Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 19 May 2009 00:02:58 +0000 (UTC)
From:      Kip Macy <kmacy@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r192359 - in user/kmacy/releng_7_2_fcs/sys: kern netncp netsmb sys
Message-ID:  <200905190002.n4J02war079181@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kmacy
Date: Tue May 19 00:02:58 2009
New Revision: 192359
URL: http://svn.freebsd.org/changeset/base/192359

Log:
  merge 174647
  Refactor select to reduce contention and hide internal implementation
  details from consumers.
  
   - Track individual selecters on a per-descriptor basis such that there
     are no longer collisions and after sleeping for events only those
     descriptors which triggered events must be rescaned.
   - Protect the selinfo (per descriptor) structure with a mtx pool mutex.
     mtx pool mutexes were chosen to preserve api compatibility with
     existing code which does nothing but bzero() to setup selinfo
     structures.
   - Use a per-thread wait channel rather than a global wait channel.
   - Hide select implementation details in a seltd structure which is
     opaque to the rest of the kernel.
   - Provide a 'selsocket' interface for those kernel consumers who wish to
     select on a socket when they have no fd so they no longer have to
     be aware of select implementation details.
  
  Tested by:	kris
  Reviewed on:	arch

Modified:
  user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c
  user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c
  user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c
  user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c
  user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c
  user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c
  user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c
  user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.h
  user/kmacy/releng_7_2_fcs/sys/netsmb/smb_trantcp.c
  user/kmacy/releng_7_2_fcs/sys/sys/proc.h
  user/kmacy/releng_7_2_fcs/sys/sys/selinfo.h
  user/kmacy/releng_7_2_fcs/sys/sys/socketvar.h
  user/kmacy/releng_7_2_fcs/sys/sys/systm.h

Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c	Mon May 18 23:36:11 2009	(r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c	Tue May 19 00:02:58 2009	(r192359)
@@ -1460,7 +1460,8 @@ kqueue_poll(struct file *fp, int events,
 			revents |= events & (POLLIN | POLLRDNORM);
 		} else {
 			selrecord(td, &kq->kq_sel);
-			kq->kq_state |= KQ_SEL;
+			if (SEL_WAITING(&kq->kq_sel))
+				kq->kq_state |= KQ_SEL;
 		}
 	}
 	kqueue_release(kq, 1);
@@ -1553,8 +1554,9 @@ kqueue_close(struct file *fp, struct thr
 	}
 
 	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
-		kq->kq_state &= ~KQ_SEL;
 		selwakeuppri(&kq->kq_sel, PSOCK);
+		if (!SEL_WAITING(&kq->kq_sel))
+			kq->kq_state &= ~KQ_SEL;
 	}
 
 	KQ_UNLOCK(kq);
@@ -1589,8 +1591,9 @@ kqueue_wakeup(struct kqueue *kq)
 		wakeup(kq);
 	}
 	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
-		kq->kq_state &= ~KQ_SEL;
 		selwakeuppri(&kq->kq_sel, PSOCK);
+		if (!SEL_WAITING(&kq->kq_sel))
+			kq->kq_state &= ~KQ_SEL;
 	}
 	if (!knlist_empty(&kq->kq_sel.si_note))
 		kqueue_schedtask(kq);

Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c	Mon May 18 23:36:11 2009	(r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c	Tue May 19 00:02:58 2009	(r192359)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
+#include <sys/selinfo.h>
 #include <sys/turnstile.h>
 #include <sys/ktr.h>
 #include <sys/umtx.h>
@@ -214,6 +215,7 @@ thread_fini(void *mem, int size)
 	turnstile_free(td->td_turnstile);
 	sleepq_free(td->td_sleepqueue);
 	umtx_thread_fini(td);
+	seltdfini(td);
 }
 
 /*

Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c	Mon May 18 23:36:11 2009	(r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c	Tue May 19 00:02:58 2009	(r192359)
@@ -69,18 +69,60 @@ __FBSDID("$FreeBSD$");
 #include <sys/ktrace.h>
 #endif
 
+#include <sys/ktr.h>
+
 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
 
 static int	pollout(struct pollfd *, struct pollfd *, u_int);
 static int	pollscan(struct thread *, struct pollfd *, u_int);
+static int	pollrescan(struct thread *);
 static int	selscan(struct thread *, fd_mask **, fd_mask **, int);
+static int	selrescan(struct thread *, fd_mask **, fd_mask **);
+static void	selfdalloc(struct thread *, void *);
+static void	selfdfree(struct seltd *, struct selfd *);
 static int	dofileread(struct thread *, int, struct file *, struct uio *,
 		    off_t, int);
 static int	dofilewrite(struct thread *, int, struct file *, struct uio *,
 		    off_t, int);
 static void	doselwakeup(struct selinfo *, int);
+static void	seltdinit(struct thread *);
+static int	seltdwait(struct thread *, int);
+static void	seltdclear(struct thread *);
+
+/*
+ * One seltd per-thread allocated on demand as needed.
+ *
+ *	t - protected by st_mtx
+ * 	k - Only accessed by curthread or read-only
+ */
+struct seltd {
+	STAILQ_HEAD(, selfd)	st_selq;	/* (k) List of selfds. */
+	struct selfd		*st_free1;	/* (k) free fd for read set. */
+	struct selfd		*st_free2;	/* (k) free fd for write set. */
+	struct mtx		st_mtx;		/* Protects struct seltd */
+	struct cv		st_wait;	/* (t) Wait channel. */
+	int			st_flags;	/* (t) SELTD_ flags. */
+};
+
+#define	SELTD_PENDING	0x0001			/* We have pending events. */
+#define	SELTD_RESCAN	0x0002			/* Doing a rescan. */
+
+/*
+ * One selfd allocated per-thread per-file-descriptor.
+ *	f - protected by sf_mtx
+ */
+struct selfd {
+	STAILQ_ENTRY(selfd)	sf_link;	/* (k) fds owned by this td. */
+	TAILQ_ENTRY(selfd)	sf_threads;	/* (f) fds on this selinfo. */
+	struct selinfo		*sf_si;		/* (f) selinfo when linked. */
+	struct mtx		*sf_mtx;	/* Pointer to selinfo mtx. */
+	struct seltd		*sf_td;		/* (k) owning seltd. */
+	void			*sf_cookie;	/* (k) fd or pollfd. */
+};
+
+static uma_zone_t selfd_zone;
 
 #ifndef _SYS_SYSPROTO_H_
 struct read_args {
@@ -630,14 +672,6 @@ out:
 	return (error);
 }
 
-/*
- * sellock and selwait are initialized in selectinit() via SYSINIT.
- */
-struct mtx	sellock;
-struct cv	selwait;
-u_int		nselcoll;	/* Select collisions since boot */
-SYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
-
 int
 poll_no_poll(int events)
 {
@@ -695,7 +729,7 @@ kern_select(struct thread *td, int nd, f
 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
 	struct timeval atv, rtv, ttv;
 	int error, timo;
-	u_int ncoll, nbufbytes, ncpbytes, nfdbits;
+	u_int nbufbytes, ncpbytes, nfdbits;
 
 	if (nd < 0)
 		return (EINVAL);
@@ -740,7 +774,7 @@ kern_select(struct thread *td, int nd, f
 			sbp += ncpbytes / sizeof *sbp;			\
 			error = copyin(name, ibits[x], ncpbytes);	\
 			if (error != 0)					\
-				goto done_nosellock;			\
+				goto done;				\
 		}							\
 	} while (0)
 	getbits(fd_in, 0);
@@ -754,7 +788,7 @@ kern_select(struct thread *td, int nd, f
 		atv = *tvp;
 		if (itimerfix(&atv)) {
 			error = EINVAL;
-			goto done_nosellock;
+			goto done;
 		}
 		getmicrouptime(&rtv);
 		timevaladd(&atv, &rtv);
@@ -763,58 +797,31 @@ kern_select(struct thread *td, int nd, f
 		atv.tv_usec = 0;
 	}
 	timo = 0;
-	TAILQ_INIT(&td->td_selq);
-	mtx_lock(&sellock);
-retry:
-	ncoll = nselcoll;
-	thread_lock(td);
-	td->td_flags |= TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-
-	error = selscan(td, ibits, obits, nd);
-	mtx_lock(&sellock);
-	if (error || td->td_retval[0])
-		goto done;
-	if (atv.tv_sec || atv.tv_usec) {
-		getmicrouptime(&rtv);
-		if (timevalcmp(&rtv, &atv, >=))
-			goto done;
-		ttv = atv;
-		timevalsub(&ttv, &rtv);
-		timo = ttv.tv_sec > 24 * 60 * 60 ?
-		    24 * 60 * 60 * hz : tvtohz(&ttv);
-	}
-
-	/*
-	 * An event of interest may occur while we do not hold
-	 * sellock, so check TDF_SELECT and the number of
-	 * collisions and rescan the file descriptors if
-	 * necessary.
-	 */
-	thread_lock(td);
-	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		thread_unlock(td);
-		goto retry;
+	seltdinit(td);
+	/* Iterate until the timeout expires or descriptors become ready. */
+	for (;;) {
+		error = selscan(td, ibits, obits, nd);
+		if (error || td->td_retval[0] != 0)
+			break;
+		if (atv.tv_sec || atv.tv_usec) {
+			getmicrouptime(&rtv);
+			if (timevalcmp(&rtv, &atv, >=))
+				break;
+			ttv = atv;
+			timevalsub(&ttv, &rtv);
+			timo = ttv.tv_sec > 24 * 60 * 60 ?
+			    24 * 60 * 60 * hz : tvtohz(&ttv);
+		}
+		error = seltdwait(td, timo);
+		if (error)
+			break;
+		error = selrescan(td, ibits, obits);
+		if (error || td->td_retval[0] != 0)
+			break;
 	}
-	thread_unlock(td);
-
-	if (timo > 0)
-		error = cv_timedwait_sig(&selwait, &sellock, timo);
-	else
-		error = cv_wait_sig(&selwait, &sellock);
-	
-	if (error == 0)
-		goto retry;
+	seltdclear(td);
 
 done:
-	clear_selinfo_list(td);
-	thread_lock(td);
-	td->td_flags &= ~TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-
-done_nosellock:
 	/* select is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
@@ -837,6 +844,60 @@ done_nosellock:
 	return (error);
 }
 
+/*
+ * Traverse the list of fds attached to this thread's seltd and check for
+ * completion.
+ */
+static int
+selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
+{
+	struct seltd *stp;
+	struct selfd *sfp;
+	struct selfd *sfn;
+	struct selinfo *si;
+	struct file *fp;
+	int msk, fd;
+	int n = 0;
+	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
+	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
+	struct filedesc *fdp = td->td_proc->p_fd;
+
+	stp = td->td_sel;
+	FILEDESC_SLOCK(fdp);
+	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+		fd = (int)(uintptr_t)sfp->sf_cookie;
+		si = sfp->sf_si;
+		selfdfree(stp, sfp);
+		/* If the selinfo wasn't cleared the event didn't fire. */
+		if (si != NULL)
+			continue;
+		if ((fp = fget_locked(fdp, fd)) == NULL) {
+			FILEDESC_SUNLOCK(fdp);
+			return (EBADF);
+		}
+		for (msk = 0; msk < 3; msk++) {
+			if (ibits[msk] == NULL)
+				continue;
+			if ((ibits[msk][fd/NFDBITS] &
+			    ((fd_mask) 1 << (fd % NFDBITS))) == 0)
+				continue;
+			if (fo_poll(fp, flag[msk], td->td_ucred, td)) {
+				obits[msk][(fd)/NFDBITS] |=
+				    ((fd_mask)1 << ((fd) % NFDBITS));
+				n++;
+			}
+		}
+	}
+	FILEDESC_SUNLOCK(fdp);
+	stp->st_flags = 0;
+	td->td_retval[0] = n;
+	return (0);
+}
+
+/*
+ * Perform the initial filedescriptor scan and register ourselves with
+ * each selinfo.
+ */
 static int
 selscan(td, ibits, obits, nfd)
 	struct thread *td;
@@ -865,6 +926,7 @@ selscan(td, ibits, obits, nfd)
 					FILEDESC_SUNLOCK(fdp);
 					return (EBADF);
 				}
+				selfdalloc(td, (void *)(uintptr_t)fd);
 				if (fo_poll(fp, flag[msk], td->td_ucred,
 				    td)) {
 					obits[msk][(fd)/NFDBITS] |=
@@ -895,7 +957,7 @@ poll(td, uap)
 	struct pollfd smallbits[32];
 	struct timeval atv, rtv, ttv;
 	int error = 0, timo;
-	u_int ncoll, nfds;
+	u_int nfds;
 	size_t ni;
 
 	nfds = uap->nfds;
@@ -911,8 +973,7 @@ poll(td, uap)
 	if ((nfds > lim_cur(td->td_proc, RLIMIT_NOFILE)) &&
 	    (nfds > FD_SETSIZE)) {
 		PROC_UNLOCK(td->td_proc);
-		error = EINVAL;
-		goto done2;
+		return (EINVAL);
 	}
 	PROC_UNLOCK(td->td_proc);
 	ni = nfds * sizeof(struct pollfd);
@@ -922,13 +983,13 @@ poll(td, uap)
 		bits = smallbits;
 	error = copyin(uap->fds, bits, ni);
 	if (error)
-		goto done_nosellock;
+		goto done;
 	if (uap->timeout != INFTIM) {
 		atv.tv_sec = uap->timeout / 1000;
 		atv.tv_usec = (uap->timeout % 1000) * 1000;
 		if (itimerfix(&atv)) {
 			error = EINVAL;
-			goto done_nosellock;
+			goto done;
 		}
 		getmicrouptime(&rtv);
 		timevaladd(&atv, &rtv);
@@ -937,56 +998,31 @@ poll(td, uap)
 		atv.tv_usec = 0;
 	}
 	timo = 0;
-	TAILQ_INIT(&td->td_selq);
-	mtx_lock(&sellock);
-retry:
-	ncoll = nselcoll;
-	thread_lock(td);
-	td->td_flags |= TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-
-	error = pollscan(td, bits, nfds);
-	mtx_lock(&sellock);
-	if (error || td->td_retval[0])
-		goto done;
-	if (atv.tv_sec || atv.tv_usec) {
-		getmicrouptime(&rtv);
-		if (timevalcmp(&rtv, &atv, >=))
-			goto done;
-		ttv = atv;
-		timevalsub(&ttv, &rtv);
-		timo = ttv.tv_sec > 24 * 60 * 60 ?
-		    24 * 60 * 60 * hz : tvtohz(&ttv);
-	}
-	/*
-	 * An event of interest may occur while we do not hold
-	 * sellock, so check TDF_SELECT and the number of collisions
-	 * and rescan the file descriptors if necessary.
-	 */
-	thread_lock(td);
-	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		thread_unlock(td);
-		goto retry;
+	seltdinit(td);
+	/* Iterate until the timeout expires or descriptors become ready. */
+	for (;;) {
+		error = pollscan(td, bits, nfds);
+		if (error || td->td_retval[0] != 0)
+			break;
+		if (atv.tv_sec || atv.tv_usec) {
+			getmicrouptime(&rtv);
+			if (timevalcmp(&rtv, &atv, >=))
+				break;
+			ttv = atv;
+			timevalsub(&ttv, &rtv);
+			timo = ttv.tv_sec > 24 * 60 * 60 ?
+			    24 * 60 * 60 * hz : tvtohz(&ttv);
+		}
+		error = seltdwait(td, timo);
+		if (error)
+			break;
+		error = pollrescan(td);
+		if (error || td->td_retval[0] != 0)
+			break;
 	}
-	thread_unlock(td);
-
-	if (timo > 0)
-		error = cv_timedwait_sig(&selwait, &sellock, timo);
-	else
-		error = cv_wait_sig(&selwait, &sellock);
-
-	if (error == 0)
-		goto retry;
+	seltdclear(td);
 
 done:
-	clear_selinfo_list(td);
-	thread_lock(td);
-	td->td_flags &= ~TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-
-done_nosellock:
 	/* poll is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
@@ -1000,7 +1036,6 @@ done_nosellock:
 out:
 	if (ni > sizeof(smallbits))
 		free(bits, M_TEMP);
-done2:
 	return (error);
 }
 
@@ -1025,12 +1060,56 @@ pollout(fds, ufds, nfd)
 }
 
 static int
+pollrescan(struct thread *td)
+{
+	struct seltd *stp;
+	struct selfd *sfp;
+	struct selfd *sfn;
+	struct selinfo *si;
+	struct filedesc *fdp;
+	struct file *fp;
+	struct pollfd *fd;
+	int n;
+
+	n = 0;
+	fdp = td->td_proc->p_fd;
+	stp = td->td_sel;
+	FILEDESC_SLOCK(fdp);
+	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+		fd = (struct pollfd *)sfp->sf_cookie;
+		si = sfp->sf_si;
+		selfdfree(stp, sfp);
+		/* If the selinfo wasn't cleared the event didn't fire. */
+		if (si != NULL)
+			continue;
+		fp = fdp->fd_ofiles[fd->fd];
+		if (fp == NULL) {
+			fd->revents = POLLNVAL;
+			n++;
+			continue;
+		}
+		/*
+		 * Note: backend also returns POLLHUP and
+		 * POLLERR if appropriate.
+		 */
+		fd->revents = fo_poll(fp, fd->events, td->td_ucred, td);
+		if (fd->revents != 0)
+			n++;
+	}
+	FILEDESC_SUNLOCK(fdp);
+	stp->st_flags = 0;
+	td->td_retval[0] = n;
+	return (0);
+}
+
+
+static int
 pollscan(td, fds, nfd)
 	struct thread *td;
 	struct pollfd *fds;
 	u_int nfd;
 {
-	register struct filedesc *fdp = td->td_proc->p_fd;
+	struct filedesc *fdp = td->td_proc->p_fd;
 	int i;
 	struct file *fp;
 	int n = 0;
@@ -1052,6 +1131,7 @@ pollscan(td, fds, nfd)
 				 * Note: backend also returns POLLHUP and
 				 * POLLERR if appropriate.
 				 */
+				selfdalloc(td, fds);
 				fds->revents = fo_poll(fp, fds->events,
 				    td->td_ucred, td);
 				if (fds->revents != 0)
@@ -1085,23 +1165,90 @@ openbsd_poll(td, uap)
 }
 
 /*
- * Remove the references to the thread from all of the objects we were
- * polling.
- *
- * This code assumes that the underlying owner of the selinfo structure will
- * hold sellock before it changes it, and that it will unlink itself from our
- * list if it goes away.
+ * XXX This was created specifically to support netncp and netsmb.  This
+ * allows the caller to specify a socket to wait for events on.  It returns
+ * 0 if any events matched and an error otherwise.  There is no way to
+ * determine which events fired.
  */
-void
-clear_selinfo_list(td)
-	struct thread *td;
+int
+selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
 {
-	struct selinfo *si;
+	struct timeval atv, rtv, ttv;
+	int error, timo;
+
+	if (tvp != NULL) {
+		atv = *tvp;
+		if (itimerfix(&atv))
+			return (EINVAL);
+		getmicrouptime(&rtv);
+		timevaladd(&atv, &rtv);
+	} else {
+		atv.tv_sec = 0;
+		atv.tv_usec = 0;
+	}
 
-	mtx_assert(&sellock, MA_OWNED);
-	TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
-		si->si_thread = NULL;
-	TAILQ_INIT(&td->td_selq);
+	timo = 0;
+	seltdinit(td);
+	/*
+	 * Iterate until the timeout expires or the socket becomes ready.
+	 */
+	for (;;) {
+		selfdalloc(td, NULL);
+		error = sopoll(so, events, NULL, td);
+		/* error here is actually the ready events. */
+		if (error)
+			return (0);
+		if (atv.tv_sec || atv.tv_usec) {
+			getmicrouptime(&rtv);
+			if (timevalcmp(&rtv, &atv, >=)) {
+				seltdclear(td);
+				return (EWOULDBLOCK);
+			}
+			ttv = atv;
+			timevalsub(&ttv, &rtv);
+			timo = ttv.tv_sec > 24 * 60 * 60 ?
+			    24 * 60 * 60 * hz : tvtohz(&ttv);
+		}
+		error = seltdwait(td, timo);
+		seltdclear(td);
+		if (error)
+			break;
+	}
+	/* XXX Duplicates ncp/smb behavior. */
+	if (error == ERESTART)
+		error = 0;
+	return (error);
+}
+
+/*
+ * Preallocate two selfds associated with 'cookie'.  Some fo_poll routines
+ * have two select sets, one for read and another for write.
+ */
+static void
+selfdalloc(struct thread *td, void *cookie)
+{
+	struct seltd *stp;
+
+	stp = td->td_sel;
+	if (stp->st_free1 == NULL)
+		stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+	stp->st_free1->sf_td = stp;
+	stp->st_free1->sf_cookie = cookie;
+	if (stp->st_free2 == NULL)
+		stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+	stp->st_free2->sf_td = stp;
+	stp->st_free2->sf_cookie = cookie;
+}
+
+static void
+selfdfree(struct seltd *stp, struct selfd *sfp)
+{
+	STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link);
+	mtx_lock(sfp->sf_mtx);
+	if (sfp->sf_si)
+		TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
+	mtx_unlock(sfp->sf_mtx);
+	uma_zfree(selfd_zone, sfp);
 }
 
 /*
@@ -1112,26 +1259,46 @@ selrecord(selector, sip)
 	struct thread *selector;
 	struct selinfo *sip;
 {
+	struct selfd *sfp;
+	struct seltd *stp;
+	struct mtx *mtxp;
 
-	mtx_lock(&sellock);
+	stp = selector->td_sel;
+	/*
+	 * Don't record when doing a rescan.
+	 */
+	if (stp->st_flags & SELTD_RESCAN)
+		return;
+	/*
+	 * Grab one of the preallocated descriptors.
+	 */
+	sfp = NULL;
+	if ((sfp = stp->st_free1) != NULL)
+		stp->st_free1 = NULL;
+	else if ((sfp = stp->st_free2) != NULL)
+		stp->st_free2 = NULL;
+	else
+		panic("selrecord: No free selfd on selq");
+	mtxp = mtx_pool_find(mtxpool_sleep, sip);
+	/*
+	 * Initialize the sfp and queue it in the thread.
+	 */
+	sfp->sf_si = sip;
+	sfp->sf_mtx = mtxp;
+	STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link);
 	/*
-	 * If the selinfo's thread pointer is NULL then take ownership of it.
-	 *
-	 * If the thread pointer is not NULL and it points to another
-	 * thread, then we have a collision.
-	 *
-	 * If the thread pointer is not NULL and points back to us then leave
-	 * it alone as we've already added pointed it at us and added it to
-	 * our list.
+	 * Now that we've locked the sip, check for initialization.
 	 */
-	if (sip->si_thread == NULL) {
-		sip->si_thread = selector;
-		TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
-	} else if (sip->si_thread != selector) {
-		sip->si_flags |= SI_COLL;
+	mtx_lock(mtxp);
+	if (sip->si_mtx == NULL) {
+		sip->si_mtx = mtxp;
+		TAILQ_INIT(&sip->si_tdlist);
 	}
-
-	mtx_unlock(&sellock);
+	/*
+	 * Add this thread to the list of selfds listening on this selinfo.
+	 */
+	TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads);
+	mtx_unlock(sip->si_mtx);
 }
 
 /* Wake up a selecting thread. */
@@ -1159,36 +1326,115 @@ doselwakeup(sip, pri)
 	struct selinfo *sip;
 	int pri;
 {
-	struct thread *td;
+	struct selfd *sfp;
+	struct selfd *sfn;
+	struct seltd *stp;
 
-	mtx_lock(&sellock);
-	td = sip->si_thread;
-	if ((sip->si_flags & SI_COLL) != 0) {
-		nselcoll++;
-		sip->si_flags &= ~SI_COLL;
-		cv_broadcastpri(&selwait, pri);
-	}
-	if (td == NULL) {
-		mtx_unlock(&sellock);
+	/* If it's not initialized there can't be any waiters. */
+	if (sip->si_mtx == NULL)
 		return;
+	/*
+	 * Locking the selinfo locks all selfds associated with it.
+	 */
+	mtx_lock(sip->si_mtx);
+	TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) {
+		/*
+		 * Once we remove this sfp from the list and clear the
+		 * sf_si seltdclear will know to ignore this si.
+		 */
+		TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads);
+		sfp->sf_si = NULL;
+		stp = sfp->sf_td;
+		mtx_lock(&stp->st_mtx);
+		stp->st_flags |= SELTD_PENDING;
+		cv_broadcastpri(&stp->st_wait, pri);
+		mtx_unlock(&stp->st_mtx);
 	}
-	TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
-	sip->si_thread = NULL;
-	thread_lock(td);
-	td->td_flags &= ~TDF_SELECT;
-	thread_unlock(td);
-	sleepq_remove(td, &selwait);
-	mtx_unlock(&sellock);
+	mtx_unlock(sip->si_mtx);
 }
 
-static void selectinit(void *);
-SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
+static void
+seltdinit(struct thread *td)
+{
+	struct seltd *stp;
+
+	if ((stp = td->td_sel) != NULL)
+		goto out;
+	td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO);
+	mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF);
+	cv_init(&stp->st_wait, "select");
+out:
+	stp->st_flags = 0;
+	STAILQ_INIT(&stp->st_selq);
+}
+
+static int
+seltdwait(struct thread *td, int timo)
+{
+	struct seltd *stp;
+	int error;
 
-/* ARGSUSED*/
+	stp = td->td_sel;
+	/*
+	 * An event of interest may occur while we do not hold the seltd
+	 * locked so check the pending flag before we sleep.
+	 */
+	mtx_lock(&stp->st_mtx);
+	/*
+	 * Any further calls to selrecord will be a rescan.
+	 */
+	stp->st_flags |= SELTD_RESCAN;
+	if (stp->st_flags & SELTD_PENDING) {
+		mtx_unlock(&stp->st_mtx);
+		return (0);
+	}
+	if (timo > 0)
+		error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
+	else
+		error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
+	mtx_unlock(&stp->st_mtx);
+
+	return (error);
+}
+
+void
+seltdfini(struct thread *td)
+{
+	struct seltd *stp;
+
+	stp = td->td_sel;
+	if (stp == NULL)
+		return;
+	if (stp->st_free1)
+		uma_zfree(selfd_zone, stp->st_free1);
+	if (stp->st_free2)
+		uma_zfree(selfd_zone, stp->st_free2);
+	td->td_sel = NULL;
+	free(stp, M_SELECT);
+}
+
+/*
+ * Remove the references to the thread from all of the objects we were
+ * polling.
+ */
+static void
+seltdclear(struct thread *td)
+{
+	struct seltd *stp;
+	struct selfd *sfp;
+	struct selfd *sfn;
+
+	stp = td->td_sel;
+	STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn)
+		selfdfree(stp, sfp);
+	stp->st_flags = 0;
+}
+
+static void selectinit(void *);
+SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL);
 static void
-selectinit(dummy)
-	void *dummy;
+selectinit(void *dummy __unused)
 {
-	cv_init(&selwait, "select");
-	mtx_init(&sellock, "sellck", NULL, MTX_DEF);
+	selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL,
+	    NULL, NULL, UMA_ALIGN_PTR, 0);
 }

Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c	Mon May 18 23:36:11 2009	(r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c	Tue May 19 00:02:58 2009	(r192359)
@@ -524,8 +524,9 @@ pipeselwakeup(cpipe)
 
 	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
 	if (cpipe->pipe_state & PIPE_SEL) {
-		cpipe->pipe_state &= ~PIPE_SEL;
 		selwakeuppri(&cpipe->pipe_sel, PSOCK);
+		if (!SEL_WAITING(&cpipe->pipe_sel))
+			cpipe->pipe_state &= ~PIPE_SEL;
 	}
 	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
 		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
@@ -1356,12 +1357,14 @@ pipe_poll(fp, events, active_cred, td)
 	if (revents == 0) {
 		if (events & (POLLIN | POLLRDNORM)) {
 			selrecord(td, &rpipe->pipe_sel);
-			rpipe->pipe_state |= PIPE_SEL;
+			if (SEL_WAITING(&rpipe->pipe_sel))
+				rpipe->pipe_state |= PIPE_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(td, &wpipe->pipe_sel);
-			wpipe->pipe_state |= PIPE_SEL;
+			if (SEL_WAITING(&wpipe->pipe_sel))
+				wpipe->pipe_state |= PIPE_SEL;
 		}
 	}
 #ifdef MAC

Modified: user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c	Mon May 18 23:36:11 2009	(r192358)
+++ user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c	Tue May 19 00:02:58 2009	(r192359)
@@ -179,7 +179,8 @@ sowakeup(struct socket *so, struct sockb
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	selwakeuppri(&sb->sb_sel, PSOCK);
-	sb->sb_flags &= ~SB_SEL;
+	if (!SEL_WAITING(&sb->sb_sel))
+		sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
 		wakeup(&sb->sb_cc);

Modified: user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c	Mon May 18 23:36:11 2009	(r192358)
+++ user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c	Tue May 19 00:02:58 2009	(r192359)
@@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/mbuf.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
 #include <sys/uio.h>
 
 #include <netncp/ncp.h>
@@ -274,7 +276,9 @@ ncp_request_int(struct ncp_rq *rqp)
 	/*
 	 * Flush out replies on previous reqs
 	 */
-	while (ncp_poll(so, POLLIN) != 0) {
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	while (selsocket(so, POLLIN, &tv, td) == 0) {
 		if (ncp_sock_recv(so, &m, &len) != 0)
 			break;
 		m_freem(m);
@@ -319,7 +323,7 @@ ncp_request_int(struct ncp_rq *rqp)
 		}
 		tv.tv_sec = conn->li.timeout;
 		tv.tv_usec = 0;
-		error = ncp_sock_rselect(so, td, &tv, POLLIN);
+		error = selsocket(so, POLLIN, &tv, td);
 		if (error == EWOULDBLOCK )	/* timeout expired */
 			continue;
 		error = ncp_chkintr(conn, td);
@@ -335,7 +339,9 @@ ncp_request_int(struct ncp_rq *rqp)
 		dosend = 1;	/* resend rq if error */
 		for (;;) {
 			error = 0;
-			if (ncp_poll(so, POLLIN) == 0)
+			tv.tv_sec = 0;
+			tv.tv_usec = 0;
+			if (selsocket(so, POLLIN, &tv, td) != 0)
 				break;
 /*			if (so->so_rcv.sb_cc == 0) {
 				break;

Modified: user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c	Mon May 18 23:36:11 2009	(r192358)
+++ user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c	Tue May 19 00:02:58 2009	(r192359)
@@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$");
 #define ipx_setnullhost(x) ((x).x_host.s_host[0] = 0); \
 	((x).x_host.s_host[1] = 0); ((x).x_host.s_host[2] = 0);
 
-/*int ncp_poll(struct socket *so, int events);*/
 /*static int ncp_getsockname(struct socket *so, caddr_t asa, int *alen);*/
 static int ncp_soconnect(struct socket *so, struct sockaddr *target,
 			 struct thread *td);
@@ -181,110 +180,6 @@ ncp_sock_send(struct socket *so, struct 
 	return error;
 }
 
-int
-ncp_poll(struct socket *so, int events)
-{
-	struct thread *td = curthread;
-	int revents;
-
-	/* Fake up enough state to look like we are in poll(2). */
-	mtx_lock(&sellock);
-	thread_lock(td);
-	td->td_flags |= TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-	TAILQ_INIT(&td->td_selq);
-
-	revents = sopoll(so, events, NULL, td);
-
-	/* Tear down the fake poll(2) state. */
-	mtx_lock(&sellock);
-	clear_selinfo_list(td);
-	thread_lock(td);
-	td->td_flags &= ~TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-
-	return (revents);
-}
-
-int
-ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv,
-		 int events)
-{
-	struct timeval atv, rtv, ttv;
-	int ncoll, timo, error, revents;
-
-	if (tv) {
-		atv = *tv;
-		if (itimerfix(&atv)) {
-			error = EINVAL;
-			goto done_noproclock;
-		}
-		getmicrouptime(&rtv);
-		timevaladd(&atv, &rtv);
-	}
-	timo = 0;
-	mtx_lock(&sellock);
-
-retry:
-	ncoll = nselcoll;
-	thread_lock(td);
-	td->td_flags |= TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-
-	TAILQ_INIT(&td->td_selq);
-	revents = sopoll(so, events, NULL, td);
-	mtx_lock(&sellock);
-	if (revents) {
-		error = 0;
-		goto done;
-	}
-	if (tv) {
-		getmicrouptime(&rtv);
-		if (timevalcmp(&rtv, &atv, >=)) {
-			error = EWOULDBLOCK;
-			goto done;
-		}
-		ttv = atv;
-		timevalsub(&ttv, &rtv);
-		timo = tvtohz(&ttv);
-	}
-	/*
-	 * An event of our interest may occur during locking a thread.
-	 * In order to avoid missing the event that occurred during locking
-	 * the process, test TDF_SELECT and rescan file descriptors if
-	 * necessary.
-	 */
-	thread_lock(td);
-	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		thread_unlock(td);
-		goto retry;
-	}
-	thread_unlock(td);
-
-	if (timo > 0)
-		error = cv_timedwait(&selwait, &sellock, timo);
-	else {
-		cv_wait(&selwait, &sellock);
-		error = 0;
-	}
-
-done:
-	clear_selinfo_list(td);
-
-	thread_lock(td);
-	td->td_flags &= ~TDF_SELECT;
-	thread_unlock(td);
-	mtx_unlock(&sellock);
-
-done_noproclock:

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905190002.n4J02war079181>