Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 11 Mar 2002 22:13:51 -0800
From:      Alfred Perlstein <bright@mu.org>
To:        smp@freebsd.org
Cc:        jhb@FreeBSD.org, davidc@freebsd.org, jake@locore.ca
Subject:   select fix take II.
Message-ID:  <20020312061351.GF92565@elvis.mu.org>
In-Reply-To: <20020311001131.GN26621@elvis.mu.org>
References:  <20020311001131.GN26621@elvis.mu.org>

next in thread | previous in thread | raw e-mail | index | archive | help
* Alfred Perlstein <bright@mu.org> [020310 16:12] wrote:
> Chad David and I have been working on fixing select(2) and poll(2)'s
> locking.  I've been working on pushing pipe read/write ops down
> out from under Giant.

Ok, after further investigation revision 1.79 of sys_generic.c seems
to have been wrong.  I had to pretty much lay the 4.x version of
sys_generic.c next to the 5.x one to get this right.

If you review the patch after it's applied the code is a LOT cleaner
and makes a hell of a lot more sense now.  The XXX near the TDF_SELECT
check is gone because it's obvious what it's for.

So do we brave this delta for the developer preview? :)

Index: dev/bktr/bktr_core.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/bktr/bktr_core.c,v
retrieving revision 1.117
diff -u -r1.117 bktr_core.c
--- dev/bktr/bktr_core.c	12 Sep 2001 08:37:02 -0000	1.117
+++ dev/bktr/bktr_core.c	11 Mar 2002 02:44:14 -0000
@@ -809,7 +809,7 @@
 		}
 
 		/* If someone has a select() on /dev/vbi, inform them */
-		if (bktr->vbi_select.si_pid) {
+		if (SEL_WAITING(&bktr->vbi_select)) {
 			selwakeup(&bktr->vbi_select);
 		}
 
Index: dev/kbd/kbd.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/kbd/kbd.c,v
retrieving revision 1.27
diff -u -r1.27 kbd.c
--- dev/kbd/kbd.c	12 Sep 2001 08:37:06 -0000	1.27
+++ dev/kbd/kbd.c	11 Mar 2002 22:08:08 -0000
@@ -523,8 +523,6 @@
 	bzero(&sc->gkb_q, sizeof(sc->gkb_q));
 #endif
 	clist_alloc_cblocks(&sc->gkb_q, KB_QSIZE, KB_QSIZE/2); /* XXX */
-	sc->gkb_rsel.si_flags = 0;
-	sc->gkb_rsel.si_pid = 0;
 	splx(s);
 
 	return 0;
Index: dev/snp/snp.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/snp/snp.c,v
retrieving revision 1.69
diff -u -r1.69 snp.c
--- dev/snp/snp.c	24 Nov 2001 15:59:46 -0000	1.69
+++ dev/snp/snp.c	11 Mar 2002 02:44:14 -0000
@@ -373,7 +373,6 @@
 		wakeup((caddr_t)snp);
 	}
 	selwakeup(&snp->snp_sel);
-	snp->snp_sel.si_pid = 0;
 
 	return (n);
 }
@@ -447,7 +446,6 @@
 
 detach_notty:
 	selwakeup(&snp->snp_sel);
-	snp->snp_sel.si_pid = 0;
 	if ((snp->snp_flags & SNOOP_OPEN) == 0) 
 		free(snp, M_SNP);
 
Index: dev/sound/pcm/channel.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/sound/pcm/channel.c,v
retrieving revision 1.81
diff -u -r1.81 channel.c
--- dev/sound/pcm/channel.c	24 Feb 2002 00:49:43 -0000	1.81
+++ dev/sound/pcm/channel.c	11 Mar 2002 02:44:14 -0000
@@ -116,7 +116,7 @@
     	struct snd_dbuf *bs = c->bufsoft;
 
 	CHN_LOCKASSERT(c);
-	if (sndbuf_getsel(bs)->si_pid && chn_polltrigger(c))
+	if (SEL_WAITING(sndbuf_getsel(bs)) && chn_polltrigger(c))
 		selwakeup(sndbuf_getsel(bs));
 	wakeup(bs);
 }
Index: dev/usb/ums.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/usb/ums.c,v
retrieving revision 1.48
diff -u -r1.48 ums.c
--- dev/usb/ums.c	15 Feb 2002 22:54:10 -0000	1.48
+++ dev/usb/ums.c	11 Mar 2002 22:38:21 -0000
@@ -344,8 +344,10 @@
 	sc->status.button = sc->status.obutton = 0;
 	sc->status.dx = sc->status.dy = sc->status.dz = 0;
 
+#ifndef __FreeBSD__
 	sc->rsel.si_flags = 0;
 	sc->rsel.si_pid = 0;
+#endif
 
 	sc->dev = make_dev(&ums_cdevsw, device_get_unit(self),
 			UID_ROOT, GID_OPERATOR,
Index: isa/psm.c
===================================================================
RCS file: /home/ncvs/src/sys/isa/psm.c,v
retrieving revision 1.43
diff -u -r1.43 psm.c
--- isa/psm.c	19 Dec 2001 13:32:21 -0000	1.43
+++ isa/psm.c	11 Mar 2002 22:08:51 -0000
@@ -1314,8 +1314,6 @@
     device_busy(devclass_get_device(psm_devclass, unit));
 
     /* Initialize state */
-    sc->rsel.si_flags = 0;
-    sc->rsel.si_pid = 0;
     sc->mode.level = sc->dflt_mode.level;
     sc->mode.protocol = sc->dflt_mode.protocol;
     sc->watchdog = FALSE;
Index: kern/sys_generic.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.92
diff -u -r1.92 sys_generic.c
--- kern/sys_generic.c	9 Mar 2002 22:44:37 -0000	1.92
+++ kern/sys_generic.c	12 Mar 2002 05:46:13 -0000
@@ -80,6 +80,7 @@
 		    size_t, off_t, int);
 static int	dofilewrite(struct thread *, struct file *, int,
 		    const void *, size_t, off_t, int);
+static void	clear_selinfo_list(struct thread *);
 
 /*
  * Read system call.
@@ -696,11 +697,36 @@
 	return (error);
 }
 
+/*
+ * These are initialized in selectinit() via SYSINIT
+ */
+static struct mtx	sellock;
+static struct cv	selwait;
 static int	nselcoll;	/* Select collisions since boot */
-struct cv	selwait;
 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
 
 /*
+ * Remove the references to the thread from all of the objects
+ * we were polling.
+ *
+ * This code assumes that the underlying owner of the selinfo
+ * structure will hold sellock before it changes it, and that
+ * it will unlink itself from our list if it goes away.
+ */
+static void
+clear_selinfo_list(td)
+	struct thread *td;
+{
+	struct selinfo *si;
+
+	mtx_assert(&sellock, MA_OWNED);
+
+	TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
+		si->si_thread = NULL;
+	TAILQ_INIT(&td->td_selq);
+}
+
+/*
  * Select system call.
  */
 #ifndef _SYS_SYSPROTO_H_
@@ -775,7 +801,7 @@
 			sbp += ncpbytes / sizeof *sbp;			\
 			error = copyin(uap->name, ibits[x], ncpbytes);	\
 			if (error != 0)					\
-				goto done_noproclock;			\
+				goto done_nosellock;			\
 		}							\
 	} while (0)
 	getbits(in, 0);
@@ -789,10 +815,10 @@
 		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
 			sizeof (atv));
 		if (error)
-			goto done_noproclock;
+			goto done_nosellock;
 		if (itimerfix(&atv)) {
 			error = EINVAL;
-			goto done_noproclock;
+			goto done_nosellock;
 		}
 		getmicrouptime(&rtv);
 		timevaladd(&atv, &rtv);
@@ -801,61 +827,62 @@
 		atv.tv_usec = 0;
 	}
 	timo = 0;
-	PROC_LOCK(td->td_proc);
+	mtx_lock(&sellock);
 retry:
+
 	ncoll = nselcoll;
 	mtx_lock_spin(&sched_lock);
 	td->td_flags |= TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
-	PROC_UNLOCK(td->td_proc);
+	mtx_unlock(&sellock);
+
+	/* XXX Is there a better place for this? */
+	TAILQ_INIT(&td->td_selq);
 	error = selscan(td, ibits, obits, uap->nd);
-	PROC_LOCK(td->td_proc);
+	mtx_lock(&sellock);
 	if (error || td->td_retval[0])
 		goto done;
 	if (atv.tv_sec || atv.tv_usec) {
 		getmicrouptime(&rtv);
-		if (timevalcmp(&rtv, &atv, >=)) {
-			/*
-			 * An event of our interest may occur during locking a process.
-			 * In order to avoid missing the event that occured during locking
-			 * the process, test TDF_SELECT and rescan file descriptors if
-			 * necessary.
-			 */
-			mtx_lock_spin(&sched_lock);
-			if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-				ncoll = nselcoll;
-				td->td_flags |= TDF_SELECT;
-				mtx_unlock_spin(&sched_lock);
-				PROC_UNLOCK(td->td_proc);
-				error = selscan(td, ibits, obits, uap->nd);
-				PROC_LOCK(td->td_proc);
-			} else
-				mtx_unlock_spin(&sched_lock);
+		if (timevalcmp(&rtv, &atv, >=))
 			goto done;
-		}
 		ttv = atv;
 		timevalsub(&ttv, &rtv);
 		timo = ttv.tv_sec > 24 * 60 * 60 ?
 		    24 * 60 * 60 * hz : tvtohz(&ttv);
 	}
+
+	/*
+	 * An event of interest may occur while we do not hold
+	 * sellock, so check TDF_SELECT and the number of
+	 * collisions and rescan the file descriptors if
+	 * necessary.
+	 */
 	mtx_lock_spin(&sched_lock);
+	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
+		mtx_unlock_spin(&sched_lock);
+		goto retry;
+	}
 	td->td_flags &= ~TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
 
 	if (timo > 0)
-		error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
+		error = cv_timedwait_sig(&selwait, &sellock, timo);
 	else
-		error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
+		error = cv_wait_sig(&selwait, &sellock);
 	
 	if (error == 0)
 		goto retry;
 
 done:
+	clear_selinfo_list(td);
+
 	mtx_lock_spin(&sched_lock);
 	td->td_flags &= ~TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
-	PROC_UNLOCK(td->td_proc);
-done_noproclock:
+	mtx_unlock(&sellock);
+
+done_nosellock:
 	/* select is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
@@ -967,13 +994,13 @@
 		bits = smallbits;
 	error = copyin(SCARG(uap, fds), bits, ni);
 	if (error)
-		goto done_noproclock;
+		goto done_nosellock;
 	if (SCARG(uap, timeout) != INFTIM) {
 		atv.tv_sec = SCARG(uap, timeout) / 1000;
 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
 		if (itimerfix(&atv)) {
 			error = EINVAL;
-			goto done_noproclock;
+			goto done_nosellock;
 		}
 		getmicrouptime(&rtv);
 		timevaladd(&atv, &rtv);
@@ -982,59 +1009,59 @@
 		atv.tv_usec = 0;
 	}
 	timo = 0;
-	PROC_LOCK(td->td_proc);
+	mtx_lock(&sellock);
 retry:
 	ncoll = nselcoll;
 	mtx_lock_spin(&sched_lock);
 	td->td_flags |= TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
-	PROC_UNLOCK(td->td_proc);
+	mtx_unlock(&sellock);
+
+	/* XXX Is there a better place for this? */
+	TAILQ_INIT(&td->td_selq);
 	error = pollscan(td, (struct pollfd *)bits, nfds);
-	PROC_LOCK(td->td_proc);
+	mtx_lock(&sellock);
 	if (error || td->td_retval[0])
 		goto done;
 	if (atv.tv_sec || atv.tv_usec) {
 		getmicrouptime(&rtv);
-		if (timevalcmp(&rtv, &atv, >=)) {
-			/*
-			 * An event of our interest may occur during locking a process.
-			 * In order to avoid missing the event that occured during locking
-			 * the process, test TDF_SELECT and rescan file descriptors if
-			 * necessary.
-			 */
-			mtx_lock_spin(&sched_lock);
-			if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-				ncoll = nselcoll;
-				td->td_flags |= TDF_SELECT;
-				mtx_unlock_spin(&sched_lock);
-				PROC_UNLOCK(td->td_proc);
-				error = pollscan(td, (struct pollfd *)bits, nfds);
-				PROC_LOCK(td->td_proc);
-			} else
-				mtx_unlock_spin(&sched_lock);
+		if (timevalcmp(&rtv, &atv, >=))
 			goto done;
-		}
 		ttv = atv;
 		timevalsub(&ttv, &rtv);
 		timo = ttv.tv_sec > 24 * 60 * 60 ?
 		    24 * 60 * 60 * hz : tvtohz(&ttv);
 	}
+	/*
+	 * An event of interest may occur while we do not hold
+	 * sellock, so check TDF_SELECT and the number of collisions
+	 * and rescan the file descriptors if necessary.
+	 */
 	mtx_lock_spin(&sched_lock);
+	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
+		mtx_unlock_spin(&sched_lock);
+		goto retry;
+	}
 	td->td_flags &= ~TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
+
 	if (timo > 0)
-		error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
+		error = cv_timedwait_sig(&selwait, &sellock, timo);
 	else
-		error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
+		error = cv_wait_sig(&selwait, &sellock);
+
 	if (error == 0)
 		goto retry;
 
 done:
+	clear_selinfo_list(td);
+
 	mtx_lock_spin(&sched_lock);
 	td->td_flags &= ~TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
-	PROC_UNLOCK(td->td_proc);
-done_noproclock:
+	mtx_unlock(&sellock);
+
+done_nosellock:
 	/* poll is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
@@ -1126,18 +1153,6 @@
 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 }
 
-static int
-find_thread_in_proc(struct proc *p, struct thread *td)
-{
-	struct thread *td2;
-	FOREACH_THREAD_IN_PROC(p, td2) {
-		if (td2 == td) {
-			return (1);
-		}
-	}
-	return (0);
-}
-
 /*
  * Record a select request.
  */
@@ -1146,29 +1161,21 @@
 	struct thread *selector;
 	struct selinfo *sip;
 {
-	struct proc *p;
-	pid_t mypid;
 
-	mypid = selector->td_proc->p_pid;
-	if ((sip->si_pid == mypid) &&
-	    (sip->si_thread == selector)) { /* XXXKSE should be an ID? */
-		return;
-	}
-	if (sip->si_pid &&
-	    (p = pfind(sip->si_pid)) &&
-	    (find_thread_in_proc(p, sip->si_thread))) {
-		mtx_lock_spin(&sched_lock);
-	    	if (sip->si_thread->td_wchan == (caddr_t)&selwait) {
-			mtx_unlock_spin(&sched_lock);
-			PROC_UNLOCK(p);
-			sip->si_flags |= SI_COLL;
-			return;
-		}
-		mtx_unlock_spin(&sched_lock);
-		PROC_UNLOCK(p);
+	mtx_lock(&sellock);
+	/*
+	 * If the thread is not NULL there is another thread
+	 * interested in this object, and we need to flag the
+	 * collision so selwakeup() can broadcast.
+	 */
+	if (sip->si_thread != NULL) {
+		sip->si_flags |= SI_COLL;
+	} else {
+		sip->si_thread = selector;
+		TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
 	}
-	sip->si_pid = mypid;
-	sip->si_thread = selector;
+
+	mtx_unlock(&sellock);
 }
 
 /*
@@ -1176,37 +1183,37 @@
  */
 void
 selwakeup(sip)
-	register struct selinfo *sip;
+	struct selinfo *sip;
 {
 	struct thread *td;
-	register struct proc *p;
 
-	if (sip->si_pid == 0)
-		return;
-	if (sip->si_flags & SI_COLL) {
+	mtx_lock(&sellock);
+	td = sip->si_thread;
+
+	if ((sip->si_flags & SI_COLL) != 0) {
 		nselcoll++;
 		sip->si_flags &= ~SI_COLL;
 		cv_broadcast(&selwait);
 	}
-	p = pfind(sip->si_pid);
-	sip->si_pid = 0;
-	td = sip->si_thread;
-	if (p != NULL) {
-		if (!find_thread_in_proc(p, td)) {
-			PROC_UNLOCK(p); /* lock is in pfind() */;
-			return;
-		}
-		mtx_lock_spin(&sched_lock);
-		if (td->td_wchan == (caddr_t)&selwait) {
-			if (td->td_proc->p_stat == SSLEEP)
-				setrunnable(td);
-			else
-				cv_waitq_remove(td);
-		} else
-			td->td_flags &= ~TDF_SELECT;
-		mtx_unlock_spin(&sched_lock);
-		PROC_UNLOCK(p); /* Lock is in pfind() */
+	
+	if (td == NULL) {
+		mtx_unlock(&sellock);
+		return;
 	}
+
+	TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
+	mtx_lock_spin(&sched_lock);
+	if (td->td_wchan == (caddr_t)&selwait) {
+		if (td->td_proc->p_stat == SSLEEP)
+			setrunnable(td);
+		else
+			cv_waitq_remove(td);
+	} else
+		td->td_flags &= ~TDF_SELECT;
+	mtx_unlock_spin(&sched_lock);
+
+	sip->si_thread = NULL;
+	mtx_unlock(&sellock);
 }
 
 static void selectinit __P((void *));
@@ -1218,4 +1225,5 @@
 	void *dummy;
 {
 	cv_init(&selwait, "select");
+	mtx_init(&sellock, "sellck", MTX_DEF);
 }
Index: kern/tty.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/tty.c,v
retrieving revision 1.165
diff -u -r1.165 tty.c
--- kern/tty.c	2 Mar 2002 12:42:23 -0000	1.165
+++ kern/tty.c	11 Mar 2002 02:44:14 -0000
@@ -2273,7 +2273,7 @@
 	register struct tty *tp;
 {
 
-	if (tp->t_rsel.si_pid != 0)
+	if (SEL_WAITING(&tp->t_rsel))
 		selwakeup(&tp->t_rsel);
 	if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL)
 		pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL));
@@ -2289,7 +2289,7 @@
 	register struct tty *tp;
 {
 
-	if (tp->t_wsel.si_pid != 0 && tp->t_outq.c_cc <= tp->t_olowat)
+	if (SEL_WAITING(&tp->t_wsel) && tp->t_outq.c_cc <= tp->t_olowat)
 		selwakeup(&tp->t_wsel);
 	if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL)
 		pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL));
Index: net/bpf.c
===================================================================
RCS file: /home/ncvs/src/sys/net/bpf.c,v
retrieving revision 1.86
diff -u -r1.86 bpf.c
--- net/bpf.c	14 Dec 2001 22:17:54 -0000	1.86
+++ net/bpf.c	11 Mar 2002 02:44:14 -0000
@@ -514,8 +514,6 @@
 		pgsigio(d->bd_sigio, d->bd_sig, 0);
 
 	selwakeup(&d->bd_sel);
-	/* XXX */
-	d->bd_sel.si_pid = 0;
 }
 
 static void
Index: sys/proc.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/proc.h,v
retrieving revision 1.206
diff -u -r1.206 proc.h
--- sys/proc.h	23 Feb 2002 11:12:57 -0000	1.206
+++ sys/proc.h	11 Mar 2002 22:20:23 -0000
@@ -147,6 +147,7 @@
  *      m - Giant
  *      n - not locked, lazy
  *      o - locked by pgrpsess_lock sx
+ *      p - select lock (sellock)
  *
  * If the locking key specifies two identifiers (for example, p_pptr) then
  * either lock is sufficient for read access, but both locks must be held
@@ -259,6 +260,8 @@
 	TAILQ_ENTRY(thread) td_slpq; 	/* (j) Sleep queue. XXXKSE */ 
 	TAILQ_ENTRY(thread) td_blkq; 	/* (j) Mutex queue. XXXKSE */ 
 	TAILQ_ENTRY(thread) td_runq; 	/* (j) Run queue(s). XXXKSE */ 
+
+	TAILQ_HEAD(, selinfo) td_selq;	/* (p) List of selinfos */
 
 #define	td_startzero td_flags
 	int		td_flags;	/* (j) TDF_* flags. */
Index: sys/selinfo.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/selinfo.h,v
retrieving revision 1.12
diff -u -r1.12 selinfo.h
--- sys/selinfo.h	27 Sep 2001 20:33:15 -0000	1.12
+++ sys/selinfo.h	11 Mar 2002 22:27:05 -0000
@@ -45,12 +45,21 @@
  * notified when I/O becomes possible.
  */
 struct selinfo {
-	pid_t	si_pid;		/* process to be notified */
-	struct	thread *si_thread;	/* thread in that process XXXKSE */
+	TAILQ_ENTRY(selinfo)	si_thrlist;	/* list hung off of thread */
+	struct	thread *si_thread;	/* thread waiting */
 	struct	klist si_note;	/* kernel note list */
 	short	si_flags;	/* see below */
 };
 #define	SI_COLL	0x0001		/* collision occurred */
+
+#define SEL_WAITING(si)	\
+	((si)->si_thread != NULL || ((si)->si_flags & SI_COLL) != 0)
+
+#define SEL_INIT(si) \
+	do {								\
+		(si)->si_thread = NULL;					\
+		(si)->si_flags = 0;					\
+	} while (0)
 
 #ifdef _KERNEL
 struct thread;



To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-smp" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020312061351.GF92565>