Date: Mon, 11 Mar 2002 22:13:51 -0800 From: Alfred Perlstein <bright@mu.org> To: smp@freebsd.org Cc: jhb@FreeBSD.org, davidc@freebsd.org, jake@locore.ca Subject: select fix take II. Message-ID: <20020312061351.GF92565@elvis.mu.org> In-Reply-To: <20020311001131.GN26621@elvis.mu.org> References: <20020311001131.GN26621@elvis.mu.org>
next in thread | previous in thread | raw e-mail | index | archive | help
* Alfred Perlstein <bright@mu.org> [020310 16:12] wrote: > Chad David and I have been working on fixing select(2) and poll(2)'s > locking. I've been working on pushing pipe read/write ops down > out from under Giant. Ok, after further investigation revision 1.79 of sys_generic.c seems to have been wrong. I had to pretty much lay the 4.x version of sys_generic.c next to the 5.x one to get this right. If you review the patch after it's applied the code is a LOT cleaner and makes a hell of a lot more sense now. The XXX near the TDF_SELECT check is gone because it's obvious what it's for. So do we brave this delta for the developer preview? :) Index: dev/bktr/bktr_core.c =================================================================== RCS file: /home/ncvs/src/sys/dev/bktr/bktr_core.c,v retrieving revision 1.117 diff -u -r1.117 bktr_core.c --- dev/bktr/bktr_core.c 12 Sep 2001 08:37:02 -0000 1.117 +++ dev/bktr/bktr_core.c 11 Mar 2002 02:44:14 -0000 @@ -809,7 +809,7 @@ } /* If someone has a select() on /dev/vbi, inform them */ - if (bktr->vbi_select.si_pid) { + if (SEL_WAITING(&bktr->vbi_select)) { selwakeup(&bktr->vbi_select); } Index: dev/kbd/kbd.c =================================================================== RCS file: /home/ncvs/src/sys/dev/kbd/kbd.c,v retrieving revision 1.27 diff -u -r1.27 kbd.c --- dev/kbd/kbd.c 12 Sep 2001 08:37:06 -0000 1.27 +++ dev/kbd/kbd.c 11 Mar 2002 22:08:08 -0000 @@ -523,8 +523,6 @@ bzero(&sc->gkb_q, sizeof(sc->gkb_q)); #endif clist_alloc_cblocks(&sc->gkb_q, KB_QSIZE, KB_QSIZE/2); /* XXX */ - sc->gkb_rsel.si_flags = 0; - sc->gkb_rsel.si_pid = 0; splx(s); return 0; Index: dev/snp/snp.c =================================================================== RCS file: /home/ncvs/src/sys/dev/snp/snp.c,v retrieving revision 1.69 diff -u -r1.69 snp.c --- dev/snp/snp.c 24 Nov 2001 15:59:46 -0000 1.69 +++ dev/snp/snp.c 11 Mar 2002 02:44:14 -0000 @@ -373,7 +373,6 @@ wakeup((caddr_t)snp); } selwakeup(&snp->snp_sel); - snp->snp_sel.si_pid = 0; return (n); } @@ -447,7 +446,6 @@ detach_notty: selwakeup(&snp->snp_sel); - snp->snp_sel.si_pid = 0; if ((snp->snp_flags & SNOOP_OPEN) == 0) free(snp, M_SNP); Index: dev/sound/pcm/channel.c =================================================================== RCS file: /home/ncvs/src/sys/dev/sound/pcm/channel.c,v retrieving revision 1.81 diff -u -r1.81 channel.c --- dev/sound/pcm/channel.c 24 Feb 2002 00:49:43 -0000 1.81 +++ dev/sound/pcm/channel.c 11 Mar 2002 02:44:14 -0000 @@ -116,7 +116,7 @@ struct snd_dbuf *bs = c->bufsoft; CHN_LOCKASSERT(c); - if (sndbuf_getsel(bs)->si_pid && chn_polltrigger(c)) + if (SEL_WAITING(sndbuf_getsel(bs)) && chn_polltrigger(c)) selwakeup(sndbuf_getsel(bs)); wakeup(bs); } Index: dev/usb/ums.c =================================================================== RCS file: /home/ncvs/src/sys/dev/usb/ums.c,v retrieving revision 1.48 diff -u -r1.48 ums.c --- dev/usb/ums.c 15 Feb 2002 22:54:10 -0000 1.48 +++ dev/usb/ums.c 11 Mar 2002 22:38:21 -0000 @@ -344,8 +344,10 @@ sc->status.button = sc->status.obutton = 0; sc->status.dx = sc->status.dy = sc->status.dz = 0; +#ifndef __FreeBSD__ sc->rsel.si_flags = 0; sc->rsel.si_pid = 0; +#endif sc->dev = make_dev(&ums_cdevsw, device_get_unit(self), UID_ROOT, GID_OPERATOR, Index: isa/psm.c =================================================================== RCS file: /home/ncvs/src/sys/isa/psm.c,v retrieving revision 1.43 diff -u -r1.43 psm.c --- isa/psm.c 19 Dec 2001 13:32:21 -0000 1.43 +++ isa/psm.c 11 Mar 2002 22:08:51 -0000 @@ -1314,8 +1314,6 @@ device_busy(devclass_get_device(psm_devclass, unit)); /* Initialize state */ - sc->rsel.si_flags = 0; - sc->rsel.si_pid = 0; sc->mode.level = sc->dflt_mode.level; sc->mode.protocol = sc->dflt_mode.protocol; sc->watchdog = FALSE; Index: kern/sys_generic.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sys_generic.c,v retrieving revision 1.92 diff -u -r1.92 sys_generic.c --- kern/sys_generic.c 9 Mar 2002 22:44:37 -0000 1.92 +++ kern/sys_generic.c 12 Mar 2002 05:46:13 -0000 @@ -80,6 +80,7 @@ size_t, off_t, int); static int dofilewrite(struct thread *, struct file *, int, const void *, size_t, off_t, int); +static void clear_selinfo_list(struct thread *); /* * Read system call. @@ -696,11 +697,36 @@ return (error); } +/* + * These are initialized in selectinit() via SYSINIT + */ +static struct mtx sellock; +static struct cv selwait; static int nselcoll; /* Select collisions since boot */ -struct cv selwait; SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); /* + * Remove the references to the thread from all of the objects + * we were polling. + * + * This code assumes that the underlying owner of the selinfo + * structure will hold sellock before it changes it, and that + * it will unlink itself from our list if it goes away. + */ +static void +clear_selinfo_list(td) + struct thread *td; +{ + struct selinfo *si; + + mtx_assert(&sellock, MA_OWNED); + + TAILQ_FOREACH(si, &td->td_selq, si_thrlist) + si->si_thread = NULL; + TAILQ_INIT(&td->td_selq); +} + +/* * Select system call. */ #ifndef _SYS_SYSPROTO_H_ @@ -775,7 +801,7 @@ sbp += ncpbytes / sizeof *sbp; \ error = copyin(uap->name, ibits[x], ncpbytes); \ if (error != 0) \ - goto done_noproclock; \ + goto done_nosellock; \ } \ } while (0) getbits(in, 0); @@ -789,10 +815,10 @@ error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv)); if (error) - goto done_noproclock; + goto done_nosellock; if (itimerfix(&atv)) { error = EINVAL; - goto done_noproclock; + goto done_nosellock; } getmicrouptime(&rtv); timevaladd(&atv, &rtv); @@ -801,61 +827,62 @@ atv.tv_usec = 0; } timo = 0; - PROC_LOCK(td->td_proc); + mtx_lock(&sellock); retry: + ncoll = nselcoll; mtx_lock_spin(&sched_lock); td->td_flags |= TDF_SELECT; mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); + mtx_unlock(&sellock); + + /* XXX Is there a better place for this? */ + TAILQ_INIT(&td->td_selq); error = selscan(td, ibits, obits, uap->nd); - PROC_LOCK(td->td_proc); + mtx_lock(&sellock); if (error || td->td_retval[0]) goto done; if (atv.tv_sec || atv.tv_usec) { getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) { - /* - * An event of our interest may occur during locking a process. - * In order to avoid missing the event that occured during locking - * the process, test TDF_SELECT and rescan file descriptors if - * necessary. - */ - mtx_lock_spin(&sched_lock); - if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - ncoll = nselcoll; - td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); - error = selscan(td, ibits, obits, uap->nd); - PROC_LOCK(td->td_proc); - } else - mtx_unlock_spin(&sched_lock); + if (timevalcmp(&rtv, &atv, >=)) goto done; - } ttv = atv; timevalsub(&ttv, &rtv); timo = ttv.tv_sec > 24 * 60 * 60 ? 24 * 60 * 60 * hz : tvtohz(&ttv); } + + /* + * An event of interest may occur while we do not hold + * sellock, so check TDF_SELECT and the number of + * collisions and rescan the file descriptors if + * necessary. + */ mtx_lock_spin(&sched_lock); + if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { + mtx_unlock_spin(&sched_lock); + goto retry; + } td->td_flags &= ~TDF_SELECT; mtx_unlock_spin(&sched_lock); if (timo > 0) - error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo); + error = cv_timedwait_sig(&selwait, &sellock, timo); else - error = cv_wait_sig(&selwait, &td->td_proc->p_mtx); + error = cv_wait_sig(&selwait, &sellock); if (error == 0) goto retry; done: + clear_selinfo_list(td); + mtx_lock_spin(&sched_lock); td->td_flags &= ~TDF_SELECT; mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); -done_noproclock: + mtx_unlock(&sellock); + +done_nosellock: /* select is not restarted after signals... */ if (error == ERESTART) error = EINTR; @@ -967,13 +994,13 @@ bits = smallbits; error = copyin(SCARG(uap, fds), bits, ni); if (error) - goto done_noproclock; + goto done_nosellock; if (SCARG(uap, timeout) != INFTIM) { atv.tv_sec = SCARG(uap, timeout) / 1000; atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; if (itimerfix(&atv)) { error = EINVAL; - goto done_noproclock; + goto done_nosellock; } getmicrouptime(&rtv); timevaladd(&atv, &rtv); @@ -982,59 +1009,59 @@ atv.tv_usec = 0; } timo = 0; - PROC_LOCK(td->td_proc); + mtx_lock(&sellock); retry: ncoll = nselcoll; mtx_lock_spin(&sched_lock); td->td_flags |= TDF_SELECT; mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); + mtx_unlock(&sellock); + + /* XXX Is there a better place for this? */ + TAILQ_INIT(&td->td_selq); error = pollscan(td, (struct pollfd *)bits, nfds); - PROC_LOCK(td->td_proc); + mtx_lock(&sellock); if (error || td->td_retval[0]) goto done; if (atv.tv_sec || atv.tv_usec) { getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) { - /* - * An event of our interest may occur during locking a process. - * In order to avoid missing the event that occured during locking - * the process, test TDF_SELECT and rescan file descriptors if - * necessary. - */ - mtx_lock_spin(&sched_lock); - if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - ncoll = nselcoll; - td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); - error = pollscan(td, (struct pollfd *)bits, nfds); - PROC_LOCK(td->td_proc); - } else - mtx_unlock_spin(&sched_lock); + if (timevalcmp(&rtv, &atv, >=)) goto done; - } ttv = atv; timevalsub(&ttv, &rtv); timo = ttv.tv_sec > 24 * 60 * 60 ? 24 * 60 * 60 * hz : tvtohz(&ttv); } + /* + * An event of interest may occur while we do not hold + * sellock, so check TDF_SELECT and the number of collisions + * and rescan the file descriptors if necessary. + */ mtx_lock_spin(&sched_lock); + if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { + mtx_unlock_spin(&sched_lock); + goto retry; + } td->td_flags &= ~TDF_SELECT; mtx_unlock_spin(&sched_lock); + if (timo > 0) - error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo); + error = cv_timedwait_sig(&selwait, &sellock, timo); else - error = cv_wait_sig(&selwait, &td->td_proc->p_mtx); + error = cv_wait_sig(&selwait, &sellock); + if (error == 0) goto retry; done: + clear_selinfo_list(td); + mtx_lock_spin(&sched_lock); td->td_flags &= ~TDF_SELECT; mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); -done_noproclock: + mtx_unlock(&sellock); + +done_nosellock: /* poll is not restarted after signals... */ if (error == ERESTART) error = EINTR; @@ -1126,18 +1153,6 @@ return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); } -static int -find_thread_in_proc(struct proc *p, struct thread *td) -{ - struct thread *td2; - FOREACH_THREAD_IN_PROC(p, td2) { - if (td2 == td) { - return (1); - } - } - return (0); -} - /* * Record a select request. */ @@ -1146,29 +1161,21 @@ struct thread *selector; struct selinfo *sip; { - struct proc *p; - pid_t mypid; - mypid = selector->td_proc->p_pid; - if ((sip->si_pid == mypid) && - (sip->si_thread == selector)) { /* XXXKSE should be an ID? */ - return; - } - if (sip->si_pid && - (p = pfind(sip->si_pid)) && - (find_thread_in_proc(p, sip->si_thread))) { - mtx_lock_spin(&sched_lock); - if (sip->si_thread->td_wchan == (caddr_t)&selwait) { - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); - sip->si_flags |= SI_COLL; - return; - } - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); + mtx_lock(&sellock); + /* + * If the thread is not NULL there is another thread + * interested in this object, and we need to flag the + * collision so selwakeup() can broadcast. + */ + if (sip->si_thread != NULL) { + sip->si_flags |= SI_COLL; + } else { + sip->si_thread = selector; + TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist); } - sip->si_pid = mypid; - sip->si_thread = selector; + + mtx_unlock(&sellock); } /* @@ -1176,37 +1183,37 @@ */ void selwakeup(sip) - register struct selinfo *sip; + struct selinfo *sip; { struct thread *td; - register struct proc *p; - if (sip->si_pid == 0) - return; - if (sip->si_flags & SI_COLL) { + mtx_lock(&sellock); + td = sip->si_thread; + + if ((sip->si_flags & SI_COLL) != 0) { nselcoll++; sip->si_flags &= ~SI_COLL; cv_broadcast(&selwait); } - p = pfind(sip->si_pid); - sip->si_pid = 0; - td = sip->si_thread; - if (p != NULL) { - if (!find_thread_in_proc(p, td)) { - PROC_UNLOCK(p); /* lock is in pfind() */; - return; - } - mtx_lock_spin(&sched_lock); - if (td->td_wchan == (caddr_t)&selwait) { - if (td->td_proc->p_stat == SSLEEP) - setrunnable(td); - else - cv_waitq_remove(td); - } else - td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(p); /* Lock is in pfind() */ + + if (td == NULL) { + mtx_unlock(&sellock); + return; } + + TAILQ_REMOVE(&td->td_selq, sip, si_thrlist); + mtx_lock_spin(&sched_lock); + if (td->td_wchan == (caddr_t)&selwait) { + if (td->td_proc->p_stat == SSLEEP) + setrunnable(td); + else + cv_waitq_remove(td); + } else + td->td_flags &= ~TDF_SELECT; + mtx_unlock_spin(&sched_lock); + + sip->si_thread = NULL; + mtx_unlock(&sellock); } static void selectinit __P((void *)); @@ -1218,4 +1225,5 @@ void *dummy; { cv_init(&selwait, "select"); + mtx_init(&sellock, "sellck", MTX_DEF); } Index: kern/tty.c =================================================================== RCS file: /home/ncvs/src/sys/kern/tty.c,v retrieving revision 1.165 diff -u -r1.165 tty.c --- kern/tty.c 2 Mar 2002 12:42:23 -0000 1.165 +++ kern/tty.c 11 Mar 2002 02:44:14 -0000 @@ -2273,7 +2273,7 @@ register struct tty *tp; { - if (tp->t_rsel.si_pid != 0) + if (SEL_WAITING(&tp->t_rsel)) selwakeup(&tp->t_rsel); if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL) pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL)); @@ -2289,7 +2289,7 @@ register struct tty *tp; { - if (tp->t_wsel.si_pid != 0 && tp->t_outq.c_cc <= tp->t_olowat) + if (SEL_WAITING(&tp->t_wsel) && tp->t_outq.c_cc <= tp->t_olowat) selwakeup(&tp->t_wsel); if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL) pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL)); Index: net/bpf.c =================================================================== RCS file: /home/ncvs/src/sys/net/bpf.c,v retrieving revision 1.86 diff -u -r1.86 bpf.c --- net/bpf.c 14 Dec 2001 22:17:54 -0000 1.86 +++ net/bpf.c 11 Mar 2002 02:44:14 -0000 @@ -514,8 +514,6 @@ pgsigio(d->bd_sigio, d->bd_sig, 0); selwakeup(&d->bd_sel); - /* XXX */ - d->bd_sel.si_pid = 0; } static void Index: sys/proc.h =================================================================== RCS file: /home/ncvs/src/sys/sys/proc.h,v retrieving revision 1.206 diff -u -r1.206 proc.h --- sys/proc.h 23 Feb 2002 11:12:57 -0000 1.206 +++ sys/proc.h 11 Mar 2002 22:20:23 -0000 @@ -147,6 +147,7 @@ * m - Giant * n - not locked, lazy * o - locked by pgrpsess_lock sx + * p - select lock (sellock) * * If the locking key specifies two identifiers (for example, p_pptr) then * either lock is sufficient for read access, but both locks must be held @@ -259,6 +260,8 @@ TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. XXXKSE */ TAILQ_ENTRY(thread) td_blkq; /* (j) Mutex queue. XXXKSE */ TAILQ_ENTRY(thread) td_runq; /* (j) Run queue(s). XXXKSE */ + + TAILQ_HEAD(, selinfo) td_selq; /* (p) List of selinfos */ #define td_startzero td_flags int td_flags; /* (j) TDF_* flags. */ Index: sys/selinfo.h =================================================================== RCS file: /home/ncvs/src/sys/sys/selinfo.h,v retrieving revision 1.12 diff -u -r1.12 selinfo.h --- sys/selinfo.h 27 Sep 2001 20:33:15 -0000 1.12 +++ sys/selinfo.h 11 Mar 2002 22:27:05 -0000 @@ -45,12 +45,21 @@ * notified when I/O becomes possible. */ struct selinfo { - pid_t si_pid; /* process to be notified */ - struct thread *si_thread; /* thread in that process XXXKSE */ + TAILQ_ENTRY(selinfo) si_thrlist; /* list hung off of thread */ + struct thread *si_thread; /* thread waiting */ struct klist si_note; /* kernel note list */ short si_flags; /* see below */ }; #define SI_COLL 0x0001 /* collision occurred */ + +#define SEL_WAITING(si) \ + ((si)->si_thread != NULL || ((si)->si_flags & SI_COLL) != 0) + +#define SEL_INIT(si) \ + do { \ + (si)->si_thread = NULL; \ + (si)->si_flags = 0; \ + } while (0) #ifdef _KERNEL struct thread; To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-smp" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020312061351.GF92565>