Date: Tue, 19 May 2009 00:02:58 +0000 (UTC) From: Kip Macy <kmacy@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r192359 - in user/kmacy/releng_7_2_fcs/sys: kern netncp netsmb sys Message-ID: <200905190002.n4J02war079181@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kmacy Date: Tue May 19 00:02:58 2009 New Revision: 192359 URL: http://svn.freebsd.org/changeset/base/192359 Log: merge 174647 Refactor select to reduce contention and hide internal implementation details from consumers. - Track individual selecters on a per-descriptor basis such that there are no longer collisions and after sleeping for events only those descriptors which triggered events must be rescaned. - Protect the selinfo (per descriptor) structure with a mtx pool mutex. mtx pool mutexes were chosen to preserve api compatibility with existing code which does nothing but bzero() to setup selinfo structures. - Use a per-thread wait channel rather than a global wait channel. - Hide select implementation details in a seltd structure which is opaque to the rest of the kernel. - Provide a 'selsocket' interface for those kernel consumers who wish to select on a socket when they have no fd so they no longer have to be aware of select implementation details. Tested by: kris Reviewed on: arch Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.h user/kmacy/releng_7_2_fcs/sys/netsmb/smb_trantcp.c user/kmacy/releng_7_2_fcs/sys/sys/proc.h user/kmacy/releng_7_2_fcs/sys/sys/selinfo.h user/kmacy/releng_7_2_fcs/sys/sys/socketvar.h user/kmacy/releng_7_2_fcs/sys/sys/systm.h Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c Mon May 18 23:36:11 2009 (r192358) +++ user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c Tue May 19 00:02:58 2009 (r192359) @@ -1460,7 +1460,8 @@ kqueue_poll(struct file *fp, int events, revents |= events & (POLLIN | POLLRDNORM); } else { selrecord(td, &kq->kq_sel); - kq->kq_state |= KQ_SEL; + if (SEL_WAITING(&kq->kq_sel)) + kq->kq_state |= KQ_SEL; } } kqueue_release(kq, 1); @@ -1553,8 +1554,9 @@ kqueue_close(struct file *fp, struct thr } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { - kq->kq_state &= ~KQ_SEL; selwakeuppri(&kq->kq_sel, PSOCK); + if (!SEL_WAITING(&kq->kq_sel)) + kq->kq_state &= ~KQ_SEL; } KQ_UNLOCK(kq); @@ -1589,8 +1591,9 @@ kqueue_wakeup(struct kqueue *kq) wakeup(kq); } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { - kq->kq_state &= ~KQ_SEL; selwakeuppri(&kq->kq_sel, PSOCK); + if (!SEL_WAITING(&kq->kq_sel)) + kq->kq_state &= ~KQ_SEL; } if (!knlist_empty(&kq->kq_sel.si_note)) kqueue_schedtask(kq); Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c Mon May 18 23:36:11 2009 (r192358) +++ user/kmacy/releng_7_2_fcs/sys/kern/kern_thread.c Tue May 19 00:02:58 2009 (r192359) @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/sched.h> #include <sys/sleepqueue.h> +#include <sys/selinfo.h> #include <sys/turnstile.h> #include <sys/ktr.h> #include <sys/umtx.h> @@ -214,6 +215,7 @@ thread_fini(void *mem, int size) turnstile_free(td->td_turnstile); sleepq_free(td->td_sleepqueue); umtx_thread_fini(td); + seltdfini(td); } /* Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c Mon May 18 23:36:11 2009 (r192358) +++ user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c Tue May 19 00:02:58 2009 (r192359) @@ -69,18 +69,60 @@ __FBSDID("$FreeBSD$"); #include <sys/ktrace.h> #endif +#include <sys/ktr.h> + static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); MALLOC_DEFINE(M_IOV, "iov", "large iov's"); static int pollout(struct pollfd *, struct pollfd *, u_int); static int pollscan(struct thread *, struct pollfd *, u_int); +static int pollrescan(struct thread *); static int selscan(struct thread *, fd_mask **, fd_mask **, int); +static int selrescan(struct thread *, fd_mask **, fd_mask **); +static void selfdalloc(struct thread *, void *); +static void selfdfree(struct seltd *, struct selfd *); static int dofileread(struct thread *, int, struct file *, struct uio *, off_t, int); static int dofilewrite(struct thread *, int, struct file *, struct uio *, off_t, int); static void doselwakeup(struct selinfo *, int); +static void seltdinit(struct thread *); +static int seltdwait(struct thread *, int); +static void seltdclear(struct thread *); + +/* + * One seltd per-thread allocated on demand as needed. + * + * t - protected by st_mtx + * k - Only accessed by curthread or read-only + */ +struct seltd { + STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */ + struct selfd *st_free1; /* (k) free fd for read set. */ + struct selfd *st_free2; /* (k) free fd for write set. */ + struct mtx st_mtx; /* Protects struct seltd */ + struct cv st_wait; /* (t) Wait channel. */ + int st_flags; /* (t) SELTD_ flags. */ +}; + +#define SELTD_PENDING 0x0001 /* We have pending events. */ +#define SELTD_RESCAN 0x0002 /* Doing a rescan. */ + +/* + * One selfd allocated per-thread per-file-descriptor. + * f - protected by sf_mtx + */ +struct selfd { + STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */ + TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */ + struct selinfo *sf_si; /* (f) selinfo when linked. */ + struct mtx *sf_mtx; /* Pointer to selinfo mtx. */ + struct seltd *sf_td; /* (k) owning seltd. */ + void *sf_cookie; /* (k) fd or pollfd. */ +}; + +static uma_zone_t selfd_zone; #ifndef _SYS_SYSPROTO_H_ struct read_args { @@ -630,14 +672,6 @@ out: return (error); } -/* - * sellock and selwait are initialized in selectinit() via SYSINIT. - */ -struct mtx sellock; -struct cv selwait; -u_int nselcoll; /* Select collisions since boot */ -SYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); - int poll_no_poll(int events) { @@ -695,7 +729,7 @@ kern_select(struct thread *td, int nd, f fd_mask *ibits[3], *obits[3], *selbits, *sbp; struct timeval atv, rtv, ttv; int error, timo; - u_int ncoll, nbufbytes, ncpbytes, nfdbits; + u_int nbufbytes, ncpbytes, nfdbits; if (nd < 0) return (EINVAL); @@ -740,7 +774,7 @@ kern_select(struct thread *td, int nd, f sbp += ncpbytes / sizeof *sbp; \ error = copyin(name, ibits[x], ncpbytes); \ if (error != 0) \ - goto done_nosellock; \ + goto done; \ } \ } while (0) getbits(fd_in, 0); @@ -754,7 +788,7 @@ kern_select(struct thread *td, int nd, f atv = *tvp; if (itimerfix(&atv)) { error = EINVAL; - goto done_nosellock; + goto done; } getmicrouptime(&rtv); timevaladd(&atv, &rtv); @@ -763,58 +797,31 @@ kern_select(struct thread *td, int nd, f atv.tv_usec = 0; } timo = 0; - TAILQ_INIT(&td->td_selq); - mtx_lock(&sellock); -retry: - ncoll = nselcoll; - thread_lock(td); - td->td_flags |= TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - - error = selscan(td, ibits, obits, nd); - mtx_lock(&sellock); - if (error || td->td_retval[0]) - goto done; - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) - goto done; - ttv = atv; - timevalsub(&ttv, &rtv); - timo = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); - } - - /* - * An event of interest may occur while we do not hold - * sellock, so check TDF_SELECT and the number of - * collisions and rescan the file descriptors if - * necessary. - */ - thread_lock(td); - if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - thread_unlock(td); - goto retry; + seltdinit(td); + /* Iterate until the timeout expires or descriptors become ready. */ + for (;;) { + error = selscan(td, ibits, obits, nd); + if (error || td->td_retval[0] != 0) + break; + if (atv.tv_sec || atv.tv_usec) { + getmicrouptime(&rtv); + if (timevalcmp(&rtv, &atv, >=)) + break; + ttv = atv; + timevalsub(&ttv, &rtv); + timo = ttv.tv_sec > 24 * 60 * 60 ? + 24 * 60 * 60 * hz : tvtohz(&ttv); + } + error = seltdwait(td, timo); + if (error) + break; + error = selrescan(td, ibits, obits); + if (error || td->td_retval[0] != 0) + break; } - thread_unlock(td); - - if (timo > 0) - error = cv_timedwait_sig(&selwait, &sellock, timo); - else - error = cv_wait_sig(&selwait, &sellock); - - if (error == 0) - goto retry; + seltdclear(td); done: - clear_selinfo_list(td); - thread_lock(td); - td->td_flags &= ~TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - -done_nosellock: /* select is not restarted after signals... */ if (error == ERESTART) error = EINTR; @@ -837,6 +844,60 @@ done_nosellock: return (error); } +/* + * Traverse the list of fds attached to this thread's seltd and check for + * completion. + */ +static int +selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) +{ + struct seltd *stp; + struct selfd *sfp; + struct selfd *sfn; + struct selinfo *si; + struct file *fp; + int msk, fd; + int n = 0; + /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ + static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; + struct filedesc *fdp = td->td_proc->p_fd; + + stp = td->td_sel; + FILEDESC_SLOCK(fdp); + STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { + fd = (int)(uintptr_t)sfp->sf_cookie; + si = sfp->sf_si; + selfdfree(stp, sfp); + /* If the selinfo wasn't cleared the event didn't fire. */ + if (si != NULL) + continue; + if ((fp = fget_locked(fdp, fd)) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + for (msk = 0; msk < 3; msk++) { + if (ibits[msk] == NULL) + continue; + if ((ibits[msk][fd/NFDBITS] & + ((fd_mask) 1 << (fd % NFDBITS))) == 0) + continue; + if (fo_poll(fp, flag[msk], td->td_ucred, td)) { + obits[msk][(fd)/NFDBITS] |= + ((fd_mask)1 << ((fd) % NFDBITS)); + n++; + } + } + } + FILEDESC_SUNLOCK(fdp); + stp->st_flags = 0; + td->td_retval[0] = n; + return (0); +} + +/* + * Perform the initial filedescriptor scan and register ourselves with + * each selinfo. + */ static int selscan(td, ibits, obits, nfd) struct thread *td; @@ -865,6 +926,7 @@ selscan(td, ibits, obits, nfd) FILEDESC_SUNLOCK(fdp); return (EBADF); } + selfdalloc(td, (void *)(uintptr_t)fd); if (fo_poll(fp, flag[msk], td->td_ucred, td)) { obits[msk][(fd)/NFDBITS] |= @@ -895,7 +957,7 @@ poll(td, uap) struct pollfd smallbits[32]; struct timeval atv, rtv, ttv; int error = 0, timo; - u_int ncoll, nfds; + u_int nfds; size_t ni; nfds = uap->nfds; @@ -911,8 +973,7 @@ poll(td, uap) if ((nfds > lim_cur(td->td_proc, RLIMIT_NOFILE)) && (nfds > FD_SETSIZE)) { PROC_UNLOCK(td->td_proc); - error = EINVAL; - goto done2; + return (EINVAL); } PROC_UNLOCK(td->td_proc); ni = nfds * sizeof(struct pollfd); @@ -922,13 +983,13 @@ poll(td, uap) bits = smallbits; error = copyin(uap->fds, bits, ni); if (error) - goto done_nosellock; + goto done; if (uap->timeout != INFTIM) { atv.tv_sec = uap->timeout / 1000; atv.tv_usec = (uap->timeout % 1000) * 1000; if (itimerfix(&atv)) { error = EINVAL; - goto done_nosellock; + goto done; } getmicrouptime(&rtv); timevaladd(&atv, &rtv); @@ -937,56 +998,31 @@ poll(td, uap) atv.tv_usec = 0; } timo = 0; - TAILQ_INIT(&td->td_selq); - mtx_lock(&sellock); -retry: - ncoll = nselcoll; - thread_lock(td); - td->td_flags |= TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - - error = pollscan(td, bits, nfds); - mtx_lock(&sellock); - if (error || td->td_retval[0]) - goto done; - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) - goto done; - ttv = atv; - timevalsub(&ttv, &rtv); - timo = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); - } - /* - * An event of interest may occur while we do not hold - * sellock, so check TDF_SELECT and the number of collisions - * and rescan the file descriptors if necessary. - */ - thread_lock(td); - if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - thread_unlock(td); - goto retry; + seltdinit(td); + /* Iterate until the timeout expires or descriptors become ready. */ + for (;;) { + error = pollscan(td, bits, nfds); + if (error || td->td_retval[0] != 0) + break; + if (atv.tv_sec || atv.tv_usec) { + getmicrouptime(&rtv); + if (timevalcmp(&rtv, &atv, >=)) + break; + ttv = atv; + timevalsub(&ttv, &rtv); + timo = ttv.tv_sec > 24 * 60 * 60 ? + 24 * 60 * 60 * hz : tvtohz(&ttv); + } + error = seltdwait(td, timo); + if (error) + break; + error = pollrescan(td); + if (error || td->td_retval[0] != 0) + break; } - thread_unlock(td); - - if (timo > 0) - error = cv_timedwait_sig(&selwait, &sellock, timo); - else - error = cv_wait_sig(&selwait, &sellock); - - if (error == 0) - goto retry; + seltdclear(td); done: - clear_selinfo_list(td); - thread_lock(td); - td->td_flags &= ~TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - -done_nosellock: /* poll is not restarted after signals... */ if (error == ERESTART) error = EINTR; @@ -1000,7 +1036,6 @@ done_nosellock: out: if (ni > sizeof(smallbits)) free(bits, M_TEMP); -done2: return (error); } @@ -1025,12 +1060,56 @@ pollout(fds, ufds, nfd) } static int +pollrescan(struct thread *td) +{ + struct seltd *stp; + struct selfd *sfp; + struct selfd *sfn; + struct selinfo *si; + struct filedesc *fdp; + struct file *fp; + struct pollfd *fd; + int n; + + n = 0; + fdp = td->td_proc->p_fd; + stp = td->td_sel; + FILEDESC_SLOCK(fdp); + STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { + fd = (struct pollfd *)sfp->sf_cookie; + si = sfp->sf_si; + selfdfree(stp, sfp); + /* If the selinfo wasn't cleared the event didn't fire. */ + if (si != NULL) + continue; + fp = fdp->fd_ofiles[fd->fd]; + if (fp == NULL) { + fd->revents = POLLNVAL; + n++; + continue; + } + /* + * Note: backend also returns POLLHUP and + * POLLERR if appropriate. + */ + fd->revents = fo_poll(fp, fd->events, td->td_ucred, td); + if (fd->revents != 0) + n++; + } + FILEDESC_SUNLOCK(fdp); + stp->st_flags = 0; + td->td_retval[0] = n; + return (0); +} + + +static int pollscan(td, fds, nfd) struct thread *td; struct pollfd *fds; u_int nfd; { - register struct filedesc *fdp = td->td_proc->p_fd; + struct filedesc *fdp = td->td_proc->p_fd; int i; struct file *fp; int n = 0; @@ -1052,6 +1131,7 @@ pollscan(td, fds, nfd) * Note: backend also returns POLLHUP and * POLLERR if appropriate. */ + selfdalloc(td, fds); fds->revents = fo_poll(fp, fds->events, td->td_ucred, td); if (fds->revents != 0) @@ -1085,23 +1165,90 @@ openbsd_poll(td, uap) } /* - * Remove the references to the thread from all of the objects we were - * polling. - * - * This code assumes that the underlying owner of the selinfo structure will - * hold sellock before it changes it, and that it will unlink itself from our - * list if it goes away. + * XXX This was created specifically to support netncp and netsmb. This + * allows the caller to specify a socket to wait for events on. It returns + * 0 if any events matched and an error otherwise. There is no way to + * determine which events fired. */ -void -clear_selinfo_list(td) - struct thread *td; +int +selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) { - struct selinfo *si; + struct timeval atv, rtv, ttv; + int error, timo; + + if (tvp != NULL) { + atv = *tvp; + if (itimerfix(&atv)) + return (EINVAL); + getmicrouptime(&rtv); + timevaladd(&atv, &rtv); + } else { + atv.tv_sec = 0; + atv.tv_usec = 0; + } - mtx_assert(&sellock, MA_OWNED); - TAILQ_FOREACH(si, &td->td_selq, si_thrlist) - si->si_thread = NULL; - TAILQ_INIT(&td->td_selq); + timo = 0; + seltdinit(td); + /* + * Iterate until the timeout expires or the socket becomes ready. + */ + for (;;) { + selfdalloc(td, NULL); + error = sopoll(so, events, NULL, td); + /* error here is actually the ready events. */ + if (error) + return (0); + if (atv.tv_sec || atv.tv_usec) { + getmicrouptime(&rtv); + if (timevalcmp(&rtv, &atv, >=)) { + seltdclear(td); + return (EWOULDBLOCK); + } + ttv = atv; + timevalsub(&ttv, &rtv); + timo = ttv.tv_sec > 24 * 60 * 60 ? + 24 * 60 * 60 * hz : tvtohz(&ttv); + } + error = seltdwait(td, timo); + seltdclear(td); + if (error) + break; + } + /* XXX Duplicates ncp/smb behavior. */ + if (error == ERESTART) + error = 0; + return (error); +} + +/* + * Preallocate two selfds associated with 'cookie'. Some fo_poll routines + * have two select sets, one for read and another for write. + */ +static void +selfdalloc(struct thread *td, void *cookie) +{ + struct seltd *stp; + + stp = td->td_sel; + if (stp->st_free1 == NULL) + stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); + stp->st_free1->sf_td = stp; + stp->st_free1->sf_cookie = cookie; + if (stp->st_free2 == NULL) + stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); + stp->st_free2->sf_td = stp; + stp->st_free2->sf_cookie = cookie; +} + +static void +selfdfree(struct seltd *stp, struct selfd *sfp) +{ + STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link); + mtx_lock(sfp->sf_mtx); + if (sfp->sf_si) + TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads); + mtx_unlock(sfp->sf_mtx); + uma_zfree(selfd_zone, sfp); } /* @@ -1112,26 +1259,46 @@ selrecord(selector, sip) struct thread *selector; struct selinfo *sip; { + struct selfd *sfp; + struct seltd *stp; + struct mtx *mtxp; - mtx_lock(&sellock); + stp = selector->td_sel; + /* + * Don't record when doing a rescan. + */ + if (stp->st_flags & SELTD_RESCAN) + return; + /* + * Grab one of the preallocated descriptors. + */ + sfp = NULL; + if ((sfp = stp->st_free1) != NULL) + stp->st_free1 = NULL; + else if ((sfp = stp->st_free2) != NULL) + stp->st_free2 = NULL; + else + panic("selrecord: No free selfd on selq"); + mtxp = mtx_pool_find(mtxpool_sleep, sip); + /* + * Initialize the sfp and queue it in the thread. + */ + sfp->sf_si = sip; + sfp->sf_mtx = mtxp; + STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link); /* - * If the selinfo's thread pointer is NULL then take ownership of it. - * - * If the thread pointer is not NULL and it points to another - * thread, then we have a collision. - * - * If the thread pointer is not NULL and points back to us then leave - * it alone as we've already added pointed it at us and added it to - * our list. + * Now that we've locked the sip, check for initialization. */ - if (sip->si_thread == NULL) { - sip->si_thread = selector; - TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist); - } else if (sip->si_thread != selector) { - sip->si_flags |= SI_COLL; + mtx_lock(mtxp); + if (sip->si_mtx == NULL) { + sip->si_mtx = mtxp; + TAILQ_INIT(&sip->si_tdlist); } - - mtx_unlock(&sellock); + /* + * Add this thread to the list of selfds listening on this selinfo. + */ + TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads); + mtx_unlock(sip->si_mtx); } /* Wake up a selecting thread. */ @@ -1159,36 +1326,115 @@ doselwakeup(sip, pri) struct selinfo *sip; int pri; { - struct thread *td; + struct selfd *sfp; + struct selfd *sfn; + struct seltd *stp; - mtx_lock(&sellock); - td = sip->si_thread; - if ((sip->si_flags & SI_COLL) != 0) { - nselcoll++; - sip->si_flags &= ~SI_COLL; - cv_broadcastpri(&selwait, pri); - } - if (td == NULL) { - mtx_unlock(&sellock); + /* If it's not initialized there can't be any waiters. */ + if (sip->si_mtx == NULL) return; + /* + * Locking the selinfo locks all selfds associated with it. + */ + mtx_lock(sip->si_mtx); + TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) { + /* + * Once we remove this sfp from the list and clear the + * sf_si seltdclear will know to ignore this si. + */ + TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads); + sfp->sf_si = NULL; + stp = sfp->sf_td; + mtx_lock(&stp->st_mtx); + stp->st_flags |= SELTD_PENDING; + cv_broadcastpri(&stp->st_wait, pri); + mtx_unlock(&stp->st_mtx); } - TAILQ_REMOVE(&td->td_selq, sip, si_thrlist); - sip->si_thread = NULL; - thread_lock(td); - td->td_flags &= ~TDF_SELECT; - thread_unlock(td); - sleepq_remove(td, &selwait); - mtx_unlock(&sellock); + mtx_unlock(sip->si_mtx); } -static void selectinit(void *); -SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL) +static void +seltdinit(struct thread *td) +{ + struct seltd *stp; + + if ((stp = td->td_sel) != NULL) + goto out; + td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO); + mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF); + cv_init(&stp->st_wait, "select"); +out: + stp->st_flags = 0; + STAILQ_INIT(&stp->st_selq); +} + +static int +seltdwait(struct thread *td, int timo) +{ + struct seltd *stp; + int error; -/* ARGSUSED*/ + stp = td->td_sel; + /* + * An event of interest may occur while we do not hold the seltd + * locked so check the pending flag before we sleep. + */ + mtx_lock(&stp->st_mtx); + /* + * Any further calls to selrecord will be a rescan. + */ + stp->st_flags |= SELTD_RESCAN; + if (stp->st_flags & SELTD_PENDING) { + mtx_unlock(&stp->st_mtx); + return (0); + } + if (timo > 0) + error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo); + else + error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); + mtx_unlock(&stp->st_mtx); + + return (error); +} + +void +seltdfini(struct thread *td) +{ + struct seltd *stp; + + stp = td->td_sel; + if (stp == NULL) + return; + if (stp->st_free1) + uma_zfree(selfd_zone, stp->st_free1); + if (stp->st_free2) + uma_zfree(selfd_zone, stp->st_free2); + td->td_sel = NULL; + free(stp, M_SELECT); +} + +/* + * Remove the references to the thread from all of the objects we were + * polling. + */ +static void +seltdclear(struct thread *td) +{ + struct seltd *stp; + struct selfd *sfp; + struct selfd *sfn; + + stp = td->td_sel; + STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) + selfdfree(stp, sfp); + stp->st_flags = 0; +} + +static void selectinit(void *); +SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL); static void -selectinit(dummy) - void *dummy; +selectinit(void *dummy __unused) { - cv_init(&selwait, "select"); - mtx_init(&sellock, "sellck", NULL, MTX_DEF); + selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, 0); } Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c Mon May 18 23:36:11 2009 (r192358) +++ user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c Tue May 19 00:02:58 2009 (r192359) @@ -524,8 +524,9 @@ pipeselwakeup(cpipe) PIPE_LOCK_ASSERT(cpipe, MA_OWNED); if (cpipe->pipe_state & PIPE_SEL) { - cpipe->pipe_state &= ~PIPE_SEL; selwakeuppri(&cpipe->pipe_sel, PSOCK); + if (!SEL_WAITING(&cpipe->pipe_sel)) + cpipe->pipe_state &= ~PIPE_SEL; } if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) pgsigio(&cpipe->pipe_sigio, SIGIO, 0); @@ -1356,12 +1357,14 @@ pipe_poll(fp, events, active_cred, td) if (revents == 0) { if (events & (POLLIN | POLLRDNORM)) { selrecord(td, &rpipe->pipe_sel); - rpipe->pipe_state |= PIPE_SEL; + if (SEL_WAITING(&rpipe->pipe_sel)) + rpipe->pipe_state |= PIPE_SEL; } if (events & (POLLOUT | POLLWRNORM)) { selrecord(td, &wpipe->pipe_sel); - wpipe->pipe_state |= PIPE_SEL; + if (SEL_WAITING(&wpipe->pipe_sel)) + wpipe->pipe_state |= PIPE_SEL; } } #ifdef MAC Modified: user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c Mon May 18 23:36:11 2009 (r192358) +++ user/kmacy/releng_7_2_fcs/sys/kern/uipc_sockbuf.c Tue May 19 00:02:58 2009 (r192359) @@ -179,7 +179,8 @@ sowakeup(struct socket *so, struct sockb SOCKBUF_LOCK_ASSERT(sb); selwakeuppri(&sb->sb_sel, PSOCK); - sb->sb_flags &= ~SB_SEL; + if (!SEL_WAITING(&sb->sb_sel)) + sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_cc); Modified: user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c Mon May 18 23:36:11 2009 (r192358) +++ user/kmacy/releng_7_2_fcs/sys/netncp/ncp_rq.c Tue May 19 00:02:58 2009 (r192359) @@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$"); #include <sys/mbuf.h> #include <sys/poll.h> #include <sys/proc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> #include <sys/uio.h> #include <netncp/ncp.h> @@ -274,7 +276,9 @@ ncp_request_int(struct ncp_rq *rqp) /* * Flush out replies on previous reqs */ - while (ncp_poll(so, POLLIN) != 0) { + tv.tv_sec = 0; + tv.tv_usec = 0; + while (selsocket(so, POLLIN, &tv, td) == 0) { if (ncp_sock_recv(so, &m, &len) != 0) break; m_freem(m); @@ -319,7 +323,7 @@ ncp_request_int(struct ncp_rq *rqp) } tv.tv_sec = conn->li.timeout; tv.tv_usec = 0; - error = ncp_sock_rselect(so, td, &tv, POLLIN); + error = selsocket(so, POLLIN, &tv, td); if (error == EWOULDBLOCK ) /* timeout expired */ continue; error = ncp_chkintr(conn, td); @@ -335,7 +339,9 @@ ncp_request_int(struct ncp_rq *rqp) dosend = 1; /* resend rq if error */ for (;;) { error = 0; - if (ncp_poll(so, POLLIN) == 0) + tv.tv_sec = 0; + tv.tv_usec = 0; + if (selsocket(so, POLLIN, &tv, td) != 0) break; /* if (so->so_rcv.sb_cc == 0) { break; Modified: user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c Mon May 18 23:36:11 2009 (r192358) +++ user/kmacy/releng_7_2_fcs/sys/netncp/ncp_sock.c Tue May 19 00:02:58 2009 (r192359) @@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$"); #define ipx_setnullhost(x) ((x).x_host.s_host[0] = 0); \ ((x).x_host.s_host[1] = 0); ((x).x_host.s_host[2] = 0); -/*int ncp_poll(struct socket *so, int events);*/ /*static int ncp_getsockname(struct socket *so, caddr_t asa, int *alen);*/ static int ncp_soconnect(struct socket *so, struct sockaddr *target, struct thread *td); @@ -181,110 +180,6 @@ ncp_sock_send(struct socket *so, struct return error; } -int -ncp_poll(struct socket *so, int events) -{ - struct thread *td = curthread; - int revents; - - /* Fake up enough state to look like we are in poll(2). */ - mtx_lock(&sellock); - thread_lock(td); - td->td_flags |= TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - TAILQ_INIT(&td->td_selq); - - revents = sopoll(so, events, NULL, td); - - /* Tear down the fake poll(2) state. */ - mtx_lock(&sellock); - clear_selinfo_list(td); - thread_lock(td); - td->td_flags &= ~TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - - return (revents); -} - -int -ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv, - int events) -{ - struct timeval atv, rtv, ttv; - int ncoll, timo, error, revents; - - if (tv) { - atv = *tv; - if (itimerfix(&atv)) { - error = EINVAL; - goto done_noproclock; - } - getmicrouptime(&rtv); - timevaladd(&atv, &rtv); - } - timo = 0; - mtx_lock(&sellock); - -retry: - ncoll = nselcoll; - thread_lock(td); - td->td_flags |= TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - - TAILQ_INIT(&td->td_selq); - revents = sopoll(so, events, NULL, td); - mtx_lock(&sellock); - if (revents) { - error = 0; - goto done; - } - if (tv) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) { - error = EWOULDBLOCK; - goto done; - } - ttv = atv; - timevalsub(&ttv, &rtv); - timo = tvtohz(&ttv); - } - /* - * An event of our interest may occur during locking a thread. - * In order to avoid missing the event that occurred during locking - * the process, test TDF_SELECT and rescan file descriptors if - * necessary. - */ - thread_lock(td); - if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - thread_unlock(td); - goto retry; - } - thread_unlock(td); - - if (timo > 0) - error = cv_timedwait(&selwait, &sellock, timo); - else { - cv_wait(&selwait, &sellock); - error = 0; - } - -done: - clear_selinfo_list(td); - - thread_lock(td); - td->td_flags &= ~TDF_SELECT; - thread_unlock(td); - mtx_unlock(&sellock); - -done_noproclock: *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905190002.n4J02war079181>