Date: Tue, 29 Jan 2002 12:22:13 -0800 From: Alfred Perlstein <bright@mu.org> To: smp@freebsd.org, tanimura@freebsd.org Cc: jhb@freebsd.org, kkenn@freebsd.org Subject: Re: select/poll problems with fdlocking Message-ID: <20020129122213.D13686@elvis.mu.org> In-Reply-To: <20020128185345.C13686@elvis.mu.org>; from bright@mu.org on Mon, Jan 28, 2002 at 06:53:45PM -0800 References: <20020128185345.C13686@elvis.mu.org>
next in thread | previous in thread | raw e-mail | index | archive | help
* Alfred Perlstein <bright@mu.org> [020128 18:53] wrote: > (cc'd kkenn to let him know I'm not ignoring his problem) > > I think there's some problems with select and poll. > > specifically: > > 1) if one thread were to close a select/poll'd on descriptor > then we'd leak it because the ofiles entry would be NULL > during the "drop" part of the call. > 2) it's slow to grab and drop the filedesc lock so much. > > I think I'm going to take a shot at holding the lock on the > filedesc for most of the select/poll syscalls as a solution > this _should__ be ok as we shouldn't be sleeping in fo_poll > really... > > Any suggestions/workarounds that I should investigate before > attempting this? I think I undid the damage here. Please review and test the following delta. Index: sys/filedesc.h =================================================================== RCS file: /home/ncvs/src/sys/sys/filedesc.h,v retrieving revision 1.33 diff -u -r1.33 filedesc.h --- sys/filedesc.h 23 Jan 2002 08:28:55 -0000 1.33 +++ sys/filedesc.h 29 Jan 2002 19:45:03 -0000 @@ -153,7 +154,21 @@ void funsetown __P((struct sigio *sigio)); void funsetownlst __P((struct sigiolst *sigiolst)); int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp)); +static __inline struct file * fget_locked(struct filedesc *, int); void setugidsafety __P((struct thread *td)); + +static __inline struct file * +fget_locked(fdp, fd) + struct filedesc *fdp; + int fd; +{ + struct file *fp; + + if (fd < 0 || (u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + return (NULL); + return (fp); +} #endif /* _KERNEL */ Index: kern/kern_descrip.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.121 diff -u -r1.121 kern_descrip.c --- kern/kern_descrip.c 29 Jan 2002 17:12:10 -0000 1.121 +++ kern/kern_descrip.c 29 Jan 2002 19:42:48 -0000 @@ -1492,9 +1492,7 @@ if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) return(EBADF); FILEDESC_LOCK(fdp); - if (fd < 0 || (u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - fp->f_ops == &badfileops) { + if ((fp = fget_locked(fdp, fd)) == NULL) { FILEDESC_UNLOCK(fdp); return(EBADF); } Index: kern/sys_generic.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sys_generic.c,v retrieving revision 1.89 diff -u -r1.89 sys_generic.c --- kern/sys_generic.c 23 Jan 2002 08:28:15 -0000 1.89 +++ kern/sys_generic.c 29 Jan 2002 19:42:45 -0000 @@ -75,9 +75,7 @@ MALLOC_DEFINE(M_IOV, "iov", "large iov's"); static int pollscan __P((struct thread *, struct pollfd *, u_int)); -static int pollholddrop __P((struct thread *, struct pollfd *, u_int, int)); static int selscan __P((struct thread *, fd_mask **, fd_mask **, int)); -static int selholddrop __P((struct thread *, fd_mask *, fd_mask *, int, int)); static int dofileread __P((struct thread *, struct file *, int, void *, size_t, off_t, int)); static int dofilewrite __P((struct thread *, struct file *, int, @@ -729,7 +727,7 @@ */ fd_mask s_selbits[howmany(2048, NFDBITS)]; fd_mask s_heldbits[howmany(2048, NFDBITS)]; - fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits; + fd_mask *ibits[3], *obits[3], *selbits, *sbp; struct timeval atv, rtv, ttv; int ncoll, error, timo, i; u_int nbufbytes, ncpbytes, nfdbits; @@ -761,11 +759,6 @@ selbits = &s_selbits[0]; else selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); - if (2 * ncpbytes <= sizeof s_heldbits) { - bzero(s_heldbits, sizeof(s_heldbits)); - heldbits = &s_heldbits[0]; - } else - heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO); /* * Assign pointers into the bit buffers and fetch the input bits. @@ -773,8 +766,6 @@ * together. */ sbp = selbits; - hibits = heldbits + ncpbytes / sizeof *heldbits; - hobits = heldbits; #define getbits(name, x) \ do { \ if (uap->name == NULL) \ @@ -786,10 +777,6 @@ error = copyin(uap->name, ibits[x], ncpbytes); \ if (error != 0) \ goto done_noproclock; \ - for (i = 0; \ - i < ncpbytes / sizeof ibits[i][0]; \ - i++) \ - hibits[i] |= ibits[x][i]; \ } \ } while (0) getbits(in, 0); @@ -814,7 +801,6 @@ atv.tv_sec = 0; atv.tv_usec = 0; } - selholddrop(td, hibits, hobits, uap->nd, 1); timo = 0; PROC_LOCK(td->td_proc); retry: @@ -870,7 +856,6 @@ td->td_flags &= ~TDF_SELECT; mtx_unlock_spin(&sched_lock); PROC_UNLOCK(td->td_proc); - selholddrop(td, hibits, hobits, uap->nd, 0); done_noproclock: /* select is not restarted after signals... */ if (error == ERESTART) @@ -890,65 +875,11 @@ } if (selbits != &s_selbits[0]) free(selbits, M_SELECT); - if (heldbits != &s_heldbits[0]) - free(heldbits, M_SELECT); mtx_unlock(&Giant); return (error); } -/* - * Used to hold then release a group of fds for select(2). - * Hold (hold == 1) or release (hold == 0) a group of filedescriptors. - * if holding then use ibits setting the bits in obits, otherwise use obits. - */ -static int -selholddrop(td, ibits, obits, nfd, hold) - struct thread *td; - fd_mask *ibits, *obits; - int nfd, hold; -{ - struct filedesc *fdp = td->td_proc->p_fd; - int i, fd; - fd_mask bits; - struct file *fp; - - FILEDESC_LOCK(fdp); - for (i = 0; i < nfd; i += NFDBITS) { - if (hold) - bits = ibits[i/NFDBITS]; - else - bits = obits[i/NFDBITS]; - /* ffs(int mask) not portable, fd_mask is long */ - for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { - if (!(bits & 1)) - continue; - fp = fdp->fd_ofiles[fd]; - if (fp == NULL) { - FILEDESC_UNLOCK(fdp); - return (EBADF); - } - if (hold) { - fhold(fp); - obits[(fd)/NFDBITS] |= - ((fd_mask)1 << ((fd) % NFDBITS)); - } else { - /* XXX: optimize by making a special - * version of fdrop that only unlocks - * the filedesc if needed? This would - * redcuce the number of lock/unlock - * pairs by quite a bit. - */ - FILEDESC_UNLOCK(fdp); - fdrop(fp, td); - FILEDESC_LOCK(fdp); - } - } - } - FILEDESC_UNLOCK(fdp); - return (0); -} - static int selscan(td, ibits, obits, nfd) struct thread *td; @@ -961,7 +892,9 @@ int n = 0; /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; + struct filedesc *fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); for (msk = 0; msk < 3; msk++) { if (ibits[msk] == NULL) continue; @@ -971,17 +904,19 @@ for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { if (!(bits & 1)) continue; - if (fget(td, fd, &fp)) + if ((fp = fget_locked(fdp, fd)) == NULL) { + FILEDESC_UNLOCK(fdp); return (EBADF); + } if (fo_poll(fp, flag[msk], fp->f_cred, td)) { obits[msk][(fd)/NFDBITS] |= ((fd_mask)1 << ((fd) % NFDBITS)); n++; } - fdrop(fp, td); } } } + FILEDESC_UNLOCK(fdp); td->td_retval[0] = n; return (0); } @@ -1010,8 +945,6 @@ int ncoll, error = 0, timo; u_int nfds; size_t ni; - struct pollfd p_heldbits[32]; - struct pollfd *heldbits; nfds = SCARG(uap, nfds); @@ -1033,16 +966,9 @@ bits = malloc(ni, M_TEMP, M_WAITOK); else bits = smallbits; - if (ni > sizeof(p_heldbits)) - heldbits = malloc(ni, M_TEMP, M_WAITOK); - else { - bzero(p_heldbits, sizeof(p_heldbits)); - heldbits = p_heldbits; - } error = copyin(SCARG(uap, fds), bits, ni); if (error) goto done_noproclock; - bcopy(bits, heldbits, ni); if (SCARG(uap, timeout) != INFTIM) { atv.tv_sec = SCARG(uap, timeout) / 1000; atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; @@ -1056,7 +982,6 @@ atv.tv_sec = 0; atv.tv_usec = 0; } - pollholddrop(td, heldbits, nfds, 1); timo = 0; PROC_LOCK(td->td_proc); retry: @@ -1110,7 +1035,6 @@ td->td_flags &= ~TDF_SELECT; mtx_unlock_spin(&sched_lock); PROC_UNLOCK(td->td_proc); - pollholddrop(td, heldbits, nfds, 0); done_noproclock: /* poll is not restarted after signals... */ if (error == ERESTART) @@ -1125,47 +1049,12 @@ out: if (ni > sizeof(smallbits)) free(bits, M_TEMP); - if (ni > sizeof(p_heldbits)) - free(heldbits, M_TEMP); done2: mtx_unlock(&Giant); return (error); } static int -pollholddrop(td, fds, nfd, hold) - struct thread *td; - struct pollfd *fds; - u_int nfd; - int hold; -{ - register struct filedesc *fdp = td->td_proc->p_fd; - int i; - struct file *fp; - - FILEDESC_LOCK(fdp); - for (i = 0; i < nfd; i++, fds++) { - if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) { - fp = fdp->fd_ofiles[fds->fd]; - if (hold) { - if (fp != NULL) { - fhold(fp); - fds->revents = 1; - } else - fds->revents = 0; - } else if(fp != NULL && fds->revents) { - FILE_LOCK(fp); - FILEDESC_UNLOCK(fdp); - fdrop_locked(fp, td); - FILEDESC_LOCK(fdp); - } - } - } - FILEDESC_UNLOCK(fdp); - return (0); -} - -static int pollscan(td, fds, nfd) struct thread *td; struct pollfd *fds; @@ -1176,18 +1065,15 @@ struct file *fp; int n = 0; + FILEDESC_LOCK(fdp); for (i = 0; i < nfd; i++, fds++) { - FILEDESC_LOCK(fdp); if (fds->fd >= fdp->fd_nfiles) { fds->revents = POLLNVAL; n++; - FILEDESC_UNLOCK(fdp); } else if (fds->fd < 0) { fds->revents = 0; - FILEDESC_UNLOCK(fdp); } else { fp = fdp->fd_ofiles[fds->fd]; - FILEDESC_UNLOCK(fdp); if (fp == NULL) { fds->revents = POLLNVAL; n++; @@ -1203,6 +1089,7 @@ } } } + FILEDESC_UNLOCK(fdp); td->td_retval[0] = n; return (0); } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-smp" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020129122213.D13686>