Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 29 Jan 2002 12:22:13 -0800
From:      Alfred Perlstein <bright@mu.org>
To:        smp@freebsd.org, tanimura@freebsd.org
Cc:        jhb@freebsd.org, kkenn@freebsd.org
Subject:   Re: select/poll problems with fdlocking
Message-ID:  <20020129122213.D13686@elvis.mu.org>
In-Reply-To: <20020128185345.C13686@elvis.mu.org>; from bright@mu.org on Mon, Jan 28, 2002 at 06:53:45PM -0800
References:  <20020128185345.C13686@elvis.mu.org>

next in thread | previous in thread | raw e-mail | index | archive | help
* Alfred Perlstein <bright@mu.org> [020128 18:53] wrote:
> (cc'd kkenn to let him know I'm not ignoring his problem)
> 
> I think there's some problems with select and poll.
> 
> specifically:
> 
> 1) if one thread were to close a select/poll'd on descriptor
> then we'd leak it because the ofiles entry would be NULL
> during the "drop" part of the call.
> 2) it's slow to grab and drop the filedesc lock so much.
> 
> I think I'm going to take a shot at holding the lock on the
> filedesc for most of the select/poll syscalls as a solution
> this _should__ be ok as we shouldn't be sleeping in fo_poll
> really...
> 
> Any suggestions/workarounds that I should investigate before
> attempting this?

I think I undid the damage here.

Please review and test the following delta.


Index: sys/filedesc.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/filedesc.h,v
retrieving revision 1.33
diff -u -r1.33 filedesc.h
--- sys/filedesc.h	23 Jan 2002 08:28:55 -0000	1.33
+++ sys/filedesc.h	29 Jan 2002 19:45:03 -0000
@@ -153,7 +154,21 @@
 void	funsetown __P((struct sigio *sigio));
 void	funsetownlst __P((struct sigiolst *sigiolst));
 int	getvnode __P((struct filedesc *fdp, int fd, struct file **fpp));
+static __inline struct file *	fget_locked(struct filedesc *, int);
 void	setugidsafety __P((struct thread *td));
+
+static __inline struct file *
+fget_locked(fdp, fd)
+	struct filedesc *fdp;
+	int fd;
+{
+	struct file *fp;
+
+	if (fd < 0 || (u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (NULL);
+	return (fp);
+}
 
 #endif /* _KERNEL */
 
Index: kern/kern_descrip.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.121
diff -u -r1.121 kern_descrip.c
--- kern/kern_descrip.c	29 Jan 2002 17:12:10 -0000	1.121
+++ kern/kern_descrip.c	29 Jan 2002 19:42:48 -0000
@@ -1492,9 +1492,7 @@
 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
 		return(EBADF);
 	FILEDESC_LOCK(fdp);
-	if (fd < 0 || (u_int)fd >= fdp->fd_nfiles ||
-	    (fp = fdp->fd_ofiles[fd]) == NULL ||
-	    fp->f_ops == &badfileops) {
+	if ((fp = fget_locked(fdp, fd)) == NULL) {
 		FILEDESC_UNLOCK(fdp);
 		return(EBADF);
 	}
Index: kern/sys_generic.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.89
diff -u -r1.89 sys_generic.c
--- kern/sys_generic.c	23 Jan 2002 08:28:15 -0000	1.89
+++ kern/sys_generic.c	29 Jan 2002 19:42:45 -0000
@@ -75,9 +75,7 @@
 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
 
 static int	pollscan __P((struct thread *, struct pollfd *, u_int));
-static int	pollholddrop __P((struct thread *, struct pollfd *, u_int, int));
 static int	selscan __P((struct thread *, fd_mask **, fd_mask **, int));
-static int	selholddrop __P((struct thread *, fd_mask *, fd_mask *, int, int));
 static int	dofileread __P((struct thread *, struct file *, int, void *,
 		    size_t, off_t, int));
 static int	dofilewrite __P((struct thread *, struct file *, int,
@@ -729,7 +727,7 @@
 	 */
 	fd_mask s_selbits[howmany(2048, NFDBITS)];
 	fd_mask s_heldbits[howmany(2048, NFDBITS)];
-	fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits;
+	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
 	struct timeval atv, rtv, ttv;
 	int ncoll, error, timo, i;
 	u_int nbufbytes, ncpbytes, nfdbits;
@@ -761,11 +759,6 @@
 		selbits = &s_selbits[0];
 	else
 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
-	if (2 * ncpbytes <= sizeof s_heldbits) {
-		bzero(s_heldbits, sizeof(s_heldbits));
-		heldbits = &s_heldbits[0];
-	} else
-		heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO);
 
 	/*
 	 * Assign pointers into the bit buffers and fetch the input bits.
@@ -773,8 +766,6 @@
 	 * together.
 	 */
 	sbp = selbits;
-	hibits = heldbits + ncpbytes / sizeof *heldbits;
-	hobits = heldbits;
 #define	getbits(name, x) \
 	do {								\
 		if (uap->name == NULL)					\
@@ -786,10 +777,6 @@
 			error = copyin(uap->name, ibits[x], ncpbytes);	\
 			if (error != 0)					\
 				goto done_noproclock;			\
-			for (i = 0;					\
-			     i < ncpbytes / sizeof ibits[i][0];		\
-			     i++)					\
-				hibits[i] |= ibits[x][i];		\
 		}							\
 	} while (0)
 	getbits(in, 0);
@@ -814,7 +801,6 @@
 		atv.tv_sec = 0;
 		atv.tv_usec = 0;
 	}
-	selholddrop(td, hibits, hobits, uap->nd, 1);
 	timo = 0;
 	PROC_LOCK(td->td_proc);
 retry:
@@ -870,7 +856,6 @@
 	td->td_flags &= ~TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
 	PROC_UNLOCK(td->td_proc);
-	selholddrop(td, hibits, hobits, uap->nd, 0);
 done_noproclock:
 	/* select is not restarted after signals... */
 	if (error == ERESTART)
@@ -890,65 +875,11 @@
 	}
 	if (selbits != &s_selbits[0])
 		free(selbits, M_SELECT);
-	if (heldbits != &s_heldbits[0])
-		free(heldbits, M_SELECT);
 
 	mtx_unlock(&Giant);
 	return (error);
 }
 
-/*
- * Used to hold then release a group of fds for select(2).
- * Hold (hold == 1) or release (hold == 0) a group of filedescriptors.
- * if holding then use ibits setting the bits in obits, otherwise use obits.
- */
-static int
-selholddrop(td, ibits, obits, nfd, hold)
-	struct thread *td;
-	fd_mask *ibits, *obits;
-	int nfd, hold;
-{
-	struct filedesc *fdp = td->td_proc->p_fd;
-	int i, fd;
-	fd_mask bits;
-	struct file *fp;
-
-	FILEDESC_LOCK(fdp);
-	for (i = 0; i < nfd; i += NFDBITS) {
-		if (hold)
-			bits = ibits[i/NFDBITS];
-		else
-			bits = obits[i/NFDBITS];
-		/* ffs(int mask) not portable, fd_mask is long */
-		for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
-			if (!(bits & 1))
-				continue;
-			fp = fdp->fd_ofiles[fd];
-			if (fp == NULL) {
-				FILEDESC_UNLOCK(fdp);
-				return (EBADF);
-			}
-			if (hold) {
-				fhold(fp);
-				obits[(fd)/NFDBITS] |=
-				    ((fd_mask)1 << ((fd) % NFDBITS));
-			} else {
-				/* XXX: optimize by making a special
-				 * version of fdrop that only unlocks
-				 * the filedesc if needed?  This would
-				 * redcuce the number of lock/unlock
-				 * pairs by quite a bit.
-				 */
-				FILEDESC_UNLOCK(fdp);
-				fdrop(fp, td);
-				FILEDESC_LOCK(fdp);
-			}
-		}
-	}
-	FILEDESC_UNLOCK(fdp);
-	return (0);
-}
-
 static int
 selscan(td, ibits, obits, nfd)
 	struct thread *td;
@@ -961,7 +892,9 @@
 	int n = 0;
 	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
+	struct filedesc *fdp = td->td_proc->p_fd;
 
+	FILEDESC_LOCK(fdp);
 	for (msk = 0; msk < 3; msk++) {
 		if (ibits[msk] == NULL)
 			continue;
@@ -971,17 +904,19 @@
 			for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
 				if (!(bits & 1))
 					continue;
-				if (fget(td, fd, &fp))
+				if ((fp = fget_locked(fdp, fd)) == NULL) {
+					FILEDESC_UNLOCK(fdp);
 					return (EBADF);
+				}
 				if (fo_poll(fp, flag[msk], fp->f_cred, td)) {
 					obits[msk][(fd)/NFDBITS] |=
 					    ((fd_mask)1 << ((fd) % NFDBITS));
 					n++;
 				}
-				fdrop(fp, td);
 			}
 		}
 	}
+	FILEDESC_UNLOCK(fdp);
 	td->td_retval[0] = n;
 	return (0);
 }
@@ -1010,8 +945,6 @@
 	int ncoll, error = 0, timo;
 	u_int nfds;
 	size_t ni;
-	struct pollfd p_heldbits[32];
-	struct pollfd *heldbits;
 
 	nfds = SCARG(uap, nfds);
 
@@ -1033,16 +966,9 @@
 		bits = malloc(ni, M_TEMP, M_WAITOK);
 	else
 		bits = smallbits;
-	if (ni > sizeof(p_heldbits))
-		heldbits = malloc(ni, M_TEMP, M_WAITOK);
-	else {
-		bzero(p_heldbits, sizeof(p_heldbits));
-		heldbits = p_heldbits;
-	}
 	error = copyin(SCARG(uap, fds), bits, ni);
 	if (error)
 		goto done_noproclock;
-	bcopy(bits, heldbits, ni);
 	if (SCARG(uap, timeout) != INFTIM) {
 		atv.tv_sec = SCARG(uap, timeout) / 1000;
 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
@@ -1056,7 +982,6 @@
 		atv.tv_sec = 0;
 		atv.tv_usec = 0;
 	}
-	pollholddrop(td, heldbits, nfds, 1);
 	timo = 0;
 	PROC_LOCK(td->td_proc);
 retry:
@@ -1110,7 +1035,6 @@
 	td->td_flags &= ~TDF_SELECT;
 	mtx_unlock_spin(&sched_lock);
 	PROC_UNLOCK(td->td_proc);
-	pollholddrop(td, heldbits, nfds, 0);
 done_noproclock:
 	/* poll is not restarted after signals... */
 	if (error == ERESTART)
@@ -1125,47 +1049,12 @@
 out:
 	if (ni > sizeof(smallbits))
 		free(bits, M_TEMP);
-	if (ni > sizeof(p_heldbits))
-		free(heldbits, M_TEMP);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 static int
-pollholddrop(td, fds, nfd, hold)
-	struct thread *td;
-	struct pollfd *fds;
-	u_int nfd;
-	int hold;
-{
-	register struct filedesc *fdp = td->td_proc->p_fd;
-	int i;
-	struct file *fp;
-
-	FILEDESC_LOCK(fdp);
-	for (i = 0; i < nfd; i++, fds++) {
-		if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) {
-			fp = fdp->fd_ofiles[fds->fd];
-			if (hold) {
-				if (fp != NULL) {
-					fhold(fp);
-					fds->revents = 1;
-				} else
-					fds->revents = 0;
-			} else if(fp != NULL && fds->revents) {
-				FILE_LOCK(fp);
-				FILEDESC_UNLOCK(fdp);
-				fdrop_locked(fp, td);
-				FILEDESC_LOCK(fdp);
-			}
-		}
-	}
-	FILEDESC_UNLOCK(fdp);
-	return (0);
-}
-
-static int
 pollscan(td, fds, nfd)
 	struct thread *td;
 	struct pollfd *fds;
@@ -1176,18 +1065,15 @@
 	struct file *fp;
 	int n = 0;
 
+	FILEDESC_LOCK(fdp);
 	for (i = 0; i < nfd; i++, fds++) {
-		FILEDESC_LOCK(fdp);
 		if (fds->fd >= fdp->fd_nfiles) {
 			fds->revents = POLLNVAL;
 			n++;
-			FILEDESC_UNLOCK(fdp);
 		} else if (fds->fd < 0) {
 			fds->revents = 0;
-			FILEDESC_UNLOCK(fdp);
 		} else {
 			fp = fdp->fd_ofiles[fds->fd];
-			FILEDESC_UNLOCK(fdp);
 			if (fp == NULL) {
 				fds->revents = POLLNVAL;
 				n++;
@@ -1203,6 +1089,7 @@
 			}
 		}
 	}
+	FILEDESC_UNLOCK(fdp);
 	td->td_retval[0] = n;
 	return (0);
 }


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-smp" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020129122213.D13686>