Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 3 Dec 2019 23:11:40 +0000 (UTC)
From:      Vladimir Kondratyev <wulf@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r355372 - stable/12/sys/compat/linux
Message-ID:  <201912032311.xB3NBfLB017927@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: wulf
Date: Tue Dec  3 23:11:40 2019
New Revision: 355372
URL: https://svnweb.freebsd.org/changeset/base/355372

Log:
  MFC r355065 - r355068: Linux epoll improvements.
  
  r355065:
  Linux epoll: Don't deregister file descriptor after EPOLLONESHOT is fired
  
  Linux epoll does not remove descriptor after one-shot event has been triggered.
  Set EV_DISPATCH kqueue flag rather then EV_ONESHOT to get the same behavior.
  
  Required by Linux Steam client.
  
  PR:		240590
  Reported by:	Alex S <iwtcex@gmail.com>
  Reviewed by:	emaste, imp
  Differential Revision:	https://reviews.freebsd.org/D22513
  
  r355066:
  Linux epoll: Check both read and write kqueue events existence in EPOLL_CTL_ADD
  
  Linux epoll EPOLL_CTL_ADD op handler should always check registration
  of both EVFILT_READ and EVFILT_WRITE kevents to deceide if supplied
  file descriptor fd is already registered with epoll instance.
  
  Reviewed by:	emaste
  Differential Revision:	https://reviews.freebsd.org/D22515
  
  r355067:
  Linux epoll: Register events with zero event mask
  
  Such an events are legal and should be interpreted as EPOLLERR | EPOLLHUP.
  Register a disabled kqueue event in that case as we do not support EPOLLHUP yet.
  
  Required by Linux Steam client.
  
  PR:		240590
  Reported by:	Alex S <iwtcex@gmail.com>
  Reviewed by:	emaste
  Differential Revision:	https://reviews.freebsd.org/D22516
  
  r355068:
  Linux epoll: Allow passing of any negative timeout value to epoll_wait
  
  Linux epoll allow passing of any negative timeout value to epoll_wait()
  to cause unbound blocking
  
  Reviewed by:	emaste
  Differential Revision:	https://reviews.freebsd.org/D22517

Modified:
  stable/12/sys/compat/linux/linux_event.c
  stable/12/sys/compat/linux/linux_event.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/compat/linux/linux_event.c
==============================================================================
--- stable/12/sys/compat/linux/linux_event.c	Tue Dec  3 23:07:09 2019	(r355371)
+++ stable/12/sys/compat/linux/linux_event.c	Tue Dec  3 23:11:40 2019	(r355372)
@@ -98,14 +98,16 @@ __attribute__((packed))
 #define	LINUX_MAX_EVENTS	(INT_MAX / sizeof(struct epoll_event))
 
 static void	epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata);
-static int	epoll_to_kevent(struct thread *td, struct file *epfp,
-		    int fd, struct epoll_event *l_event, int *kev_flags,
-		    struct kevent *kevent, int *nkevents);
+static int	epoll_to_kevent(struct thread *td, int fd,
+		    struct epoll_event *l_event, struct kevent *kevent,
+		    int *nkevents);
 static void	kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
 static int	epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
 static int	epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
-static int	epoll_delete_event(struct thread *td, struct file *epfp,
-		    int fd, int filter);
+static int	epoll_register_kevent(struct thread *td, struct file *epfp,
+		    int fd, int filter, unsigned int flags);
+static int	epoll_fd_registered(struct thread *td, struct file *epfp,
+		    int fd);
 static int	epoll_delete_all_events(struct thread *td, struct file *epfp,
 		    int fd);
 
@@ -296,33 +298,38 @@ linux_epoll_create1(struct thread *td, struct linux_ep
 
 /* Structure converting function from epoll to kevent. */
 static int
-epoll_to_kevent(struct thread *td, struct file *epfp,
-    int fd, struct epoll_event *l_event, int *kev_flags,
+epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event,
     struct kevent *kevent, int *nkevents)
 {
 	uint32_t levents = l_event->events;
 	struct linux_pemuldata *pem;
 	struct proc *p;
+	unsigned short kev_flags = EV_ADD | EV_ENABLE;
 
 	/* flags related to how event is registered */
 	if ((levents & LINUX_EPOLLONESHOT) != 0)
-		*kev_flags |= EV_ONESHOT;
+		kev_flags |= EV_DISPATCH;
 	if ((levents & LINUX_EPOLLET) != 0)
-		*kev_flags |= EV_CLEAR;
+		kev_flags |= EV_CLEAR;
 	if ((levents & LINUX_EPOLLERR) != 0)
-		*kev_flags |= EV_ERROR;
+		kev_flags |= EV_ERROR;
 	if ((levents & LINUX_EPOLLRDHUP) != 0)
-		*kev_flags |= EV_EOF;
+		kev_flags |= EV_EOF;
 
 	/* flags related to what event is registered */
 	if ((levents & LINUX_EPOLL_EVRD) != 0) {
-		EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0);
+		EV_SET(kevent++, fd, EVFILT_READ, kev_flags, 0, 0, 0);
 		++(*nkevents);
 	}
 	if ((levents & LINUX_EPOLL_EVWR) != 0) {
-		EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0);
+		EV_SET(kevent++, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
 		++(*nkevents);
 	}
+	/* zero event mask is legal */
+	if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) {
+		EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0);
+		++(*nkevents);
+	}
 
 	if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
 		p = td->td_proc;
@@ -451,7 +458,6 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_
 					epoll_kev_copyin};
 	struct epoll_event le;
 	cap_rights_t rights;
-	int kev_flags;
 	int nchanges = 0;
 	int error;
 
@@ -484,9 +490,7 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_
 	ciargs.changelist = kev;
 
 	if (args->op != LINUX_EPOLL_CTL_DEL) {
-		kev_flags = EV_ADD | EV_ENABLE;
-		error = epoll_to_kevent(td, epfp, args->fd, &le,
-		    &kev_flags, kev, &nchanges);
+		error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges);
 		if (error != 0)
 			goto leave0;
 	}
@@ -499,18 +503,10 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_
 		break;
 
 	case LINUX_EPOLL_CTL_ADD:
-		/*
-		 * kqueue_register() return ENOENT if event does not exists
-		 * and the EV_ADD flag is not set.
-		 */
-		kev[0].flags &= ~EV_ADD;
-		error = kqfd_register(args->epfd, &kev[0], td, M_WAITOK);
-		if (error != ENOENT) {
+		if (epoll_fd_registered(td, epfp, args->fd)) {
 			error = EEXIST;
 			goto leave0;
 		}
-		error = 0;
-		kev[0].flags |= EV_ADD;
 		break;
 
 	case LINUX_EPOLL_CTL_DEL:
@@ -561,13 +557,13 @@ linux_epoll_wait_common(struct thread *td, int epfd, s
 		return (error);
 	if (epfp->f_type != DTYPE_KQUEUE) {
 		error = EINVAL;
-		goto leave1;
+		goto leave;
 	}
 	if (uset != NULL) {
 		error = kern_sigprocmask(td, SIG_SETMASK, uset,
 		    &omask, 0);
 		if (error != 0)
-			goto leave1;
+			goto leave;
 		td->td_pflags |= TDP_OLDMASK;
 		/*
 		 * Make sure that ast() is called on return to
@@ -585,11 +581,12 @@ linux_epoll_wait_common(struct thread *td, int epfd, s
 	coargs.count = 0;
 	coargs.error = 0;
 
-	if (timeout != -1) {
-		if (timeout < 0) {
-			error = EINVAL;
-			goto leave0;
-		}
+	/*
+	 * Linux epoll_wait(2) man page states that timeout of -1 causes caller
+	 * to block indefinitely. Real implementation does it if any negative
+	 * timeout value is passed.
+	 */
+	if (timeout >= 0) {
 		/* Convert from milliseconds to timespec. */
 		ts.tv_sec = timeout / 1000;
 		ts.tv_nsec = (timeout % 1000) * 1000000;
@@ -609,11 +606,10 @@ linux_epoll_wait_common(struct thread *td, int epfd, s
 	if (error == 0)
 		td->td_retval[0] = coargs.count;
 
-leave0:
 	if (uset != NULL)
 		error = kern_sigprocmask(td, SIG_SETMASK, &omask,
 		    NULL, 0);
-leave1:
+leave:
 	fdrop(epfp, td);
 	return (error);
 }
@@ -650,7 +646,8 @@ linux_epoll_pwait(struct thread *td, struct linux_epol
 }
 
 static int
-epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter)
+epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter,
+    unsigned int flags)
 {
 	struct epoll_copyin_args ciargs;
 	struct kevent kev;
@@ -659,18 +656,36 @@ epoll_delete_event(struct thread *td, struct file *epf
 					epoll_kev_copyin};
 
 	ciargs.changelist = &kev;
-	EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
+	EV_SET(&kev, fd, filter, flags, 0, 0, 0);
 
 	return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL));
 }
 
 static int
+epoll_fd_registered(struct thread *td, struct file *epfp, int fd)
+{
+	/*
+	 * Set empty filter flags to avoid accidental modification of already
+	 * registered events. In the case of event re-registration:
+	 * 1. If event does not exists kevent() does nothing and returns ENOENT
+	 * 2. If event does exists, it's enabled/disabled state is preserved
+	 *    but fflags, data and udata fields are overwritten. So we can not
+	 *    set socket lowats and store user's context pointer in udata.
+	 */
+	if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT ||
+	    epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT)
+		return (1);
+
+	return (0);
+}
+
+static int
 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
 {
 	int error1, error2;
 
-	error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ);
-	error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE);
+	error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE);
+	error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE);
 
 	/* return 0 if at least one result positive */
 	return (error1 == 0 ? 0 : error2);

Modified: stable/12/sys/compat/linux/linux_event.h
==============================================================================
--- stable/12/sys/compat/linux/linux_event.h	Tue Dec  3 23:07:09 2019	(r355371)
+++ stable/12/sys/compat/linux/linux_event.h	Tue Dec  3 23:11:40 2019	(r355372)
@@ -45,10 +45,10 @@
 #define	LINUX_EPOLLONESHOT	1u<<30
 #define	LINUX_EPOLLET		1u<<31
 
-#define	LINUX_EPOLL_EVRD	(LINUX_EPOLLIN|LINUX_EPOLLRDNORM	\
-		|LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI)
+#define	LINUX_EPOLL_EVRD	(LINUX_EPOLLIN|LINUX_EPOLLRDNORM)
 #define	LINUX_EPOLL_EVWR	(LINUX_EPOLLOUT|LINUX_EPOLLWRNORM)
 #define	LINUX_EPOLL_EVSUP	(LINUX_EPOLLET|LINUX_EPOLLONESHOT	\
+		|LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI		\
 		|LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR|LINUX_EPOLLRDHUP)
 
 #define	LINUX_EPOLL_CTL_ADD	1



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201912032311.xB3NBfLB017927>