Date: Tue, 3 Dec 2019 23:11:40 +0000 (UTC) From: Vladimir Kondratyev <wulf@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r355372 - stable/12/sys/compat/linux Message-ID: <201912032311.xB3NBfLB017927@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: wulf Date: Tue Dec 3 23:11:40 2019 New Revision: 355372 URL: https://svnweb.freebsd.org/changeset/base/355372 Log: MFC r355065 - r355068: Linux epoll improvements. r355065: Linux epoll: Don't deregister file descriptor after EPOLLONESHOT is fired Linux epoll does not remove descriptor after one-shot event has been triggered. Set EV_DISPATCH kqueue flag rather then EV_ONESHOT to get the same behavior. Required by Linux Steam client. PR: 240590 Reported by: Alex S <iwtcex@gmail.com> Reviewed by: emaste, imp Differential Revision: https://reviews.freebsd.org/D22513 r355066: Linux epoll: Check both read and write kqueue events existence in EPOLL_CTL_ADD Linux epoll EPOLL_CTL_ADD op handler should always check registration of both EVFILT_READ and EVFILT_WRITE kevents to deceide if supplied file descriptor fd is already registered with epoll instance. Reviewed by: emaste Differential Revision: https://reviews.freebsd.org/D22515 r355067: Linux epoll: Register events with zero event mask Such an events are legal and should be interpreted as EPOLLERR | EPOLLHUP. Register a disabled kqueue event in that case as we do not support EPOLLHUP yet. Required by Linux Steam client. PR: 240590 Reported by: Alex S <iwtcex@gmail.com> Reviewed by: emaste Differential Revision: https://reviews.freebsd.org/D22516 r355068: Linux epoll: Allow passing of any negative timeout value to epoll_wait Linux epoll allow passing of any negative timeout value to epoll_wait() to cause unbound blocking Reviewed by: emaste Differential Revision: https://reviews.freebsd.org/D22517 Modified: stable/12/sys/compat/linux/linux_event.c stable/12/sys/compat/linux/linux_event.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/compat/linux/linux_event.c ============================================================================== --- stable/12/sys/compat/linux/linux_event.c Tue Dec 3 23:07:09 2019 (r355371) +++ stable/12/sys/compat/linux/linux_event.c Tue Dec 3 23:11:40 2019 (r355372) @@ -98,14 +98,16 @@ __attribute__((packed)) #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); -static int epoll_to_kevent(struct thread *td, struct file *epfp, - int fd, struct epoll_event *l_event, int *kev_flags, - struct kevent *kevent, int *nkevents); +static int epoll_to_kevent(struct thread *td, int fd, + struct epoll_event *l_event, struct kevent *kevent, + int *nkevents); static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); -static int epoll_delete_event(struct thread *td, struct file *epfp, - int fd, int filter); +static int epoll_register_kevent(struct thread *td, struct file *epfp, + int fd, int filter, unsigned int flags); +static int epoll_fd_registered(struct thread *td, struct file *epfp, + int fd); static int epoll_delete_all_events(struct thread *td, struct file *epfp, int fd); @@ -296,33 +298,38 @@ linux_epoll_create1(struct thread *td, struct linux_ep /* Structure converting function from epoll to kevent. */ static int -epoll_to_kevent(struct thread *td, struct file *epfp, - int fd, struct epoll_event *l_event, int *kev_flags, +epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event, struct kevent *kevent, int *nkevents) { uint32_t levents = l_event->events; struct linux_pemuldata *pem; struct proc *p; + unsigned short kev_flags = EV_ADD | EV_ENABLE; /* flags related to how event is registered */ if ((levents & LINUX_EPOLLONESHOT) != 0) - *kev_flags |= EV_ONESHOT; + kev_flags |= EV_DISPATCH; if ((levents & LINUX_EPOLLET) != 0) - *kev_flags |= EV_CLEAR; + kev_flags |= EV_CLEAR; if ((levents & LINUX_EPOLLERR) != 0) - *kev_flags |= EV_ERROR; + kev_flags |= EV_ERROR; if ((levents & LINUX_EPOLLRDHUP) != 0) - *kev_flags |= EV_EOF; + kev_flags |= EV_EOF; /* flags related to what event is registered */ if ((levents & LINUX_EPOLL_EVRD) != 0) { - EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); + EV_SET(kevent++, fd, EVFILT_READ, kev_flags, 0, 0, 0); ++(*nkevents); } if ((levents & LINUX_EPOLL_EVWR) != 0) { - EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); + EV_SET(kevent++, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); ++(*nkevents); } + /* zero event mask is legal */ + if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) { + EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); + ++(*nkevents); + } if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { p = td->td_proc; @@ -451,7 +458,6 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ epoll_kev_copyin}; struct epoll_event le; cap_rights_t rights; - int kev_flags; int nchanges = 0; int error; @@ -484,9 +490,7 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ ciargs.changelist = kev; if (args->op != LINUX_EPOLL_CTL_DEL) { - kev_flags = EV_ADD | EV_ENABLE; - error = epoll_to_kevent(td, epfp, args->fd, &le, - &kev_flags, kev, &nchanges); + error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges); if (error != 0) goto leave0; } @@ -499,18 +503,10 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ break; case LINUX_EPOLL_CTL_ADD: - /* - * kqueue_register() return ENOENT if event does not exists - * and the EV_ADD flag is not set. - */ - kev[0].flags &= ~EV_ADD; - error = kqfd_register(args->epfd, &kev[0], td, M_WAITOK); - if (error != ENOENT) { + if (epoll_fd_registered(td, epfp, args->fd)) { error = EEXIST; goto leave0; } - error = 0; - kev[0].flags |= EV_ADD; break; case LINUX_EPOLL_CTL_DEL: @@ -561,13 +557,13 @@ linux_epoll_wait_common(struct thread *td, int epfd, s return (error); if (epfp->f_type != DTYPE_KQUEUE) { error = EINVAL; - goto leave1; + goto leave; } if (uset != NULL) { error = kern_sigprocmask(td, SIG_SETMASK, uset, &omask, 0); if (error != 0) - goto leave1; + goto leave; td->td_pflags |= TDP_OLDMASK; /* * Make sure that ast() is called on return to @@ -585,11 +581,12 @@ linux_epoll_wait_common(struct thread *td, int epfd, s coargs.count = 0; coargs.error = 0; - if (timeout != -1) { - if (timeout < 0) { - error = EINVAL; - goto leave0; - } + /* + * Linux epoll_wait(2) man page states that timeout of -1 causes caller + * to block indefinitely. Real implementation does it if any negative + * timeout value is passed. + */ + if (timeout >= 0) { /* Convert from milliseconds to timespec. */ ts.tv_sec = timeout / 1000; ts.tv_nsec = (timeout % 1000) * 1000000; @@ -609,11 +606,10 @@ linux_epoll_wait_common(struct thread *td, int epfd, s if (error == 0) td->td_retval[0] = coargs.count; -leave0: if (uset != NULL) error = kern_sigprocmask(td, SIG_SETMASK, &omask, NULL, 0); -leave1: +leave: fdrop(epfp, td); return (error); } @@ -650,7 +646,8 @@ linux_epoll_pwait(struct thread *td, struct linux_epol } static int -epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) +epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, + unsigned int flags) { struct epoll_copyin_args ciargs; struct kevent kev; @@ -659,18 +656,36 @@ epoll_delete_event(struct thread *td, struct file *epf epoll_kev_copyin}; ciargs.changelist = &kev; - EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); + EV_SET(&kev, fd, filter, flags, 0, 0, 0); return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); } static int +epoll_fd_registered(struct thread *td, struct file *epfp, int fd) +{ + /* + * Set empty filter flags to avoid accidental modification of already + * registered events. In the case of event re-registration: + * 1. If event does not exists kevent() does nothing and returns ENOENT + * 2. If event does exists, it's enabled/disabled state is preserved + * but fflags, data and udata fields are overwritten. So we can not + * set socket lowats and store user's context pointer in udata. + */ + if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || + epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) + return (1); + + return (0); +} + +static int epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) { int error1, error2; - error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); - error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); + error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); + error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); /* return 0 if at least one result positive */ return (error1 == 0 ? 0 : error2); Modified: stable/12/sys/compat/linux/linux_event.h ============================================================================== --- stable/12/sys/compat/linux/linux_event.h Tue Dec 3 23:07:09 2019 (r355371) +++ stable/12/sys/compat/linux/linux_event.h Tue Dec 3 23:11:40 2019 (r355372) @@ -45,10 +45,10 @@ #define LINUX_EPOLLONESHOT 1u<<30 #define LINUX_EPOLLET 1u<<31 -#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM \ - |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI) +#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM) #define LINUX_EPOLL_EVWR (LINUX_EPOLLOUT|LINUX_EPOLLWRNORM) #define LINUX_EPOLL_EVSUP (LINUX_EPOLLET|LINUX_EPOLLONESHOT \ + |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI \ |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR|LINUX_EPOLLRDHUP) #define LINUX_EPOLL_CTL_ADD 1
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201912032311.xB3NBfLB017927>