Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 28 Jan 2016 20:24:15 +0000 (UTC)
From:      Eric van Gyzen <vangyzen@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r295012 - in head: sys/kern sys/sys tests/sys/kqueue
Message-ID:  <201601282024.u0SKOFlb004850@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: vangyzen
Date: Thu Jan 28 20:24:15 2016
New Revision: 295012
URL: https://svnweb.freebsd.org/changeset/base/295012

Log:
  kqueue EVFILT_PROC: avoid collision between NOTE_CHILD and NOTE_EXIT
  
  NOTE_CHILD and NOTE_EXIT return something in kevent.data: the parent
  pid (ppid) for NOTE_CHILD and the exit status for NOTE_EXIT.
  Do not let the two events be combined, since one would overwrite
  the other's data.
  
  PR:		180385
  Submitted by:	David A. Bright <david_a_bright@dell.com>
  Reviewed by:	jhb
  MFC after:	1 month
  Sponsored by:	Dell Inc.
  Differential Revision:	https://reviews.freebsd.org/D4900

Modified:
  head/sys/kern/kern_event.c
  head/sys/sys/event.h
  head/tests/sys/kqueue/common.h
  head/tests/sys/kqueue/main.c
  head/tests/sys/kqueue/proc.c

Modified: head/sys/kern/kern_event.c
==============================================================================
--- head/sys/kern/kern_event.c	Thu Jan 28 20:21:15 2016	(r295011)
+++ head/sys/kern/kern_event.c	Thu Jan 28 20:24:15 2016	(r295012)
@@ -373,11 +373,21 @@ filt_procattach(struct knote *kn)
 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
 
 	/*
-	 * internal flag indicating registration done by kernel
+	 * Internal flag indicating registration done by kernel for the
+	 * purposes of getting a NOTE_CHILD notification.
 	 */
-	if (kn->kn_flags & EV_FLAG1) {
+	if (kn->kn_flags & EV_FLAG2) {
+		kn->kn_flags &= ~EV_FLAG2;
 		kn->kn_data = kn->kn_sdata;		/* ppid */
 		kn->kn_fflags = NOTE_CHILD;
+                kn->kn_sfflags &= ~NOTE_EXIT;
+		immediate = 1; /* Force immediate activation of child note. */
+	}
+	/*
+	 * Internal flag indicating registration done by kernel (for other than
+	 * NOTE_CHILD).
+	 */
+	if (kn->kn_flags & EV_FLAG1) {
 		kn->kn_flags &= ~EV_FLAG1;
 	}
 
@@ -385,9 +395,10 @@ filt_procattach(struct knote *kn)
 		knlist_add(&p->p_klist, kn, 1);
 
 	/*
-	 * Immediately activate any exit notes if the target process is a
-	 * zombie.  This is necessary to handle the case where the target
-	 * process, e.g. a child, dies before the kevent is registered.
+	 * Immediately activate any child notes or, in the case of a zombie
+	 * target process, exit notes.  The latter is necessary to handle the
+	 * case where the target process, e.g. a child, dies before the kevent
+	 * is registered.
 	 */
 	if (immediate && filt_proc(kn, NOTE_EXIT))
 		KNOTE_ACTIVATE(kn, 0);
@@ -495,7 +506,7 @@ knote_fork(struct knlist *list, int pid)
 
 		/*
 		 * The NOTE_TRACK case. In addition to the activation
-		 * of the event, we need to register new event to
+		 * of the event, we need to register new events to
 		 * track the child. Drop the locks in preparation for
 		 * the call to kqueue_register().
 		 */
@@ -504,8 +515,27 @@ knote_fork(struct knlist *list, int pid)
 		list->kl_unlock(list->kl_lockarg);
 
 		/*
-		 * Activate existing knote and register a knote with
+		 * Activate existing knote and register tracking knotes with
 		 * new process.
+		 *
+		 * First register a knote to get just the child notice. This
+		 * must be a separate note from a potential NOTE_EXIT
+		 * notification since both NOTE_CHILD and NOTE_EXIT are defined
+		 * to use the data field (in conflicting ways).
+		 */
+		kev.ident = pid;
+		kev.filter = kn->kn_filter;
+		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2;
+		kev.fflags = kn->kn_sfflags;
+		kev.data = kn->kn_id;		/* parent */
+		kev.udata = kn->kn_kevent.udata;/* preserve udata */
+		error = kqueue_register(kq, &kev, NULL, 0);
+		if (error)
+			kn->kn_fflags |= NOTE_TRACKERR;
+
+		/*
+		 * Then register another knote to track other potential events
+		 * from the new process.
 		 */
 		kev.ident = pid;
 		kev.filter = kn->kn_filter;
@@ -1129,7 +1159,7 @@ findkn:
 
 		if (fp->f_type == DTYPE_KQUEUE) {
 			/*
-			 * if we add some inteligence about what we are doing,
+			 * If we add some intelligence about what we are doing,
 			 * we should be able to support events on ourselves.
 			 * We need to know when we are doing this to prevent
 			 * getting both the knlist lock and the kq lock since
@@ -1161,7 +1191,18 @@ findkn:
 			kqueue_expand(kq, fops, kev->ident, waitok);
 
 		KQ_LOCK(kq);
-		if (kq->kq_knhashmask != 0) {
+
+		/*
+		 * If possible, find an existing knote to use for this kevent.
+		 */
+		if (kev->filter == EVFILT_PROC &&
+		    (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
+			/* This is an internal creation of a process tracking
+			 * note. Don't attempt to coalesce this with an
+			 * existing note.
+			 */
+			;			
+		} else if (kq->kq_knhashmask != 0) {
 			struct klist *list;
 
 			list = &kq->kq_knhash[
@@ -1173,7 +1214,7 @@ findkn:
 		}
 	}
 
-	/* knote is in the process of changing, wait for it to stablize. */
+	/* knote is in the process of changing, wait for it to stabilize. */
 	if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
 		KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 		if (filedesc_unlock) {

Modified: head/sys/sys/event.h
==============================================================================
--- head/sys/sys/event.h	Thu Jan 28 20:21:15 2016	(r295011)
+++ head/sys/sys/event.h	Thu Jan 28 20:24:15 2016	(r295012)
@@ -80,6 +80,7 @@ struct kevent {
 #define EV_SYSFLAGS	0xF000		/* reserved by system */
 #define	EV_DROP		0x1000		/* note should be dropped */
 #define EV_FLAG1	0x2000		/* filter-specific flag */
+#define EV_FLAG2	0x4000		/* filter-specific flag */
 
 /* returned values */
 #define EV_EOF		0x8000		/* EOF detected */

Modified: head/tests/sys/kqueue/common.h
==============================================================================
--- head/tests/sys/kqueue/common.h	Thu Jan 28 20:21:15 2016	(r295011)
+++ head/tests/sys/kqueue/common.h	Thu Jan 28 20:24:15 2016	(r295012)
@@ -46,6 +46,7 @@ int vnode_fd;
 
 extern const char * kevent_to_str(struct kevent *);
 struct kevent * kevent_get(int);
+struct kevent * kevent_get_timeout(int, int);
 
 
 void kevent_cmp(struct kevent *, struct kevent *);

Modified: head/tests/sys/kqueue/main.c
==============================================================================
--- head/tests/sys/kqueue/main.c	Thu Jan 28 20:21:15 2016	(r295011)
+++ head/tests/sys/kqueue/main.c	Thu Jan 28 20:24:15 2016	(r295012)
@@ -69,6 +69,28 @@ kevent_get(int kqfd)
     return (kev);
 }
 
+/* Retrieve a single kevent, specifying a maximum time to wait for it. */
+struct kevent *
+kevent_get_timeout(int kqfd, int seconds)
+{
+    int nfds;
+    struct kevent *kev;
+    struct timespec timeout = {seconds, 0};
+
+    if ((kev = calloc(1, sizeof(*kev))) == NULL)
+	err(1, "out of memory");
+    
+    nfds = kevent(kqfd, NULL, 0, kev, 1, &timeout);
+    if (nfds < 0) {
+        err(1, "kevent(2)");
+    } else if (nfds == 0) {
+        free(kev);
+        kev = NULL;
+    }
+
+    return (kev);
+}
+
 char *
 kevent_fflags_dump(struct kevent *kev)
 {
@@ -82,25 +104,39 @@ kevent_fflags_dump(struct kevent *kev)
 	abort();
 
     /* Not every filter has meaningful fflags */
-    if (kev->filter != EVFILT_VNODE) {
-    	snprintf(buf, 1024, "fflags = %d", kev->fflags);
-	return (buf);
-    }
-
-    snprintf(buf, 1024, "fflags = %d (", kev->fflags);
-    KEVFFL_DUMP(NOTE_DELETE);
-    KEVFFL_DUMP(NOTE_WRITE);
-    KEVFFL_DUMP(NOTE_EXTEND);
+    if (kev->filter == EVFILT_PROC) {
+        snprintf(buf, 1024, "fflags = %x (", kev->fflags);
+        KEVFFL_DUMP(NOTE_EXIT);
+        KEVFFL_DUMP(NOTE_FORK);
+        KEVFFL_DUMP(NOTE_EXEC);
+        KEVFFL_DUMP(NOTE_CHILD);
+        KEVFFL_DUMP(NOTE_TRACKERR);
+        KEVFFL_DUMP(NOTE_TRACK);
+        buf[strlen(buf) - 1] = ')';
+    } else if (kev->filter == EVFILT_PROCDESC) {
+        snprintf(buf, 1024, "fflags = %x (", kev->fflags);
+        KEVFFL_DUMP(NOTE_EXIT);
+        KEVFFL_DUMP(NOTE_FORK);
+        KEVFFL_DUMP(NOTE_EXEC);
+        buf[strlen(buf) - 1] = ')';
+    } else if (kev->filter == EVFILT_VNODE) {
+        snprintf(buf, 1024, "fflags = %x (", kev->fflags);
+        KEVFFL_DUMP(NOTE_DELETE);
+        KEVFFL_DUMP(NOTE_WRITE);
+        KEVFFL_DUMP(NOTE_EXTEND);
 #if HAVE_NOTE_TRUNCATE
-    KEVFFL_DUMP(NOTE_TRUNCATE);
+        KEVFFL_DUMP(NOTE_TRUNCATE);
 #endif
-    KEVFFL_DUMP(NOTE_ATTRIB);
-    KEVFFL_DUMP(NOTE_LINK);
-    KEVFFL_DUMP(NOTE_RENAME);
+        KEVFFL_DUMP(NOTE_ATTRIB);
+        KEVFFL_DUMP(NOTE_LINK);
+        KEVFFL_DUMP(NOTE_RENAME);
 #if HAVE_NOTE_REVOKE
-    KEVFFL_DUMP(NOTE_REVOKE);
+        KEVFFL_DUMP(NOTE_REVOKE);
 #endif
-    buf[strlen(buf) - 1] = ')';
+        buf[strlen(buf) - 1] = ')';
+    } else {
+    	snprintf(buf, 1024, "fflags = %x", kev->fflags);
+    }
 
     return (buf);
 }
@@ -260,6 +296,15 @@ main(int argc, char **argv)
         argc--;
     }
 
+    /*
+     * Some tests fork.  If output is fully buffered,
+     * the children inherit some buffered data and flush
+     * it when they exit, causing some data to be printed twice.
+     * Use line buffering to avoid this problem.
+     */
+    setlinebuf(stdout);
+    setlinebuf(stderr);
+
     test_kqueue();
     test_kqueue_close();
 

Modified: head/tests/sys/kqueue/proc.c
==============================================================================
--- head/tests/sys/kqueue/proc.c	Thu Jan 28 20:21:15 2016	(r295011)
+++ head/tests/sys/kqueue/proc.c	Thu Jan 28 20:24:15 2016	(r295012)
@@ -74,6 +74,172 @@ add_and_delete(void)
 
 }
 
+static void
+proc_track(int sleep_time)
+{
+    char test_id[64];
+    struct kevent kev;
+    pid_t pid;
+    int pipe_fd[2];
+    ssize_t result;
+
+    snprintf(test_id, sizeof(test_id),
+             "kevent(EVFILT_PROC, NOTE_TRACK); sleep %d", sleep_time);
+    test_begin(test_id);
+    test_no_kevents();
+
+    if (pipe(pipe_fd)) {
+        err(1, "pipe (parent) failed! (%s() at %s:%d)",
+            __func__, __FILE__, __LINE__);
+    }
+
+    /* Create a child to track. */
+    pid = fork();
+    if (pid == 0) { /* Child */
+        pid_t grandchild = -1;
+
+        /*
+         * Give the parent a chance to start tracking us.
+         */
+        result = read(pipe_fd[1], test_id, 1);
+        if (result != 1) {
+            err(1, "read from pipe in child failed! (ret %zd) (%s() at %s:%d)",
+                result, __func__, __FILE__, __LINE__);
+        }
+
+        /*
+         * Spawn a grandchild that will immediately exit. If the kernel has bug
+         * 180385, the parent will see a kevent with both NOTE_CHILD and
+         * NOTE_EXIT. If that bug is fixed, it will see two separate kevents
+         * for those notes. Note that this triggers the conditions for
+         * detecting the bug quite reliably on a 1 CPU system (or if the test
+         * process is restricted to a single CPU), but may not trigger it on a
+         * multi-CPU system.
+         */
+        grandchild = fork();
+        if (grandchild == 0) { /* Grandchild */
+            if (sleep_time) sleep(sleep_time);
+            exit(1);
+        } else if (grandchild == -1) { /* Error */
+            err(1, "fork (grandchild) failed! (%s() at %s:%d)",
+                __func__, __FILE__, __LINE__);
+        } else { /* Child (Grandchild Parent) */
+            printf(" -- grandchild created (pid %d)\n", (int) grandchild);
+        }
+        if (sleep_time) sleep(sleep_time);
+        exit(0);
+    } else if (pid == -1) { /* Error */
+        err(1, "fork (child) failed! (%s() at %s:%d)",
+            __func__, __FILE__, __LINE__);
+    }
+    
+    printf(" -- child created (pid %d)\n", (int) pid);
+
+    kevent_add(kqfd, &kev, pid, EVFILT_PROC, EV_ADD | EV_ENABLE,
+               NOTE_TRACK | NOTE_EXEC | NOTE_EXIT | NOTE_FORK,
+               0, NULL);
+
+    printf(" -- tracking child (pid %d)\n", (int) pid);
+
+    /* Now that we're tracking the child, tell it to proceed. */
+    result = write(pipe_fd[0], test_id, 1);
+    if (result != 1) {
+        err(1, "write to pipe in parent failed! (ret %zd) (%s() at %s:%d)",
+            result, __func__, __FILE__, __LINE__);
+    }
+
+    /*
+     * Several events should be received:
+     *  - NOTE_FORK (from child)
+     *  - NOTE_CHILD (from grandchild)
+     *  - NOTE_EXIT (from grandchild)
+     *  - NOTE_EXIT (from child)
+     *
+     * The NOTE_FORK and NOTE_EXIT from the child could be combined into a
+     * single event, but the NOTE_CHILD and NOTE_EXIT from the grandchild must
+     * not be combined.
+     *
+     * The loop continues until no events are received within a 5 second
+     * period, at which point it is assumed that no more will be coming. The
+     * loop is deliberately designed to attempt to get events even after all
+     * the expected ones are received in case some spurious events are
+     * generated as well as the expected ones.
+     */
+    {
+        int child_exit = 0;
+        int child_fork = 0;
+        int gchild_exit = 0;
+        int gchild_note = 0;
+        pid_t gchild_pid = -1;
+        int done = 0;
+        
+        while (!done)
+        {
+            int handled = 0;
+            struct kevent *kevp;
+
+            kevp = kevent_get_timeout(kqfd, 5);
+            if (kevp == NULL) {
+                done = 1;
+            } else {
+                printf(" -- Received kevent: %s\n", kevent_to_str(kevp));
+            
+                if ((kevp->fflags & NOTE_CHILD) && (kevp->fflags & NOTE_EXIT)) {
+                    errx(1, "NOTE_CHILD and NOTE_EXIT in same kevent: %s", kevent_to_str(kevp));
+                }
+            
+                if (kevp->fflags & NOTE_CHILD) {
+                    if (kevp->data == pid) {
+                        if (!gchild_note) {
+                            ++gchild_note;
+                            gchild_pid = kevp->ident;
+                            ++handled;
+                        } else {
+                            errx(1, "Spurious NOTE_CHILD: %s", kevent_to_str(kevp));
+                        }
+                    }
+                }
+
+                if (kevp->fflags & NOTE_EXIT) {
+                    if ((kevp->ident == pid) && (!child_exit)) {
+                        ++child_exit;
+                        ++handled;
+                    } else if ((kevp->ident == gchild_pid) && (!gchild_exit)) {
+                        ++gchild_exit;
+                        ++handled;
+                    } else {
+                        errx(1, "Spurious NOTE_EXIT: %s", kevent_to_str(kevp));
+                    }
+                }
+
+                if (kevp->fflags & NOTE_FORK) {
+                    if ((kevp->ident == pid) && (!child_fork)) {
+                        ++child_fork;
+                        ++handled;
+                    } else {
+                        errx(1, "Spurious NOTE_FORK: %s", kevent_to_str(kevp));
+                    }
+                }
+
+                if (!handled) {
+                    errx(1, "Spurious kevent: %s", kevent_to_str(kevp));
+                }
+
+                free(kevp);
+            }
+        }
+        
+        /* Make sure all expected events were received. */
+        if (child_exit && child_fork && gchild_exit && gchild_note) {
+            printf(" -- Received all expected events.\n");
+        } else {
+            errx(1, "Did not receive all expected events.");
+        }
+    }
+
+    success();
+}
+
 #ifdef TODO
 static void
 event_trigger(void)
@@ -236,6 +402,8 @@ test_evfilt_proc()
     signal(SIGUSR1, sig_handler);
 
     add_and_delete();
+    proc_track(0); /* Run without sleeping before children exit. */
+    proc_track(1); /* Sleep a bit in the children before exiting. */
 
 #if TODO
     event_trigger();



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201601282024.u0SKOFlb004850>