Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 5 Apr 2014 14:09:16 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r264146 - in head/sys: kern sys
Message-ID:  <201404051409.s35E9GYD013475@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sat Apr  5 14:09:16 2014
New Revision: 264146
URL: http://svnweb.freebsd.org/changeset/base/264146

Log:
  When KN_INFLUX is set on the knote due to kqueue_register() or
  kqueue_scan() unlocking the kqueue to call f_event, knote() or
  knote_fork() should not skip the knote.  The knote is not going to
  disappear during the influx time, and the mutual exclusion between
  scan and knote() is ensured by both code pathes taking knlist lock.
  The race appears since knlist lock is before kq lock, so KN_INFLUX
  must be set, kq lock must be dropped and only then knlist lock can be
  taken.  The window between kq unlock and knlist lock causes lost
  events.
  
  Add a flag KN_SCAN to indicate that KN_INFLUX is set in a manner safe
  for the knote(), and check for it to ignore KN_INFLUX in the knote*()
  as needed.  Also, in knote(), remove the lockless check for the
  KN_INFLUX flag, which could also result in the lost notification.
  
  Reported and tested by:	Kohji Okuno <okuno.kohji@jp.panasonic.com>
  Discussed with:	jmg
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 week

Modified:
  head/sys/kern/kern_event.c
  head/sys/sys/event.h

Modified: head/sys/kern/kern_event.c
==============================================================================
--- head/sys/kern/kern_event.c	Sat Apr  5 13:01:44 2014	(r264145)
+++ head/sys/kern/kern_event.c	Sat Apr  5 14:09:16 2014	(r264146)
@@ -474,7 +474,7 @@ knote_fork(struct knlist *list, int pid)
 			continue;
 		kq = kn->kn_kq;
 		KQ_LOCK(kq);
-		if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
+		if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) {
 			KQ_UNLOCK(kq);
 			continue;
 		}
@@ -1174,7 +1174,7 @@ findkn:
 	 * but doing so will not reset any filter which has already been
 	 * triggered.
 	 */
-	kn->kn_status |= KN_INFLUX;
+	kn->kn_status |= KN_INFLUX | KN_SCAN;
 	KQ_UNLOCK(kq);
 	KN_LIST_LOCK(kn);
 	kn->kn_kevent.udata = kev->udata;
@@ -1197,7 +1197,7 @@ done_ev_add:
 	KQ_LOCK(kq);
 	if (event)
 		KNOTE_ACTIVATE(kn, 1);
-	kn->kn_status &= ~KN_INFLUX;
+	kn->kn_status &= ~(KN_INFLUX | KN_SCAN);
 	KN_LIST_UNLOCK(kn);
 
 	if ((kev->flags & EV_DISABLE) &&
@@ -1506,7 +1506,7 @@ retry:
 			KQ_LOCK(kq);
 			kn = NULL;
 		} else {
-			kn->kn_status |= KN_INFLUX;
+			kn->kn_status |= KN_INFLUX | KN_SCAN;
 			KQ_UNLOCK(kq);
 			if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
 				KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
@@ -1515,7 +1515,8 @@ retry:
 				KQ_LOCK(kq);
 				KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 				kn->kn_status &=
-				    ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX);
+				    ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX |
+				    KN_SCAN);
 				kq->kq_count--;
 				KN_LIST_UNLOCK(kn);
 				influx = 1;
@@ -1545,7 +1546,7 @@ retry:
 			} else
 				TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 			
-			kn->kn_status &= ~(KN_INFLUX);
+			kn->kn_status &= ~(KN_INFLUX | KN_SCAN);
 			KN_LIST_UNLOCK(kn);
 			influx = 1;
 		}
@@ -1865,28 +1866,33 @@ knote(struct knlist *list, long hint, in
 	 */
 	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
 		kq = kn->kn_kq;
-		if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) {
+		KQ_LOCK(kq);
+		if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) {
+			/*
+			 * Do not process the influx notes, except for
+			 * the influx coming from the kq unlock in the
+			 * kqueue_scan().  In the later case, we do
+			 * not interfere with the scan, since the code
+			 * fragment in kqueue_scan() locks the knlist,
+			 * and cannot proceed until we finished.
+			 */
+			KQ_UNLOCK(kq);
+		} else if ((lockflags & KNF_NOKQLOCK) != 0) {
+			kn->kn_status |= KN_INFLUX;
+			KQ_UNLOCK(kq);
+			error = kn->kn_fop->f_event(kn, hint);
 			KQ_LOCK(kq);
-			if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
-				KQ_UNLOCK(kq);
-			} else if ((lockflags & KNF_NOKQLOCK) != 0) {
-				kn->kn_status |= KN_INFLUX;
-				KQ_UNLOCK(kq);
-				error = kn->kn_fop->f_event(kn, hint);
-				KQ_LOCK(kq);
-				kn->kn_status &= ~KN_INFLUX;
-				if (error)
-					KNOTE_ACTIVATE(kn, 1);
-				KQ_UNLOCK_FLUX(kq);
-			} else {
-				kn->kn_status |= KN_HASKQLOCK;
-				if (kn->kn_fop->f_event(kn, hint))
-					KNOTE_ACTIVATE(kn, 1);
-				kn->kn_status &= ~KN_HASKQLOCK;
-				KQ_UNLOCK(kq);
-			}
+			kn->kn_status &= ~KN_INFLUX;
+			if (error)
+				KNOTE_ACTIVATE(kn, 1);
+			KQ_UNLOCK_FLUX(kq);
+		} else {
+			kn->kn_status |= KN_HASKQLOCK;
+			if (kn->kn_fop->f_event(kn, hint))
+				KNOTE_ACTIVATE(kn, 1);
+			kn->kn_status &= ~KN_HASKQLOCK;
+			KQ_UNLOCK(kq);
 		}
-		kq = NULL;
 	}
 	if ((lockflags & KNF_LISTLOCKED) == 0)
 		list->kl_unlock(list->kl_lockarg); 

Modified: head/sys/sys/event.h
==============================================================================
--- head/sys/sys/event.h	Sat Apr  5 13:01:44 2014	(r264145)
+++ head/sys/sys/event.h	Sat Apr  5 14:09:16 2014	(r264146)
@@ -207,6 +207,7 @@ struct knote {
 #define KN_MARKER	0x20			/* ignore this knote */
 #define KN_KQUEUE	0x40			/* this knote belongs to a kq */
 #define KN_HASKQLOCK	0x80			/* for _inevent */
+#define	KN_SCAN		0x100			/* flux set in kqueue_scan() */
 	int			kn_sfflags;	/* saved filter flags */
 	intptr_t		kn_sdata;	/* saved data field */
 	union {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201404051409.s35E9GYD013475>