Date: Mon, 26 Dec 2016 19:33:40 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r310617 - in head/sys: kern sys Message-ID: <201612261933.uBQJXe7T071333@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Mon Dec 26 19:33:40 2016 New Revision: 310617 URL: https://svnweb.freebsd.org/changeset/base/310617 Log: Make knote KN_INFLUX state counted. This is final fix for the issue closed by r310302 for knote(). If KN_INFLUX | KN_SCAN flags are set for the note passed to knote() or knote_fork(), i.e. the knote is scanned, we might erronously clear INFLUX when finishing notification. For normal knote() it was fixed in r310302 simply by remembering the fact that we do not own KN_INFLUX, since there we own knlist lock and scan thread cannot clear KN_INFLUX until we drop the lock. For knote_fork(), the situation is more complicated, e must drop knlist lock AKA the process lock, since we need to register new knotes. Change KN_INFLUX into counter and allow shared ownership of the in-flux state between scan and knote_fork() or knote(). Both in-flux setters need to ensure that knote is not dropped in parallel. Added assert about kn_influx == 1 in knote_drop() verifies that in-flux state is not shared when knote is destroyed. Since KBI of the struct knote is changed by addition of the int kn_influx field, reorder kn_hook and kn_hookid to fill pad on LP64 arches [1]. This keeps sizeof(struct knote) to same 128 bytes as it was before addition of kn_influx, on amd64. Reviewed by: markj Suggested by: markj [1] Tested by: pho (previous version) Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D8898 Modified: head/sys/kern/kern_event.c head/sys/sys/event.h Modified: head/sys/kern/kern_event.c ============================================================================== --- head/sys/kern/kern_event.c Mon Dec 26 19:29:04 2016 (r310616) +++ head/sys/kern/kern_event.c Mon Dec 26 19:33:40 2016 (r310617) @@ -193,7 +193,7 @@ static unsigned int kq_calloutmax = 4 * SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); -/* XXX - ensure not KN_INFLUX?? */ +/* XXX - ensure not influx ? */ #define KNOTE_ACTIVATE(kn, islock) do { \ if ((islock)) \ mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ @@ -254,6 +254,32 @@ kn_list_unlock(struct knlist *knl) } } +static bool +kn_in_flux(struct knote *kn) +{ + + return (kn->kn_influx > 0); +} + +static void +kn_enter_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx < INT_MAX); + kn->kn_influx++; +} + +static bool +kn_leave_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx > 0); + kn->kn_influx--; + return (kn->kn_influx == 0); +} + #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ @@ -498,7 +524,7 @@ knote_fork(struct knlist *list, int pid) SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { KQ_UNLOCK(kq); continue; } @@ -521,7 +547,7 @@ knote_fork(struct knlist *list, int pid) * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); list->kl_unlock(list->kl_lockarg); @@ -561,7 +587,7 @@ knote_fork(struct knlist *list, int pid) if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 0); KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); KQ_UNLOCK_FLUX(kq); list->kl_lock(list->kl_lockarg); } @@ -1262,7 +1288,7 @@ findkn: } /* knote is in the process of changing, wait for it to stabilize. */ - if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn != NULL && kn_in_flux(kn)) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { FILEDESC_XUNLOCK(td->td_proc->p_fd); @@ -1306,7 +1332,8 @@ findkn: kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); - kn->kn_status = KN_INFLUX|KN_DETACHED; + kn->kn_status = KN_DETACHED; + kn_enter_flux(kn); error = knote_attach(kn, kq); KQ_UNLOCK(kq); @@ -1330,7 +1357,7 @@ findkn: } if (kev->flags & EV_DELETE) { - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1348,7 +1375,8 @@ findkn: * but doing so will not reset any filter which has already been * triggered. */ - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; @@ -1383,7 +1411,8 @@ done_ev_add: if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == KN_ACTIVE) knote_enqueue(kn); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); @@ -1546,7 +1575,7 @@ kqueue_task(void *arg, int pending) /* * Scan, update kn_data (if not ONESHOT), and copyout triggered events. - * We treat KN_MARKER knotes as if they are INFLUX. + * We treat KN_MARKER knotes as if they are in flux. */ static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, @@ -1620,7 +1649,7 @@ retry: kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || - (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + kn_in_flux(kn)) { if (influx) { influx = 0; KQ_FLUX_WAKEUP(kq); @@ -1643,17 +1672,17 @@ retry: goto retry; goto done; } - KASSERT((kn->kn_status & KN_INFLUX) == 0, - ("KN_INFLUX set when not suppose to be")); + KASSERT(!kn_in_flux(kn), + ("knote %p is unexpectedly in flux", kn)); if ((kn->kn_flags & EV_DROP) == EV_DROP) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked it as in flux. */ if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1662,12 +1691,12 @@ retry: continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked the knote as being in flux. */ *kevp = kn->kn_kevent; if (!(kn->kn_status & KN_DETACHED)) @@ -1676,7 +1705,8 @@ retry: KQ_LOCK(kq); kn = NULL; } else { - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); @@ -1684,9 +1714,9 @@ retry: if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); - kn->kn_status &= - ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | + kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN); + kn_leave_flux(kn); kq->kq_count--; kn_list_unlock(knl); influx = 1; @@ -1716,7 +1746,8 @@ retry: } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); influx = 1; } @@ -1864,12 +1895,12 @@ kqueue_drain(struct kqueue *kq, struct t for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1880,13 +1911,13 @@ kqueue_drain(struct kqueue *kq, struct t if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo2", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -2010,7 +2041,6 @@ knote(struct knlist *list, long hint, in struct kqueue *kq; struct knote *kn, *tkn; int error; - bool own_influx; if (list == NULL) return; @@ -2021,7 +2051,7 @@ knote(struct knlist *list, long hint, in list->kl_lock(list->kl_lockarg); /* - * If we unlock the list lock (and set KN_INFLUX), we can + * If we unlock the list lock (and enter influx), we can * eliminate the kqueue scheduling, but this will introduce * four lock/unlock's for each knote to test. Also, marker * would be needed to keep iteration position, since filters @@ -2030,7 +2060,7 @@ knote(struct knlist *list, long hint, in SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { /* * Do not process the influx notes, except for * the influx coming from the kq unlock in the @@ -2041,14 +2071,11 @@ knote(struct knlist *list, long hint, in */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { - own_influx = (kn->kn_status & KN_INFLUX) == 0; - if (own_influx) - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); - if (own_influx) - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); @@ -2070,10 +2097,12 @@ knote(struct knlist *list, long hint, in void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { + KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == - (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); + KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) != 0, + ("knote %p was not detached", kn)); if (!islocked) knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); @@ -2089,12 +2118,13 @@ static void knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) { - KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); + + KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); - if (!kqislocked) - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, - ("knlist_remove called w/o knote being KN_INFLUX or already removed")); + KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) == 0, + ("knote %p was already detached", kn)); if (!knlislocked) knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); @@ -2287,30 +2317,30 @@ again: /* need to reacquire lock since SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX)) { + if (kn_in_flux(kn)) { KQ_UNLOCK(kq); continue; } knlist_remove_kq(knl, kn, 1, 1); if (killkn) { - kn->kn_status |= KN_INFLUX | KN_DETACHED; + kn->kn_status |= KN_DETACHED; + kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); } else { /* Make sure cleared knotes disappear soon */ - kn->kn_flags |= (EV_EOF | EV_ONESHOT); + kn->kn_flags |= EV_EOF | EV_ONESHOT; KQ_UNLOCK(kq); } kq = NULL; } if (!SLIST_EMPTY(&knl->kl_list)) { - /* there are still KN_INFLUX remaining */ + /* there are still in flux knotes remaining */ kn = SLIST_FIRST(&knl->kl_list); kq = kn->kn_kq; KQ_LOCK(kq); - KASSERT(kn->kn_status & KN_INFLUX, - ("knote removed w/o list lock")); + KASSERT(kn_in_flux(kn), ("knote removed w/o list lock")); knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); @@ -2352,7 +2382,7 @@ again: influx = 0; while (kq->kq_knlistsize > fd && (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { - if (kn->kn_status & KN_INFLUX) { + if (kn_in_flux(kn)) { /* someone else might be waiting on our knote */ if (influx) wakeup(kq); @@ -2360,7 +2390,7 @@ again: msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); goto again; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -2377,7 +2407,7 @@ knote_attach(struct knote *kn, struct kq { struct klist *list; - KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); + KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn)); KQ_OWNED(kq); if (kn->kn_fop->f_isfd) { @@ -2395,7 +2425,7 @@ knote_attach(struct knote *kn, struct kq /* * knote must already have been detached using the f_detach method. - * no lock need to be held, it is assumed that the KN_INFLUX flag is set + * no lock need to be held, it is assumed that the influx state is set * to prevent other removal. */ static void @@ -2407,10 +2437,10 @@ knote_drop(struct knote *kn, struct thre kq = kn->kn_kq; KQ_NOTOWNED(kq); - KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, - ("knote_drop called without KN_INFLUX set in kn_status")); - KQ_LOCK(kq); + KASSERT(kn->kn_influx == 1, + ("knote_drop called on %p with influx %d", kn, kn->kn_influx)); + if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else Modified: head/sys/sys/event.h ============================================================================== --- head/sys/sys/event.h Mon Dec 26 19:29:04 2016 (r310616) +++ head/sys/sys/event.h Mon Dec 26 19:33:40 2016 (r310617) @@ -202,8 +202,11 @@ struct filterops { }; /* - * Setting the KN_INFLUX flag enables you to unlock the kq that this knote - * is on, and modify kn_status as if you had the KQ lock. + * An in-flux knote cannot be dropped from its kq while the kq is + * unlocked. If the KN_SCAN flag is not set, a thread can only set + * kn_influx when it is exclusive owner of the knote state, and can + * modify kn_status as if it had the KQ lock. KN_SCAN must not be set + * on a knote which is already in flux. * * kn_sfflags, kn_sdata, and kn_kevent are protected by the knlist lock. */ @@ -214,16 +217,18 @@ struct knote { TAILQ_ENTRY(knote) kn_tqe; struct kqueue *kn_kq; /* which queue we are on */ struct kevent kn_kevent; + void *kn_hook; + int kn_hookid; int kn_status; /* protected by kq lock */ #define KN_ACTIVE 0x01 /* event has been triggered */ #define KN_QUEUED 0x02 /* event is on queue */ #define KN_DISABLED 0x04 /* event is disabled */ #define KN_DETACHED 0x08 /* knote is detached */ -#define KN_INFLUX 0x10 /* knote is in flux */ #define KN_MARKER 0x20 /* ignore this knote */ #define KN_KQUEUE 0x40 /* this knote belongs to a kq */ #define KN_HASKQLOCK 0x80 /* for _inevent */ #define KN_SCAN 0x100 /* flux set in kqueue_scan() */ + int kn_influx; int kn_sfflags; /* saved filter flags */ intptr_t kn_sdata; /* saved data field */ union { @@ -234,8 +239,6 @@ struct knote { void *p_v; /* generic other pointer */ } kn_ptr; struct filterops *kn_fop; - void *kn_hook; - int kn_hookid; #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201612261933.uBQJXe7T071333>