Date: Wed, 13 Feb 2019 09:03:24 +0000 (UTC) From: Vincenzo Maffione <vmaffione@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r344090 - stable/12/sys/dev/netmap Message-ID: <201902130903.x1D93Oia064565@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: vmaffione Date: Wed Feb 13 09:03:23 2019 New Revision: 344090 URL: https://svnweb.freebsd.org/changeset/base/344090 Log: MFC r343579 netmap: fix lock order reversal related to kqueue usage When using poll(), select() or kevent() on netmap file descriptors, netmap executes the equivalent of NIOCTXSYNC and NIOCRXSYNC commands, before collecting the events that are ready. In other words, the poll/kevent callback has side effects. This is done to avoid the overhead of two system call per iteration (e.g., poll() + ioctl(NIOC*XSYNC)). When the kqueue subsystem invokes the kqueue(9) f_event callback (netmap_knrw), it holds the lock of the struct knlist object associated to the netmap port (the lock is provided at initialization, by calling knlist_init_mtx). However, netmap_knrw() may need to wake up another netmap port (or even the same one), which means that it may need to call knote(). Since knote() needs the lock of the struct knlist object associated to the to-be-wake-up netmap port, it is possible to have a lock order reversal problem (AB/BA deadlock). This change prevents the deadlock by executing the knote() call in a per-selinfo taskqueue, where it is possible to hold a mutex. Reviewed by: aleksandr.fedorov_itglobal.com Differential Revision: https://reviews.freebsd.org/D18956 Modified: stable/12/sys/dev/netmap/netmap.c stable/12/sys/dev/netmap/netmap_freebsd.c stable/12/sys/dev/netmap/netmap_kern.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/dev/netmap/netmap.c ============================================================================== --- stable/12/sys/dev/netmap/netmap.c Wed Feb 13 07:37:33 2019 (r344089) +++ stable/12/sys/dev/netmap/netmap.c Wed Feb 13 09:03:23 2019 (r344090) @@ -830,6 +830,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int struct netmap_kring *kring; u_int n[NR_TXRX]; enum txrx t; + int err = 0; if (na->tx_rings != NULL) { if (netmap_debug & NM_DEBUG_ON) @@ -869,7 +870,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int for (i = 0; i < n[t]; i++) { kring = NMR(na, t)[i]; bzero(kring, sizeof(*kring)); - kring->na = na; kring->notify_na = na; kring->ring_id = i; kring->tx = t; @@ -895,13 +895,21 @@ netmap_krings_create(struct netmap_adapter *na, u_int nm_txrx2str(t), i); nm_prdis("ktx %s h %d c %d t %d", kring->name, kring->rhead, kring->rcur, kring->rtail); + err = nm_os_selinfo_init(&kring->si, kring->name); + if (err) { + netmap_krings_delete(na); + return err; + } mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF); - nm_os_selinfo_init(&kring->si); + kring->na = na; /* setting this field marks the mutex as initialized */ } - nm_os_selinfo_init(&na->si[t]); + err = nm_os_selinfo_init(&na->si[t], na->name); + if (err) { + netmap_krings_delete(na); + return err; + } } - return 0; } @@ -925,7 +933,8 @@ netmap_krings_delete(struct netmap_adapter *na) /* we rely on the krings layout described above */ for ( ; kring != na->tailroom; kring++) { - mtx_destroy(&(*kring)->q_lock); + if ((*kring)->na != NULL) + mtx_destroy(&(*kring)->q_lock); nm_os_selinfo_uninit(&(*kring)->si); } nm_os_free(na->tx_rings); Modified: stable/12/sys/dev/netmap/netmap_freebsd.c ============================================================================== --- stable/12/sys/dev/netmap/netmap_freebsd.c Wed Feb 13 07:37:33 2019 (r344089) +++ stable/12/sys/dev/netmap/netmap_freebsd.c Wed Feb 13 09:03:23 2019 (r344090) @@ -58,6 +58,7 @@ #include <sys/unistd.h> /* RFNOWAIT */ #include <sys/sched.h> /* sched_bind() */ #include <sys/smp.h> /* mp_maxid */ +#include <sys/taskqueue.h> /* taskqueue_enqueue(), taskqueue_create(), ... */ #include <net/if.h> #include <net/if_var.h> #include <net/if_types.h> /* IFT_ETHER */ @@ -75,16 +76,48 @@ /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ -void nm_os_selinfo_init(NM_SELINFO_T *si) { - struct mtx *m = &si->m; - mtx_init(m, "nm_kn_lock", NULL, MTX_DEF); - knlist_init_mtx(&si->si.si_note, m); +static void +nm_kqueue_notify(void *opaque, int pending) +{ + struct nm_selinfo *si = opaque; + + /* We use a non-zero hint to distinguish this notification call + * from the call done in kqueue_scan(), which uses hint=0. + */ + KNOTE_UNLOCKED(&si->si.si_note, /*hint=*/0x100); } +int nm_os_selinfo_init(NM_SELINFO_T *si, const char *name) { + int err; + + TASK_INIT(&si->ntfytask, 0, nm_kqueue_notify, si); + si->ntfytq = taskqueue_create(name, M_NOWAIT, + taskqueue_thread_enqueue, &si->ntfytq); + if (si->ntfytq == NULL) + return -ENOMEM; + err = taskqueue_start_threads(&si->ntfytq, 1, PI_NET, "tq %s", name); + if (err) { + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; + return err; + } + + snprintf(si->mtxname, sizeof(si->mtxname), "nmkl%s", name); + mtx_init(&si->m, si->mtxname, NULL, MTX_DEF); + knlist_init_mtx(&si->si.si_note, &si->m); + + return (0); +} + void nm_os_selinfo_uninit(NM_SELINFO_T *si) { - /* XXX kqueue(9) needed; these will mirror knlist_init. */ + if (si->ntfytq == NULL) { + return; /* si was not initialized */ + } + taskqueue_drain(si->ntfytq, &si->ntfytask); + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; knlist_delete(&si->si.si_note, curthread, /*islocked=*/0); knlist_destroy(&si->si.si_note); /* now we don't need the mutex anymore */ @@ -1292,13 +1325,18 @@ nm_os_kctx_destroy(struct nm_kctx *nmk) /* * In addition to calling selwakeuppri(), nm_os_selwakeup() also - * needs to call KNOTE to wake up kqueue listeners. - * We use a non-zero 'hint' argument to inform the netmap_knrw() - * function that it is being called from 'nm_os_selwakeup'; this - * is necessary because when netmap_knrw() is called by the kevent - * subsystem (i.e. kevent_scan()) we also need to call netmap_poll(). - * The knote uses a private mutex associated to the 'si' (see struct - * selinfo, struct nm_selinfo, and nm_os_selinfo_init). + * needs to call knote() to wake up kqueue listeners. + * This operation is deferred to a taskqueue in order to avoid possible + * lock order reversals; these may happen because knote() grabs a + * private lock associated to the 'si' (see struct selinfo, + * struct nm_selinfo, and nm_os_selinfo_init), and nm_os_selwakeup() + * can be called while holding the lock associated to a different + * 'si'. + * When calling knote() we use a non-zero 'hint' argument to inform + * the netmap_knrw() function that it is being called from + * 'nm_os_selwakeup'; this is necessary because when netmap_knrw() is + * called by the kevent subsystem (i.e. kevent_scan()) we also need to + * call netmap_poll(). * * The netmap_kqfilter() function registers one or another f_event * depending on read or write mode. A pointer to the struct @@ -1313,11 +1351,7 @@ void nm_os_selwakeup(struct nm_selinfo *si) { selwakeuppri(&si->si, PI_NET); - /* We use a non-zero hint to distinguish this notification call - * from the call done in kqueue_scan(), which uses hint=0. - */ - KNOTE(&si->si.si_note, /*hint=*/0x100, - mtx_owned(&si->m) ? KNF_LISTLOCKED : 0); + taskqueue_enqueue(si->ntfytq, &si->ntfytask); } void Modified: stable/12/sys/dev/netmap/netmap_kern.h ============================================================================== --- stable/12/sys/dev/netmap/netmap_kern.h Wed Feb 13 07:37:33 2019 (r344089) +++ stable/12/sys/dev/netmap/netmap_kern.h Wed Feb 13 09:03:23 2019 (r344090) @@ -133,7 +133,10 @@ struct netmap_adapter *netmap_getna(if_t ifp); struct nm_selinfo { struct selinfo si; + struct taskqueue *ntfytq; + struct task ntfytask; struct mtx m; + char mtxname[32]; }; @@ -290,7 +293,7 @@ struct netmap_priv_d; struct nm_bdg_args; /* os-specific NM_SELINFO_T initialzation/destruction functions */ -void nm_os_selinfo_init(NM_SELINFO_T *); +int nm_os_selinfo_init(NM_SELINFO_T *, const char *name); void nm_os_selinfo_uninit(NM_SELINFO_T *); const char *nm_dump_buf(char *p, int len, int lim, char *dst);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201902130903.x1D93Oia064565>