Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 6 Apr 2012 06:53:58 +0000 (UTC)
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r233937 - in head/sys: kern net security/mac
Message-ID:  <201204060653.q366rwLa096182@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: melifaro
Date: Fri Apr  6 06:53:58 2012
New Revision: 233937
URL: http://svn.freebsd.org/changeset/base/233937

Log:
  - Improve BPF locking model.
  
  Interface locks and descriptor locks are converted from mutex(9) to rwlock(9).
  This greately improves performance: in most common case we need to acquire 1
  reader lock instead of 2 mutexes.
  
  - Remove filter(descriptor) (reader) lock in bpf_mtap[2]
  This was suggested by glebius@. We protect filter by requesting interface
  writer lock on filter change.
  
  - Cover struct bpf_if under BPF_INTERNAL define. This permits including bpf.h
  without including rwlock stuff. However, this is is temporary solution,
  struct bpf_if should be made opaque for any external caller.
  
  Found by:       Dmitrij Tejblum <tejblum@yandex-team.ru>
  Sponsored by:   Yandex LLC
  
  Reviewed by:    glebius (previous version)
  Reviewed by:    silence on -net@
  Approved by:    (mentor)
  
  MFC after:      3 weeks

Modified:
  head/sys/kern/subr_witness.c
  head/sys/net/bpf.c
  head/sys/net/bpf.h
  head/sys/net/bpf_buffer.c
  head/sys/net/bpf_zerocopy.c
  head/sys/net/bpfdesc.h
  head/sys/security/mac/mac_net.c

Modified: head/sys/kern/subr_witness.c
==============================================================================
--- head/sys/kern/subr_witness.c	Fri Apr  6 06:40:17 2012	(r233936)
+++ head/sys/kern/subr_witness.c	Fri Apr  6 06:53:58 2012	(r233937)
@@ -563,8 +563,8 @@ static struct witness_order_list_entry o
 	 * BPF
 	 */
 	{ "bpf global lock", &lock_class_mtx_sleep },
-	{ "bpf interface lock", &lock_class_mtx_sleep },
-	{ "bpf cdev lock", &lock_class_mtx_sleep },
+	{ "bpf interface lock", &lock_class_rw },
+	{ "bpf cdev lock", &lock_class_rw },
 	{ NULL, NULL },
 	/*
 	 * NFS server

Modified: head/sys/net/bpf.c
==============================================================================
--- head/sys/net/bpf.c	Fri Apr  6 06:40:17 2012	(r233936)
+++ head/sys/net/bpf.c	Fri Apr  6 06:53:58 2012	(r233937)
@@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
@@ -66,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/socket.h>
 
 #include <net/if.h>
+#define	BPF_INTERNAL
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
@@ -207,7 +210,7 @@ bpf_append_bytes(struct bpf_d *d, caddr_
     u_int len)
 {
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
@@ -227,7 +230,7 @@ bpf_append_mbuf(struct bpf_d *d, caddr_t
     u_int len)
 {
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
@@ -249,7 +252,7 @@ static void
 bpf_buf_reclaimed(struct bpf_d *d)
 {
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
@@ -290,7 +293,6 @@ bpf_canfreebuf(struct bpf_d *d)
 static int
 bpf_canwritebuf(struct bpf_d *d)
 {
-
 	BPFD_LOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
@@ -309,7 +311,7 @@ static void
 bpf_buffull(struct bpf_d *d)
 {
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
@@ -325,7 +327,7 @@ void
 bpf_bufheld(struct bpf_d *d)
 {
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_ZBUF:
@@ -574,12 +576,12 @@ bpf_attachd(struct bpf_d *d, struct bpf_
 	 * Finally, point the driver's bpf cookie at the interface so
 	 * it will divert packets to bpf.
 	 */
-	BPFIF_LOCK(bp);
+	BPFIF_WLOCK(bp);
 	d->bd_bif = bp;
 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 
 	bpf_bpfd_cnt++;
-	BPFIF_UNLOCK(bp);
+	BPFIF_WUNLOCK(bp);
 
 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
@@ -594,20 +596,24 @@ bpf_detachd(struct bpf_d *d)
 	struct bpf_if *bp;
 	struct ifnet *ifp;
 
+	BPF_LOCK_ASSERT();
+
 	bp = d->bd_bif;
-	BPFIF_LOCK(bp);
-	BPFD_LOCK(d);
-	ifp = d->bd_bif->bif_ifp;
+	BPFIF_WLOCK(bp);
+	BPFD_WLOCK(d);
 
 	/*
 	 * Remove d from the interface's descriptor list.
 	 */
 	LIST_REMOVE(d, bd_next);
 
-	bpf_bpfd_cnt--;
+	ifp = bp->bif_ifp;
 	d->bd_bif = NULL;
-	BPFD_UNLOCK(d);
-	BPFIF_UNLOCK(bp);
+	BPFD_WUNLOCK(d);
+	BPFIF_WUNLOCK(bp);
+
+	/* We're already protected by global lock. */
+	bpf_bpfd_cnt--;
 
 	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
 
@@ -642,16 +648,16 @@ bpf_dtor(void *data)
 {
 	struct bpf_d *d = data;
 
-	BPFD_LOCK(d);
+	BPFD_WLOCK(d);
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 	funsetown(&d->bd_sigio);
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	if (d->bd_bif)
 		bpf_detachd(d);
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 #ifdef MAC
 	mac_bpfdesc_destroy(d);
 #endif /* MAC */
@@ -689,14 +695,14 @@ bpfopen(struct cdev *dev, int flags, int
 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
 	d->bd_sig = SIGIO;
 	d->bd_direction = BPF_D_INOUT;
-	d->bd_pid = td->td_proc->p_pid;
+	BPF_PID_REFRESH(d, td);
 #ifdef MAC
 	mac_bpfdesc_init(d);
 	mac_bpfdesc_create(td->td_ucred, d);
 #endif
-	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
-	callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
-	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
+	rw_init(&d->bd_lock, "bpf cdev lock");
+	callout_init_rw(&d->bd_callout, &d->bd_lock, 0);
+	knlist_init_rw_reader(&d->bd_sel.si_note, &d->bd_lock);
 
 	return (0);
 }
@@ -725,10 +731,10 @@ bpfread(struct cdev *dev, struct uio *ui
 
 	non_block = ((ioflag & O_NONBLOCK) != 0);
 
-	BPFD_LOCK(d);
-	d->bd_pid = curthread->td_proc->p_pid;
+	BPFD_WLOCK(d);
+	BPF_PID_REFRESH_CUR(d);
 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
 		return (EOPNOTSUPP);
 	}
 	if (d->bd_state == BPF_WAITING)
@@ -764,18 +770,18 @@ bpfread(struct cdev *dev, struct uio *ui
 		 * it before using it again.
 		 */
 		if (d->bd_bif == NULL) {
-			BPFD_UNLOCK(d);
+			BPFD_WUNLOCK(d);
 			return (ENXIO);
 		}
 
 		if (non_block) {
-			BPFD_UNLOCK(d);
+			BPFD_WUNLOCK(d);
 			return (EWOULDBLOCK);
 		}
-		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
+		error = rw_sleep(d, &d->bd_lock, PRINET|PCATCH,
 		     "bpf", d->bd_rtout);
 		if (error == EINTR || error == ERESTART) {
-			BPFD_UNLOCK(d);
+			BPFD_WUNLOCK(d);
 			return (error);
 		}
 		if (error == EWOULDBLOCK) {
@@ -793,7 +799,7 @@ bpfread(struct cdev *dev, struct uio *ui
 				break;
 
 			if (d->bd_slen == 0) {
-				BPFD_UNLOCK(d);
+				BPFD_WUNLOCK(d);
 				return (0);
 			}
 			ROTATE_BUFFERS(d);
@@ -803,7 +809,7 @@ bpfread(struct cdev *dev, struct uio *ui
 	/*
 	 * At this point, we know we have something in the hold slot.
 	 */
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 
 	/*
 	 * Move data from hold buffer into user space.
@@ -816,12 +822,12 @@ bpfread(struct cdev *dev, struct uio *ui
 	 */
 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
 
-	BPFD_LOCK(d);
+	BPFD_WLOCK(d);
 	d->bd_fbuf = d->bd_hbuf;
 	d->bd_hbuf = NULL;
 	d->bd_hlen = 0;
 	bpf_buf_reclaimed(d);
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 
 	return (error);
 }
@@ -833,7 +839,7 @@ static __inline void
 bpf_wakeup(struct bpf_d *d)
 {
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 	if (d->bd_state == BPF_WAITING) {
 		callout_stop(&d->bd_callout);
 		d->bd_state = BPF_IDLE;
@@ -851,7 +857,7 @@ bpf_timed_out(void *arg)
 {
 	struct bpf_d *d = (struct bpf_d *)arg;
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
 		return;
@@ -866,7 +872,7 @@ static int
 bpf_ready(struct bpf_d *d)
 {
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
 		return (1);
@@ -889,7 +895,7 @@ bpfwrite(struct cdev *dev, struct uio *u
 	if (error != 0)
 		return (error);
 
-	d->bd_pid = curthread->td_proc->p_pid;
+	BPF_PID_REFRESH_CUR(d);
 	d->bd_wcount++;
 	if (d->bd_bif == NULL) {
 		d->bd_wdcount++;
@@ -937,11 +943,11 @@ bpfwrite(struct cdev *dev, struct uio *u
 
 	CURVNET_SET(ifp->if_vnet);
 #ifdef MAC
-	BPFD_LOCK(d);
+	BPFD_WLOCK(d);
 	mac_bpfdesc_create_mbuf(d, m);
 	if (mc != NULL)
 		mac_bpfdesc_create_mbuf(d, mc);
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 #endif
 
 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
@@ -970,7 +976,7 @@ static void
 reset_d(struct bpf_d *d)
 {
 
-	mtx_assert(&d->bd_mtx, MA_OWNED);
+	BPFD_WLOCK_ASSERT(d);
 
 	if ((d->bd_hbuf != NULL) &&
 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
@@ -1037,12 +1043,12 @@ bpfioctl(struct cdev *dev, u_long cmd, c
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
-	BPFD_LOCK(d);
-	d->bd_pid = td->td_proc->p_pid;
+	BPFD_WLOCK(d);
+	BPF_PID_REFRESH(d, td);
 	if (d->bd_state == BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state = BPF_IDLE;
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 
 	if (d->bd_locked == 1) {
 		switch (cmd) {
@@ -1108,11 +1114,11 @@ bpfioctl(struct cdev *dev, u_long cmd, c
 		{
 			int n;
 
-			BPFD_LOCK(d);
+			BPFD_WLOCK(d);
 			n = d->bd_slen;
 			if (d->bd_hbuf)
 				n += d->bd_hlen;
-			BPFD_UNLOCK(d);
+			BPFD_WUNLOCK(d);
 
 			*(int *)addr = n;
 			break;
@@ -1163,9 +1169,9 @@ bpfioctl(struct cdev *dev, u_long cmd, c
 	 * Flush read packet buffer.
 	 */
 	case BIOCFLUSH:
-		BPFD_LOCK(d);
+		BPFD_WLOCK(d);
 		reset_d(d);
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
 		break;
 
 	/*
@@ -1488,15 +1494,15 @@ bpfioctl(struct cdev *dev, u_long cmd, c
 			return (EINVAL);
 		}
 
-		BPFD_LOCK(d);
+		BPFD_WLOCK(d);
 		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
 		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
-			BPFD_UNLOCK(d);
+			BPFD_WUNLOCK(d);
 			CURVNET_RESTORE();
 			return (EBUSY);
 		}
 		d->bd_bufmode = *(u_int *)addr;
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
 		break;
 
 	case BIOCGETZMAX:
@@ -1556,7 +1562,12 @@ bpf_setf(struct bpf_d *d, struct bpf_pro
 	if (fp->bf_insns == NULL) {
 		if (fp->bf_len != 0)
 			return (EINVAL);
-		BPFD_LOCK(d);
+		/* 
+		 * Protect filter change by interface lock, too.
+		 * The same lock order is used by bpf_detachd().
+		 */
+		BPFIF_WLOCK(d->bd_bif);
+		BPFD_WLOCK(d);
 		if (wfilter)
 			d->bd_wfilter = NULL;
 		else {
@@ -1567,7 +1578,8 @@ bpf_setf(struct bpf_d *d, struct bpf_pro
 			if (cmd == BIOCSETF)
 				reset_d(d);
 		}
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
+		BPFIF_WUNLOCK(d->bd_bif);
 		if (old != NULL)
 			free((caddr_t)old, M_BPF);
 #ifdef BPF_JITTER
@@ -1584,7 +1596,12 @@ bpf_setf(struct bpf_d *d, struct bpf_pro
 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
 	    bpf_validate(fcode, (int)flen)) {
-		BPFD_LOCK(d);
+		/* 
+		 * Protect filter change by interface lock, too
+		 * The same lock order is used by bpf_detachd().
+		 */
+		BPFIF_WLOCK(d->bd_bif);
+		BPFD_WLOCK(d);
 		if (wfilter)
 			d->bd_wfilter = fcode;
 		else {
@@ -1595,7 +1612,8 @@ bpf_setf(struct bpf_d *d, struct bpf_pro
 			if (cmd == BIOCSETF)
 				reset_d(d);
 		}
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
+		BPFIF_WUNLOCK(d->bd_bif);
 		if (old != NULL)
 			free((caddr_t)old, M_BPF);
 #ifdef BPF_JITTER
@@ -1659,9 +1677,9 @@ bpf_setif(struct bpf_d *d, struct ifreq 
 
 		bpf_attachd(d, bp);
 	}
-	BPFD_LOCK(d);
+	BPFD_WLOCK(d);
 	reset_d(d);
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 	return (0);
 }
 
@@ -1685,8 +1703,8 @@ bpfpoll(struct cdev *dev, int events, st
 	 * Refresh PID associated with this descriptor.
 	 */
 	revents = events & (POLLOUT | POLLWRNORM);
-	BPFD_LOCK(d);
-	d->bd_pid = td->td_proc->p_pid;
+	BPFD_WLOCK(d);
+	BPF_PID_REFRESH(d, td);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (bpf_ready(d))
 			revents |= events & (POLLIN | POLLRDNORM);
@@ -1700,7 +1718,7 @@ bpfpoll(struct cdev *dev, int events, st
 			}
 		}
 	}
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 	return (revents);
 }
 
@@ -1720,12 +1738,12 @@ bpfkqfilter(struct cdev *dev, struct kno
 	/*
 	 * Refresh PID associated with this descriptor.
 	 */
-	BPFD_LOCK(d);
-	d->bd_pid = curthread->td_proc->p_pid;
+	BPFD_WLOCK(d);
+	BPF_PID_REFRESH_CUR(d);
 	kn->kn_fop = &bpfread_filtops;
 	kn->kn_hook = d;
 	knlist_add(&d->bd_sel.si_note, kn, 1);
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 
 	return (0);
 }
@@ -1744,7 +1762,7 @@ filt_bpfread(struct knote *kn, long hint
 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
 	int ready;
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 	ready = bpf_ready(d);
 	if (ready) {
 		kn->kn_data = d->bd_slen;
@@ -1819,9 +1837,19 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, 
 	int gottime;
 
 	gottime = BPF_TSTAMP_NONE;
-	BPFIF_LOCK(bp);
+
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-		BPFD_LOCK(d);
+		/*
+		 * We are not using any locks for d here because:
+		 * 1) any filter change is protected by interface
+		 * write lock
+		 * 2) destroying/detaching d is protected by interface
+		 * write lock, too
+		 */
+
+		/* XXX: Do not protect counter for the sake of performance. */
 		++d->bd_rcount;
 		/*
 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
@@ -1837,6 +1865,11 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, 
 #endif
 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
 		if (slen != 0) {
+			/*
+			 * Filter matches. Let's to acquire write lock.
+			 */
+			BPFD_WLOCK(d);
+
 			d->bd_fcount++;
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
@@ -1845,10 +1878,10 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, 
 #endif
 				catchpacket(d, pkt, pktlen, slen,
 				    bpf_append_bytes, &bt);
+			BPFD_WUNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
 
 #define	BPF_CHECK_DIRECTION(d, r, i)				\
@@ -1857,6 +1890,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, 
 
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
+ * Locking model is explained in bpf_tap().
  */
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
@@ -1876,13 +1910,13 @@ bpf_mtap(struct bpf_if *bp, struct mbuf 
 	}
 
 	pktlen = m_length(m, NULL);
-
 	gottime = BPF_TSTAMP_NONE;
-	BPFIF_LOCK(bp);
+
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
-		BPFD_LOCK(d);
 		++d->bd_rcount;
 #ifdef BPF_JITTER
 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
@@ -1893,6 +1927,8 @@ bpf_mtap(struct bpf_if *bp, struct mbuf 
 #endif
 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
 		if (slen != 0) {
+			BPFD_WLOCK(d);
+
 			d->bd_fcount++;
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
@@ -1901,10 +1937,10 @@ bpf_mtap(struct bpf_if *bp, struct mbuf 
 #endif
 				catchpacket(d, (u_char *)m, pktlen, slen,
 				    bpf_append_mbuf, &bt);
+			BPFD_WUNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
 
 /*
@@ -1938,14 +1974,17 @@ bpf_mtap2(struct bpf_if *bp, void *data,
 	pktlen += dlen;
 
 	gottime = BPF_TSTAMP_NONE;
-	BPFIF_LOCK(bp);
+
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
 			continue;
-		BPFD_LOCK(d);
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
 		if (slen != 0) {
+			BPFD_WLOCK(d);
+
 			d->bd_fcount++;
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
@@ -1954,10 +1993,10 @@ bpf_mtap2(struct bpf_if *bp, void *data,
 #endif
 				catchpacket(d, (u_char *)&mb, pktlen, slen,
 				    bpf_append_mbuf, &bt);
+			BPFD_WUNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
 
 #undef	BPF_CHECK_DIRECTION
@@ -2049,7 +2088,7 @@ catchpacket(struct bpf_d *d, u_char *pkt
 	int do_timestamp;
 	int tstype;
 
-	BPFD_LOCK_ASSERT(d);
+	BPFD_WLOCK_ASSERT(d);
 
 	/*
 	 * Detect whether user space has released a buffer back to us, and if
@@ -2196,7 +2235,7 @@ bpf_freed(struct bpf_d *d)
 	}
 	if (d->bd_wfilter != NULL)
 		free((caddr_t)d->bd_wfilter, M_BPF);
-	mtx_destroy(&d->bd_mtx);
+	rw_destroy(&d->bd_lock);
 }
 
 /*
@@ -2228,13 +2267,13 @@ bpfattach2(struct ifnet *ifp, u_int dlt,
 	LIST_INIT(&bp->bif_dlist);
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
-	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+	rw_init(&bp->bif_lock, "bpf interface lock");
 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
 	*driverp = bp;
 
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 
 	bp->bif_hdrlen = hdrlen;
 
@@ -2261,14 +2300,14 @@ bpfdetach(struct ifnet *ifp)
 
 	/* Find all bpf_if struct's which reference ifp and detach them. */
 	do {
-		mtx_lock(&bpf_mtx);
+		BPF_LOCK();
 		LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 			if (ifp == bp->bif_ifp)
 				break;
 		}
 		if (bp != NULL)
 			LIST_REMOVE(bp, bif_next);
-		mtx_unlock(&bpf_mtx);
+		BPF_UNLOCK();
 
 		if (bp != NULL) {
 #ifdef INVARIANTS
@@ -2276,11 +2315,11 @@ bpfdetach(struct ifnet *ifp)
 #endif
 			while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
 				bpf_detachd(d);
-				BPFD_LOCK(d);
+				BPFD_WLOCK(d);
 				bpf_wakeup(d);
-				BPFD_UNLOCK(d);
+				BPFD_WUNLOCK(d);
 			}
-			mtx_destroy(&bp->bif_mtx);
+			rw_destroy(&bp->bif_lock);
 			free(bp, M_BPF);
 		}
 	} while (bp != NULL);
@@ -2304,13 +2343,13 @@ bpf_getdltlist(struct bpf_d *d, struct b
 	ifp = d->bd_bif->bif_ifp;
 	n = 0;
 	error = 0;
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp != ifp)
 			continue;
 		if (bfl->bfl_list != NULL) {
 			if (n >= bfl->bfl_len) {
-				mtx_unlock(&bpf_mtx);
+				BPF_UNLOCK();
 				return (ENOMEM);
 			}
 			error = copyout(&bp->bif_dlt,
@@ -2318,7 +2357,7 @@ bpf_getdltlist(struct bpf_d *d, struct b
 		}
 		n++;
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 	bfl->bfl_len = n;
 	return (error);
 }
@@ -2336,19 +2375,19 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
 	if (d->bd_bif->bif_dlt == dlt)
 		return (0);
 	ifp = d->bd_bif->bif_ifp;
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
 			break;
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 	if (bp != NULL) {
 		opromisc = d->bd_promisc;
 		bpf_detachd(d);
 		bpf_attachd(d, bp);
-		BPFD_LOCK(d);
+		BPFD_WLOCK(d);
 		reset_d(d);
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
 		if (opromisc) {
 			error = ifpromisc(bp->bif_ifp, 1);
 			if (error)
@@ -2386,22 +2425,22 @@ bpf_zero_counters(void)
 	struct bpf_if *bp;
 	struct bpf_d *bd;
 
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		BPFIF_LOCK(bp);
+		BPFIF_RLOCK(bp);
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
-			BPFD_LOCK(bd);
+			BPFD_WLOCK(bd);
 			bd->bd_rcount = 0;
 			bd->bd_dcount = 0;
 			bd->bd_fcount = 0;
 			bd->bd_wcount = 0;
 			bd->bd_wfcount = 0;
 			bd->bd_zcopy = 0;
-			BPFD_UNLOCK(bd);
+			BPFD_WUNLOCK(bd);
 		}
-		BPFIF_UNLOCK(bp);
+		BPFIF_RUNLOCK(bp);
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 }
 
 static void
@@ -2472,24 +2511,24 @@ bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 	if (bpf_bpfd_cnt == 0)
 		return (SYSCTL_OUT(req, 0, 0));
 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
-		mtx_unlock(&bpf_mtx);
+		BPF_UNLOCK();
 		free(xbdbuf, M_BPF);
 		return (ENOMEM);
 	}
 	index = 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		BPFIF_LOCK(bp);
+		BPFIF_RLOCK(bp);
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			xbd = &xbdbuf[index++];
-			BPFD_LOCK(bd);
+			BPFD_RLOCK(bd);
 			bpfstats_fill_xbpf(xbd, bd);
-			BPFD_UNLOCK(bd);
+			BPFD_RUNLOCK(bd);
 		}
-		BPFIF_UNLOCK(bp);
+		BPFIF_RUNLOCK(bp);
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
 	free(xbdbuf, M_BPF);
 	return (error);

Modified: head/sys/net/bpf.h
==============================================================================
--- head/sys/net/bpf.h	Fri Apr  6 06:40:17 2012	(r233936)
+++ head/sys/net/bpf.h	Fri Apr  6 06:53:58 2012	(r233937)
@@ -1092,14 +1092,19 @@ SYSCTL_DECL(_net_bpf);
 
 /*
  * Descriptor associated with each attached hardware interface.
+ * FIXME: this structure is exposed to external callers to speed up
+ * bpf_peers_present() call. However we cover all fields not needed by
+ * this function via BPF_INTERNAL define
  */
 struct bpf_if {
 	LIST_ENTRY(bpf_if)	bif_next;	/* list of all interfaces */
 	LIST_HEAD(, bpf_d)	bif_dlist;	/* descriptor list */
+#ifdef BPF_INTERNAL
 	u_int bif_dlt;				/* link layer type */
 	u_int bif_hdrlen;		/* length of link header */
 	struct ifnet *bif_ifp;		/* corresponding interface */
-	struct mtx	bif_mtx;	/* mutex for interface */
+	struct rwlock bif_lock;		/* interface lock */
+#endif
 };
 
 void	 bpf_bufheld(struct bpf_d *d);

Modified: head/sys/net/bpf_buffer.c
==============================================================================
--- head/sys/net/bpf_buffer.c	Fri Apr  6 06:40:17 2012	(r233936)
+++ head/sys/net/bpf_buffer.c	Fri Apr  6 06:53:58 2012	(r233937)
@@ -184,9 +184,9 @@ bpf_buffer_ioctl_sblen(struct bpf_d *d, 
 {
 	u_int size;
 
-	BPFD_LOCK(d);
+	BPFD_WLOCK(d);
 	if (d->bd_bif != NULL) {
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
 		return (EINVAL);
 	}
 	size = *i;
@@ -195,7 +195,7 @@ bpf_buffer_ioctl_sblen(struct bpf_d *d, 
 	else if (size < BPF_MINBUFSIZE)
 		*i = size = BPF_MINBUFSIZE;
 	d->bd_bufsize = size;
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 	return (0);
 }
 

Modified: head/sys/net/bpf_zerocopy.c
==============================================================================
--- head/sys/net/bpf_zerocopy.c	Fri Apr  6 06:40:17 2012	(r233936)
+++ head/sys/net/bpf_zerocopy.c	Fri Apr  6 06:53:58 2012	(r233937)
@@ -515,14 +515,14 @@ bpf_zerocopy_ioctl_rotzbuf(struct thread
 	struct zbuf *bzh;
 
 	bzero(bz, sizeof(*bz));
-	BPFD_LOCK(d);
+	BPFD_WLOCK(d);
 	if (d->bd_hbuf == NULL && d->bd_slen != 0) {
 		ROTATE_BUFFERS(d);
 		bzh = (struct zbuf *)d->bd_hbuf;
 		bz->bz_bufa = (void *)bzh->zb_uaddr;
 		bz->bz_buflen = d->bd_hlen;
 	}
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 	return (0);
 }
 
@@ -570,10 +570,10 @@ bpf_zerocopy_ioctl_setzbuf(struct thread
 	 * We only allow buffers to be installed once, so atomically check
 	 * that no buffers are currently installed and install new buffers.
 	 */
-	BPFD_LOCK(d);
+	BPFD_WLOCK(d);
 	if (d->bd_hbuf != NULL || d->bd_sbuf != NULL || d->bd_fbuf != NULL ||
 	    d->bd_bif != NULL) {
-		BPFD_UNLOCK(d);
+		BPFD_WUNLOCK(d);
 		zbuf_free(zba);
 		zbuf_free(zbb);
 		return (EINVAL);
@@ -593,6 +593,6 @@ bpf_zerocopy_ioctl_setzbuf(struct thread
 	 * shared management region.
 	 */
 	d->bd_bufsize = bz->bz_buflen - sizeof(struct bpf_zbuf_header);
-	BPFD_UNLOCK(d);
+	BPFD_WUNLOCK(d);
 	return (0);
 }

Modified: head/sys/net/bpfdesc.h
==============================================================================
--- head/sys/net/bpfdesc.h	Fri Apr  6 06:40:17 2012	(r233936)
+++ head/sys/net/bpfdesc.h	Fri Apr  6 06:53:58 2012	(r233937)
@@ -87,7 +87,7 @@ struct bpf_d {
 	int		bd_sig;		/* signal to send upon packet reception */
 	struct sigio *	bd_sigio;	/* information for async I/O */
 	struct selinfo	bd_sel;		/* bsd select info */
-	struct mtx	bd_mtx;		/* mutex for this descriptor */
+	struct rwlock	bd_lock;	/* per-descriptor lock */
 	struct callout	bd_callout;	/* for BPF timeouts with select */
 	struct label	*bd_label;	/* MAC label for descriptor */
 	u_int64_t	bd_fcount;	/* number of packets which matched filter */
@@ -106,10 +106,19 @@ struct bpf_d {
 #define BPF_WAITING	1		/* waiting for read timeout in select */
 #define BPF_TIMED_OUT	2		/* read timeout has expired in select */
 
-#define BPFD_LOCK(bd)		mtx_lock(&(bd)->bd_mtx)
-#define BPFD_UNLOCK(bd)		mtx_unlock(&(bd)->bd_mtx)
-#define BPFD_LOCK_ASSERT(bd)	mtx_assert(&(bd)->bd_mtx, MA_OWNED)
+#define BPFD_RLOCK(bd)		rw_rlock(&(bd)->bd_lock)
+#define BPFD_RUNLOCK(bd)	rw_runlock(&(bd)->bd_lock)
+#define BPFD_WLOCK(bd)		rw_wlock(&(bd)->bd_lock)
+#define BPFD_WUNLOCK(bd)	rw_wunlock(&(bd)->bd_lock)
+#define BPFD_WLOCK_ASSERT(bd)	rw_assert(&(bd)->bd_lock, RA_WLOCKED)
+#define BPFD_LOCK_ASSERT(bd)	rw_assert(&(bd)->bd_lock, RA_LOCKED)
 
+#define BPF_PID_REFRESH(bd, td)	(bd)->bd_pid = (td)->td_proc->p_pid
+#define BPF_PID_REFRESH_CUR(bd)	(bd)->bd_pid = curthread->td_proc->p_pid
+
+#define BPF_LOCK()		mtx_lock(&bpf_mtx)
+#define BPF_UNLOCK()		mtx_unlock(&bpf_mtx)
+#define BPF_LOCK_ASSERT()	mtx_assert(&bpf_mtx, MA_OWNED)
 /*
  * External representation of the bpf descriptor
  */
@@ -144,7 +153,9 @@ struct xbpf_d {
 	u_int64_t	bd_spare[4];
 };
 
-#define BPFIF_LOCK(bif)		mtx_lock(&(bif)->bif_mtx)
-#define BPFIF_UNLOCK(bif)	mtx_unlock(&(bif)->bif_mtx)
+#define BPFIF_RLOCK(bif)	rw_rlock(&(bif)->bif_lock)
+#define BPFIF_RUNLOCK(bif)	rw_runlock(&(bif)->bif_lock)
+#define BPFIF_WLOCK(bif)	rw_wlock(&(bif)->bif_lock)
+#define BPFIF_WUNLOCK(bif)	rw_wunlock(&(bif)->bif_lock)
 
 #endif

Modified: head/sys/security/mac/mac_net.c
==============================================================================
--- head/sys/security/mac/mac_net.c	Fri Apr  6 06:40:17 2012	(r233936)
+++ head/sys/security/mac/mac_net.c	Fri Apr  6 06:53:58 2012	(r233937)
@@ -319,6 +319,7 @@ mac_bpfdesc_create_mbuf(struct bpf_d *d,
 {
 	struct label *label;
 
+	/* Assume reader lock is enough. */
 	BPFD_LOCK_ASSERT(d);
 
 	if (mac_policy_count == 0)
@@ -354,6 +355,7 @@ mac_bpfdesc_check_receive(struct bpf_d *
 {
 	int error;
 
+	/* Assume reader lock is enough. */
 	BPFD_LOCK_ASSERT(d);
 
 	if (mac_policy_count == 0)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204060653.q366rwLa096182>