Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 17 Oct 2012 08:58:42 -0500
From:      Guy Helmer <guy.helmer@gmail.com>
To:        "Alexander V. Chernikov" <melifaro@freebsd.org>
Cc:        freebsd-net@freebsd.org, FreeBSD Stable <freebsd-stable@freebsd.org>
Subject:   Re: 8.3: kernel panic in bpf.c catchpacket()
Message-ID:  <381E3EEC-7EDB-428B-A724-434443E51A53@gmail.com>
In-Reply-To: <1EDA1615-2CDE-405A-A725-AF7CC7D3E273@gmail.com>
References:  <4B5399BF-4EE0-4182-8297-3BB97C4AA884@gmail.com> <59F9A36E-3DB2-4F6F-BB2A-A4C9DA76A70C@gmail.com> <5075C05E.9070800@FreeBSD.org> <1EDA1615-2CDE-405A-A725-AF7CC7D3E273@gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On Oct 12, 2012, at 8:54 AM, Guy Helmer <guy.helmer@gmail.com> wrote:

>=20
> On Oct 10, 2012, at 1:37 PM, Alexander V. Chernikov =
<melifaro@freebsd.org> wrote:
>=20
>> On 10.10.2012 00:36, Guy Helmer wrote:
>>>=20
>>> On Oct 8, 2012, at 8:09 AM, Guy Helmer <guy.helmer@gmail.com> wrote:
>>>=20
>>>> I'm seeing a consistent new kernel panic in FreeBSD 8.3:
>>>> I'm not seeing how bd_sbuf would be NULL here. Any ideas?
>>>=20
>>> Since I've not had any replies, I hope nobody minds if I reply with =
more information.
>>>=20
>>> This panic seems to be occasionally triggered now that my user land =
code is changing the packet filter a while after the bpd device has been =
opened and an initial packet filter was set (previously, my code did not =
change the filter after it was initially set).
>>>=20
>>> I'm focusing on bpf_setf() since that seems to be the place that =
could be tickling a problem, and I see that bpf_setf() calls reset_d(d) =
to clear the hold buffer. I have manually verified that the BPFD lock is =
held during the call to reset_d(), and the lock is held every other =
place that the buffers are manipulated, so I haven't been able to find =
any place that seems vulnerable to losing one of the bpf buffers. Still =
searching, but any help would be appreciated.
>>=20
>> Can you please check this code on -current?
>> Locking has changed quite significantly some time ago, so there is =
good chance that you can get rid of this panic (or discover different =
one which is really "new") :).
>=20
> I'm not ready to run this app on current, so I have merged revs =
229898, 233937, 233938, 233946, 235744, 235745, 235746, 235747, 236231, =
236251, 236261, 236262, 236559, and 236806 to my 8.3 checkout to get =
code that should be virtually identical to current without the timestamp =
changes.
>=20
> Unfortunately, I have only been able to trigger the panic in my test =
lab once -- so I'm not sure whether a lack of problems with the updated =
code will be indicative of likely success in the field where this has =
been trigged regularly at some sites=85
>=20
> Thanks,
> Guy
>=20


FWIW, I was able to trigger the panic with the original 8.3 code again =
in my test lab. With these changes resulting from merging the revs =
mentioned above, I have not seen any panics in my test lab setup in two =
days of load testing, and AFAIK, packet capturing seems to be working =
fine.

I've included the diffs for reference for anyone encountering the issue.

Thanks, Alexander!

Guy

Index: net/bpf.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- net/bpf.c	(revision 239830)
+++ net/bpf.c	(working copy)
@@ -43,6 +43,8 @@
=20
 #include <sys/types.h>
 #include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
@@ -66,6 +68,7 @@
 #include <sys/socket.h>
=20
 #include <net/if.h>
+#define	BPF_INTERNAL
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
@@ -139,6 +142,7 @@
=20
 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
 static void	bpf_detachd(struct bpf_d *);
+static void	bpf_detachd_locked(struct bpf_d *);
 static void	bpf_freed(struct bpf_d *);
 static int	bpf_movein(struct uio *, int, struct ifnet *, struct =
mbuf **,
 		    struct sockaddr *, int *, struct bpf_insn *);
@@ -150,7 +154,7 @@
 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, =
u_int),
 		    struct timeval *);
 static void	reset_d(struct bpf_d *);
-static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long =
cmd);
+static int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long =
cmd);
 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 static int	bpf_setdlt(struct bpf_d *, u_int);
 static void	filt_bpfdetach(struct knote *);
@@ -168,6 +172,12 @@
 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
     bpf_stats_sysctl, "bpf statistics portal");
=20
+static VNET_DEFINE(int, bpf_optimize_writers) =3D 0;
+#define	V_bpf_optimize_writers VNET(bpf_optimize_writers)
+SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
+    CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
+    "Do not send packets until BPF program is set");
+
 static	d_open_t	bpfopen;
 static	d_read_t	bpfread;
 static	d_write_t	bpfwrite;
@@ -189,7 +199,38 @@
 static struct filterops bpfread_filtops =3D
 	{ 1, NULL, filt_bpfdetach, filt_bpfread };
=20
+eventhandler_tag	bpf_ifdetach_cookie =3D NULL;
+
 /*
+ * LOCKING MODEL USED BY BPF:
+ * Locks:
+ * 1) global lock (BPF_LOCK). Mutex, used to protect interface =
addition/removal,
+ * some global counters and every bpf_if reference.
+ * 2) Interface lock. Rwlock, used to protect list of BPF descriptors =
and their filters.
+ * 3) Descriptor lock. Mutex, used to protect BPF buffers and various =
structure fields
+ *   used by bpf_mtap code.
+ *
+ * Lock order:
+ *
+ * Global lock, interface lock, descriptor lock
+ *
+ * We have to acquire interface lock before descriptor main lock due to =
BPF_MTAP[2]
+ * working model. In many places (like bpf_detachd) we start with BPF =
descriptor
+ * (and we need to at least rlock it to get reliable interface =
pointer). This
+ * gives us potential LOR. As a result, we use global lock to protect =
from bpf_if
+ * change in every such place.
+ *
+ * Changing d->bd_bif is protected by 1) global lock, 2) interface lock =
and
+ * 3) descriptor main wlock.
+ * Reading bd_bif can be protected by any of these locks, typically =
global lock.
+ *
+ * Changing read/write BPF filter is protected by the same three locks,
+ * the same applies for reading.
+ *
+ * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ */
+
+/*
  * Wrapper functions for various buffering methods.  If the set of =
buffer
  * modes expands, we will probably want to introduce a switch data =
structure
  * similar to protosw, et.
@@ -282,7 +323,6 @@
 static int
 bpf_canwritebuf(struct bpf_d *d)
 {
-
 	BPFD_LOCK_ASSERT(d);
=20
 	switch (d->bd_bufmode) {
@@ -561,18 +601,93 @@
 static void
 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
+	int op_w;
+
+	BPF_LOCK_ASSERT();
+
 	/*
-	 * Point d at bp, and add d to the interface's list of =
listeners.
-	 * Finally, point the driver's bpf cookie at the interface so
-	 * it will divert packets to bpf.
+	 * Save sysctl value to protect from sysctl change
+	 * between reads
 	 */
-	BPFIF_LOCK(bp);
+	op_w =3D V_bpf_optimize_writers;
+
+	if (d->bd_bif !=3D NULL)
+		bpf_detachd_locked(d);
+	/*
+	 * Point d at bp, and add d to the interface's list.
+	 * Since there are many applicaiotns using BPF for
+	 * sending raw packets only (dhcpd, cdpd are good examples)
+	 * we can delay adding d to the list of active listeners until
+	 * some filter is configured.
+	 */
+
+	BPFIF_WLOCK(bp);
+	BPFD_LOCK(d);
+
 	d->bd_bif =3D bp;
-	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
=20
+	if (op_w !=3D 0) {
+		/* Add to writers-only list */
+		LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+		/*
+		 * We decrement bd_writer on every filter set operation.
+		 * First BIOCSETF is done by pcap_open_live() to set up
+		 * snap length. After that appliation usually sets its =
own filter
+		 */
+		d->bd_writer =3D 2;
+	} else
+		LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+	BPFD_UNLOCK(d);
+	BPFIF_WUNLOCK(bp);
+
 	bpf_bpfd_cnt++;
-	BPFIF_UNLOCK(bp);
=20
+	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s =
list",
+	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
+
+	if (op_w =3D=3D 0)
+		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, =
1);
+}
+
+/*
+ * Add d to the list of active bp filters.
+ * Reuqires bpf_attachd() to be called before
+ */
+static void
+bpf_upgraded(struct bpf_d *d)
+{
+	struct bpf_if *bp;
+
+	BPF_LOCK_ASSERT();
+
+	bp =3D d->bd_bif;
+
+	/*
+	 * Filter can be set several times without specifying interface.
+	 * Mark d as reader and exit.
+	 */
+	if (bp =3D=3D NULL) {
+		BPFD_LOCK(d);
+		d->bd_writer =3D 0;
+		BPFD_UNLOCK(d);
+		return;
+	}
+
+	BPFIF_WLOCK(bp);
+	BPFD_LOCK(d);
+
+	/* Remove from writers-only list */
+	LIST_REMOVE(d, bd_next);
+	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+	/* Mark d as reader */
+	d->bd_writer =3D 0;
+
+	BPFD_UNLOCK(d);
+	BPFIF_WUNLOCK(bp);
+
+	CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, =
d->bd_pid);
+
 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
=20
@@ -582,27 +697,48 @@
 static void
 bpf_detachd(struct bpf_d *d)
 {
+	BPF_LOCK();
+	bpf_detachd_locked(d);
+	BPF_UNLOCK();
+}
+
+static void
+bpf_detachd_locked(struct bpf_d *d)
+{
 	int error;
 	struct bpf_if *bp;
 	struct ifnet *ifp;
=20
-	bp =3D d->bd_bif;
-	BPFIF_LOCK(bp);
+	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, =
d->bd_pid);
+
+	BPF_LOCK_ASSERT();
+
+	/* Check if descriptor is attached */
+	if ((bp =3D d->bd_bif) =3D=3D NULL)
+		return;
+
+	BPFIF_WLOCK(bp);
 	BPFD_LOCK(d);
-	ifp =3D d->bd_bif->bif_ifp;
=20
+	/* Save bd_writer value */
+	error =3D d->bd_writer;
+
 	/*
 	 * Remove d from the interface's descriptor list.
 	 */
 	LIST_REMOVE(d, bd_next);
=20
-	bpf_bpfd_cnt--;
+	ifp =3D bp->bif_ifp;
 	d->bd_bif =3D NULL;
 	BPFD_UNLOCK(d);
-	BPFIF_UNLOCK(bp);
+	BPFIF_WUNLOCK(bp);
=20
-	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+	bpf_bpfd_cnt--;
=20
+	/* Call event handler iff d is attached */
+	if (error =3D=3D 0)
+		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+
 	/*
 	 * Check if this descriptor had requested promiscuous mode.
 	 * If so, turn it off.
@@ -640,10 +776,7 @@
 	d->bd_state =3D BPF_IDLE;
 	BPFD_UNLOCK(d);
 	funsetown(&d->bd_sigio);
-	mtx_lock(&bpf_mtx);
-	if (d->bd_bif)
-		bpf_detachd(d);
-	mtx_unlock(&bpf_mtx);
+	bpf_detachd(d);
 #ifdef MAC
 	mac_bpfdesc_destroy(d);
 #endif /* MAC */
@@ -663,7 +796,7 @@
 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct bpf_d *d;
-	int error;
+	int error, size;
=20
 	d =3D malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
 	error =3D devfs_set_cdevpriv(d, bpf_dtor);
@@ -681,15 +814,19 @@
 	d->bd_bufmode =3D BPF_BUFMODE_BUFFER;
 	d->bd_sig =3D SIGIO;
 	d->bd_direction =3D BPF_D_INOUT;
-	d->bd_pid =3D td->td_proc->p_pid;
+	BPF_PID_REFRESH(d, td);
 #ifdef MAC
 	mac_bpfdesc_init(d);
 	mac_bpfdesc_create(td->td_ucred, d);
 #endif
-	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
-	callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
-	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
+	mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
+	callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
+	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
=20
+	/* Allocate default buffers */
+	size =3D d->bd_bufsize;
+	bpf_buffer_ioctl_sblen(d, &size);
+
 	return (0);
 }
=20
@@ -718,7 +855,7 @@
 	non_block =3D ((ioflag & O_NONBLOCK) !=3D 0);
=20
 	BPFD_LOCK(d);
-	d->bd_pid =3D curthread->td_proc->p_pid;
+	BPF_PID_REFRESH_CUR(d);
 	if (d->bd_bufmode !=3D BPF_BUFMODE_BUFFER) {
 		BPFD_UNLOCK(d);
 		return (EOPNOTSUPP);
@@ -764,7 +901,7 @@
 			BPFD_UNLOCK(d);
 			return (EWOULDBLOCK);
 		}
-		error =3D msleep(d, &d->bd_mtx, PRINET|PCATCH,
+		error =3D msleep(d, &d->bd_lock, PRINET|PCATCH,
 		     "bpf", d->bd_rtout);
 		if (error =3D=3D EINTR || error =3D=3D ERESTART) {
 			BPFD_UNLOCK(d);
@@ -881,8 +1018,9 @@
 	if (error !=3D 0)
 		return (error);
=20
-	d->bd_pid =3D curthread->td_proc->p_pid;
+	BPF_PID_REFRESH_CUR(d);
 	d->bd_wcount++;
+	/* XXX: locking required */
 	if (d->bd_bif =3D=3D NULL) {
 		d->bd_wdcount++;
 		return (ENXIO);
@@ -903,6 +1041,7 @@
 	bzero(&dst, sizeof(dst));
 	m =3D NULL;
 	hlen =3D 0;
+	/* XXX: bpf_movein() can sleep */
 	error =3D bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
 	    &m, &dst, &hlen, d->bd_wfilter);
 	if (error) {
@@ -962,7 +1101,7 @@
 reset_d(struct bpf_d *d)
 {
=20
-	mtx_assert(&d->bd_mtx, MA_OWNED);
+	BPFD_LOCK_ASSERT(d);
=20
 	if ((d->bd_hbuf !=3D NULL) &&
 	    (d->bd_bufmode !=3D BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) =
{
@@ -1028,7 +1167,7 @@
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
-	d->bd_pid =3D td->td_proc->p_pid;
+	BPF_PID_REFRESH(d, td);
 	if (d->bd_state =3D=3D BPF_WAITING)
 		callout_stop(&d->bd_callout);
 	d->bd_state =3D BPF_IDLE;
@@ -1079,7 +1218,9 @@
 	case BIOCGDLTLIST32:
 	case BIOCGRTIMEOUT32:
 	case BIOCSRTIMEOUT32:
+		BPFD_LOCK(d);
 		d->bd_compat32 =3D 1;
+		BPFD_UNLOCK(d);
 	}
 #endif
=20
@@ -1124,7 +1265,9 @@
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
+		BPFD_LOCK(d);
 		*(u_int *)addr =3D d->bd_bufsize;
+		BPFD_UNLOCK(d);
 		break;
=20
 	/*
@@ -1179,10 +1322,12 @@
 	 * Get current data link type.
 	 */
 	case BIOCGDLT:
+		BPF_LOCK();
 		if (d->bd_bif =3D=3D NULL)
 			error =3D EINVAL;
 		else
 			*(u_int *)addr =3D d->bd_bif->bif_dlt;
+		BPF_UNLOCK();
 		break;
=20
 	/*
@@ -1197,6 +1342,7 @@
 			list32 =3D (struct bpf_dltlist32 *)addr;
 			dltlist.bfl_len =3D list32->bfl_len;
 			dltlist.bfl_list =3D PTRIN(list32->bfl_list);
+			BPF_LOCK();
 			if (d->bd_bif =3D=3D NULL)
 				error =3D EINVAL;
 			else {
@@ -1204,31 +1350,37 @@
 				if (error =3D=3D 0)
 					list32->bfl_len =3D =
dltlist.bfl_len;
 			}
+			BPF_UNLOCK();
 			break;
 		}
 #endif
=20
 	case BIOCGDLTLIST:
+		BPF_LOCK();
 		if (d->bd_bif =3D=3D NULL)
 			error =3D EINVAL;
 		else
 			error =3D bpf_getdltlist(d, (struct bpf_dltlist =
*)addr);
+		BPF_UNLOCK();
 		break;
=20
 	/*
 	 * Set data link type.
 	 */
 	case BIOCSDLT:
+		BPF_LOCK();
 		if (d->bd_bif =3D=3D NULL)
 			error =3D EINVAL;
 		else
 			error =3D bpf_setdlt(d, *(u_int *)addr);
+		BPF_UNLOCK();
 		break;
=20
 	/*
 	 * Get interface name.
 	 */
 	case BIOCGETIF:
+		BPF_LOCK();
 		if (d->bd_bif =3D=3D NULL)
 			error =3D EINVAL;
 		else {
@@ -1238,13 +1390,16 @@
 			strlcpy(ifr->ifr_name, ifp->if_xname,
 			    sizeof(ifr->ifr_name));
 		}
+		BPF_UNLOCK();
 		break;
=20
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
+		BPF_LOCK();
 		error =3D bpf_setif(d, (struct ifreq *)addr);
+		BPF_UNLOCK();
 		break;
=20
 	/*
@@ -1327,7 +1482,9 @@
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
+		BPFD_LOCK(d);
 		d->bd_immediate =3D *(u_int *)addr;
+		BPFD_UNLOCK(d);
 		break;
=20
 	case BIOCVERSION:
@@ -1343,21 +1500,27 @@
 	 * Get "header already complete" flag
 	 */
 	case BIOCGHDRCMPLT:
+		BPFD_LOCK(d);
 		*(u_int *)addr =3D d->bd_hdrcmplt;
+		BPFD_UNLOCK(d);
 		break;
=20
 	/*
 	 * Set "header already complete" flag
 	 */
 	case BIOCSHDRCMPLT:
+		BPFD_LOCK(d);
 		d->bd_hdrcmplt =3D *(u_int *)addr ? 1 : 0;
+		BPFD_UNLOCK(d);
 		break;
=20
 	/*
 	 * Get packet direction flag
 	 */
 	case BIOCGDIRECTION:
+		BPFD_LOCK(d);
 		*(u_int *)addr =3D d->bd_direction;
+		BPFD_UNLOCK(d);
 		break;
=20
 	/*
@@ -1372,7 +1535,9 @@
 			case BPF_D_IN:
 			case BPF_D_INOUT:
 			case BPF_D_OUT:
+				BPFD_LOCK(d);
 				d->bd_direction =3D direction;
+				BPFD_UNLOCK(d);
 				break;
 			default:
 				error =3D EINVAL;
@@ -1381,26 +1546,38 @@
 		break;
=20
 	case BIOCFEEDBACK:
+		BPFD_LOCK(d);
 		d->bd_feedback =3D *(u_int *)addr;
+		BPFD_UNLOCK(d);
 		break;
=20
 	case BIOCLOCK:
+		BPFD_LOCK(d);
 		d->bd_locked =3D 1;
+		BPFD_UNLOCK(d);
 		break;
=20
 	case FIONBIO:		/* Non-blocking I/O */
 		break;
=20
 	case FIOASYNC:		/* Send signal on receive packets */
+		BPFD_LOCK(d);
 		d->bd_async =3D *(int *)addr;
+		BPFD_UNLOCK(d);
 		break;
=20
 	case FIOSETOWN:
+		/*
+		 * XXX: Add some sort of locking here?
+		 * fsetown() can sleep.
+		 */
 		error =3D fsetown(*(int *)addr, &d->bd_sigio);
 		break;
=20
 	case FIOGETOWN:
+		BPFD_LOCK(d);
 		*(int *)addr =3D fgetown(&d->bd_sigio);
+		BPFD_UNLOCK(d);
 		break;
=20
 	/* This is deprecated, FIOSETOWN should be used instead. */
@@ -1421,16 +1598,23 @@
=20
 			if (sig >=3D NSIG)
 				error =3D EINVAL;
-			else
+			else {
+				BPFD_LOCK(d);
 				d->bd_sig =3D sig;
+				BPFD_UNLOCK(d);
+			}
 			break;
 		}
 	case BIOCGRSIG:
+		BPFD_LOCK(d);
 		*(u_int *)addr =3D d->bd_sig;
+		BPFD_UNLOCK(d);
 		break;
=20
 	case BIOCGETBUFMODE:
+		BPFD_LOCK(d);
 		*(u_int *)addr =3D d->bd_bufmode;
+		BPFD_UNLOCK(d);
 		break;
=20
 	case BIOCSETBUFMODE:
@@ -1485,95 +1669,130 @@
 /*
  * Set d's packet filter program to fp.  If this file already has a =
filter,
  * free it and replace it.  Returns EINVAL for bogus requests.
+ *
+ * Note we need global lock here to serialize bpf_setf() and =
bpf_setif() calls
+ * since reading d->bd_bif can't be protected by d or interface lock =
due to
+ * lock order.
+ *
+ * Additionally, we have to acquire interface write lock due to =
bpf_mtap() uses
+ * interface read lock to read all filers.
+ *
  */
 static int
 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 {
+#ifdef COMPAT_FREEBSD32
+	struct bpf_program fp_swab;
+	struct bpf_program32 *fp32;
+#endif
 	struct bpf_insn *fcode, *old;
-	u_int wfilter, flen, size;
 #ifdef BPF_JITTER
-	bpf_jit_filter *ofunc;
+	bpf_jit_filter *jfunc, *ofunc;
 #endif
+	size_t size;
+	u_int flen;
+	int need_upgrade;
+
 #ifdef COMPAT_FREEBSD32
-	struct bpf_program32 *fp32;
-	struct bpf_program fp_swab;
-
-	if (cmd =3D=3D BIOCSETWF32 || cmd =3D=3D BIOCSETF32 || cmd =3D=3D =
BIOCSETFNR32) {
+	switch (cmd) {
+	case BIOCSETF32:
+	case BIOCSETWF32:
+	case BIOCSETFNR32:
 		fp32 =3D (struct bpf_program32 *)fp;
 		fp_swab.bf_len =3D fp32->bf_len;
 		fp_swab.bf_insns =3D (struct bpf_insn =
*)(uintptr_t)fp32->bf_insns;
 		fp =3D &fp_swab;
-		if (cmd =3D=3D BIOCSETWF32)
+		switch (cmd) {
+		case BIOCSETF32:
+			cmd =3D BIOCSETF;
+			break;
+		case BIOCSETWF32:
 			cmd =3D BIOCSETWF;
+			break;
+		}
+		break;
 	}
 #endif
-	if (cmd =3D=3D BIOCSETWF) {
-		old =3D d->bd_wfilter;
-		wfilter =3D 1;
+
+	fcode =3D NULL;
 #ifdef BPF_JITTER
-		ofunc =3D NULL;
+	jfunc =3D ofunc =3D NULL;
 #endif
-	} else {
-		wfilter =3D 0;
-		old =3D d->bd_rfilter;
-#ifdef BPF_JITTER
-		ofunc =3D d->bd_bfilter;
-#endif
-	}
-	if (fp->bf_insns =3D=3D NULL) {
-		if (fp->bf_len !=3D 0)
+	need_upgrade =3D 0;
+
+	/*
+	 * Check new filter validness before acquiring any locks.
+	 * Allocate memory for new filter, if needed.
+	 */
+	flen =3D fp->bf_len;
+	if (flen > bpf_maxinsns || (fp->bf_insns =3D=3D NULL && flen !=3D =
0))
+		return (EINVAL);
+	size =3D flen * sizeof(*fp->bf_insns);
+	if (size > 0) {
+		/* We're setting up new filter.  Copy and check actual =
data. */
+		fcode =3D malloc(size, M_BPF, M_WAITOK);
+		if (copyin(fp->bf_insns, fcode, size) !=3D 0 ||
+		    !bpf_validate(fcode, flen)) {
+			free(fcode, M_BPF);
 			return (EINVAL);
-		BPFD_LOCK(d);
-		if (wfilter)
-			d->bd_wfilter =3D NULL;
-		else {
-			d->bd_rfilter =3D NULL;
-#ifdef BPF_JITTER
-			d->bd_bfilter =3D NULL;
-#endif
-			if (cmd =3D=3D BIOCSETF)
-				reset_d(d);
 		}
-		BPFD_UNLOCK(d);
-		if (old !=3D NULL)
-			free((caddr_t)old, M_BPF);
 #ifdef BPF_JITTER
-		if (ofunc !=3D NULL)
-			bpf_destroy_jit_filter(ofunc);
+		/* Filter is copied inside fcode and is perfectly valid. =
*/
+		jfunc =3D bpf_jitter(fcode, flen);
 #endif
-		return (0);
 	}
-	flen =3D fp->bf_len;
-	if (flen > bpf_maxinsns)
-		return (EINVAL);
=20
-	size =3D flen * sizeof(*fp->bf_insns);
-	fcode =3D (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
-	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) =3D=3D 0 =
&&
-	    bpf_validate(fcode, (int)flen)) {
-		BPFD_LOCK(d);
-		if (wfilter)
-			d->bd_wfilter =3D fcode;
-		else {
-			d->bd_rfilter =3D fcode;
+	BPF_LOCK();
+
+	/*
+	 * Set up new filter.
+	 * Protect filter change by interface lock.
+	 * Additionally, we are protected by global lock here.
+	 */
+	if (d->bd_bif !=3D NULL)
+		BPFIF_WLOCK(d->bd_bif);
+	BPFD_LOCK(d);
+	if (cmd =3D=3D BIOCSETWF) {
+		old =3D d->bd_wfilter;
+		d->bd_wfilter =3D fcode;
+	} else {
+		old =3D d->bd_rfilter;
+		d->bd_rfilter =3D fcode;
 #ifdef BPF_JITTER
-			d->bd_bfilter =3D bpf_jitter(fcode, flen);
+		ofunc =3D d->bd_bfilter;
+		d->bd_bfilter =3D jfunc;
 #endif
-			if (cmd =3D=3D BIOCSETF)
-				reset_d(d);
+		if (cmd =3D=3D BIOCSETF)
+			reset_d(d);
+
+		if (fcode !=3D NULL) {
+			/*
+			 * Do not require upgrade by first BIOCSETF
+			 * (used to set snaplen) by pcap_open_live().
+			 */
+			if (d->bd_writer !=3D 0 && --d->bd_writer =3D=3D =
0)
+				need_upgrade =3D 1;
+			CTR4(KTR_NET, "%s: filter function set by pid =
%d, "
+			    "bd_writer counter %d, need_upgrade %d",
+			    __func__, d->bd_pid, d->bd_writer, =
need_upgrade);
 		}
-		BPFD_UNLOCK(d);
-		if (old !=3D NULL)
-			free((caddr_t)old, M_BPF);
+	}
+	BPFD_UNLOCK(d);
+	if (d->bd_bif !=3D NULL)
+		BPFIF_WUNLOCK(d->bd_bif);
+	if (old !=3D NULL)
+		free(old, M_BPF);
 #ifdef BPF_JITTER
-		if (ofunc !=3D NULL)
-			bpf_destroy_jit_filter(ofunc);
+	if (ofunc !=3D NULL)
+		bpf_destroy_jit_filter(ofunc);
 #endif
=20
-		return (0);
-	}
-	free((caddr_t)fcode, M_BPF);
-	return (EINVAL);
+	/* Move d to active readers list. */
+	if (need_upgrade)
+		bpf_upgraded(d);
+
+	BPF_UNLOCK();
+	return (0);
 }
=20
 /*
@@ -1587,28 +1806,30 @@
 	struct bpf_if *bp;
 	struct ifnet *theywant;
=20
+	BPF_LOCK_ASSERT();
+
 	theywant =3D ifunit(ifr->ifr_name);
 	if (theywant =3D=3D NULL || theywant->if_bpf =3D=3D NULL)
 		return (ENXIO);
=20
 	bp =3D theywant->if_bpf;
=20
+	/* Check if interface is not being detached from BPF */
+	BPFIF_RLOCK(bp);
+	if (bp->flags & BPFIF_FLAG_DYING) {
+		BPFIF_RUNLOCK(bp);
+		return (ENXIO);
+	}
+	BPFIF_RUNLOCK(bp);
+
 	/*
 	 * Behavior here depends on the buffering model.  If we're using
 	 * kernel memory buffers, then we can allocate them here.  If =
we're
 	 * using zero-copy, then the user process must have registered
 	 * buffers by the time we get here.  If not, return an error.
-	 *
-	 * XXXRW: There are locking issues here with multi-threaded use: =
what
-	 * if two threads try to set the interface at once?
 	 */
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
-		if (d->bd_sbuf =3D=3D NULL)
-			bpf_buffer_alloc(d);
-		KASSERT(d->bd_sbuf !=3D NULL, ("bpf_setif: bd_sbuf =
NULL"));
-		break;
-
 	case BPF_BUFMODE_ZBUF:
 		if (d->bd_sbuf =3D=3D NULL)
 			return (EINVAL);
@@ -1617,15 +1838,8 @@
 	default:
 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
 	}
-	if (bp !=3D d->bd_bif) {
-		if (d->bd_bif)
-			/*
-			 * Detach if attached to something else.
-			 */
-			bpf_detachd(d);
-
+	if (bp !=3D d->bd_bif)
 		bpf_attachd(d, bp);
-	}
 	BPFD_LOCK(d);
 	reset_d(d);
 	BPFD_UNLOCK(d);
@@ -1653,7 +1867,7 @@
 	 */
 	revents =3D events & (POLLOUT | POLLWRNORM);
 	BPFD_LOCK(d);
-	d->bd_pid =3D td->td_proc->p_pid;
+	BPF_PID_REFRESH(d, td);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (bpf_ready(d))
 			revents |=3D events & (POLLIN | POLLRDNORM);
@@ -1688,7 +1902,7 @@
 	 * Refresh PID associated with this descriptor.
 	 */
 	BPFD_LOCK(d);
-	d->bd_pid =3D curthread->td_proc->p_pid;
+	BPF_PID_REFRESH_CUR(d);
 	kn->kn_fop =3D &bpfread_filtops;
 	kn->kn_hook =3D d;
 	knlist_add(&d->bd_sel.si_note, kn, 1);
@@ -1744,9 +1958,19 @@
 	struct timeval tv;
=20
 	gottime =3D 0;
-	BPFIF_LOCK(bp);
+
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-		BPFD_LOCK(d);
+		/*
+		 * We are not using any locks for d here because:
+		 * 1) any filter change is protected by interface
+		 * write lock
+		 * 2) destroying/detaching d is protected by interface
+		 * write lock, too
+		 */
+
+		/* XXX: Do not protect counter for the sake of =
performance. */
 		++d->bd_rcount;
 		/*
 		 * NB: We dont call BPF_CHECK_DIRECTION() here since =
there is no
@@ -1762,6 +1986,11 @@
 #endif
 		slen =3D bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
 		if (slen !=3D 0) {
+			/*
+			 * Filter matches. Let's to acquire write lock.
+			 */
+			BPFD_LOCK(d);
+
 			d->bd_fcount++;
 			if (!gottime) {
 				microtime(&tv);
@@ -1772,10 +2001,10 @@
 #endif
 				catchpacket(d, pkt, pktlen, slen,
 				    bpf_append_bytes, &tv);
+			BPFD_UNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
=20
 #define	BPF_CHECK_DIRECTION(d, r, i)				=
\
@@ -1784,6 +2013,7 @@
=20
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf =
chain.
+ * Locking model is explained in bpf_tap().
  */
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
@@ -1806,11 +2036,11 @@
=20
 	pktlen =3D m_length(m, NULL);
=20
-	BPFIF_LOCK(bp);
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, =
bp->bif_ifp))
 			continue;
-		BPFD_LOCK(d);
 		++d->bd_rcount;
 #ifdef BPF_JITTER
 		bf =3D bpf_jitter_enable !=3D 0 ? d->bd_bfilter : NULL;
@@ -1821,6 +2051,8 @@
 #endif
 		slen =3D bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, =
0);
 		if (slen !=3D 0) {
+			BPFD_LOCK(d);
+
 			d->bd_fcount++;
 			if (!gottime) {
 				microtime(&tv);
@@ -1831,10 +2063,10 @@
 #endif
 				catchpacket(d, (u_char *)m, pktlen, =
slen,
 				    bpf_append_mbuf, &tv);
+			BPFD_UNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
=20
 /*
@@ -1869,14 +2101,17 @@
 	mb.m_len =3D dlen;
 	pktlen +=3D dlen;
=20
-	BPFIF_LOCK(bp);
+
+	BPFIF_RLOCK(bp);
+
 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, =
bp->bif_ifp))
 			continue;
-		BPFD_LOCK(d);
 		++d->bd_rcount;
 		slen =3D bpf_filter(d->bd_rfilter, (u_char *)&mb, =
pktlen, 0);
 		if (slen !=3D 0) {
+			BPFD_LOCK(d);
+
 			d->bd_fcount++;
 			if (!gottime) {
 				microtime(&tv);
@@ -1887,10 +2122,10 @@
 #endif
 				catchpacket(d, (u_char *)&mb, pktlen, =
slen,
 				    bpf_append_mbuf, &tv);
+			BPFD_UNLOCK(d);
 		}
-		BPFD_UNLOCK(d);
 	}
-	BPFIF_UNLOCK(bp);
+	BPFIF_RUNLOCK(bp);
 }
=20
 #undef	BPF_CHECK_DIRECTION
@@ -2040,7 +2275,7 @@
 	}
 	if (d->bd_wfilter !=3D NULL)
 		free((caddr_t)d->bd_wfilter, M_BPF);
-	mtx_destroy(&d->bd_mtx);
+	mtx_destroy(&d->bd_lock);
 }
=20
 /*
@@ -2070,15 +2305,16 @@
 		panic("bpfattach");
=20
 	LIST_INIT(&bp->bif_dlist);
+	LIST_INIT(&bp->bif_wlist);
 	bp->bif_ifp =3D ifp;
 	bp->bif_dlt =3D dlt;
-	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+	rw_init(&bp->bif_lock, "bpf interface lock");
 	KASSERT(*driverp =3D=3D NULL, ("bpfattach2: driverp already =
initialized"));
 	*driverp =3D bp;
=20
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
=20
 	/*
 	 * Compute the length of the bpf header.  This is not =
necessarily
@@ -2093,10 +2329,9 @@
 }
=20
 /*
- * Detach bpf from an interface.  This involves detaching each =
descriptor
- * associated with the interface, and leaving bd_bif NULL.  Notify each
- * descriptor as it's detached so that any sleepers wake up and get
- * ENXIO.
+ * Detach bpf from an interface. This involves detaching each =
descriptor
+ * associated with the interface. Notify each descriptor as it's =
detached
+ * so that any sleepers wake up and get ENXIO.
  */
 void
 bpfdetach(struct ifnet *ifp)
@@ -2109,31 +2344,45 @@
 	ndetached =3D 0;
 #endif
=20
+	BPF_LOCK();
 	/* Find all bpf_if struct's which reference ifp and detach them. =
*/
 	do {
-		mtx_lock(&bpf_mtx);
 		LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 			if (ifp =3D=3D bp->bif_ifp)
 				break;
 		}
 		if (bp !=3D NULL)
 			LIST_REMOVE(bp, bif_next);
-		mtx_unlock(&bpf_mtx);
=20
 		if (bp !=3D NULL) {
 #ifdef INVARIANTS
 			ndetached++;
 #endif
 			while ((d =3D LIST_FIRST(&bp->bif_dlist)) !=3D =
NULL) {
-				bpf_detachd(d);
+				bpf_detachd_locked(d);
 				BPFD_LOCK(d);
 				bpf_wakeup(d);
 				BPFD_UNLOCK(d);
 			}
-			mtx_destroy(&bp->bif_mtx);
-			free(bp, M_BPF);
+			/* Free writer-only descriptors */
+			while ((d =3D LIST_FIRST(&bp->bif_wlist)) !=3D =
NULL) {
+				bpf_detachd_locked(d);
+				BPFD_LOCK(d);
+				bpf_wakeup(d);
+				BPFD_UNLOCK(d);
+			}
+
+			/*
+			 * Delay freing bp till interface is detached
+			 * and all routes through this interface are =
removed.
+			 * Mark bp as detached to restrict new =
consumers.
+			 */
+			BPFIF_WLOCK(bp);
+			bp->flags |=3D BPFIF_FLAG_DYING;
+			BPFIF_WUNLOCK(bp);
 		}
 	} while (bp !=3D NULL);
+	BPF_UNLOCK();
=20
 #ifdef INVARIANTS
 	if (ndetached =3D=3D 0)
@@ -2142,6 +2391,37 @@
 }
=20
 /*
+ * Interface departure handler.
+ * Note departure event does not guarantee interface is going down.
+ */
+static void
+bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+	struct bpf_if *bp;
+
+	BPF_LOCK();
+	if ((bp =3D ifp->if_bpf) =3D=3D NULL) {
+		BPF_UNLOCK();
+		return;
+	}
+
+	/* Check if bpfdetach() was called previously */
+	if ((bp->flags & BPFIF_FLAG_DYING) =3D=3D 0) {
+		BPF_UNLOCK();
+		return;
+	}
+
+	CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
+	    __func__, bp, ifp);
+
+	ifp->if_bpf =3D NULL;
+	BPF_UNLOCK();
+
+	rw_destroy(&bp->bif_lock);
+	free(bp, M_BPF);
+}
+
+/*
  * Get a list of available data link type of the interface.
  */
 static int
@@ -2151,24 +2431,22 @@
 	struct ifnet *ifp;
 	struct bpf_if *bp;
=20
+	BPF_LOCK_ASSERT();
+
 	ifp =3D d->bd_bif->bif_ifp;
 	n =3D 0;
 	error =3D 0;
-	mtx_lock(&bpf_mtx);
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp !=3D ifp)
 			continue;
 		if (bfl->bfl_list !=3D NULL) {
-			if (n >=3D bfl->bfl_len) {
-				mtx_unlock(&bpf_mtx);
+			if (n >=3D bfl->bfl_len)
 				return (ENOMEM);
-			}
 			error =3D copyout(&bp->bif_dlt,
 			    bfl->bfl_list + n, sizeof(u_int));
 		}
 		n++;
 	}
-	mtx_unlock(&bpf_mtx);
 	bfl->bfl_len =3D n;
 	return (error);
 }
@@ -2183,18 +2461,19 @@
 	struct ifnet *ifp;
 	struct bpf_if *bp;
=20
+	BPF_LOCK_ASSERT();
+
 	if (d->bd_bif->bif_dlt =3D=3D dlt)
 		return (0);
 	ifp =3D d->bd_bif->bif_ifp;
-	mtx_lock(&bpf_mtx);
+
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp =3D=3D ifp && bp->bif_dlt =3D=3D dlt)
 			break;
 	}
-	mtx_unlock(&bpf_mtx);
+
 	if (bp !=3D NULL) {
 		opromisc =3D d->bd_promisc;
-		bpf_detachd(d);
 		bpf_attachd(d, bp);
 		BPFD_LOCK(d);
 		reset_d(d);
@@ -2223,6 +2502,11 @@
 	dev =3D make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, =
"bpf");
 	/* For compatibility */
 	make_dev_alias(dev, "bpf0");
+
+	/* Register interface departure handler */
+	bpf_ifdetach_cookie =3D EVENTHANDLER_REGISTER(
+		    ifnet_departure_event, bpf_ifdetach, NULL,
+		    EVENTHANDLER_PRI_ANY);
 }
=20
 /*
@@ -2236,9 +2520,9 @@
 	struct bpf_if *bp;
 	struct bpf_d *bd;
=20
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		BPFIF_LOCK(bp);
+		BPFIF_RLOCK(bp);
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			BPFD_LOCK(bd);
 			bd->bd_rcount =3D 0;
@@ -2249,11 +2533,14 @@
 			bd->bd_zcopy =3D 0;
 			BPFD_UNLOCK(bd);
 		}
-		BPFIF_UNLOCK(bp);
+		BPFIF_RUNLOCK(bp);
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 }
=20
+/*
+ * Fill filter statistics
+ */
 static void
 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 {
@@ -2261,6 +2548,7 @@
 	bzero(d, sizeof(*d));
 	BPFD_LOCK_ASSERT(bd);
 	d->bd_structsize =3D sizeof(*d);
+	/* XXX: reading should be protected by global lock */
 	d->bd_immediate =3D bd->bd_immediate;
 	d->bd_promisc =3D bd->bd_promisc;
 	d->bd_hdrcmplt =3D bd->bd_hdrcmplt;
@@ -2285,6 +2573,9 @@
 	d->bd_bufmode =3D bd->bd_bufmode;
 }
=20
+/*
+ * Handle `netstat -B' stats request
+ */
 static int
 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
@@ -2322,24 +2613,31 @@
 	if (bpf_bpfd_cnt =3D=3D 0)
 		return (SYSCTL_OUT(req, 0, 0));
 	xbdbuf =3D malloc(req->oldlen, M_BPF, M_WAITOK);
-	mtx_lock(&bpf_mtx);
+	BPF_LOCK();
 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
-		mtx_unlock(&bpf_mtx);
+		BPF_UNLOCK();
 		free(xbdbuf, M_BPF);
 		return (ENOMEM);
 	}
 	index =3D 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		BPFIF_LOCK(bp);
+		BPFIF_RLOCK(bp);
+		/* Send writers-only first */
+		LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
+			xbd =3D &xbdbuf[index++];
+			BPFD_LOCK(bd);
+			bpfstats_fill_xbpf(xbd, bd);
+			BPFD_UNLOCK(bd);
+		}
 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
 			xbd =3D &xbdbuf[index++];
 			BPFD_LOCK(bd);
 			bpfstats_fill_xbpf(xbd, bd);
 			BPFD_UNLOCK(bd);
 		}
-		BPFIF_UNLOCK(bp);
+		BPFIF_RUNLOCK(bp);
 	}
-	mtx_unlock(&bpf_mtx);
+	BPF_UNLOCK();
 	error =3D SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
 	free(xbdbuf, M_BPF);
 	return (error);
Index: net/bpf.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- net/bpf.h	(revision 239830)
+++ net/bpf.h	(working copy)
@@ -917,14 +917,21 @@
=20
 /*
  * Descriptor associated with each attached hardware interface.
+ * FIXME: this structure is exposed to external callers to speed up
+ * bpf_peers_present() call. However we cover all fields not needed by
+ * this function via BPF_INTERNAL define
  */
 struct bpf_if {
 	LIST_ENTRY(bpf_if)	bif_next;	/* list of all =
interfaces */
 	LIST_HEAD(, bpf_d)	bif_dlist;	/* descriptor list */
+#ifdef BPF_INTERNAL
 	u_int bif_dlt;				/* link layer type */
 	u_int bif_hdrlen;		/* length of header (with =
padding) */
 	struct ifnet *bif_ifp;		/* corresponding interface */
-	struct mtx	bif_mtx;	/* mutex for interface */
+	struct rwlock bif_lock;		/* interface lock */
+	LIST_HEAD(, bpf_d)	bif_wlist;	/* writer-only list */
+	int flags;			/* Interface flags */
+#endif
 };
=20
 void	 bpf_bufheld(struct bpf_d *d);
Index: net/bpf_buffer.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- net/bpf_buffer.c	(revision 239830)
+++ net/bpf_buffer.c	(working copy)
@@ -93,21 +93,6 @@
 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
     &bpf_maxbufsize, 0, "Default capture buffer in bytes");
=20
-void
-bpf_buffer_alloc(struct bpf_d *d)
-{
-
-	KASSERT(d->bd_fbuf =3D=3D NULL, ("bpf_buffer_alloc: bd_fbuf !=3D =
NULL"));
-	KASSERT(d->bd_sbuf =3D=3D NULL, ("bpf_buffer_alloc: bd_sbuf !=3D =
NULL"));
-	KASSERT(d->bd_hbuf =3D=3D NULL, ("bpf_buffer_alloc: bd_hbuf !=3D =
NULL"));
-
-	d->bd_fbuf =3D (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-	d->bd_sbuf =3D (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-	d->bd_hbuf =3D NULL;
-	d->bd_slen =3D 0;
-	d->bd_hlen =3D 0;
-}
-
 /*
  * Simple data copy to the current kernel buffer.
  */
@@ -183,18 +168,42 @@
 bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
 {
 	u_int size;
+	caddr_t fbuf, sbuf;
=20
+	size =3D *i;
+	if (size > bpf_maxbufsize)
+		*i =3D size =3D bpf_maxbufsize;
+	else if (size < BPF_MINBUFSIZE)
+		*i =3D size =3D BPF_MINBUFSIZE;
+
+	/* Allocate buffers immediately */
+	fbuf =3D (caddr_t)malloc(size, M_BPF, M_WAITOK);
+	sbuf =3D (caddr_t)malloc(size, M_BPF, M_WAITOK);
+
 	BPFD_LOCK(d);
 	if (d->bd_bif !=3D NULL) {
+		/* Interface already attached, unable to change buffers =
*/
 		BPFD_UNLOCK(d);
+		free(fbuf, M_BPF);
+		free(sbuf, M_BPF);
 		return (EINVAL);
 	}
-	size =3D *i;
-	if (size > bpf_maxbufsize)
-		*i =3D size =3D bpf_maxbufsize;
-	else if (size < BPF_MINBUFSIZE)
-		*i =3D size =3D BPF_MINBUFSIZE;
+
+	/* Free old buffers if set */
+	if (d->bd_fbuf !=3D NULL)
+		free(d->bd_fbuf, M_BPF);
+	if (d->bd_sbuf !=3D NULL)
+		free(d->bd_sbuf, M_BPF);
+
+	/* Fill in new data */
 	d->bd_bufsize =3D size;
+	d->bd_fbuf =3D fbuf;
+	d->bd_sbuf =3D sbuf;
+
+	d->bd_hbuf =3D NULL;
+	d->bd_slen =3D 0;
+	d->bd_hlen =3D 0;
+
 	BPFD_UNLOCK(d);
 	return (0);
 }
Index: net/bpf_buffer.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- net/bpf_buffer.h	(revision 239830)
+++ net/bpf_buffer.h	(working copy)
@@ -36,7 +36,6 @@
 #error "no user-serviceable parts inside"
 #endif
=20
-void	bpf_buffer_alloc(struct bpf_d *d);
 void	bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int =
offset,
 	    void *src, u_int len);
 void	bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int =
offset,
Index: net/bpfdesc.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- net/bpfdesc.h	(revision 239830)
+++ net/bpfdesc.h	(working copy)
@@ -79,6 +79,7 @@
 	u_char		bd_promisc;	/* true if listening =
promiscuously */
 	u_char		bd_state;	/* idle, waiting, or timed out =
*/
 	u_char		bd_immediate;	/* true to return on packet =
arrival */
+	u_char		bd_writer;	/* non-zero if d is writer-only =
*/
 	int		bd_hdrcmplt;	/* false to fill in src lladdr =
automatically */
 	int		bd_direction;	/* select packet direction */
 	int		bd_feedback;	/* true to feed back sent =
packets */
@@ -86,7 +87,7 @@
 	int		bd_sig;		/* signal to send upon packet =
reception */
 	struct sigio *	bd_sigio;	/* information for async I/O */
 	struct selinfo	bd_sel;		/* bsd select info */
-	struct mtx	bd_mtx;		/* mutex for this descriptor */
+	struct mtx	bd_lock;	/* per-descriptor lock */
 	struct callout	bd_callout;	/* for BPF timeouts with select =
*/
 	struct label	*bd_label;	/* MAC label for descriptor */
 	u_int64_t	bd_fcount;	/* number of packets which =
matched filter */
@@ -105,10 +106,16 @@
 #define BPF_WAITING	1		/* waiting for read timeout in =
select */
 #define BPF_TIMED_OUT	2		/* read timeout has expired in =
select */
=20
-#define BPFD_LOCK(bd)		mtx_lock(&(bd)->bd_mtx)
-#define BPFD_UNLOCK(bd)		mtx_unlock(&(bd)->bd_mtx)
-#define BPFD_LOCK_ASSERT(bd)	mtx_assert(&(bd)->bd_mtx, MA_OWNED)
+#define BPFD_LOCK(bd)		mtx_lock(&(bd)->bd_lock)
+#define BPFD_UNLOCK(bd)		mtx_unlock(&(bd)->bd_lock)
+#define BPFD_LOCK_ASSERT(bd)	mtx_assert(&(bd)->bd_lock, MA_OWNED)
=20
+#define BPF_PID_REFRESH(bd, td)	(bd)->bd_pid =3D =
(td)->td_proc->p_pid
+#define BPF_PID_REFRESH_CUR(bd)	(bd)->bd_pid =3D =
curthread->td_proc->p_pid
+
+#define BPF_LOCK()		mtx_lock(&bpf_mtx)
+#define BPF_UNLOCK()		mtx_unlock(&bpf_mtx)
+#define BPF_LOCK_ASSERT()	mtx_assert(&bpf_mtx, MA_OWNED)
 /*
  * External representation of the bpf descriptor
  */
@@ -143,7 +150,11 @@
 	u_int64_t	bd_spare[4];
 };
=20
-#define BPFIF_LOCK(bif)		mtx_lock(&(bif)->bif_mtx)
-#define BPFIF_UNLOCK(bif)	mtx_unlock(&(bif)->bif_mtx)
+#define BPFIF_RLOCK(bif)	rw_rlock(&(bif)->bif_lock)
+#define BPFIF_RUNLOCK(bif)	rw_runlock(&(bif)->bif_lock)
+#define BPFIF_WLOCK(bif)	rw_wlock(&(bif)->bif_lock)
+#define BPFIF_WUNLOCK(bif)	rw_wunlock(&(bif)->bif_lock)
=20
+#define BPFIF_FLAG_DYING	1	/* Reject new bpf consumers */
+
 #endif




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?381E3EEC-7EDB-428B-A724-434443E51A53>