Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 16 Apr 2012 10:33:50 +0400
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        Adrian Chadd <adrian@freebsd.org>
Cc:        svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org
Subject:   Re: svn commit: r233937 - in head/sys: kern net security/mac
Message-ID:  <4F8BBD4E.1040106@FreeBSD.org>
In-Reply-To: <CAJ-VmonJ%2BZXrwgrwc3eoDvf6oMmip9zf2TFLpvjqahHgdcZdxw@mail.gmail.com>
References:  <201204060653.q366rwLa096182@svn.freebsd.org> <4F7E9413.20602@FreeBSD.org> <CAJ-VmonJ%2BZXrwgrwc3eoDvf6oMmip9zf2TFLpvjqahHgdcZdxw@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------010400080205040607050602
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

On 16.04.2012 01:17, Adrian Chadd wrote:
> Hi,
>
> This has broken (at least) net80211 and bpf, with LOR:
Yes, it is. Please try the attached patch
>
> # ifconfig wlan1 destroy
> panic: mutex bpf global lock now owned at ....../net/bpf.c:656
>
>
> The stack:
>
> * ieee80211_vap_detach()
> * ether_ifdetach()
> * bpfdetach()
> *<something>  - I bet this is bpf_detachd()
> * _mtx_assert()
>


--------------010400080205040607050602
Content-Type: text/plain;
 name="bpf_cfglocks.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="bpf_cfglocks.diff"

>From 5e621db1dae528f228e94374702d03501138fb1b Mon Sep 17 00:00:00 2001
From: "Alexander V. Chernikov" <melifaro@ipfw.ru>
Date: Wed, 11 Apr 2012 20:04:58 +0400
Subject: [PATCH 1/1] * Final BPF locks patch

---
 sys/net/bpf.c     |  379 +++++++++++++++++++++++++++++++++++++++--------------
 sys/net/bpf.h     |    1 +
 sys/net/bpfdesc.h |    2 +
 3 files changed, 283 insertions(+), 99 deletions(-)

diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index d87efc0..2556be4 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -147,6 +147,7 @@ static int		bpf_bpfd_cnt;
 
 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
 static void	bpf_detachd(struct bpf_d *);
+static void	bpf_detachd_locked(struct bpf_d *);
 static void	bpf_freed(struct bpf_d *);
 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
 		    struct sockaddr *, int *, struct bpf_insn *);
@@ -206,6 +207,37 @@ static struct filterops bpfread_filtops = {
 	.f_event = filt_bpfread,
 };
 
+eventhandler_tag	bpf_ifdetach_cookie = NULL;
+
+/*
+ * LOCKING MODEL USED BY BPF:
+ * Locks:
+ * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
+ * some global counters and every bpf_if reference.
+ * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
+ * 3) Descriptor lock. Rwlock, used to protect BPF buffers and various structure fields
+ *   used by bpf_mtap code.
+ *
+ * Lock order:
+ *
+ * Global lock, interface lock, descriptor lock
+ *
+ * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
+ * working model. In many places (like bpf_detachd) we start with BPF descriptor
+ * (and we need to at least rlock it to get reliable interface pointer). This
+ * gives us potential LOR. As a result, we use global lock to protect from bpf_if
+ * change in every such place.
+ *
+ * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
+ * 3) descriptor main wlock.
+ * Reading bd_bif can be protected by any of these locks, typically global lock.
+ *
+ * Changing read/write BPF filter is protected by the same three locks,
+ * the same applies for reading.
+ *
+ * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ */
+
 /*
  * Wrapper functions for various buffering methods.  If the set of buffer
  * modes expands, we will probably want to introduce a switch data structure
@@ -577,6 +609,14 @@ bad:
 static void
 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
+	int op_w;
+
+	BPF_LOCK_ASSERT();
+
+	op_w = V_bpf_optimize_writers;
+
+	if (d->bd_bif != NULL)
+		bpf_detachd_locked(d);
 	/*
 	 * Point d at bp, and add d to the interface's list.
 	 * Since there are many applicaiotns using BPF for
@@ -584,11 +624,13 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 	 * we can delay adding d to the list of active listeners until
 	 * some filter is configured.
 	 */
-	d->bd_bif = bp;
 
 	BPFIF_WLOCK(bp);
+	BPFD_WLOCK(d);
+
+	d->bd_bif = bp;
 
-	if (V_bpf_optimize_writers != 0) {
+	if (op_w != 0) {
 		/* Add to writers-only list */
 		LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
 		/*
@@ -600,16 +642,15 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 	} else
 		LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 
+	BPFD_WUNLOCK(d);
 	BPFIF_WUNLOCK(bp);
 
-	BPF_LOCK();
 	bpf_bpfd_cnt++;
-	BPF_UNLOCK();
 
 	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
 	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
 
-	if (V_bpf_optimize_writers == 0)
+	if (op_w == 0)
 		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
 
@@ -622,8 +663,20 @@ bpf_upgraded(struct bpf_d *d)
 {
 	struct bpf_if *bp;
 
+	BPF_LOCK_ASSERT();
 	bp = d->bd_bif;
 
+	/*
+	 * Filter can be set several times without specifying interface.
+	 * Mark d as reader and exit.
+	 */
+	if (bp == NULL) {
+		BPFD_WLOCK(d);
+		d->bd_writer = 0;
+		BPFD_WUNLOCK(d);
+		return;
+	}
+
 	BPFIF_WLOCK(bp);
 	BPFD_WLOCK(d);
 
@@ -647,15 +700,26 @@ bpf_upgraded(struct bpf_d *d)
 static void
 bpf_detachd(struct bpf_d *d)
 {
+	BPF_LOCK();
+	bpf_detachd_locked(d);
+	BPF_UNLOCK();
+}
+
+/*
+ * Detach a file from its interface.
+ */
+static void
+bpf_detachd_locked(struct bpf_d *d)
+{
 	int error;
 	struct bpf_if *bp;
 	struct ifnet *ifp;
 
 	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
 
-	BPF_LOCK_ASSERT();
+	if ((bp = d->bd_bif) == NULL)
+		return;
 
-	bp = d->bd_bif;
 	BPFIF_WLOCK(bp);
 	BPFD_WLOCK(d);
 
@@ -672,7 +736,6 @@ bpf_detachd(struct bpf_d *d)
 	BPFD_WUNLOCK(d);
 	BPFIF_WUNLOCK(bp);
 
-	/* We're already protected by global lock. */
 	bpf_bpfd_cnt--;
 
 	/* Call event handler iff d is attached */
@@ -716,10 +779,7 @@ bpf_dtor(void *data)
 	d->bd_state = BPF_IDLE;
 	BPFD_WUNLOCK(d);
 	funsetown(&d->bd_sigio);
-	BPF_LOCK();
-	if (d->bd_bif)
-		bpf_detachd(d);
-	BPF_UNLOCK();
+	bpf_detachd(d);
 #ifdef MAC
 	mac_bpfdesc_destroy(d);
 #endif /* MAC */
@@ -959,6 +1019,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 
 	BPF_PID_REFRESH_CUR(d);
 	d->bd_wcount++;
+	/* XXX: locking required */
 	if (d->bd_bif == NULL) {
 		d->bd_wdcount++;
 		return (ENXIO);
@@ -979,6 +1040,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 	bzero(&dst, sizeof(dst));
 	m = NULL;
 	hlen = 0;
+	/* XXX: bpf_movein() can sleep */
 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
 	    &m, &dst, &hlen, d->bd_wfilter);
 	if (error) {
@@ -1158,7 +1220,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	case BIOCGDLTLIST32:
 	case BIOCGRTIMEOUT32:
 	case BIOCSRTIMEOUT32:
+		BPFD_WLOCK(d);
 		d->bd_compat32 = 1;
+		BPFD_WUNLOCK(d);
 	}
 #endif
 
@@ -1176,11 +1240,11 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 		{
 			int n;
 
-			BPFD_WLOCK(d);
+			BPFD_RLOCK(d);
 			n = d->bd_slen;
-			if (d->bd_hbuf)
+			if (d->bd_hbuf != 0)
 				n += d->bd_hlen;
-			BPFD_WUNLOCK(d);
+			BPFD_RUNLOCK(d);
 
 			*(int *)addr = n;
 			break;
@@ -1190,12 +1254,14 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 		{
 			struct ifnet *ifp;
 
+			BPF_LOCK();
 			if (d->bd_bif == NULL)
 				error = EINVAL;
 			else {
 				ifp = d->bd_bif->bif_ifp;
 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
 			}
+			BPF_UNLOCK();
 			break;
 		}
 
@@ -1203,7 +1269,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
+		BPFD_RLOCK(d);
 		*(u_int *)addr = d->bd_bufsize;
+		BPFD_RUNLOCK(d);
 		break;
 
 	/*
@@ -1240,28 +1308,30 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Put interface into promiscuous mode.
 	 */
 	case BIOCPROMISC:
+		BPF_LOCK();
 		if (d->bd_bif == NULL) {
 			/*
 			 * No interface attached yet.
 			 */
 			error = EINVAL;
-			break;
-		}
-		if (d->bd_promisc == 0) {
+		} else if (d->bd_promisc == 0) {
 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
 			if (error == 0)
 				d->bd_promisc = 1;
 		}
+		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get current data link type.
 	 */
 	case BIOCGDLT:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			*(u_int *)addr = d->bd_bif->bif_dlt;
+		BPF_UNLOCK();
 		break;
 
 	/*
@@ -1276,6 +1346,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			list32 = (struct bpf_dltlist32 *)addr;
 			dltlist.bfl_len = list32->bfl_len;
 			dltlist.bfl_list = PTRIN(list32->bfl_list);
+			BPF_LOCK();
 			if (d->bd_bif == NULL)
 				error = EINVAL;
 			else {
@@ -1283,31 +1354,37 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 				if (error == 0)
 					list32->bfl_len = dltlist.bfl_len;
 			}
+			BPF_UNLOCK();
 			break;
 		}
 #endif
 
 	case BIOCGDLTLIST:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
+		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set data link type.
 	 */
 	case BIOCSDLT:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else
 			error = bpf_setdlt(d, *(u_int *)addr);
+		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Get interface name.
 	 */
 	case BIOCGETIF:
+		BPF_LOCK();
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else {
@@ -1317,13 +1394,16 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			strlcpy(ifr->ifr_name, ifp->if_xname,
 			    sizeof(ifr->ifr_name));
 		}
+		BPF_UNLOCK();
 		break;
 
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
+		BPF_LOCK();
 		error = bpf_setif(d, (struct ifreq *)addr);
+		BPF_UNLOCK();
 		break;
 
 	/*
@@ -1406,7 +1486,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
+		BPFD_WLOCK(d);
 		d->bd_immediate = *(u_int *)addr;
+		BPFD_WUNLOCK(d);
 		break;
 
 	case BIOCVERSION:
@@ -1422,21 +1504,27 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Get "header already complete" flag
 	 */
 	case BIOCGHDRCMPLT:
+		BPFD_RLOCK(d);
 		*(u_int *)addr = d->bd_hdrcmplt;
+		BPFD_RUNLOCK(d);
 		break;
 
 	/*
 	 * Set "header already complete" flag
 	 */
 	case BIOCSHDRCMPLT:
+		BPFD_WLOCK(d);
 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+		BPFD_WUNLOCK(d);
 		break;
 
 	/*
 	 * Get packet direction flag
 	 */
 	case BIOCGDIRECTION:
+		BPFD_RLOCK(d);
 		*(u_int *)addr = d->bd_direction;
+		BPFD_RUNLOCK(d);
 		break;
 
 	/*
@@ -1451,7 +1539,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			case BPF_D_IN:
 			case BPF_D_INOUT:
 			case BPF_D_OUT:
+				BPFD_WLOCK(d);
 				d->bd_direction = direction;
+				BPFD_WUNLOCK(d);
 				break;
 			default:
 				error = EINVAL;
@@ -1463,7 +1553,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	 * Get packet timestamp format and resolution.
 	 */
 	case BIOCGTSTAMP:
+		BPFD_RLOCK(d);
 		*(u_int *)addr = d->bd_tstamp;
+		BPFD_RUNLOCK(d);
 		break;
 
 	/*
@@ -1474,34 +1566,57 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			u_int	func;
 
 			func = *(u_int *)addr;
-			if (BPF_T_VALID(func))
+			if (BPF_T_VALID(func)) {
+				BPFD_WLOCK(d);
 				d->bd_tstamp = func;
-			else
+				BPFD_WUNLOCK(d);
+			} else
 				error = EINVAL;
 		}
 		break;
 
 	case BIOCFEEDBACK:
+		BPFD_WLOCK(d);
 		d->bd_feedback = *(u_int *)addr;
+		BPFD_WUNLOCK(d);
 		break;
 
 	case BIOCLOCK:
+		BPFD_WLOCK(d);
 		d->bd_locked = 1;
+		BPFD_WUNLOCK(d);
 		break;
 
 	case FIONBIO:		/* Non-blocking I/O */
 		break;
 
 	case FIOASYNC:		/* Send signal on receive packets */
+		BPFD_WLOCK(d);
 		d->bd_async = *(int *)addr;
+		BPFD_WUNLOCK(d);
 		break;
 
 	case FIOSETOWN:
-		error = fsetown(*(int *)addr, &d->bd_sigio);
+		{
+			struct sigio *psio = d->bd_sigio;
+
+			/*
+			 * XXX: Add some sort of locking here?
+			 * fsetown() can sleep.
+			 * */
+			error = fsetown(*(int *)addr, &psio);
+			if (error == 0) {
+				BPFD_WLOCK(d);
+				d->bd_sigio = psio;
+				BPFD_WUNLOCK(d);
+			}
+		}
 		break;
 
 	case FIOGETOWN:
+		BPFD_RLOCK(d);
 		*(int *)addr = fgetown(&d->bd_sigio);
+		BPFD_RUNLOCK(d);
 		break;
 
 	/* This is deprecated, FIOSETOWN should be used instead. */
@@ -1522,16 +1637,23 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 
 			if (sig >= NSIG)
 				error = EINVAL;
-			else
+			else {
+				BPFD_WLOCK(d);
 				d->bd_sig = sig;
+				BPFD_WUNLOCK(d);
+			}
 			break;
 		}
 	case BIOCGRSIG:
+		BPFD_RLOCK(d);
 		*(u_int *)addr = d->bd_sig;
+		BPFD_RUNLOCK(d);
 		break;
 
 	case BIOCGETBUFMODE:
+		BPFD_RLOCK(d);
 		*(u_int *)addr = d->bd_bufmode;
+		BPFD_RUNLOCK(d);
 		break;
 
 	case BIOCSETBUFMODE:
@@ -1609,6 +1731,20 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 			cmd = BIOCSETWF;
 	}
 #endif
+
+	flen = fp->bf_len;
+	if ((flen > bpf_maxinsns) || ((fp->bf_insns == NULL) && (flen != 0)))
+		return (EINVAL);
+
+	need_upgrade = 0;
+	size = flen * sizeof(*fp->bf_insns);
+	if (size > 0)
+		fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
+	else
+		fcode = NULL; /* Make compiler happy */
+
+	BPF_LOCK();
+
 	if (cmd == BIOCSETWF) {
 		old = d->bd_wfilter;
 		wfilter = 1;
@@ -1623,13 +1759,12 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 #endif
 	}
 	if (fp->bf_insns == NULL) {
-		if (fp->bf_len != 0)
-			return (EINVAL);
 		/* 
-		 * Protect filter change by interface lock, too.
-		 * The same lock order is used by bpf_detachd().
+		 * Protect filter change by interface lock.
+		 * Additionally, we are protected by global lock here.
 		 */
-		BPFIF_WLOCK(d->bd_bif);
+		if (d->bd_bif != NULL)
+			BPFIF_WLOCK(d->bd_bif);
 		BPFD_WLOCK(d);
 		if (wfilter)
 			d->bd_wfilter = NULL;
@@ -1642,29 +1777,25 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 				reset_d(d);
 		}
 		BPFD_WUNLOCK(d);
-		BPFIF_WUNLOCK(d->bd_bif);
+		if (d->bd_bif != NULL)
+			BPFIF_WUNLOCK(d->bd_bif);
 		if (old != NULL)
 			free((caddr_t)old, M_BPF);
 #ifdef BPF_JITTER
 		if (ofunc != NULL)
 			bpf_destroy_jit_filter(ofunc);
 #endif
+		BPF_UNLOCK();
 		return (0);
 	}
-	flen = fp->bf_len;
-	if (flen > bpf_maxinsns)
-		return (EINVAL);
-
-	need_upgrade = 0;
-	size = flen * sizeof(*fp->bf_insns);
-	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
 	    bpf_validate(fcode, (int)flen)) {
 		/* 
 		 * Protect filter change by interface lock, too
-		 * The same lock order is used by bpf_detachd().
+		 * Additionally, we are protected by global lock here.
 		 */
-		BPFIF_WLOCK(d->bd_bif);
+		if (d->bd_bif != NULL)
+			BPFIF_WLOCK(d->bd_bif);
 		BPFD_WLOCK(d);
 		if (wfilter)
 			d->bd_wfilter = fcode;
@@ -1687,7 +1818,8 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 			    __func__, d->bd_pid, d->bd_writer, need_upgrade);
 		}
 		BPFD_WUNLOCK(d);
-		BPFIF_WUNLOCK(d->bd_bif);
+		if (d->bd_bif != NULL)
+			BPFIF_WUNLOCK(d->bd_bif);
 		if (old != NULL)
 			free((caddr_t)old, M_BPF);
 #ifdef BPF_JITTER
@@ -1699,8 +1831,10 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 		if (need_upgrade != 0)
 			bpf_upgraded(d);
 
+		BPF_UNLOCK();
 		return (0);
 	}
+	BPF_UNLOCK();
 	free((caddr_t)fcode, M_BPF);
 	return (EINVAL);
 }
@@ -1716,21 +1850,29 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 	struct bpf_if *bp;
 	struct ifnet *theywant;
 
+	BPF_LOCK_ASSERT();
+
 	theywant = ifunit(ifr->ifr_name);
-	if (theywant == NULL || theywant->if_bpf == NULL)
+	if (theywant == NULL || theywant->if_bpf == NULL) {
 		return (ENXIO);
+	}
 
 	bp = theywant->if_bpf;
 
+	BPFIF_RLOCK(bp);
+	if (bp->flags & BPFIF_FLAG_DYING) {
+		BPFIF_RUNLOCK(bp);
+		return (ENXIO);
+	}
+	BPFIF_RUNLOCK(bp);
+
 	/*
 	 * Behavior here depends on the buffering model.  If we're using
 	 * kernel memory buffers, then we can allocate them here.  If we're
 	 * using zero-copy, then the user process must have registered
 	 * buffers by the time we get here.  If not, return an error.
-	 *
-	 * XXXRW: There are locking issues here with multi-threaded use: what
-	 * if two threads try to set the interface at once?
 	 */
+
 	switch (d->bd_bufmode) {
 	case BPF_BUFMODE_BUFFER:
 		if (d->bd_sbuf == NULL)
@@ -1746,15 +1888,9 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 	default:
 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
 	}
-	if (bp != d->bd_bif) {
-		if (d->bd_bif)
-			/*
-			 * Detach if attached to something else.
-			 */
-			bpf_detachd(d);
-
+	if (bp != d->bd_bif)
 		bpf_attachd(d, bp);
-	}
+
 	BPFD_WLOCK(d);
 	reset_d(d);
 	BPFD_WUNLOCK(d);
@@ -1942,22 +2078,22 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 		else
 #endif
 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
-		if (slen != 0) {
-			/*
-			 * Filter matches. Let's to acquire write lock.
-			 */
-			BPFD_WLOCK(d);
+		if (slen == 0)
+			continue;
 
-			d->bd_fcount++;
-			if (gottime < bpf_ts_quality(d->bd_tstamp))
-				gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
+		/*
+		 * Filter matches. Let's acquire write lock.
+		 */
+		BPFD_WLOCK(d);
+
+		d->bd_fcount++;
+		if (gottime < bpf_ts_quality(d->bd_tstamp))
+			gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
 #ifdef MAC
-			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+		if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
-				catchpacket(d, pkt, pktlen, slen,
-				    bpf_append_bytes, &bt);
-			BPFD_WUNLOCK(d);
-		}
+			catchpacket(d, pkt, pktlen, slen, bpf_append_bytes, &bt);
+		BPFD_WUNLOCK(d);
 	}
 	BPFIF_RUNLOCK(bp);
 }
@@ -2004,19 +2140,19 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 		else
 #endif
 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
-		if (slen != 0) {
-			BPFD_WLOCK(d);
+		if (slen == 0)
+			continue;
+
+		BPFD_WLOCK(d);
 
-			d->bd_fcount++;
-			if (gottime < bpf_ts_quality(d->bd_tstamp))
-				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
+		d->bd_fcount++;
+		if (gottime < bpf_ts_quality(d->bd_tstamp))
+			gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
-			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+		if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
-				catchpacket(d, (u_char *)m, pktlen, slen,
-				    bpf_append_mbuf, &bt);
-			BPFD_WUNLOCK(d);
-		}
+			catchpacket(d, (u_char *)m, pktlen, slen, bpf_append_mbuf, &bt);
+		BPFD_WUNLOCK(d);
 	}
 	BPFIF_RUNLOCK(bp);
 }
@@ -2060,19 +2196,19 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 			continue;
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
-		if (slen != 0) {
-			BPFD_WLOCK(d);
+		if (slen != 0)
+			continue;
+
+		BPFD_WLOCK(d);
 
-			d->bd_fcount++;
-			if (gottime < bpf_ts_quality(d->bd_tstamp))
-				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
+		d->bd_fcount++;
+		if (gottime < bpf_ts_quality(d->bd_tstamp))
+			gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
-			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+		if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
 #endif
-				catchpacket(d, (u_char *)&mb, pktlen, slen,
-				    bpf_append_mbuf, &bt);
-			BPFD_WUNLOCK(d);
-		}
+			catchpacket(d, (u_char *)&mb, pktlen, slen, bpf_append_mbuf, &bt);
+		BPFD_WUNLOCK(d);
 	}
 	BPFIF_RUNLOCK(bp);
 }
@@ -2352,19 +2488,20 @@ bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 
 	BPF_LOCK();
 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
-	BPF_UNLOCK();
 
 	bp->bif_hdrlen = hdrlen;
+	BPF_UNLOCK();
 
 	if (bootverbose)
 		if_printf(ifp, "bpf attached\n");
+	CTR3(KTR_NET, "%s: Attaching BPF instance %p to interface %p",
+	    __func__, bp, ifp);
 }
 
 /*
- * Detach bpf from an interface.  This involves detaching each descriptor
- * associated with the interface, and leaving bd_bif NULL.  Notify each
- * descriptor as it's detached so that any sleepers wake up and get
- * ENXIO.
+ * Detach bpf from an interface. This involves detaching each descriptor
+ * associated with the interface. Notify each descriptor as it's detached
+ * so that any sleepers wake up and get ENXIO.
  */
 void
 bpfdetach(struct ifnet *ifp)
@@ -2378,30 +2515,44 @@ bpfdetach(struct ifnet *ifp)
 #endif
 
 	/* Find all bpf_if struct's which reference ifp and detach them. */
+	BPF_LOCK();
 	do {
-		BPF_LOCK();
 		LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 			if (ifp == bp->bif_ifp)
 				break;
 		}
 		if (bp != NULL)
 			LIST_REMOVE(bp, bif_next);
-		BPF_UNLOCK();
 
 		if (bp != NULL) {
 #ifdef INVARIANTS
 			ndetached++;
 #endif
 			while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
-				bpf_detachd(d);
+				bpf_detachd_locked(d);
 				BPFD_WLOCK(d);
 				bpf_wakeup(d);
 				BPFD_WUNLOCK(d);
 			}
-			rw_destroy(&bp->bif_lock);
-			free(bp, M_BPF);
+
+			while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+				bpf_detachd_locked(d);
+				BPFD_WLOCK(d);
+				bpf_wakeup(d);
+				BPFD_WUNLOCK(d);
+			}
+
+			/*
+			 * Delay freing bp till interface is detached
+			 * and all routes through this interface are removed.
+			 * Mark bp as detached to restrict new consumers.
+			 */
+			BPFIF_WLOCK(bp);
+			bp->flags |= BPFIF_FLAG_DYING;
+			BPFIF_WUNLOCK(bp);
 		}
 	} while (bp != NULL);
+	BPF_UNLOCK();
 
 #ifdef INVARIANTS
 	if (ndetached == 0)
@@ -2410,6 +2561,25 @@ bpfdetach(struct ifnet *ifp)
 }
 
 /*
+ * Interface departure handler
+ */
+static void
+bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+	struct bpf_if *bp;
+
+	if ((bp = ifp->if_bpf) == NULL)
+		return;
+
+	CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
+	    __func__, bp, ifp);
+
+	ifp->if_bpf = NULL;
+	rw_destroy(&bp->bif_lock);
+	free(bp, M_BPF);
+}
+
+/*
  * Get a list of available data link type of the interface.
  */
 static int
@@ -2419,16 +2589,16 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 
+	BPF_LOCK_ASSERT();
+
 	ifp = d->bd_bif->bif_ifp;
 	n = 0;
 	error = 0;
-	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp != ifp)
 			continue;
 		if (bfl->bfl_list != NULL) {
 			if (n >= bfl->bfl_len) {
-				BPF_UNLOCK();
 				return (ENOMEM);
 			}
 			error = copyout(&bp->bif_dlt,
@@ -2436,7 +2606,6 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 		}
 		n++;
 	}
-	BPF_UNLOCK();
 	bfl->bfl_len = n;
 	return (error);
 }
@@ -2451,18 +2620,17 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
 	struct ifnet *ifp;
 	struct bpf_if *bp;
 
+	BPF_LOCK_ASSERT();
+
 	if (d->bd_bif->bif_dlt == dlt)
 		return (0);
 	ifp = d->bd_bif->bif_ifp;
-	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
 			break;
 	}
-	BPF_UNLOCK();
 	if (bp != NULL) {
 		opromisc = d->bd_promisc;
-		bpf_detachd(d);
 		bpf_attachd(d, bp);
 		BPFD_WLOCK(d);
 		reset_d(d);
@@ -2477,6 +2645,7 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
 				d->bd_promisc = 1;
 		}
 	}
+
 	return (bp == NULL ? EINVAL : 0);
 }
 
@@ -2491,6 +2660,11 @@ bpf_drvinit(void *unused)
 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
 	/* For compatibility */
 	make_dev_alias(dev, "bpf0");
+
+	/* Register interface departure handler */
+	bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
+		    ifnet_departure_event, bpf_ifdetach, NULL,
+		    EVENTHANDLER_PRI_ANY);
 }
 
 /*
@@ -2522,6 +2696,9 @@ bpf_zero_counters(void)
 	BPF_UNLOCK();
 }
 
+/*
+ * Fill filter statistics
+ */
 static void
 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 {
@@ -2530,6 +2707,7 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 	BPFD_LOCK_ASSERT(bd);
 	d->bd_structsize = sizeof(*d);
 	d->bd_immediate = bd->bd_immediate;
+	/* XXX: reading should be protected by global lock */
 	d->bd_promisc = bd->bd_promisc;
 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
 	d->bd_direction = bd->bd_direction;
@@ -2553,6 +2731,9 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 	d->bd_bufmode = bd->bd_bufmode;
 }
 
+/*
+ * Handle `netstat -B' stats request
+ */
 static int
 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
diff --git a/sys/net/bpf.h b/sys/net/bpf.h
index fa34894..6a7e661 100644
--- a/sys/net/bpf.h
+++ b/sys/net/bpf.h
@@ -1105,6 +1105,7 @@ struct bpf_if {
 	struct ifnet *bif_ifp;		/* corresponding interface */
 	struct rwlock bif_lock;		/* interface lock */
 	LIST_HEAD(, bpf_d)	bif_wlist;	/* writer-only list */
+	int flags;			/* Interface flags */
 #endif
 };
 
diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h
index 842c018..23b6eb6 100644
--- a/sys/net/bpfdesc.h
+++ b/sys/net/bpfdesc.h
@@ -159,4 +159,6 @@ struct xbpf_d {
 #define BPFIF_WLOCK(bif)	rw_wlock(&(bif)->bif_lock)
 #define BPFIF_WUNLOCK(bif)	rw_wunlock(&(bif)->bif_lock)
 
+#define BPFIF_FLAG_DYING	1	/* Reject new bpf consumers */
+
 #endif
-- 
1.7.9.4


--------------010400080205040607050602--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4F8BBD4E.1040106>