Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 26 Nov 2000 11:05:54 -0600
From:      Jonathan Lemon <jlemon@flugsvamp.com>
To:        smp@freebsd.org
Subject:   patch to make bpf mpsafe
Message-ID:  <20001126110554.H69183@prism.flugsvamp.com>

next in thread | raw e-mail | index | archive | help
  Attached is a patch to make bpf mpsafe (and also cleans up some cruft).
BPF happened to be the first subsystem called by ether_input, so was the
first thing that I looked at.

3 levels of mutexes are added:

   bpf_mtx  - global to BPF, protects BPF interface list, device open
      (bif_next, bif_ifp)
   bpf interface mtx - protects per-interface descriptor list
      (bd_next, bd_bif, ifp->if_bpf)
   bpf descriptor mtx - protects fields in the individual descriptor.

Locking order is top to bottom, but the hierarchy can be entered at
any point.

Network drivers start by locking the interface mutex, then walking the
list, locking each descriptor as it is manipulated.

User level code reading from the bpf descriptor sleeps on the descriptor 
mutex. This has been lightly tested with an mpsafe network driver.  
Comments welcome.
--
Jonathan


Index: bpf.c
===================================================================
RCS file: /ncvs/src/sys/net/bpf.c,v
retrieving revision 1.69
diff -u -r1.69 bpf.c
--- bpf.c	2000/11/03 00:51:41	1.69
+++ bpf.c	2000/11/25 17:42:30
@@ -82,20 +82,8 @@
 
 #if NBPF > 0
 
-/*
- * Older BSDs don't have kernel malloc.
- */
-#if BSD < 199103
-extern bcopy();
-static caddr_t bpf_alloc();
-#include <net/bpf_compat.h>
-#define BPF_BUFSIZE (MCLBYTES-8)
-#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio)
-#else
 #define BPF_BUFSIZE 4096
 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
-#endif
-
 #define PRINET  26			/* interruptible */
 
 /*
@@ -112,6 +100,10 @@
  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
  */
 static struct bpf_if	*bpf_iflist;
+static struct mtx	bpf_mtx;		/* bpf global lock */
+
+#define BPF_LOCK	mtx_enter(&bpf_mtx, MTX_DEF)
+#define BPF_UNLOCK	mtx_exit(&bpf_mtx, MTX_DEF)
 
 static int	bpf_allocbufs __P((struct bpf_d *));
 static void	bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp));
@@ -235,13 +228,8 @@
 	if (m == 0)
 		return (ENOBUFS);
 	if (len > MHLEN) {
-#if BSD >= 199103
 		MCLGET(m, M_WAIT);
 		if ((m->m_flags & M_EXT) == 0) {
-#else
-		MCLGET(m);
-		if (m->m_len != MCLBYTES) {
-#endif
 			error = ENOBUFS;
 			goto bad;
 		}
@@ -286,11 +274,13 @@
 	 * Finally, point the driver's bpf cookie at the interface so
 	 * it will divert packets to bpf.
 	 */
+	BPFIF_LOCK(bp);
 	d->bd_bif = bp;
 	d->bd_next = bp->bif_dlist;
 	bp->bif_dlist = d;
 
 	bp->bif_ifp->if_bpf = bp;
+	BPFIF_UNLOCK(bp);
 }
 
 /*
@@ -324,6 +314,7 @@
 		}
 	}
 	/* Remove d from the interface's descriptor list. */
+	BPFIF_LOCK(bp);
 	p = &bp->bif_dlist;
 	while (*p != d) {
 		p = &(*p)->bd_next;
@@ -336,6 +327,7 @@
 		 * Let the driver know that there are no more listeners.
 		 */
 		d->bd_bif->bif_ifp->if_bpf = 0;
+	BPFIF_UNLOCK(bp);
 	d->bd_bif = 0;
 }
 
@@ -356,13 +348,16 @@
 	if (p->p_prison)
 		return (EPERM);
 
+	BPF_LOCK;
 	d = dev->si_drv1;
 	/*
 	 * Each minor can be opened by only one process.  If the requested 
 	 * minor is in use, return EBUSY.
 	 */
-	if (d)
+	if (d) {
+		BPF_UNLOCK;
 		return (EBUSY);
+	}
 	if ((dev->si_flags & SI_NAMED) == 0)
 		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
 		    "bpf%d", dev2unit(dev));
@@ -372,6 +367,8 @@
 	d->bd_bufsize = bpf_bufsize;
 	d->bd_sig = SIGIO;
 	d->bd_seesent = 1;
+	mtx_init(&d->bd_mtx, devtoname(dev), MTX_DEF);
+	BPF_UNLOCK;
 
 	return (0);
 }
@@ -393,8 +390,10 @@
 
 	funsetown(d->bd_sigio);
 	s = splimp();
+	BPF_LOCK;
 	if (d->bd_bif)
 		bpf_detachd(d);
+	BPF_UNLOCK;
 	splx(s);
 	bpf_freed(d);
 	dev->si_drv1 = 0;
@@ -403,45 +402,7 @@
 	return (0);
 }
 
-/*
- * Support for SunOS, which does not have tsleep.
- */
-#if BSD < 199103
-static
-bpf_timeout(arg)
-	caddr_t arg;
-{
-	struct bpf_d *d = (struct bpf_d *)arg;
-	d->bd_timedout = 1;
-	wakeup(arg);
-}
 
-#define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan)
-
-int
-bpf_sleep(d)
-	register struct bpf_d *d;
-{
-	register int rto = d->bd_rtout;
-	register int st;
-
-	if (rto != 0) {
-		d->bd_timedout = 0;
-		timeout(bpf_timeout, (caddr_t)d, rto);
-	}
-	st = sleep((caddr_t)d, PRINET|PCATCH);
-	if (rto != 0) {
-		if (d->bd_timedout == 0)
-			untimeout(bpf_timeout, (caddr_t)d);
-		else if (st == 0)
-			return EWOULDBLOCK;
-	}
-	return (st != 0) ? EINTR : 0;
-}
-#else
-#define BPF_SLEEP tsleep
-#endif
-
 /*
  * Rotate the packet buffers in descriptor d.  Move the store buffer
  * into the hold slot, and the free buffer into the store slot.
@@ -474,6 +435,7 @@
 		return (EINVAL);
 
 	s = splimp();
+	BPFD_LOCK(d);
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
 	 * ends when the timeout expires or when enough packets
@@ -497,6 +459,7 @@
 		 * it before using it again.
 		 */
 		if (d->bd_bif == NULL) {
+			BPFD_UNLOCK(d);
 			splx(s);
 			return (ENXIO);
 		}
@@ -504,9 +467,10 @@
 		if (ioflag & IO_NDELAY)
 			error = EWOULDBLOCK;
 		else
-			error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf",
-					  d->bd_rtout);
+			error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH,
+			     "bpf", d->bd_rtout);
 		if (error == EINTR || error == ERESTART) {
+			BPFD_UNLOCK(d);
 			splx(s);
 			return (error);
 		}
@@ -525,6 +489,7 @@
 				break;
 
 			if (d->bd_slen == 0) {
+				BPFD_UNLOCK(d);
 				splx(s);
 				return (0);
 			}
@@ -535,6 +500,7 @@
 	/*
 	 * At this point, we know we have something in the hold slot.
 	 */
+	BPFD_UNLOCK(d);
 	splx(s);
 
 	/*
@@ -545,9 +511,11 @@
 	error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
 
 	s = splimp();
+	BPFD_LOCK(d);
 	d->bd_fbuf = d->bd_hbuf;
 	d->bd_hbuf = 0;
 	d->bd_hlen = 0;
+	BPFD_UNLOCK(d);
 	splx(s);
 
 	return (error);
@@ -565,17 +533,9 @@
 	if (d->bd_async && d->bd_sig && d->bd_sigio)
 		pgsigio(d->bd_sigio, d->bd_sig, 0);
 
-#if BSD >= 199103
 	selwakeup(&d->bd_sel);
 	/* XXX */
 	d->bd_sel.si_pid = 0;
-#else
-	if (d->bd_selproc) {
-		selwakeup(d->bd_selproc, (int)d->bd_selcoll);
-		d->bd_selcoll = 0;
-		d->bd_selproc = 0;
-	}
-#endif
 }
 
 static	int
@@ -610,11 +570,9 @@
 		dst.sa_family = pseudo_AF_HDRCMPLT;
 
 	s = splnet();
-#if BSD >= 199103
+	mtx_enter(&Giant, MTX_DEF);
 	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
-#else
-	error = (*ifp->if_output)(ifp, m, &dst);
-#endif
+	mtx_exit(&Giant, MTX_DEF);
 	splx(s);
 	/*
 	 * The driver frees the mbuf.
@@ -624,12 +582,15 @@
 
 /*
  * Reset a descriptor by flushing its packet buffer and clearing the
- * receive and drop counts.  Should be called at splimp.
+ * receive and drop counts.  Should be called with the descriptor mutex
+ * held.
  */
 static void
 reset_d(d)
 	struct bpf_d *d;
 {
+
+	mtx_assert(&d->bd_mtx, MA_OWNED);
 	if (d->bd_hbuf) {
 		/* Free the hold buffer. */
 		d->bd_fbuf = d->bd_hbuf;
@@ -670,7 +631,7 @@
 	int flags;
 	struct proc *p;
 {
-	register struct bpf_d *d = dev->si_drv1;
+	struct bpf_d *d = dev->si_drv1;
 	int s, error = 0;
 
 	switch (cmd) {
@@ -687,9 +648,11 @@
 			int n;
 
 			s = splimp();
+			BPFD_LOCK(d);
 			n = d->bd_slen;
 			if (d->bd_hbuf)
 				n += d->bd_hlen;
+			BPFD_UNLOCK(d);
 			splx(s);
 
 			*(int *)addr = n;
@@ -720,9 +683,6 @@
 	 * Set buffer length.
 	 */
 	case BIOCSBLEN:
-#if BSD < 199103
-		error = EINVAL;
-#else
 		if (d->bd_bif != 0)
 			error = EINVAL;
 		else {
@@ -734,7 +694,6 @@
 				*(u_int *)addr = size = BPF_MINBUFSIZE;
 			d->bd_bufsize = size;
 		}
-#endif
 		break;
 
 	/*
@@ -749,7 +708,9 @@
 	 */
 	case BIOCFLUSH:
 		s = splimp();
+		BPFD_LOCK(d);
 		reset_d(d);
+		BPFD_UNLOCK(d);
 		splx(s);
 		break;
 
@@ -951,8 +912,10 @@
 		if (fp->bf_len != 0)
 			return (EINVAL);
 		s = splimp();
+		BPFD_LOCK(d);
 		d->bd_filter = 0;
 		reset_d(d);
+		BPFD_UNLOCK(d);
 		splx(s);
 		if (old != 0)
 			free((caddr_t)old, M_BPF);
@@ -967,8 +930,10 @@
 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
 	    bpf_validate(fcode, (int)flen)) {
 		s = splimp();
+		BPFD_LOCK(d);
 		d->bd_filter = fcode;
 		reset_d(d);
+		BPFD_UNLOCK(d);
 		splx(s);
 		if (old != 0)
 			free((caddr_t)old, M_BPF);
@@ -990,7 +955,7 @@
 	struct ifreq *ifr;
 {
 	struct bpf_if *bp;
-	int s, error;
+	int error;
 	struct ifnet *theywant;
 
 	theywant = ifunit(ifr->ifr_name);
@@ -1000,11 +965,14 @@
 	/*
 	 * Look through attached interfaces for the named one.
 	 */
+	BPF_LOCK;
 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
 		struct ifnet *ifp = bp->bif_ifp;
 
 		if (ifp == 0 || ifp != theywant)
 			continue;
+
+		BPF_UNLOCK;
 		/*
 		 * We found the requested interface.
 		 * If it's not up, return an error.
@@ -1020,7 +988,6 @@
 			if (error != 0)
 				return (error);
 		}
-		s = splimp();
 		if (bp != d->bd_bif) {
 			if (d->bd_bif)
 				/*
@@ -1030,10 +997,12 @@
 
 			bpf_attachd(d, bp);
 		}
+		BPFD_LOCK(d);
 		reset_d(d);
-		splx(s);
+		BPFD_UNLOCK(d);
 		return (0);
 	}
+	BPF_UNLOCK;
 	/* Not found. */
 	return (ENXIO);
 }
@@ -1063,12 +1032,14 @@
 		return (ENXIO);
 
 	s = splimp();
+	BPFD_LOCK(d);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
 			revents |= events & (POLLIN | POLLRDNORM);
 		else
 			selrecord(p, &d->bd_sel);
 	}
+	BPFD_UNLOCK(d);
 	splx(s);
 	return (revents);
 }
@@ -1088,18 +1059,18 @@
 	struct bpf_if *bp;
 	register struct bpf_d *d;
 	register u_int slen;
-	/*
-	 * Note that the ipl does not have to be raised at this point.
-	 * The only problem that could arise here is that if two different
-	 * interfaces shared any data.  This is not the case.
-	 */
+
 	bp = ifp->if_bpf;
+	BPFIF_LOCK(bp);
 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
+		BPFD_LOCK(d);
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
 		if (slen != 0)
 			catchpacket(d, pkt, pktlen, slen, bcopy);
+		BPFD_UNLOCK(d);
 	}
+	BPFIF_UNLOCK(bp);
 }
 
 /*
@@ -1146,14 +1117,18 @@
 	for (m0 = m; m0 != 0; m0 = m0->m_next)
 		pktlen += m0->m_len;
 
+	BPFIF_LOCK(bp);
 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
 		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
 			continue;
+		BPFD_LOCK(d);
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
 		if (slen != 0)
 			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
+		BPFD_UNLOCK(d);
 	}
+	BPFIF_UNLOCK(bp);
 }
 
 /*
@@ -1276,6 +1251,7 @@
 	}
 	if (d->bd_filter)
 		free((caddr_t)d->bd_filter, M_BPF);
+	mtx_destroy(&d->bd_mtx);
 }
 
 /*
@@ -1296,9 +1272,12 @@
 	bp->bif_dlist = 0;
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
+	mtx_init(&bp->bif_mtx, "bpf interface lock", MTX_DEF);
 
+	BPF_LOCK;
 	bp->bif_next = bpf_iflist;
 	bpf_iflist = bp;
+	BPF_UNLOCK;
 
 	bp->bif_ifp->if_bpf = 0;
 
@@ -1329,6 +1308,7 @@
 	int	s;
 
 	s = splimp();
+	BPF_LOCK;
 
 	/* Locate BPF interface information */
 	bp_prev = NULL;
@@ -1340,25 +1320,30 @@
 
 	/* Interface wasn't attached */
 	if (bp->bif_ifp == NULL) {
+		BPF_UNLOCK;
 		splx(s);
 		printf("bpfdetach: %s%d was not attached\n", ifp->if_name,
 		    ifp->if_unit);
 		return;
 	}
 
-	while ((d = bp->bif_dlist) != NULL) {
-		bpf_detachd(d);
-		bpf_wakeup(d);
-	}
-
 	if (bp_prev) {
 		bp_prev->bif_next = bp->bif_next;
 	} else {
 		bpf_iflist = bp->bif_next;
 	}
 
+	while ((d = bp->bif_dlist) != NULL) {
+		bpf_detachd(d);
+		BPFD_LOCK(d);
+		bpf_wakeup(d);
+		BPFD_UNLOCK(d);
+	}
+
+	mtx_destroy(&bp->bif_mtx);
 	free(bp, M_BPF);
 
+	BPF_UNLOCK;
 	splx(s);
 }
 
@@ -1390,6 +1375,7 @@
 	void *unused;
 {
 
+	mtx_init(&bpf_mtx, "bpf global lock", MTX_DEF);
 	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
 	cdevsw_add(&bpf_cdevsw);
 }


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-smp" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20001126110554.H69183>