From owner-freebsd-smp Sun Nov 26 9: 7:27 2000 Delivered-To: freebsd-smp@freebsd.org Received: from prism.flugsvamp.com (cb58709-a.mdsn1.wi.home.com [24.17.241.9]) by hub.freebsd.org (Postfix) with ESMTP id 0887B37B4C5 for ; Sun, 26 Nov 2000 09:07:12 -0800 (PST) Received: (from jlemon@localhost) by prism.flugsvamp.com (8.11.0/8.11.0) id eAQH5sb95721 for smp@freebsd.org; Sun, 26 Nov 2000 11:05:54 -0600 (CST) (envelope-from jlemon) Date: Sun, 26 Nov 2000 11:05:54 -0600 From: Jonathan Lemon To: smp@freebsd.org Subject: patch to make bpf mpsafe Message-ID: <20001126110554.H69183@prism.flugsvamp.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii X-Mailer: Mutt 1.0pre2i Sender: owner-freebsd-smp@FreeBSD.ORG Precedence: bulk X-Loop: FreeBSD.org Attached is a patch to make bpf mpsafe (and also cleans up some cruft). BPF happened to be the first subsystem called by ether_input, so was the first thing that I looked at. 3 levels of mutexes are added: bpf_mtx - global to BPF, protects BPF interface list, device open (bif_next, bif_ifp) bpf interface mtx - protects per-interface descriptor list (bd_next, bd_bif, ifp->if_bpf) bpf descriptor mtx - protects fields in the individual descriptor. Locking order is top to bottom, but the hierarchy can be entered at any point. Network drivers start by locking the interface mutex, then walking the list, locking each descriptor as it is manipulated. User level code reading from the bpf descriptor sleeps on the descriptor mutex. This has been lightly tested with an mpsafe network driver. Comments welcome. -- Jonathan Index: bpf.c =================================================================== RCS file: /ncvs/src/sys/net/bpf.c,v retrieving revision 1.69 diff -u -r1.69 bpf.c --- bpf.c 2000/11/03 00:51:41 1.69 +++ bpf.c 2000/11/25 17:42:30 @@ -82,20 +82,8 @@ #if NBPF > 0 -/* - * Older BSDs don't have kernel malloc. - */ -#if BSD < 199103 -extern bcopy(); -static caddr_t bpf_alloc(); -#include -#define BPF_BUFSIZE (MCLBYTES-8) -#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio) -#else #define BPF_BUFSIZE 4096 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio) -#endif - #define PRINET 26 /* interruptible */ /* @@ -112,6 +100,10 @@ * bpf_iflist is the list of interfaces; each corresponds to an ifnet */ static struct bpf_if *bpf_iflist; +static struct mtx bpf_mtx; /* bpf global lock */ + +#define BPF_LOCK mtx_enter(&bpf_mtx, MTX_DEF) +#define BPF_UNLOCK mtx_exit(&bpf_mtx, MTX_DEF) static int bpf_allocbufs __P((struct bpf_d *)); static void bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp)); @@ -235,13 +228,8 @@ if (m == 0) return (ENOBUFS); if (len > MHLEN) { -#if BSD >= 199103 MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) { -#else - MCLGET(m); - if (m->m_len != MCLBYTES) { -#endif error = ENOBUFS; goto bad; } @@ -286,11 +274,13 @@ * Finally, point the driver's bpf cookie at the interface so * it will divert packets to bpf. */ + BPFIF_LOCK(bp); d->bd_bif = bp; d->bd_next = bp->bif_dlist; bp->bif_dlist = d; bp->bif_ifp->if_bpf = bp; + BPFIF_UNLOCK(bp); } /* @@ -324,6 +314,7 @@ } } /* Remove d from the interface's descriptor list. */ + BPFIF_LOCK(bp); p = &bp->bif_dlist; while (*p != d) { p = &(*p)->bd_next; @@ -336,6 +327,7 @@ * Let the driver know that there are no more listeners. */ d->bd_bif->bif_ifp->if_bpf = 0; + BPFIF_UNLOCK(bp); d->bd_bif = 0; } @@ -356,13 +348,16 @@ if (p->p_prison) return (EPERM); + BPF_LOCK; d = dev->si_drv1; /* * Each minor can be opened by only one process. If the requested * minor is in use, return EBUSY. */ - if (d) + if (d) { + BPF_UNLOCK; return (EBUSY); + } if ((dev->si_flags & SI_NAMED) == 0) make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600, "bpf%d", dev2unit(dev)); @@ -372,6 +367,8 @@ d->bd_bufsize = bpf_bufsize; d->bd_sig = SIGIO; d->bd_seesent = 1; + mtx_init(&d->bd_mtx, devtoname(dev), MTX_DEF); + BPF_UNLOCK; return (0); } @@ -393,8 +390,10 @@ funsetown(d->bd_sigio); s = splimp(); + BPF_LOCK; if (d->bd_bif) bpf_detachd(d); + BPF_UNLOCK; splx(s); bpf_freed(d); dev->si_drv1 = 0; @@ -403,45 +402,7 @@ return (0); } -/* - * Support for SunOS, which does not have tsleep. - */ -#if BSD < 199103 -static -bpf_timeout(arg) - caddr_t arg; -{ - struct bpf_d *d = (struct bpf_d *)arg; - d->bd_timedout = 1; - wakeup(arg); -} -#define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan) - -int -bpf_sleep(d) - register struct bpf_d *d; -{ - register int rto = d->bd_rtout; - register int st; - - if (rto != 0) { - d->bd_timedout = 0; - timeout(bpf_timeout, (caddr_t)d, rto); - } - st = sleep((caddr_t)d, PRINET|PCATCH); - if (rto != 0) { - if (d->bd_timedout == 0) - untimeout(bpf_timeout, (caddr_t)d); - else if (st == 0) - return EWOULDBLOCK; - } - return (st != 0) ? EINTR : 0; -} -#else -#define BPF_SLEEP tsleep -#endif - /* * Rotate the packet buffers in descriptor d. Move the store buffer * into the hold slot, and the free buffer into the store slot. @@ -474,6 +435,7 @@ return (EINVAL); s = splimp(); + BPFD_LOCK(d); /* * If the hold buffer is empty, then do a timed sleep, which * ends when the timeout expires or when enough packets @@ -497,6 +459,7 @@ * it before using it again. */ if (d->bd_bif == NULL) { + BPFD_UNLOCK(d); splx(s); return (ENXIO); } @@ -504,9 +467,10 @@ if (ioflag & IO_NDELAY) error = EWOULDBLOCK; else - error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf", - d->bd_rtout); + error = msleep((caddr_t)d, &d->bd_mtx, PRINET|PCATCH, + "bpf", d->bd_rtout); if (error == EINTR || error == ERESTART) { + BPFD_UNLOCK(d); splx(s); return (error); } @@ -525,6 +489,7 @@ break; if (d->bd_slen == 0) { + BPFD_UNLOCK(d); splx(s); return (0); } @@ -535,6 +500,7 @@ /* * At this point, we know we have something in the hold slot. */ + BPFD_UNLOCK(d); splx(s); /* @@ -545,9 +511,11 @@ error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio); s = splimp(); + BPFD_LOCK(d); d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = 0; d->bd_hlen = 0; + BPFD_UNLOCK(d); splx(s); return (error); @@ -565,17 +533,9 @@ if (d->bd_async && d->bd_sig && d->bd_sigio) pgsigio(d->bd_sigio, d->bd_sig, 0); -#if BSD >= 199103 selwakeup(&d->bd_sel); /* XXX */ d->bd_sel.si_pid = 0; -#else - if (d->bd_selproc) { - selwakeup(d->bd_selproc, (int)d->bd_selcoll); - d->bd_selcoll = 0; - d->bd_selproc = 0; - } -#endif } static int @@ -610,11 +570,9 @@ dst.sa_family = pseudo_AF_HDRCMPLT; s = splnet(); -#if BSD >= 199103 + mtx_enter(&Giant, MTX_DEF); error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0); -#else - error = (*ifp->if_output)(ifp, m, &dst); -#endif + mtx_exit(&Giant, MTX_DEF); splx(s); /* * The driver frees the mbuf. @@ -624,12 +582,15 @@ /* * Reset a descriptor by flushing its packet buffer and clearing the - * receive and drop counts. Should be called at splimp. + * receive and drop counts. Should be called with the descriptor mutex + * held. */ static void reset_d(d) struct bpf_d *d; { + + mtx_assert(&d->bd_mtx, MA_OWNED); if (d->bd_hbuf) { /* Free the hold buffer. */ d->bd_fbuf = d->bd_hbuf; @@ -670,7 +631,7 @@ int flags; struct proc *p; { - register struct bpf_d *d = dev->si_drv1; + struct bpf_d *d = dev->si_drv1; int s, error = 0; switch (cmd) { @@ -687,9 +648,11 @@ int n; s = splimp(); + BPFD_LOCK(d); n = d->bd_slen; if (d->bd_hbuf) n += d->bd_hlen; + BPFD_UNLOCK(d); splx(s); *(int *)addr = n; @@ -720,9 +683,6 @@ * Set buffer length. */ case BIOCSBLEN: -#if BSD < 199103 - error = EINVAL; -#else if (d->bd_bif != 0) error = EINVAL; else { @@ -734,7 +694,6 @@ *(u_int *)addr = size = BPF_MINBUFSIZE; d->bd_bufsize = size; } -#endif break; /* @@ -749,7 +708,9 @@ */ case BIOCFLUSH: s = splimp(); + BPFD_LOCK(d); reset_d(d); + BPFD_UNLOCK(d); splx(s); break; @@ -951,8 +912,10 @@ if (fp->bf_len != 0) return (EINVAL); s = splimp(); + BPFD_LOCK(d); d->bd_filter = 0; reset_d(d); + BPFD_UNLOCK(d); splx(s); if (old != 0) free((caddr_t)old, M_BPF); @@ -967,8 +930,10 @@ if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && bpf_validate(fcode, (int)flen)) { s = splimp(); + BPFD_LOCK(d); d->bd_filter = fcode; reset_d(d); + BPFD_UNLOCK(d); splx(s); if (old != 0) free((caddr_t)old, M_BPF); @@ -990,7 +955,7 @@ struct ifreq *ifr; { struct bpf_if *bp; - int s, error; + int error; struct ifnet *theywant; theywant = ifunit(ifr->ifr_name); @@ -1000,11 +965,14 @@ /* * Look through attached interfaces for the named one. */ + BPF_LOCK; for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { struct ifnet *ifp = bp->bif_ifp; if (ifp == 0 || ifp != theywant) continue; + + BPF_UNLOCK; /* * We found the requested interface. * If it's not up, return an error. @@ -1020,7 +988,6 @@ if (error != 0) return (error); } - s = splimp(); if (bp != d->bd_bif) { if (d->bd_bif) /* @@ -1030,10 +997,12 @@ bpf_attachd(d, bp); } + BPFD_LOCK(d); reset_d(d); - splx(s); + BPFD_UNLOCK(d); return (0); } + BPF_UNLOCK; /* Not found. */ return (ENXIO); } @@ -1063,12 +1032,14 @@ return (ENXIO); s = splimp(); + BPFD_LOCK(d); if (events & (POLLIN | POLLRDNORM)) { if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) revents |= events & (POLLIN | POLLRDNORM); else selrecord(p, &d->bd_sel); } + BPFD_UNLOCK(d); splx(s); return (revents); } @@ -1088,18 +1059,18 @@ struct bpf_if *bp; register struct bpf_d *d; register u_int slen; - /* - * Note that the ipl does not have to be raised at this point. - * The only problem that could arise here is that if two different - * interfaces shared any data. This is not the case. - */ + bp = ifp->if_bpf; + BPFIF_LOCK(bp); for (d = bp->bif_dlist; d != 0; d = d->bd_next) { + BPFD_LOCK(d); ++d->bd_rcount; slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen); if (slen != 0) catchpacket(d, pkt, pktlen, slen, bcopy); + BPFD_UNLOCK(d); } + BPFIF_UNLOCK(bp); } /* @@ -1146,14 +1117,18 @@ for (m0 = m; m0 != 0; m0 = m0->m_next) pktlen += m0->m_len; + BPFIF_LOCK(bp); for (d = bp->bif_dlist; d != 0; d = d->bd_next) { if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL)) continue; + BPFD_LOCK(d); ++d->bd_rcount; slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0); if (slen != 0) catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy); + BPFD_UNLOCK(d); } + BPFIF_UNLOCK(bp); } /* @@ -1276,6 +1251,7 @@ } if (d->bd_filter) free((caddr_t)d->bd_filter, M_BPF); + mtx_destroy(&d->bd_mtx); } /* @@ -1296,9 +1272,12 @@ bp->bif_dlist = 0; bp->bif_ifp = ifp; bp->bif_dlt = dlt; + mtx_init(&bp->bif_mtx, "bpf interface lock", MTX_DEF); + BPF_LOCK; bp->bif_next = bpf_iflist; bpf_iflist = bp; + BPF_UNLOCK; bp->bif_ifp->if_bpf = 0; @@ -1329,6 +1308,7 @@ int s; s = splimp(); + BPF_LOCK; /* Locate BPF interface information */ bp_prev = NULL; @@ -1340,25 +1320,30 @@ /* Interface wasn't attached */ if (bp->bif_ifp == NULL) { + BPF_UNLOCK; splx(s); printf("bpfdetach: %s%d was not attached\n", ifp->if_name, ifp->if_unit); return; } - while ((d = bp->bif_dlist) != NULL) { - bpf_detachd(d); - bpf_wakeup(d); - } - if (bp_prev) { bp_prev->bif_next = bp->bif_next; } else { bpf_iflist = bp->bif_next; } + while ((d = bp->bif_dlist) != NULL) { + bpf_detachd(d); + BPFD_LOCK(d); + bpf_wakeup(d); + BPFD_UNLOCK(d); + } + + mtx_destroy(&bp->bif_mtx); free(bp, M_BPF); + BPF_UNLOCK; splx(s); } @@ -1390,6 +1375,7 @@ void *unused; { + mtx_init(&bpf_mtx, "bpf global lock", MTX_DEF); EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000); cdevsw_add(&bpf_cdevsw); } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-smp" in the body of the message