Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 29 Aug 2013 19:35:15 +0000 (UTC)
From:      Adrian Chadd <adrian@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r255038 - head/sys/net
Message-ID:  <201308291935.r7TJZFm1045076@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: adrian
Date: Thu Aug 29 19:35:14 2013
New Revision: 255038
URL: http://svnweb.freebsd.org/changeset/base/255038

Log:
  Convert the if_lagg rwlock to an rmlock.
  
  We've been seeing lots of cache line contention (but not lock contention!)
  in our workloads between the various TX and RX threads going on.
  
  The write lock is only grabbed when configuration changes are made - which
  are infrequent.
  
  With this patch, the contention and cycles spent waiting for updates
  disappear.
  
  Sponsored by:	Netflix, Inc.

Modified:
  head/sys/net/if_lagg.c
  head/sys/net/if_lagg.h

Modified: head/sys/net/if_lagg.c
==============================================================================
--- head/sys/net/if_lagg.c	Thu Aug 29 18:36:47 2013	(r255037)
+++ head/sys/net/if_lagg.c	Thu Aug 29 19:35:14 2013	(r255038)
@@ -37,7 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/hash.h>
 #include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 
@@ -233,16 +233,17 @@ lagg_register_vlan(void *arg, struct ifn
 {
         struct lagg_softc       *sc = ifp->if_softc;
         struct lagg_port        *lp;
+        struct rm_priotracker   tracker;
 
         if (ifp->if_softc !=  arg)   /* Not our event */
                 return;
 
-        LAGG_RLOCK(sc);
+        LAGG_RLOCK(sc, &tracker);
         if (!SLIST_EMPTY(&sc->sc_ports)) {
                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
                         EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
         }
-        LAGG_RUNLOCK(sc);
+        LAGG_RUNLOCK(sc, &tracker);
 }
 
 /*
@@ -254,16 +255,17 @@ lagg_unregister_vlan(void *arg, struct i
 {
         struct lagg_softc       *sc = ifp->if_softc;
         struct lagg_port        *lp;
+        struct rm_priotracker   tracker;
 
         if (ifp->if_softc !=  arg)   /* Not our event */
                 return;
 
-        LAGG_RLOCK(sc);
+        LAGG_RLOCK(sc, &tracker);
         if (!SLIST_EMPTY(&sc->sc_ports)) {
                 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
                         EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
         }
-        LAGG_RUNLOCK(sc);
+        LAGG_RUNLOCK(sc, &tracker);
 }
 
 static int
@@ -322,9 +324,15 @@ lagg_clone_create(struct if_clone *ifc, 
 		}
 	}
 	LAGG_LOCK_INIT(sc);
+	LAGG_CALLOUT_LOCK_INIT(sc);
 	SLIST_INIT(&sc->sc_ports);
 	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
-	callout_init_rw(&sc->sc_callout, &sc->sc_mtx, CALLOUT_SHAREDLOCK);
+
+	/*
+	 * This uses the callout lock rather than the rmlock; one can't
+	 * hold said rmlock during SWI.
+	 */
+	callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
 
 	/* Initialise pseudo media types */
 	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
@@ -389,7 +397,10 @@ lagg_clone_destroy(struct ifnet *ifp)
 	ether_ifdetach(ifp);
 	if_free(ifp);
 
+	/* This grabs sc_callout_mtx, serialising it correctly */
 	callout_drain(&sc->sc_callout);
+
+	/* At this point it's drained; we can free this */
 	counter_u64_free(sc->sc_ipackets);
 	counter_u64_free(sc->sc_opackets);
 	counter_u64_free(sc->sc_ibytes);
@@ -401,6 +412,7 @@ lagg_clone_destroy(struct ifnet *ifp)
 
 	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
 	LAGG_LOCK_DESTROY(sc);
+	LAGG_CALLOUT_LOCK_DESTROY(sc);
 	free(sc, M_DEVBUF);
 }
 
@@ -764,6 +776,7 @@ lagg_port_ioctl(struct ifnet *ifp, u_lon
 	struct lagg_softc *sc;
 	struct lagg_port *lp = NULL;
 	int error = 0;
+	struct rm_priotracker tracker;
 
 	/* Should be checked by the caller */
 	if (ifp->if_type != IFT_IEEE8023ADLAG ||
@@ -778,15 +791,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_lon
 			break;
 		}
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
 			error = ENOENT;
-			LAGG_RUNLOCK(sc);
+			LAGG_RUNLOCK(sc, &tracker);
 			break;
 		}
 
 		lagg_port2req(lp, rp);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		break;
 
 	case SIOCSIFCAP:
@@ -955,21 +968,22 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd
 	struct thread *td = curthread;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0;
+	struct rm_priotracker tracker;
 
 	bzero(&rpbuf, sizeof(rpbuf));
 
 	switch (cmd) {
 	case SIOCGLAGG:
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		count = 0;
 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 			count++;
 		buflen = count * sizeof(struct lagg_reqport);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 
 		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		ra->ra_proto = sc->sc_proto;
 		if (sc->sc_req != NULL)
 			(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
@@ -987,7 +1001,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd
 			buf += sizeof(rpbuf);
 			len -= sizeof(rpbuf);
 		}
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		ra->ra_ports = count;
 		ra->ra_size = count * sizeof(rpbuf);
 		error = copyout(outbuf, ra->ra_port, ra->ra_size);
@@ -1065,16 +1079,16 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd
 			break;
 		}
 
-		LAGG_RLOCK(sc);
+		LAGG_RLOCK(sc, &tracker);
 		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 		    lp->lp_softc != sc) {
 			error = ENOENT;
-			LAGG_RUNLOCK(sc);
+			LAGG_RUNLOCK(sc, &tracker);
 			break;
 		}
 
 		lagg_port2req(lp, rp);
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		break;
 	case SIOCSLAGGPORT:
 		error = priv_check(td, PRIV_NET_LAGG);
@@ -1280,14 +1294,15 @@ lagg_transmit(struct ifnet *ifp, struct 
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	int error, len, mcast;
+	struct rm_priotracker tracker;
 
 	len = m->m_pkthdr.len;
 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	/* We need a Tx algorithm and at least one port */
 	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		m_freem(m);
 		ifp->if_oerrors++;
 		return (ENXIO);
@@ -1296,7 +1311,7 @@ lagg_transmit(struct ifnet *ifp, struct 
 	ETHER_BPF_MTAP(ifp, m);
 
 	error = (*sc->sc_start)(sc, m);
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 
 	if (error == 0) {
 		counter_u64_add(sc->sc_opackets, 1);
@@ -1322,12 +1337,13 @@ lagg_input(struct ifnet *ifp, struct mbu
 	struct lagg_port *lp = ifp->if_lagg;
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *scifp = sc->sc_ifp;
+	struct rm_priotracker tracker;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
 	    sc->sc_proto == LAGG_PROTO_NONE) {
-		LAGG_RUNLOCK(sc);
+		LAGG_RUNLOCK(sc, &tracker);
 		m_freem(m);
 		return (NULL);
 	}
@@ -1346,7 +1362,7 @@ lagg_input(struct ifnet *ifp, struct mbu
 		}
 	}
 
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 	return (m);
 }
 
@@ -1367,16 +1383,17 @@ lagg_media_status(struct ifnet *ifp, str
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_port *lp;
+	struct rm_priotracker tracker;
 
 	imr->ifm_status = IFM_AVALID;
 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
 
-	LAGG_RLOCK(sc);
+	LAGG_RLOCK(sc, &tracker);
 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (LAGG_PORTACTIVE(lp))
 			imr->ifm_status |= IFM_ACTIVE;
 	}
-	LAGG_RUNLOCK(sc);
+	LAGG_RUNLOCK(sc, &tracker);
 }
 
 static void

Modified: head/sys/net/if_lagg.h
==============================================================================
--- head/sys/net/if_lagg.h	Thu Aug 29 18:36:47 2013	(r255037)
+++ head/sys/net/if_lagg.h	Thu Aug 29 19:35:14 2013	(r255038)
@@ -187,7 +187,8 @@ struct lagg_llq {
 
 struct lagg_softc {
 	struct ifnet			*sc_ifp;	/* virtual interface */
-	struct rwlock			sc_mtx;
+	struct rmlock			sc_mtx;
+	struct mtx			sc_call_mtx;
 	int				sc_proto;	/* lagg protocol */
 	u_int				sc_count;	/* number of ports */
 	u_int				sc_active;	/* active port count */
@@ -255,14 +256,19 @@ struct lagg_port {
 	SLIST_ENTRY(lagg_port)		lp_entries;
 };
 
-#define	LAGG_LOCK_INIT(_sc)	rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
-#define	LAGG_LOCK_DESTROY(_sc)	rw_destroy(&(_sc)->sc_mtx)
-#define	LAGG_RLOCK(_sc)		rw_rlock(&(_sc)->sc_mtx)
-#define	LAGG_WLOCK(_sc)		rw_wlock(&(_sc)->sc_mtx)
-#define	LAGG_RUNLOCK(_sc)	rw_runlock(&(_sc)->sc_mtx)
-#define	LAGG_WUNLOCK(_sc)	rw_wunlock(&(_sc)->sc_mtx)
-#define	LAGG_RLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
-#define	LAGG_WLOCK_ASSERT(_sc)	rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+#define	LAGG_LOCK_INIT(_sc)	rm_init(&(_sc)->sc_mtx, "if_lagg rmlock")
+#define	LAGG_LOCK_DESTROY(_sc)	rm_destroy(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK(_sc, _p)	rm_rlock(&(_sc)->sc_mtx, (_p))
+#define	LAGG_WLOCK(_sc)		rm_wlock(&(_sc)->sc_mtx)
+#define	LAGG_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->sc_mtx, (_p))
+#define	LAGG_WUNLOCK(_sc)	rm_wunlock(&(_sc)->sc_mtx)
+#define	LAGG_RLOCK_ASSERT(_sc)	rm_assert(&(_sc)->sc_mtx, RA_RLOCKED)
+#define	LAGG_WLOCK_ASSERT(_sc)	rm_assert(&(_sc)->sc_mtx, RA_WLOCKED)
+
+#define	LAGG_CALLOUT_LOCK_INIT(_sc)					\
+	    mtx_init(&(_sc)->sc_call_mtx, "if_lagg callout mutex", NULL,\
+	    MTX_DEF)
+#define	LAGG_CALLOUT_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->sc_call_mtx)
 
 extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
 extern void	(*lagg_linkstate_p)(struct ifnet *, int );



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308291935.r7TJZFm1045076>