Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 3 Jun 2016 13:57:10 +0000 (UTC)
From:      "Bjoern A. Zeeb" <bz@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r301270 - in head: share/man/man9 sys/net sys/netinet sys/netinet6
Message-ID:  <201606031357.u53DvAJ0036445@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bz
Date: Fri Jun  3 13:57:10 2016
New Revision: 301270
URL: https://svnweb.freebsd.org/changeset/base/301270

Log:
  Introduce a per-VNET flag to enable/disable netisr prcessing on that VNET.
  Add accessor functions to toggle the state per VNET.
  The base system (vnet0) will always enable itself with the normal
  registration. We will share the registered protocol handlers in all
  VNETs minimising duplication and management.
  Upon disabling netisr processing for a VNET drain the netisr queue from
  packets for that VNET.
  
  Update netisr consumers to (de)register on a per-VNET start/teardown using
  VNET_SYS(UN)INIT functionality.
  
  The change should be transparent for non-VIMAGE kernels.
  
  Reviewed by:	gnn (, hiren)
  Obtained from:	projects/vnet
  MFC after:	2 weeks
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D6691

Modified:
  head/share/man/man9/netisr.9
  head/sys/net/if_epair.c
  head/sys/net/if_ethersubr.c
  head/sys/net/netisr.c
  head/sys/net/netisr.h
  head/sys/net/rtsock.c
  head/sys/netinet/if_ether.c
  head/sys/netinet/ip_input.c
  head/sys/netinet6/ip6_input.c

Modified: head/share/man/man9/netisr.9
==============================================================================
--- head/share/man/man9/netisr.9	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/share/man/man9/netisr.9	Fri Jun  3 13:57:10 2016	(r301270)
@@ -27,7 +27,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 11, 2015
+.Dd June 3, 2016
 .Dt NETISR 9
 .Os
 .Sh NAME
@@ -61,6 +61,16 @@
 .Fn netisr_get_cpucount "void"
 .Ft u_int
 .Fn netisr_get_cpuid "u_int cpunumber"
+.Pp
+With optional virtual network stack support enabled via the following kernel
+compile option:
+.Bd -ragged -offset indent
+.Cd "options VIMAGE"
+.Ed
+.Ft void
+.Fn netisr_register_vnet "const struct netisr_handler *nhp"
+.Ft void
+.Fn netisr_unregister_vnet "const struct netisr_handler *nhp"
 .Sh DESCRIPTION
 The
 .Nm
@@ -80,6 +90,16 @@ and may also manage queue limits and sta
 and
 .Fn netisr_setqlimit .
 .Pp
+In case of VIMAGE kernels each virtual network stack (vnet), that is not the
+default base system network stack, calls
+.Fn netisr_register_vnet
+and
+.Fn netisr_unregister_vnet
+to enable or disable packet processing by the
+.Nm
+for each protocol.
+Disabling will also purge any outstanding packet from the protocol queue.
+.Pp
 .Nm
 supports multi-processor execution of handlers, and relies on a combination
 of source ordering and protocol-specific ordering and work-placement

Modified: head/sys/net/if_epair.c
==============================================================================
--- head/sys/net/if_epair.c	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/net/if_epair.c	Fri Jun  3 13:57:10 2016	(r301270)
@@ -959,6 +959,9 @@ vnet_epair_init(const void *unused __unu
 
 	V_epair_cloner = if_clone_advanced(epairname, 0,
 	    epair_clone_match, epair_clone_create, epair_clone_destroy);
+#ifdef VIMAGE
+	netisr_register_vnet(&epair_nh);
+#endif
 }
 VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_epair_init, NULL);
@@ -967,6 +970,9 @@ static void
 vnet_epair_uninit(const void *unused __unused)
 {
 
+#ifdef VIMAGE
+	netisr_unregister_vnet(&epair_nh);
+#endif
 	if_clone_detach(V_epair_cloner);
 }
 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,

Modified: head/sys/net/if_ethersubr.c
==============================================================================
--- head/sys/net/if_ethersubr.c	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/net/if_ethersubr.c	Fri Jun  3 13:57:10 2016	(r301270)
@@ -702,12 +702,16 @@ vnet_ether_init(__unused void *arg)
 	if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil link hook, "
 			"error %d\n", __func__, i);
+#ifdef VIMAGE
+	netisr_register_vnet(&ether_nh);
+#endif
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
  
+#ifdef VIMAGE
 static void
-vnet_ether_destroy(__unused void *arg)
+vnet_ether_pfil_destroy(__unused void *arg)
 {
 	int i;
 
@@ -715,8 +719,18 @@ vnet_ether_destroy(__unused void *arg)
 		printf("%s: WARNING: unable to unregister pfil link hook, "
 			"error %d\n", __func__, i);
 }
+VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
+    vnet_ether_pfil_destroy, NULL);
+
+static void
+vnet_ether_destroy(__unused void *arg)
+{
+
+	netisr_unregister_vnet(&ether_nh);
+}
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
+#endif
 
 
 
@@ -740,7 +754,9 @@ ether_input(struct ifnet *ifp, struct mb
 		 * so assert it is correct here.
 		 */
 		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
+		CURVNET_SET_QUIET(ifp->if_vnet);
 		netisr_dispatch(NETISR_ETHER, m);
+		CURVNET_RESTORE();
 		m = mn;
 	}
 }

Modified: head/sys/net/netisr.c
==============================================================================
--- head/sys/net/netisr.c	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/net/netisr.c	Fri Jun  3 13:57:10 2016	(r301270)
@@ -210,6 +210,23 @@ SYSCTL_UINT(_net_isr, OID_AUTO, maxprot,
  */
 static struct netisr_proto	netisr_proto[NETISR_MAXPROT];
 
+#ifdef VIMAGE
+/*
+ * The netisr_enable array describes a per-VNET flag for registered
+ * protocols on whether this netisr is active in this VNET or not.
+ * netisr_register() will automatically enable the netisr for the
+ * default VNET and all currently active instances.
+ * netisr_unregister() will disable all active VNETs, including vnet0.
+ * Individual network stack instances can be enabled/disabled by the
+ * netisr_(un)register _vnet() functions.
+ * With this we keep the one netisr_proto per protocol but add a
+ * mechanism to stop netisr processing for vnet teardown.
+ * Apart from that we expect a VNET to always be enabled.
+ */
+static VNET_DEFINE(u_int,	netisr_enable[NETISR_MAXPROT]);
+#define	V_netisr_enable		VNET(netisr_enable)
+#endif
+
 /*
  * Per-CPU workstream data.  See netisr_internal.h for more details.
  */
@@ -352,6 +369,7 @@ sysctl_netisr_dispatch_policy(SYSCTL_HAN
 void
 netisr_register(const struct netisr_handler *nhp)
 {
+	VNET_ITERATOR_DECL(vnet_iter);
 	struct netisr_work *npwp;
 	const char *name;
 	u_int i, proto;
@@ -420,6 +438,22 @@ netisr_register(const struct netisr_hand
 		bzero(npwp, sizeof(*npwp));
 		npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
 	}
+
+#ifdef VIMAGE
+	/*
+	 * Test that we are in vnet0 and have a curvnet set.
+	 */
+	KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+	KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p",
+	    __func__, curvnet, vnet0));
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		V_netisr_enable[proto] = 1;
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
 	NETISR_WUNLOCK();
 }
 
@@ -584,6 +618,7 @@ netisr_drain_proto(struct netisr_work *n
 void
 netisr_unregister(const struct netisr_handler *nhp)
 {
+	VNET_ITERATOR_DECL(vnet_iter);
 	struct netisr_work *npwp;
 #ifdef INVARIANTS
 	const char *name;
@@ -602,6 +637,16 @@ netisr_unregister(const struct netisr_ha
 	    ("%s(%u): protocol not registered for %s", __func__, proto,
 	    name));
 
+#ifdef VIMAGE
+	VNET_LIST_RLOCK_NOSLEEP();
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		V_netisr_enable[proto] = 0;
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK_NOSLEEP();
+#endif
+
 	netisr_proto[proto].np_name = NULL;
 	netisr_proto[proto].np_handler = NULL;
 	netisr_proto[proto].np_m2flow = NULL;
@@ -616,6 +661,97 @@ netisr_unregister(const struct netisr_ha
 	NETISR_WUNLOCK();
 }
 
+#ifdef VIMAGE
+void
+netisr_register_vnet(const struct netisr_handler *nhp)
+{
+	u_int proto;
+
+	proto = nhp->nh_proto;
+
+	KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+	KASSERT(proto < NETISR_MAXPROT,
+	    ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+	NETISR_WLOCK();
+	KASSERT(netisr_proto[proto].np_handler != NULL,
+	    ("%s(%u): protocol not registered for %s", __func__, proto,
+	    nhp->nh_name));
+	
+	V_netisr_enable[proto] = 1;
+	NETISR_WUNLOCK();
+}
+
+static void
+netisr_drain_proto_vnet(struct vnet *vnet, u_int proto)
+{
+	struct netisr_workstream *nwsp;
+	struct netisr_work *npwp;
+	struct mbuf *m, *mp, *n, *ne;
+	u_int i;
+
+	KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__));
+	NETISR_LOCK_ASSERT();
+
+	CPU_FOREACH(i) {
+		nwsp = DPCPU_ID_PTR(i, nws);
+		if (nwsp->nws_intr_event == NULL)
+			continue;
+		npwp = &nwsp->nws_work[proto];
+		NWS_LOCK(nwsp);
+
+		/*
+		 * Rather than dissecting and removing mbufs from the middle
+		 * of the chain, we build a new chain if the packet stays and
+		 * update the head and tail pointers at the end.  All packets
+		 * matching the given vnet are freed.
+		 */
+		m = npwp->nw_head;
+		n = ne = NULL;
+		while (m != NULL) {
+			mp = m;
+			m = m->m_nextpkt;
+			mp->m_nextpkt = NULL;
+			if (mp->m_pkthdr.rcvif->if_vnet != vnet) {
+				if (n == NULL) {
+					n = ne = mp;
+				} else {
+					ne->m_nextpkt = mp;
+					ne = mp;
+				}
+				continue;
+			}
+			/* This is a packet in the selected vnet. Free it. */
+			npwp->nw_len--;
+			m_freem(mp);
+		}
+		npwp->nw_head = n;
+		npwp->nw_tail = ne;
+		NWS_UNLOCK(nwsp);
+	}
+}
+
+void
+netisr_unregister_vnet(const struct netisr_handler *nhp)
+{
+	u_int proto;
+
+	proto = nhp->nh_proto;
+
+	KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__));
+	KASSERT(proto < NETISR_MAXPROT,
+	    ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name));
+	NETISR_WLOCK();
+	KASSERT(netisr_proto[proto].np_handler != NULL,
+	    ("%s(%u): protocol not registered for %s", __func__, proto,
+	    nhp->nh_name));
+	
+	V_netisr_enable[proto] = 0;
+
+	netisr_drain_proto_vnet(curvnet, proto);
+	NETISR_WUNLOCK();
+}
+#endif
+
 /*
  * Compose the global and per-protocol policies on dispatch, and return the
  * dispatch policy to use.
@@ -906,6 +1042,13 @@ netisr_queue_src(u_int proto, uintptr_t 
 	KASSERT(netisr_proto[proto].np_handler != NULL,
 	    ("%s: invalid proto %u", __func__, proto));
 
+#ifdef VIMAGE
+	if (V_netisr_enable[proto] == 0) {
+		m_freem(m);
+		return (ENOPROTOOPT);
+	}
+#endif
+
 	m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
 	    source, m, &cpuid);
 	if (m != NULL) {
@@ -952,6 +1095,13 @@ netisr_dispatch_src(u_int proto, uintptr
 	KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
 	    proto));
 
+#ifdef VIMAGE
+	if (V_netisr_enable[proto] == 0) {
+		m_freem(m);
+		return (ENOPROTOOPT);
+	}
+#endif
+
 	dispatch_policy = netisr_get_dispatch(npp);
 	if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
 		return (netisr_queue_src(proto, source, m));

Modified: head/sys/net/netisr.h
==============================================================================
--- head/sys/net/netisr.h	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/net/netisr.h	Fri Jun  3 13:57:10 2016	(r301270)
@@ -210,6 +210,10 @@ void	netisr_getqlimit(const struct netis
 void	netisr_register(const struct netisr_handler *nhp);
 int	netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
 void	netisr_unregister(const struct netisr_handler *nhp);
+#ifdef VIMAGE
+void	netisr_register_vnet(const struct netisr_handler *nhp);
+void	netisr_unregister_vnet(const struct netisr_handler *nhp);
+#endif
 
 /*
  * Process a packet destined for a protocol, and attempt direct dispatch.

Modified: head/sys/net/rtsock.c
==============================================================================
--- head/sys/net/rtsock.c	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/net/rtsock.c	Fri Jun  3 13:57:10 2016	(r301270)
@@ -191,15 +191,33 @@ SYSCTL_PROC(_net_route, OID_AUTO, netisr
     "maximum routing socket dispatch queue length");
 
 static void
-rts_init(void)
+vnet_rts_init(void)
 {
 	int tmp;
 
-	if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
-		rtsock_nh.nh_qlimit = tmp;
-	netisr_register(&rtsock_nh);
+	if (IS_DEFAULT_VNET(curvnet)) {
+		if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
+			rtsock_nh.nh_qlimit = tmp;
+		netisr_register(&rtsock_nh);
+	}
+#ifdef VIMAGE
+	 else
+		netisr_register_vnet(&rtsock_nh);
+#endif
+}
+VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+    vnet_rts_init, 0);
+
+#ifdef VIMAGE
+static void
+vnet_rts_uninit(void)
+{
+
+	netisr_unregister_vnet(&rtsock_nh);
 }
-SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
+VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+    vnet_rts_uninit, 0);
+#endif
 
 static int
 raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,

Modified: head/sys/netinet/if_ether.c
==============================================================================
--- head/sys/netinet/if_ether.c	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/netinet/if_ether.c	Fri Jun  3 13:57:10 2016	(r301270)
@@ -143,7 +143,6 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUT
 } while (0)
 
 
-static void	arp_init(void);
 static void	arpintr(struct mbuf *);
 static void	arptimer(void *);
 #ifdef INET
@@ -1337,12 +1336,33 @@ arp_iflladdr(void *arg __unused, struct 
 }
 
 static void
-arp_init(void)
+vnet_arp_init(void)
 {
 
-	netisr_register(&arp_nh);
-	if (IS_DEFAULT_VNET(curvnet))
+	if (IS_DEFAULT_VNET(curvnet)) {
+		netisr_register(&arp_nh);
 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
 		    arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+	}
+#ifdef VIMAGE
+	else
+		netisr_register_vnet(&arp_nh);
+#endif
 }
-SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
+VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
+    vnet_arp_init, 0);
+
+#ifdef VIMAGE
+/*
+ * We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH
+ * lookups after destroying the hash.  Ideally this would go on SI_ORDER_3.5.
+ */
+static void
+vnet_arp_destroy(__unused void *arg)
+{
+
+	netisr_unregister_vnet(&arp_nh);
+}
+VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
+    vnet_arp_destroy, NULL);
+#endif

Modified: head/sys/netinet/ip_input.c
==============================================================================
--- head/sys/netinet/ip_input.c	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/netinet/ip_input.c	Fri Jun  3 13:57:10 2016	(r301270)
@@ -331,8 +331,15 @@ ip_init(void)
 		    __func__);
 
 	/* Skip initialization of globals for non-default instances. */
-	if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+	if (!IS_DEFAULT_VNET(curvnet)) {
+		netisr_register_vnet(&ip_nh);
+#ifdef	RSS
+		netisr_register_vnet(&ip_direct_nh);
+#endif
 		return;
+	}
+#endif
 
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
@@ -366,6 +373,11 @@ ip_destroy(void *unused __unused)
 {
 	int error;
 
+#ifdef	RSS
+	netisr_unregister_vnet(&ip_direct_nh);
+#endif
+	netisr_unregister_vnet(&ip_nh);
+
 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil hook, "
 		    "error %d\n", __func__, error);

Modified: head/sys/netinet6/ip6_input.c
==============================================================================
--- head/sys/netinet6/ip6_input.c	Fri Jun  3 11:39:35 2016	(r301269)
+++ head/sys/netinet6/ip6_input.c	Fri Jun  3 13:57:10 2016	(r301270)
@@ -217,8 +217,15 @@ ip6_init(void)
 	V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
 
 	/* Skip global initialization stuff for non-default instances. */
-	if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+	if (!IS_DEFAULT_VNET(curvnet)) {
+		netisr_register_vnet(&ip6_nh);
+#ifdef RSS
+		netisr_register_vnet(&ip6_direct_nh);
+#endif
 		return;
+	}
+#endif
 
 	pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
@@ -310,6 +317,11 @@ ip6_destroy(void *unused __unused)
 {
 	int error;
 
+#ifdef RSS
+	netisr_unregister_vnet(&ip6_direct_nh);
+#endif
+	netisr_unregister_vnet(&ip6_nh);
+
 	if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil hook, "
 		    "error %d\n", __func__, error);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201606031357.u53DvAJ0036445>