Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 14 Jun 2018 14:53:24 +0000 (UTC)
From:      "Andrey V. Elsukov" <ae@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r335141 - head/sys/net
Message-ID:  <201806141453.w5EErO7V020697@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ae
Date: Thu Jun 14 14:53:24 2018
New Revision: 335141
URL: https://svnweb.freebsd.org/changeset/base/335141

Log:
  Convert if_me(4) driver to use encap_lookup_t method and be lockless on
  data path.

Modified:
  head/sys/net/if_me.c

Modified: head/sys/net/if_me.c
==============================================================================
--- head/sys/net/if_me.c	Thu Jun 14 14:53:01 2018	(r335140)
+++ head/sys/net/if_me.c	Thu Jun 14 14:53:24 2018	(r335141)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,22 +28,20 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/systm.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
-#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
-#include <sys/systm.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
@@ -68,8 +66,6 @@ __FBSDID("$FreeBSD$");
 #define	MEMTU			(1500 - sizeof(struct mobhdr))
 static const char mename[] = "me";
 static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
-static VNET_DEFINE(struct mtx, me_mtx);
-#define	V_me_mtx	VNET(me_mtx)
 /* Minimal forwarding header RFC 2004 */
 struct mobhdr {
 	uint8_t		mob_proto;	/* protocol */
@@ -82,32 +78,27 @@ struct mobhdr {
 
 struct me_softc {
 	struct ifnet		*me_ifp;
-	LIST_ENTRY(me_softc)	me_list;
-	struct rmlock		me_lock;
 	u_int			me_fibnum;
-	const struct encaptab	*me_ecookie;
 	struct in_addr		me_src;
 	struct in_addr		me_dst;
+
+	CK_LIST_ENTRY(me_softc) chain;
 };
+CK_LIST_HEAD(me_list, me_softc);
 #define	ME2IFP(sc)		((sc)->me_ifp)
 #define	ME_READY(sc)		((sc)->me_src.s_addr != 0)
-#define	ME_LOCK_INIT(sc)	rm_init(&(sc)->me_lock, "me softc")
-#define	ME_LOCK_DESTROY(sc)	rm_destroy(&(sc)->me_lock)
-#define	ME_RLOCK_TRACKER	struct rm_priotracker me_tracker
-#define	ME_RLOCK(sc)		rm_rlock(&(sc)->me_lock, &me_tracker)
-#define	ME_RUNLOCK(sc)		rm_runlock(&(sc)->me_lock, &me_tracker)
-#define	ME_RLOCK_ASSERT(sc)	rm_assert(&(sc)->me_lock, RA_RLOCKED)
-#define	ME_WLOCK(sc)		rm_wlock(&(sc)->me_lock)
-#define	ME_WUNLOCK(sc)		rm_wunlock(&(sc)->me_lock)
-#define	ME_WLOCK_ASSERT(sc)	rm_assert(&(sc)->me_lock, RA_WLOCKED)
+#define	ME_RLOCK()		epoch_enter_preempt(net_epoch_preempt)
+#define	ME_RUNLOCK()		epoch_exit_preempt(net_epoch_preempt)
+#define	ME_WAIT()		epoch_wait_preempt(net_epoch_preempt)
 
-#define	ME_LIST_LOCK_INIT(x)	mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
-#define	ME_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_me_mtx)
-#define	ME_LIST_LOCK(x)		mtx_lock(&V_me_mtx)
-#define	ME_LIST_UNLOCK(x)	mtx_unlock(&V_me_mtx)
+#ifndef ME_HASH_SIZE
+#define	ME_HASH_SIZE	(1 << 4)
+#endif
+static VNET_DEFINE(struct me_list *, me_hashtbl) = NULL;
+#define	V_me_hashtbl		VNET(me_hashtbl)
+#define	ME_HASH(src, dst)	(V_me_hashtbl[\
+    me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)])
 
-static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
-#define	V_me_softc_list	VNET(me_softc_list)
 static struct sx me_ioctl_sx;
 SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
 
@@ -123,21 +114,9 @@ static int	me_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	me_input(struct mbuf *, int, int, void *);
 
-static int	me_set_tunnel(struct ifnet *, struct sockaddr_in *,
-    struct sockaddr_in *);
-static void	me_delete_tunnel(struct ifnet *);
-static int	me_encapcheck(const struct mbuf *, int, int, void *);
+static int	me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t);
+static void	me_delete_tunnel(struct me_softc *);
 
-#define	ME_MINLEN	(sizeof(struct ip) + sizeof(struct mobhdr) -\
-    sizeof(in_addr_t))
-static const struct encap_config ipv4_encap_cfg = {
-	.proto = IPPROTO_MOBILE,
-	.min_length = ME_MINLEN,
-	.exact_match = (sizeof(in_addr_t) << 4) + 8,
-	.check = me_encapcheck,
-	.input = me_input
-};
-
 SYSCTL_DECL(_net_link);
 static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
     "Minimal Encapsulation for IP (RFC 2004)");
@@ -150,11 +129,32 @@ static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
 SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
 
+static uint32_t
+me_hashval(in_addr_t src, in_addr_t dst)
+{
+	uint32_t ret;
+
+	ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
+	return (fnv_32_buf(&dst, sizeof(dst), ret));
+}
+
+static struct me_list *
+me_hashinit(void)
+{
+	struct me_list *hash;
+	int i;
+
+	hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE,
+	    M_IFME, M_WAITOK);
+	for (i = 0; i < ME_HASH_SIZE; i++)
+		CK_LIST_INIT(&hash[i]);
+
+	return (hash);
+}
+
 static void
 vnet_me_init(const void *unused __unused)
 {
-	LIST_INIT(&V_me_softc_list);
-	ME_LIST_LOCK_INIT();
 	V_me_cloner = if_clone_simple(mename, me_clone_create,
 	    me_clone_destroy, 0);
 }
@@ -165,8 +165,9 @@ static void
 vnet_me_uninit(const void *unused __unused)
 {
 
+	if (V_me_hashtbl != NULL)
+		free(V_me_hashtbl, M_IFME);
 	if_clone_detach(V_me_cloner);
-	ME_LIST_LOCK_DESTROY();
 }
 VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_me_uninit, NULL);
@@ -179,7 +180,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_
 	sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
 	sc->me_fibnum = curthread->td_proc->p_fibnum;
 	ME2IFP(sc) = if_alloc(IFT_TUNNEL);
-	ME_LOCK_INIT(sc);
 	ME2IFP(sc)->if_softc = sc;
 	if_initname(ME2IFP(sc), mename, unit);
 
@@ -193,9 +193,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_
 	ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
 	if_attach(ME2IFP(sc));
 	bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
-	ME_LIST_LOCK();
-	LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
-	ME_LIST_UNLOCK();
 	return (0);
 }
 
@@ -206,24 +203,20 @@ me_clone_destroy(struct ifnet *ifp)
 
 	sx_xlock(&me_ioctl_sx);
 	sc = ifp->if_softc;
-	me_delete_tunnel(ifp);
-	ME_LIST_LOCK();
-	LIST_REMOVE(sc, me_list);
-	ME_LIST_UNLOCK();
+	me_delete_tunnel(sc);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	ifp->if_softc = NULL;
 	sx_xunlock(&me_ioctl_sx);
 
+	ME_WAIT();
 	if_free(ifp);
-	ME_LOCK_DESTROY(sc);
 	free(sc, M_IFME);
 }
 
 static int
 me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-	ME_RLOCK_TRACKER;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr_in *src, *dst;
 	struct me_softc *sc;
@@ -251,10 +244,8 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFPHYADDR:
-		src = (struct sockaddr_in *)
-			&(((struct in_aliasreq *)data)->ifra_addr);
-		dst = (struct sockaddr_in *)
-			&(((struct in_aliasreq *)data)->ifra_dstaddr);
+		src = &((struct in_aliasreq *)data)->ifra_addr;
+		dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
 		if (src->sin_family != dst->sin_family ||
 		    src->sin_family != AF_INET ||
 		    src->sin_len != dst->sin_len ||
@@ -267,17 +258,16 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			error = EADDRNOTAVAIL;
 			break;
 		}
-		error = me_set_tunnel(ifp, src, dst);
+		error = me_set_tunnel(sc, src->sin_addr.s_addr,
+		    dst->sin_addr.s_addr);
 		break;
 	case SIOCDIFPHYADDR:
-		me_delete_tunnel(ifp);
+		me_delete_tunnel(sc);
 		break;
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
-		ME_RLOCK(sc);
 		if (!ME_READY(sc)) {
 			error = EADDRNOTAVAIL;
-			ME_RUNLOCK(sc);
 			break;
 		}
 		src = (struct sockaddr_in *)&ifr->ifr_addr;
@@ -292,7 +282,6 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			src->sin_addr = sc->me_dst;
 			break;
 		}
-		ME_RUNLOCK(sc);
 		error = prison_if(curthread->td_ucred, sintosa(src));
 		if (error != 0)
 			memset(src, 0, sizeof(*src));
@@ -318,81 +307,71 @@ end:
 }
 
 static int
-me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+me_lookup(const struct mbuf *m, int off, int proto, void **arg)
 {
-	ME_RLOCK_TRACKER;
+	const struct ip *ip;
 	struct me_softc *sc;
-	struct ip *ip;
-	int ret;
 
-	sc = (struct me_softc *)arg;
-	if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
-		return (0);
-
-	M_ASSERTPKTHDR(m);
-
-	ret = 0;
-	ME_RLOCK(sc);
-	if (ME_READY(sc)) {
-		ip = mtod(m, struct ip *);
+	MPASS(in_epoch());
+	ip = mtod(m, const struct ip *);
+	CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
+	    ip->ip_src.s_addr), chain) {
 		if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
-		    sc->me_dst.s_addr == ip->ip_src.s_addr)
-			ret = 32 * 2 + 8;
+		    sc->me_dst.s_addr == ip->ip_src.s_addr) {
+			if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
+				return (0);
+			*arg = sc;
+			return (ENCAP_DRV_LOOKUP);
+		}
 	}
-	ME_RUNLOCK(sc);
-	return (ret);
+	return (0);
 }
 
 static int
-me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
-    struct sockaddr_in *dst)
+me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst)
 {
-	struct me_softc *sc, *tsc;
+	struct me_softc *tmp;
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
-	ME_LIST_LOCK();
-	sc = ifp->if_softc;
-	LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
-		if (tsc == sc || !ME_READY(tsc))
+
+	if (V_me_hashtbl == NULL)
+		V_me_hashtbl = me_hashinit();
+
+	if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst)
+		return (0);
+
+	CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) {
+		if (tmp == sc)
 			continue;
-		if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
-		    tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
-			ME_LIST_UNLOCK();
+		if (tmp->me_src.s_addr == src &&
+		    tmp->me_dst.s_addr == dst)
 			return (EADDRNOTAVAIL);
-		}
 	}
-	ME_LIST_UNLOCK();
 
-	ME_WLOCK(sc);
-	sc->me_dst = dst->sin_addr;
-	sc->me_src = src->sin_addr;
-	ME_WUNLOCK(sc);
+	me_delete_tunnel(sc);
+	sc->me_dst.s_addr = dst;
+	sc->me_src.s_addr = src;
+	CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain);
 
-	if (sc->me_ecookie == NULL)
-		sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg,
-		    sc, M_WAITOK);
-	if (sc->me_ecookie != NULL) {
-		ifp->if_drv_flags |= IFF_DRV_RUNNING;
-		if_link_state_change(ifp, LINK_STATE_UP);
-	}
+	ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
+	if_link_state_change(ME2IFP(sc), LINK_STATE_UP);
 	return (0);
 }
 
 static void
-me_delete_tunnel(struct ifnet *ifp)
+me_delete_tunnel(struct me_softc *sc)
 {
-	struct me_softc *sc = ifp->if_softc;
 
 	sx_assert(&me_ioctl_sx, SA_XLOCKED);
-	if (sc->me_ecookie != NULL)
-		ip_encap_detach(sc->me_ecookie);
-	sc->me_ecookie = NULL;
-	ME_WLOCK(sc);
-	sc->me_src.s_addr = 0;
-	sc->me_dst.s_addr = 0;
-	ME_WUNLOCK(sc);
-	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	if_link_state_change(ifp, LINK_STATE_DOWN);
+	if (ME_READY(sc)) {
+		CK_LIST_REMOVE(sc, chain);
+		ME_WAIT();
+
+		sc->me_src.s_addr = 0;
+		sc->me_dst.s_addr = 0;
+		ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+		if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN);
+	}
 }
 
 static uint16_t
@@ -505,58 +484,48 @@ me_check_nesting(struct ifnet *ifp, struct mbuf *m)
 
 static int
 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
-   struct route *ro)
+   struct route *ro __unused)
 {
 	uint32_t af;
-	int error;
 
-#ifdef MAC
-	error = mac_ifnet_check_transmit(ifp, m);
-	if (error != 0)
-		goto drop;
-#endif
-	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
-	    (ifp->if_flags & IFF_UP) == 0) {
-		error = ENETDOWN;
-		goto drop;
-	}
-
-	error = me_check_nesting(ifp, m);
-	if (error != 0)
-		goto drop;
-
-	m->m_flags &= ~(M_BCAST|M_MCAST);
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = dst->sa_family;
-	if (af != AF_INET) {
-		error = EAFNOSUPPORT;
-		goto drop;
-	}
-	BPF_MTAP2(ifp, &af, sizeof(af), m);
+	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
-drop:
-	m_freem(m);
-	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
-	return (error);
 }
 
 static int
 me_transmit(struct ifnet *ifp, struct mbuf *m)
 {
-	ME_RLOCK_TRACKER;
 	struct mobhdr mh;
 	struct me_softc *sc;
 	struct ip *ip;
+	uint32_t af;
 	int error, hlen, plen;
 
+#ifdef MAC
+	error = mac_ifnet_check_transmit(ifp, m);
+	if (error != 0)
+		goto drop;
+#endif
+	error = ENETDOWN;
+	ME_RLOCK();
 	sc = ifp->if_softc;
-	if (sc == NULL) {
-		error = ENETDOWN;
+	if (sc == NULL || !ME_READY(sc) ||
+	    (ifp->if_flags & IFF_MONITOR) != 0 ||
+	    (ifp->if_flags & IFF_UP) == 0 ||
+	    (error = me_check_nesting(ifp, m) != 0)) {
 		m_freem(m);
 		goto drop;
 	}
+	af = m->m_pkthdr.csum_data;
+	if (af != AF_INET) {
+		error = EAFNOSUPPORT;
+		m_freem(m);
+		goto drop;
+	}
 	if (m->m_len < sizeof(struct ip))
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
@@ -573,13 +542,6 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
 	mh.mob_proto = ip->ip_p;
 	mh.mob_src = ip->ip_src;
 	mh.mob_dst = ip->ip_dst;
-	ME_RLOCK(sc);
-	if (!ME_READY(sc)) {
-		ME_RUNLOCK(sc);
-		error = ENETDOWN;
-		m_freem(m);
-		goto drop;
-	}
 	if (in_hosteq(sc->me_src, ip->ip_src)) {
 		hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
 		mh.mob_flags = 0;
@@ -590,8 +552,8 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
 	plen = m->m_pkthdr.len;
 	ip->ip_src = sc->me_src;
 	ip->ip_dst = sc->me_dst;
+	m->m_flags &= ~(M_BCAST|M_MCAST);
 	M_SETFIB(m, sc->me_fibnum);
-	ME_RUNLOCK(sc);
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
@@ -619,6 +581,7 @@ drop:
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
 	}
+	ME_RUNLOCK();
 	return (error);
 }
 
@@ -628,13 +591,26 @@ me_qflush(struct ifnet *ifp __unused)
 
 }
 
+static const struct encaptab *ecookie = NULL;
+static const struct encap_config me_encap_cfg = {
+	.proto = IPPROTO_MOBILE,
+	.min_length = sizeof(struct ip) + sizeof(struct mobhdr) -
+	    sizeof(in_addr_t),
+	.exact_match = ENCAP_DRV_LOOKUP,
+	.lookup = me_lookup,
+	.input = me_input
+};
+
 static int
 memodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
+		ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK);
+		break;
 	case MOD_UNLOAD:
+		ip_encap_detach(ecookie);
 		break;
 	default:
 		return (EOPNOTSUPP);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201806141453.w5EErO7V020697>