Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 Jun 2016 15:01:07 +0000 (UTC)
From:      "Bjoern A. Zeeb" <bz@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r302298 - head/sys/contrib/ipfilter/netinet
Message-ID:  <201606301501.u5UF17WV091555@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bz
Date: Thu Jun 30 15:01:07 2016
New Revision: 302298
URL: https://svnweb.freebsd.org/changeset/base/302298

Log:
  Virtualise ipfilter.
  Split initializzation an teardown into module (global state) and VNET
  (per virtual network stack) parts.  Virtualise global state, which is
  not "const".
  
  Cleanup eventhandlers, so that we can make use of the passed in argument
  to get the vnet state from the ifp;  disable the "cloner" event as it is
  too early, has no state, and can fire before initialisation (see comment
  in the source).
  
  Handle the dynamic sysctls specially.  The problem is that "ipmain"
  is the virtualized struct, but the fields used for the sysctls are
  hanging off memory allocated and attached to the virtualized "ipmain"
  thus standard VNET macros and sysctl handling do not work.
  We still say it is VNET sysctls to get the proper protection checks
  in the VIMAGE case;  to solve the problem of accessing the right bit
  of memory hanging of each per-VNET ipmain, we use a dedicated handler
  function wrapping around sysctl_ipf_int() undoing the base calculation
  from kern_sysctl.c and then adding the passed-in offset into the right
  struct depending on handler.  A bit of a mess exposing VNET-internals
  this way but the only way to keep the code without having to massively
  restructure ipf internals.
  
  Approved by:		re (hrs)
  Sponsored by:		The FreeBSD Foundation
  Obtained from:		projects/vnet
  MFC after:		2 weeks
  Reviewed by:		cy
  Differential Revision:	https://reviews.freebsd.org/D7000

Modified:
  head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
  head/sys/contrib/ipfilter/netinet/ip_proxy.c
  head/sys/contrib/ipfilter/netinet/ip_rpcb_pxy.c
  head/sys/contrib/ipfilter/netinet/ip_rules.c
  head/sys/contrib/ipfilter/netinet/mlfk_ipl.c

Modified: head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
==============================================================================
--- head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	Thu Jun 30 14:55:49 2016	(r302297)
+++ head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	Thu Jun 30 15:01:07 2016	(r302298)
@@ -62,6 +62,9 @@ static const char rcsid[] = "@(#)$Id$";
 #else
 #define CURVNET_SET(arg)
 #define CURVNET_RESTORE()
+#define	VNET_DEFINE(_t, _v)	_t _v
+#define	VNET_DECLARE(_t, _v)	extern _t _v
+#define	VNET(arg)	arg
 #endif
 #if defined(__osf__)
 # include <netinet/tcp_timer.h>
@@ -102,22 +105,40 @@ MALLOC_DEFINE(M_IPFILTER, "ipfilter", "I
 static	int	ipf_send_ip __P((fr_info_t *, mb_t *));
 static void	ipf_timer_func __P((void *arg));
 
-ipf_main_softc_t ipfmain;
+VNET_DEFINE(ipf_main_softc_t, ipfmain) = {
+	.ipf_running		= -2,
+};
+#define	V_ipfmain		VNET(ipfmain)
 
 # include <sys/conf.h>
 # if defined(NETBSD_PF)
 #  include <net/pfil.h>
 # endif /* NETBSD_PF */
 
+static eventhandler_tag ipf_arrivetag, ipf_departtag;
+#if 0
+/*
+ * Disable the "cloner" event handler;  we are getting interface
+ * events before the firewall is fully initiallized and also no vnet
+ * information thus leading to uninitialised memory accesses.
+ * In addition it is unclear why we need it in first place.
+ * If it turns out to be needed, well need a dedicated event handler
+ * for it to deal with the ifc and the correct vnet.
+ */
+static eventhandler_tag ipf_clonetag;
+#endif
 
-static eventhandler_tag ipf_arrivetag, ipf_departtag, ipf_clonetag;
-
-static void ipf_ifevent(void *arg);
+static void ipf_ifevent(void *arg, struct ifnet *ifp);
 
-static void ipf_ifevent(arg)
+static void ipf_ifevent(arg, ifp)
 	void *arg;
+	struct ifnet *ifp;
 {
-        ipf_sync(arg, NULL);
+
+	CURVNET_SET(ifp->if_vnet);
+	if (V_ipfmain.ipf_running > 0)
+		ipf_sync(&V_ipfmain, NULL);
+	CURVNET_RESTORE();
 }
 
 
@@ -135,8 +156,10 @@ ipf_check_wrapper(void *arg, struct mbuf
 	ip->ip_len = htons(ip->ip_len);
 	ip->ip_off = htons(ip->ip_off);
 #endif
-	rv = ipf_check(&ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT),
+	CURVNET_SET(ifp->if_vnet);
+	rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT),
 		       mp);
+	CURVNET_RESTORE();
 #if (__FreeBSD_version < 1000019)
 	if ((rv == 0) && (*mp != NULL)) {
 		ip = mtod(*mp, struct ip *);
@@ -153,8 +176,13 @@ ipf_check_wrapper(void *arg, struct mbuf
 static int
 ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
-	return (ipf_check(&ipfmain, mtod(*mp, struct ip *),
-			  sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp));
+	int error;
+
+	CURVNET_SET(ifp->if_vnet);
+	error = ipf_check(&V_ipfmain, mtod(*mp, struct ip *),
+			  sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp);
+	CURVNET_RESTORE();
+	return (error);
 }
 # endif
 #if	defined(IPFILTER_LKM)
@@ -215,7 +243,7 @@ ipfattach(softc)
 	}
 
 
-	bzero((char *)ipfmain.ipf_selwait, sizeof(ipfmain.ipf_selwait));
+	bzero((char *)V_ipfmain.ipf_selwait, sizeof(V_ipfmain.ipf_selwait));
 	softc->ipf_running = 1;
 
 	if (softc->ipf_control_forwarding & 1)
@@ -284,38 +312,42 @@ ipfioctl(dev, cmd, data, mode
 	int error = 0, unit = 0;
 	SPL_INT(s);
 
+	CURVNET_SET(TD_TO_VNET(p));
 #if (BSD >= 199306)
         if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
 	{
-		ipfmain.ipf_interror = 130001;
+		V_ipfmain.ipf_interror = 130001;
+		CURVNET_RESTORE();
 		return EPERM;
 	}
 #endif
 
 	unit = GET_MINOR(dev);
 	if ((IPL_LOGMAX < unit) || (unit < 0)) {
-		ipfmain.ipf_interror = 130002;
+		V_ipfmain.ipf_interror = 130002;
+		CURVNET_RESTORE();
 		return ENXIO;
 	}
 
-	if (ipfmain.ipf_running <= 0) {
+	if (V_ipfmain.ipf_running <= 0) {
 		if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) {
-			ipfmain.ipf_interror = 130003;
+			V_ipfmain.ipf_interror = 130003;
+			CURVNET_RESTORE();
 			return EIO;
 		}
 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 		    cmd != SIOCGETFS && cmd != SIOCGETFF &&
 		    cmd != SIOCIPFINTERROR) {
-			ipfmain.ipf_interror = 130004;
+			V_ipfmain.ipf_interror = 130004;
+			CURVNET_RESTORE();
 			return EIO;
 		}
 	}
 
 	SPL_NET(s);
 
-	CURVNET_SET(TD_TO_VNET(p));
-	error = ipf_ioctlswitch(&ipfmain, unit, data, cmd, mode, p->p_uid, p);
+	error = ipf_ioctlswitch(&V_ipfmain, unit, data, cmd, mode, p->p_uid, p);
 	CURVNET_RESTORE();
 	if (error != -1) {
 		SPL_X(s);
@@ -563,7 +595,7 @@ ipf_send_icmp_err(type, fin, dst)
 			}
 
 		if (dst == 0) {
-			if (ipf_ifpaddr(&ipfmain, 4, FRI_NORMAL, ifp,
+			if (ipf_ifpaddr(&V_ipfmain, 4, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
@@ -600,7 +632,7 @@ ipf_send_icmp_err(type, fin, dst)
 		xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr);
 		xtra = MIN(xtra, IPV6_MMTU - iclen);
 		if (dst == 0) {
-			if (ipf_ifpaddr(&ipfmain, 6, FRI_NORMAL, ifp,
+			if (ipf_ifpaddr(&V_ipfmain, 6, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
@@ -924,9 +956,9 @@ sendorfree:
     }
 done:
 	if (!error)
-		ipfmain.ipf_frouteok[0]++;
+		V_ipfmain.ipf_frouteok[0]++;
 	else
-		ipfmain.ipf_frouteok[1]++;
+		V_ipfmain.ipf_frouteok[1]++;
 
 	if (has_nhop)
 		fib4_free_nh_ext(fibnum, &nh4);
@@ -1388,13 +1420,15 @@ void
 ipf_event_reg(void)
 {
 	ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \
-					       ipf_ifevent, &ipfmain, \
+					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \
-					       ipf_ifevent, &ipfmain, \
+					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
+#if 0
 	ipf_clonetag  = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \
-					       &ipfmain, EVENTHANDLER_PRI_ANY);
+					       NULL, EVENTHANDLER_PRI_ANY);
+#endif
 }
 
 void
@@ -1406,9 +1440,11 @@ ipf_event_dereg(void)
 	if (ipf_departtag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag);
 	}
+#if 0
 	if (ipf_clonetag != NULL) {
 		EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag);
 	}
+#endif
 }
 
 

Modified: head/sys/contrib/ipfilter/netinet/ip_proxy.c
==============================================================================
--- head/sys/contrib/ipfilter/netinet/ip_proxy.c	Thu Jun 30 14:55:49 2016	(r302297)
+++ head/sys/contrib/ipfilter/netinet/ip_proxy.c	Thu Jun 30 15:01:07 2016	(r302298)
@@ -64,6 +64,15 @@ struct file;
 # include <sys/queue.h>
 #endif
 #include <net/if.h>
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) && defined(_KERNEL)
+#include <net/vnet.h>
+#else
+#define CURVNET_SET(arg)
+#define CURVNET_RESTORE()
+#define	VNET_DEFINE(_t, _v)	_t _v
+#define	VNET_DECLARE(_t, _v)	extern _t _v
+#define	VNET(arg)	arg
+#endif
 #ifdef sun
 # include <net/af.h>
 #endif

Modified: head/sys/contrib/ipfilter/netinet/ip_rpcb_pxy.c
==============================================================================
--- head/sys/contrib/ipfilter/netinet/ip_rpcb_pxy.c	Thu Jun 30 14:55:49 2016	(r302297)
+++ head/sys/contrib/ipfilter/netinet/ip_rpcb_pxy.c	Thu Jun 30 15:01:07 2016	(r302298)
@@ -80,7 +80,9 @@ static void     ipf_p_rpcb_fixlen __P((f
  */
 static	frentry_t	rpcbfr;	/* Skeleton rule for reference by entities
 				   this proxy creates. */
-static	int	rpcbcnt;	/* Upper bound of allocated RPCB sessions. */
+static	VNET_DEFINE(int,	rpcbcnt);
+#define	V_rpcbcnt		VNET(rpcbcnt)
+				/* Upper bound of allocated RPCB sessions. */
 				/* XXX rpcbcnt still requires locking. */
 
 static	int	rpcb_proxy_init = 0;
@@ -107,7 +109,7 @@ static	int	rpcb_proxy_init = 0;
 void
 ipf_p_rpcb_main_load()
 {
-	rpcbcnt = 0;
+	V_rpcbcnt = 0;
 
 	bzero((char *)&rpcbfr, sizeof(rpcbfr));
 	rpcbfr.fr_ref = 1;
@@ -581,7 +583,7 @@ ipf_p_rpcb_insert(rs, rx)
 		return(0);
         }
 
-	if (rpcbcnt == RPCB_MAXREQS)
+	if (V_rpcbcnt == RPCB_MAXREQS)
 		return(-1);
 
 	KMALLOC(rxp, rpcb_xact_t *);
@@ -599,7 +601,7 @@ ipf_p_rpcb_insert(rs, rx)
 
 	rxp->rx_ref = 1;
 
-	++rpcbcnt;
+	++V_rpcbcnt;
 
 	return(0);
 }
@@ -1084,7 +1086,7 @@ ipf_p_rpcb_deref(rs, rx)
 
 	KFREE(rx);
 
-	--rpcbcnt;
+	--V_rpcbcnt;
 }
 
 /* --------------------------------------------------------------------	*/

Modified: head/sys/contrib/ipfilter/netinet/ip_rules.c
==============================================================================
--- head/sys/contrib/ipfilter/netinet/ip_rules.c	Thu Jun 30 14:55:49 2016	(r302297)
+++ head/sys/contrib/ipfilter/netinet/ip_rules.c	Thu Jun 30 15:01:07 2016	(r302298)
@@ -32,6 +32,15 @@
 #endif
 #if defined(__FreeBSD__) && (__FreeBSD_version > 220000)
 # include <sys/sockio.h>
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) && defined(_KERNEL)
+#include <net/vnet.h>
+#else
+#define CURVNET_SET(arg)
+#define CURVNET_RESTORE()
+#define	VNET_DEFINE(_t, _v)	_t _v
+#define	VNET_DECLARE(_t, _v)	extern _t _v
+#define	VNET(arg)	arg
+#endif
 #else
 # include <sys/ioctl.h>
 #endif /* FreeBSD */
@@ -51,7 +60,8 @@
 
 #ifdef IPFILTER_COMPILED
 
-extern ipf_main_softc_t ipfmain;
+VNET_DECLARE(ipf_main_softc_t, ipfmain);
+#define	V_ipfmain		VNET(ipfmain)
 
 
 static u_long in_rule__0[] = {
@@ -129,8 +139,8 @@ int ipfrule_add_out_()
 	fp->fr_dsize = sizeof(ipf_rules_out_[0]);
 	fp->fr_family = AF_INET;
 	fp->fr_func = (ipfunc_t)ipfrule_match_out_;
-	err = frrequest(&ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp,
-			ipfmain.ipf_active, 0);
+	err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp,
+			V_ipfmain.ipf_active, 0);
 	return err;
 }
 
@@ -156,9 +166,9 @@ int ipfrule_remove_out_()
 		}
 	}
 	if (err == 0)
-		err = frrequest(&ipfmain, IPL_LOGIPF, SIOCDELFR,
+		err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCDELFR,
 				(caddr_t)&ipfrule_out_,
-				ipfmain.ipf_active, 0);
+				V_ipfmain.ipf_active, 0);
 	if (err)
 		return err;
 
@@ -198,8 +208,8 @@ int ipfrule_add_in_()
 	fp->fr_dsize = sizeof(ipf_rules_in_[0]);
 	fp->fr_family = AF_INET;
 	fp->fr_func = (ipfunc_t)ipfrule_match_in_;
-	err = frrequest(&ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp,
-			ipfmain.ipf_active, 0);
+	err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp,
+			V_ipfmain.ipf_active, 0);
 	return err;
 }
 
@@ -225,9 +235,9 @@ int ipfrule_remove_in_()
 		}
 	}
 	if (err == 0)
-		err = frrequest(&ipfmain, IPL_LOGIPF, SIOCDELFR,
+		err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCDELFR,
 				(caddr_t)&ipfrule_in_,
-				ipfmain.ipf_active, 0);
+				V_ipfmain.ipf_active, 0);
 	if (err)
 		return err;
 

Modified: head/sys/contrib/ipfilter/netinet/mlfk_ipl.c
==============================================================================
--- head/sys/contrib/ipfilter/netinet/mlfk_ipl.c	Thu Jun 30 14:55:49 2016	(r302297)
+++ head/sys/contrib/ipfilter/netinet/mlfk_ipl.c	Thu Jun 30 15:01:07 2016	(r302298)
@@ -7,6 +7,12 @@
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 
+#if defined(KERNEL) || defined(_KERNEL)
+# undef KERNEL
+# undef _KERNEL
+# define	KERNEL  1
+# define	_KERNEL 1
+#endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -18,6 +24,16 @@
 #include <sys/select.h>
 #if __FreeBSD_version >= 500000
 # include <sys/selinfo.h>
+# include <sys/jail.h>
+#endif
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) && defined(_KERNEL)
+#include <net/vnet.h>
+#else
+#define CURVNET_SET(arg)
+#define CURVNET_RESTORE()
+#define	VNET_DEFINE(_t, _v)	_t _v
+#define	VNET_DECLARE(_t, _v)	extern _t _v
+#define	VNET(arg)	arg
 #endif
 #include <net/if.h>
 #include <netinet/in_systm.h>
@@ -33,7 +49,8 @@
 #include "netinet/ip_frag.h"
 #include "netinet/ip_sync.h"
 
-extern ipf_main_softc_t ipfmain;
+VNET_DECLARE(ipf_main_softc_t, ipfmain);
+#define	V_ipfmain		VNET(ipfmain)
 
 #if __FreeBSD_version >= 502116
 static struct cdev *ipf_devs[IPL_LOGSIZE];
@@ -42,10 +59,14 @@ static dev_t ipf_devs[IPL_LOGSIZE];
 #endif
 
 static int sysctl_ipf_int ( SYSCTL_HANDLER_ARGS );
+static int sysctl_ipf_int_nat ( SYSCTL_HANDLER_ARGS );
+static int sysctl_ipf_int_state ( SYSCTL_HANDLER_ARGS );
+static int sysctl_ipf_int_auth ( SYSCTL_HANDLER_ARGS );
+static int sysctl_ipf_int_frag ( SYSCTL_HANDLER_ARGS );
 static int ipf_modload(void);
 static int ipf_modunload(void);
-static int ipf_fbsd_sysctl_create(ipf_main_softc_t*);
-static int ipf_fbsd_sysctl_destroy(ipf_main_softc_t*);
+static int ipf_fbsd_sysctl_create(void);
+static int ipf_fbsd_sysctl_destroy(void);
 
 #if (__FreeBSD_version >= 500024)
 # if (__FreeBSD_version >= 502116)
@@ -70,40 +91,49 @@ static	int	ipfwrite __P((dev_t, struct u
 
 SYSCTL_DECL(_net_inet);
 #define SYSCTL_IPF(parent, nbr, name, access, ptr, val, descr) \
-	SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \
-		   ptr, val, sysctl_ipf_int, "I", descr);
-#define SYSCTL_DYN_IPF(parent, nbr, name, access,ptr, val, descr) \
+	SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|CTLFLAG_VNET|access, \
+		   ptr, val, sysctl_ipf_int, "I", descr)
+#define SYSCTL_DYN_IPF_NAT(parent, nbr, name, access,ptr, val, descr) \
+	SYSCTL_ADD_OID(&ipf_clist, SYSCTL_STATIC_CHILDREN(parent), nbr, name, \
+	CTLFLAG_DYN|CTLTYPE_INT|CTLFLAG_VNET|access, ptr, val, sysctl_ipf_int_nat, "I", descr)
+#define SYSCTL_DYN_IPF_STATE(parent, nbr, name, access,ptr, val, descr) \
 	SYSCTL_ADD_OID(&ipf_clist, SYSCTL_STATIC_CHILDREN(parent), nbr, name, \
-	CTLFLAG_DYN|CTLTYPE_INT|access, ptr, val, sysctl_ipf_int, "I", descr)
+	CTLFLAG_DYN|CTLTYPE_INT|CTLFLAG_VNET|access, ptr, val, sysctl_ipf_int_state, "I", descr)
+#define SYSCTL_DYN_IPF_FRAG(parent, nbr, name, access,ptr, val, descr) \
+	SYSCTL_ADD_OID(&ipf_clist, SYSCTL_STATIC_CHILDREN(parent), nbr, name, \
+	CTLFLAG_DYN|CTLTYPE_INT|CTLFLAG_VNET|access, ptr, val, sysctl_ipf_int_frag, "I", descr)
+#define SYSCTL_DYN_IPF_AUTH(parent, nbr, name, access,ptr, val, descr) \
+	SYSCTL_ADD_OID(&ipf_clist, SYSCTL_STATIC_CHILDREN(parent), nbr, name, \
+	CTLFLAG_DYN|CTLTYPE_INT|CTLFLAG_VNET|access, ptr, val, sysctl_ipf_int_auth, "I", descr)
 static struct sysctl_ctx_list ipf_clist;
 #define	CTLFLAG_OFF	0x00800000	/* IPFilter must be disabled */
 #define	CTLFLAG_RWO	(CTLFLAG_RW|CTLFLAG_OFF)
 SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF");
-SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &ipfmain.ipf_flags, 0, "IPF flags");
-SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &ipfmain.ipf_pass, 0, "default pass/block");
-SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &ipfmain.ipf_active, 0, "IPF is active");
+SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_flags), 0, "IPF flags");
+SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_pass), 0, "default pass/block");
+SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &VNET_NAME(ipfmain.ipf_active), 0, "IPF is active");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RWO,
-	   &ipfmain.ipf_tcpidletimeout, 0, "TCP idle timeout in seconds");
+	   &VNET_NAME(ipfmain.ipf_tcpidletimeout), 0, "TCP idle timeout in seconds");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RWO,
-	   &ipfmain.ipf_tcphalfclosed, 0, "timeout for half closed TCP sessions");
+	   &VNET_NAME(ipfmain.ipf_tcphalfclosed), 0, "timeout for half closed TCP sessions");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RWO,
-	   &ipfmain.ipf_tcpclosewait, 0, "timeout for TCP sessions in closewait status");
+	   &VNET_NAME(ipfmain.ipf_tcpclosewait), 0, "timeout for TCP sessions in closewait status");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RWO,
-	   &ipfmain.ipf_tcplastack, 0, "timeout for TCP sessions in last ack status");
+	   &VNET_NAME(ipfmain.ipf_tcplastack), 0, "timeout for TCP sessions in last ack status");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RWO,
-	   &ipfmain.ipf_tcptimeout, 0, "");
+	   &VNET_NAME(ipfmain.ipf_tcptimeout), 0, "");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RWO,
-	   &ipfmain.ipf_tcpclosed, 0, "");
+	   &VNET_NAME(ipfmain.ipf_tcpclosed), 0, "");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RWO,
-	   &ipfmain.ipf_udptimeout, 0, "UDP timeout");
+	   &VNET_NAME(ipfmain.ipf_udptimeout), 0, "UDP timeout");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udpacktimeout, CTLFLAG_RWO,
-	   &ipfmain.ipf_udpacktimeout, 0, "");
+	   &VNET_NAME(ipfmain.ipf_udpacktimeout), 0, "");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RWO,
-	   &ipfmain.ipf_icmptimeout, 0, "ICMP timeout");
+	   &VNET_NAME(ipfmain.ipf_icmptimeout), 0, "ICMP timeout");
 SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_running, CTLFLAG_RD,
-	   &ipfmain.ipf_running, 0, "IPF is running");
-SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &ipfmain.ipf_chksrc, 0, "");
-SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &ipfmain.ipf_minttl, 0, "");
+	   &VNET_NAME(ipfmain.ipf_running), 0, "IPF is running");
+SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_chksrc), 0, "");
+SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_minttl), 0, "");
 
 #define CDEV_MAJOR 79
 #include <sys/poll.h>
@@ -178,28 +208,61 @@ ipfilter_modevent(module_t mod, int type
 }
 
 
+static void
+vnet_ipf_init(void)
+{
+	char *defpass;
+	int error;
+
+	if (ipf_create_all(&V_ipfmain) == NULL)
+		return;
+
+	error = ipfattach(&V_ipfmain);
+	if (error) {
+		ipf_destroy_all(&V_ipfmain);
+		return;
+	}
+
+	if (FR_ISPASS(V_ipfmain.ipf_pass))
+		defpass = "pass";
+	else if (FR_ISBLOCK(V_ipfmain.ipf_pass))
+		defpass = "block";
+	else
+		defpass = "no-match -> block";
+
+	if (IS_DEFAULT_VNET(curvnet))
+	    printf("%s initialized.  Default = %s all, Logging = %s%s\n",
+		ipfilter_version, defpass,
+#ifdef IPFILTER_LOG
+		"enabled",
+#else
+		"disabled",
+#endif
+#ifdef IPFILTER_COMPILED
+		" (COMPILED)"
+#else
+		""
+#endif
+		);
+}
+VNET_SYSINIT(vnet_ipf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
+    vnet_ipf_init, NULL);
+
 static int
 ipf_modload()
 {
-	char *defpass, *c, *str;
+	char *c, *str;
 	int i, j, error;
 
 	if (ipf_load_all() != 0)
 		return EIO;
 
-	if (ipf_create_all(&ipfmain) == NULL)
-		return EIO;
-
-	if (ipf_fbsd_sysctl_create(&ipfmain) != 0)
+	if (ipf_fbsd_sysctl_create() != 0) {
 		return EIO;
-
-	error = ipfattach(&ipfmain);
-	if (error)
-		return error;
+	}
 
 	for (i = 0; i < IPL_LOGSIZE; i++)
 		ipf_devs[i] = NULL;
-
 	for (i = 0; (str = ipf_devfiles[i]); i++) {
 		c = NULL;
 		for(j = strlen(str); j > 0; j--)
@@ -217,63 +280,48 @@ ipf_modload()
 		return error;
 	ipf_event_reg();
 
-	if (FR_ISPASS(ipfmain.ipf_pass))
-		defpass = "pass";
-	else if (FR_ISBLOCK(ipfmain.ipf_pass))
-		defpass = "block";
-	else
-		defpass = "no-match -> block";
-
-	printf("%s initialized.  Default = %s all, Logging = %s%s\n",
-		ipfilter_version, defpass,
-#ifdef IPFILTER_LOG
-		"enabled",
-#else
-		"disabled",
-#endif
-#ifdef IPFILTER_COMPILED
-		" (COMPILED)"
-#else
-		""
-#endif
-		);
 	return 0;
 }
 
+static void
+vnet_ipf_uninit(void)
+{
+
+	if (V_ipfmain.ipf_refcnt)
+		return;
+
+	if (V_ipfmain.ipf_running >= 0) {
+		if (ipfdetach(&V_ipfmain) != 0)
+			return;
+
+		ipf_destroy_all(&V_ipfmain);
+	}
+
+	V_ipfmain.ipf_running = -2;
+}
+VNET_SYSUNINIT(vnet_ipf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
+    vnet_ipf_uninit, NULL);
 
 static int
 ipf_modunload()
 {
 	int error, i;
 
-	if (ipfmain.ipf_refcnt)
-		return EBUSY;
+	ipf_event_dereg();
 
-	if (ipf_fbsd_sysctl_destroy(&ipfmain) != 0)
-		return EIO;
+	ipf_fbsd_sysctl_destroy();
 
 	error = ipf_pfil_unhook();
 	if (error != 0)
 		return error;
 
-	if (ipfmain.ipf_running >= 0) {
-		error = ipfdetach(&ipfmain);
-		if (error != 0)
-			return error;
-
-		ipf_fbsd_sysctl_destroy(&ipfmain);
-		ipf_destroy_all(&ipfmain);
-		ipf_unload_all();
-	} else
-		error = 0;
-
-	ipfmain.ipf_running = -2;
-
 	for (i = 0; ipf_devfiles[i]; i++) {
 		if (ipf_devs[i] != NULL)
 			destroy_dev(ipf_devs[i]);
 	}
 
+	ipf_unload_all();
+
 	printf("%s unloaded\n", ipfilter_version);
 
 	return error;
@@ -287,7 +335,7 @@ static moduledata_t ipfiltermod = {
 };
 
 
-DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
+DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
 #ifdef	MODULE_VERSION
 MODULE_VERSION(ipfilter, 1);
 #endif
@@ -310,13 +358,87 @@ sysctl_ipf_int ( SYSCTL_HANDLER_ARGS )
 	if (!arg1)
 		error = EPERM;
 	else {
-		if ((oidp->oid_kind & CTLFLAG_OFF) && (ipfmain.ipf_running > 0))
+		if ((oidp->oid_kind & CTLFLAG_OFF) && (V_ipfmain.ipf_running > 0))
 			error = EBUSY;
 		else
 			error = SYSCTL_IN(req, arg1, sizeof(int));
 	}
 	return (error);
 }
+
+/*
+ * In the VIMAGE case kern_sysctl.c already adds the vnet base address given
+ * we set CTLFLAG_VNET to get proper access checks.  Have to undo this.
+ * Then we add the given offset to the specific malloced struct hanging off
+ * virtualized ipmain struct.
+ */
+static int
+sysctl_ipf_int_nat ( SYSCTL_HANDLER_ARGS )
+{
+
+	if (arg1) {
+		ipf_nat_softc_t *nat_softc;
+
+		nat_softc = V_ipfmain.ipf_nat_soft;
+#ifdef VIMAGE
+		arg1 = (void *)((uintptr_t)arg1 - curvnet->vnet_data_base);
+#endif
+		arg1 = (void *)((uintptr_t)nat_softc + (uintptr_t)arg1);
+	}
+
+	return (sysctl_ipf_int(oidp, arg1, arg2, req));
+}
+
+static int
+sysctl_ipf_int_state ( SYSCTL_HANDLER_ARGS )
+{
+
+	if (arg1) {
+		ipf_state_softc_t *state_softc;
+
+		state_softc = V_ipfmain.ipf_state_soft;
+#ifdef VIMAGE
+		arg1 = (void *)((uintptr_t)arg1 - curvnet->vnet_data_base);
+#endif
+		arg1 = (void *)((uintptr_t)state_softc + (uintptr_t)arg1);
+	}
+
+	return (sysctl_ipf_int(oidp, arg1, arg2, req));
+}
+
+static int
+sysctl_ipf_int_auth ( SYSCTL_HANDLER_ARGS )
+{
+
+	if (arg1) {
+		ipf_auth_softc_t *auth_softc;
+
+		auth_softc = V_ipfmain.ipf_auth_soft;
+#ifdef VIMAGE
+		arg1 = (void *)((uintptr_t)arg1 - curvnet->vnet_data_base);
+#endif
+		arg1 = (void *)((uintptr_t)auth_softc + (uintptr_t)arg1);
+	}
+
+	return (sysctl_ipf_int(oidp, arg1, arg2, req));
+}
+
+static int
+sysctl_ipf_int_frag ( SYSCTL_HANDLER_ARGS )
+{
+
+	if (arg1) {
+		ipf_frag_softc_t *frag_softc;
+
+		frag_softc = V_ipfmain.ipf_frag_soft;
+#ifdef VIMAGE
+		arg1 = (void *)((uintptr_t)arg1 - curvnet->vnet_data_base);
+#endif
+		arg1 = (void *)((uintptr_t)frag_softc + (uintptr_t)arg1);
+	}
+
+	return (sysctl_ipf_int(oidp, arg1, arg2, req));
+}
 #endif
 
 
@@ -335,24 +457,25 @@ ipfpoll(dev_t dev, int events, struct pr
 
 	revents = 0;
 
+	CURVNET_SET(TD_TO_VNET(td));
 	switch (unit)
 	{
 	case IPL_LOGIPF :
 	case IPL_LOGNAT :
 	case IPL_LOGSTATE :
 #ifdef IPFILTER_LOG
-		if ((events & (POLLIN | POLLRDNORM)) && ipf_log_canread(&ipfmain, unit))
+		if ((events & (POLLIN | POLLRDNORM)) && ipf_log_canread(&V_ipfmain, unit))
 			revents |= events & (POLLIN | POLLRDNORM);
 #endif
 		break;
 	case IPL_LOGAUTH :
-		if ((events & (POLLIN | POLLRDNORM)) && ipf_auth_waiting(&ipfmain))
+		if ((events & (POLLIN | POLLRDNORM)) && ipf_auth_waiting(&V_ipfmain))
 			revents |= events & (POLLIN | POLLRDNORM);
 		break;
 	case IPL_LOGSYNC :
-		if ((events & (POLLIN | POLLRDNORM)) && ipf_sync_canread(&ipfmain))
+		if ((events & (POLLIN | POLLRDNORM)) && ipf_sync_canread(&V_ipfmain))
 			revents |= events & (POLLIN | POLLRDNORM);
-		if ((events & (POLLOUT | POLLWRNORM)) && ipf_sync_canwrite(&ipfmain))
+		if ((events & (POLLOUT | POLLWRNORM)) && ipf_sync_canwrite(&V_ipfmain))
 			revents |= events & (POLLOUT | POLLWRNORM);
 		break;
 	case IPL_LOGSCAN :
@@ -362,7 +485,8 @@ ipfpoll(dev_t dev, int events, struct pr
 	}
 
 	if ((revents == 0) && ((events & (POLLIN|POLLRDNORM)) != 0))
-		selrecord(td, &ipfmain.ipf_selwait[unit]);
+		selrecord(td, &V_ipfmain.ipf_selwait[unit]);
+	CURVNET_RESTORE();
 
 	return revents;
 }
@@ -465,22 +589,31 @@ static int ipfread(dev, uio)
 #endif
 	struct uio *uio;
 {
+	int error;
 	int	unit = GET_MINOR(dev);
 
 	if (unit < 0)
 		return ENXIO;
 
-	if (ipfmain.ipf_running < 1)
+	CURVNET_SET(TD_TO_VNET(curthread));
+	if (V_ipfmain.ipf_running < 1) {
+		CURVNET_RESTORE();
 		return EIO;
+	}
 
-	if (unit == IPL_LOGSYNC)
-		return ipf_sync_read(&ipfmain, uio);
+	if (unit == IPL_LOGSYNC) {
+		error = ipf_sync_read(&V_ipfmain, uio);
+		CURVNET_RESTORE();
+		return error;
+	}
 
 #ifdef IPFILTER_LOG
-	return ipf_log_read(&ipfmain, unit, uio);
+	error = ipf_log_read(&V_ipfmain, unit, uio);
 #else
-	return ENXIO;
+	error = ENXIO;
 #endif
+	CURVNET_RESTORE();
+	return error;
 }
 
 
@@ -503,61 +636,57 @@ static int ipfwrite(dev, uio)
 #endif
 	struct uio *uio;
 {
+	int error;
 
-	if (ipfmain.ipf_running < 1)
+	CURVNET_SET(TD_TO_VNET(curthread));
+	if (V_ipfmain.ipf_running < 1) {
+		CURVNET_RESTORE();
 		return EIO;
+	}
 
-	if (GET_MINOR(dev) == IPL_LOGSYNC)
-		return ipf_sync_write(&ipfmain, uio);
+	if (GET_MINOR(dev) == IPL_LOGSYNC) {
+		error = ipf_sync_write(&V_ipfmain, uio);
+		CURVNET_RESTORE();
+		return error;
+	}
 	return ENXIO;
 }
 
 static int
-ipf_fbsd_sysctl_create(main_softc)
-	ipf_main_softc_t *main_softc;
+ipf_fbsd_sysctl_create(void)
 {
-	ipf_nat_softc_t	*nat_softc;
-	ipf_state_softc_t *state_softc;
-	ipf_auth_softc_t *auth_softc;
-	ipf_frag_softc_t *frag_softc;
-
-	nat_softc = main_softc->ipf_nat_soft;
-	state_softc = main_softc->ipf_state_soft;
-	auth_softc = main_softc->ipf_auth_soft;
-	frag_softc = main_softc->ipf_frag_soft;
 
 	sysctl_ctx_init(&ipf_clist);
 
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_defnatage", CTLFLAG_RWO,
-	    &nat_softc->ipf_nat_defage, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_statesize", CTLFLAG_RWO,
-	    &state_softc->ipf_state_size, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_statemax", CTLFLAG_RWO,
-	    &state_softc->ipf_state_max, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_nattable_max", CTLFLAG_RWO,
-	    &nat_softc->ipf_nat_table_max, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_nattable_sz", CTLFLAG_RWO,
-	    &nat_softc->ipf_nat_table_sz, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_natrules_sz", CTLFLAG_RWO,
-	    &nat_softc->ipf_nat_maprules_sz, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_rdrrules_sz", CTLFLAG_RWO,
-	    &nat_softc->ipf_nat_rdrrules_sz, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_hostmap_sz", CTLFLAG_RWO,
-	    &nat_softc->ipf_nat_hostmap_sz, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_authsize", CTLFLAG_RWO,
-	   &auth_softc->ipf_auth_size, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_authused", CTLFLAG_RD,
-	   &auth_softc->ipf_auth_used, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_defaultauthage", CTLFLAG_RW,
-	   &auth_softc->ipf_auth_defaultage, 0, "");
-	SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_ipfrttl", CTLFLAG_RW,
-	   &frag_softc->ipfr_ttl, 0, "");
+	SYSCTL_DYN_IPF_NAT(_net_inet_ipf, OID_AUTO, "fr_defnatage", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_nat_softc_t, ipf_nat_defage), 0, "");
+	SYSCTL_DYN_IPF_STATE(_net_inet_ipf, OID_AUTO, "fr_statesize", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_state_softc_t, ipf_state_size), 0, "");
+	SYSCTL_DYN_IPF_STATE(_net_inet_ipf, OID_AUTO, "fr_statemax", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_state_softc_t, ipf_state_max), 0, "");
+	SYSCTL_DYN_IPF_NAT(_net_inet_ipf, OID_AUTO, "ipf_nattable_max", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_max), 0, "");
+	SYSCTL_DYN_IPF_NAT(_net_inet_ipf, OID_AUTO, "ipf_nattable_sz", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_sz), 0, "");
+	SYSCTL_DYN_IPF_NAT(_net_inet_ipf, OID_AUTO, "ipf_natrules_sz", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_nat_softc_t, ipf_nat_maprules_sz), 0, "");
+	SYSCTL_DYN_IPF_NAT(_net_inet_ipf, OID_AUTO, "ipf_rdrrules_sz", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_nat_softc_t, ipf_nat_rdrrules_sz), 0, "");
+	SYSCTL_DYN_IPF_NAT(_net_inet_ipf, OID_AUTO, "ipf_hostmap_sz", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_nat_softc_t, ipf_nat_hostmap_sz), 0, "");
+	SYSCTL_DYN_IPF_AUTH(_net_inet_ipf, OID_AUTO, "fr_authsize", CTLFLAG_RWO,
+	    (void *)offsetof(ipf_auth_softc_t, ipf_auth_size), 0, "");
+	SYSCTL_DYN_IPF_AUTH(_net_inet_ipf, OID_AUTO, "fr_authused", CTLFLAG_RD,
+	    (void *)offsetof(ipf_auth_softc_t, ipf_auth_used), 0, "");
+	SYSCTL_DYN_IPF_AUTH(_net_inet_ipf, OID_AUTO, "fr_defaultauthage", CTLFLAG_RW,
+	    (void *)offsetof(ipf_auth_softc_t, ipf_auth_defaultage), 0, "");
+	SYSCTL_DYN_IPF_FRAG(_net_inet_ipf, OID_AUTO, "fr_ipfrttl", CTLFLAG_RW,
+	    (void *)offsetof(ipf_frag_softc_t, ipfr_ttl), 0, "");
 	return 0;
 }
 
 static int
-ipf_fbsd_sysctl_destroy(main_softc)
-	ipf_main_softc_t *main_softc;
+ipf_fbsd_sysctl_destroy(void)
 {
 	if (sysctl_ctx_free(&ipf_clist)) {
 		printf("sysctl_ctx_free failed");



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201606301501.u5UF17WV091555>