Date: Wed, 14 Aug 2013 23:43:05 +0300 From: Mikolaj Golub <trociny@FreeBSD.org> To: Marko Zec <zec@fer.hr> Cc: freebsd-virtualization@freebsd.org Subject: Re: RFC: ipfw nat VIMAGE improvements Message-ID: <20130814204303.GA13541@gmail.com> In-Reply-To: <201308141728.31361.zec@fer.hr> References: <20130811200111.GA49895@gmail.com> <201308141728.31361.zec@fer.hr>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
On Wed, Aug 14, 2013 at 05:28:31PM +0200, Marko Zec wrote:
> On Sunday 11 August 2013 22:01:12 Mikolaj Golub wrote:
> > Hi,
> >
> > I would like to commit this patch that fixes some issues related to
> > ipfw nat module load/unload on VIMAGE featured system.
> >
> > Any comments, objections?
>
> Far from being an expert in ipfw, I'm worried that the proposed approach of
> simultaneously acquiring locks on _all_ ipfw instances might be calling for
> trouble:
>
> + VNET_LIST_RLOCK();
> + VNET_FOREACH(vnet_iter) {
> + CURVNET_SET(vnet_iter);
> + IPFW_WLOCK(&V_layer3_chain);
> + CURVNET_RESTORE();
> + }
> ipfw_nat_ptr = ipfw_nat;
> lookup_nat_ptr = lookup_nat;
> ipfw_nat_cfg_ptr = ipfw_nat_cfg;
> ipfw_nat_del_ptr = ipfw_nat_del;
> ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
> ipfw_nat_get_log_ptr = ipfw_nat_get_log;
> - IPFW_WUNLOCK(&V_layer3_chain);
> - V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
> + VNET_FOREACH(vnet_iter) {
> + CURVNET_SET(vnet_iter);
> + IPFW_WUNLOCK(&V_layer3_chain);
> + CURVNET_RESTORE();
> + }
> + VNET_LIST_RUNLOCK();
>
> Why couldn't we introduce a per-vnet flag, say V_ipfw_nat_ready, and use it
> as
>
> #define IPFW_NAT_LOADED (V_ipfw_nat_ready)
>
> instead of current version of that macro:
>
> #define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
>
> I.e., perhaps in ipfw_nat_init() we could first set all the function
> pointers, and then iterate over all vnets and set V_ipfw_nat ready there.
> In ipfw_nat_destroy() we would first iterate over all vnets to clear the
> flag, before clearing function pointers?
I like you approach. Though insted of iterating vnets in
ipfw_nat_init/destroy I think it is safe just to set/unset
V_ipfw_nat_ready in vnet_ipfw_nat_init/uninit.
--
Mikolaj Golub
[-- Attachment #2 --]
commit 76323cd328717de5b77d0d1e2e23150c482c630a
Author: Mikolaj Golub <trociny@freebsd.org>
Date: Sun Aug 4 13:49:50 2013 +0300
Make ipfw nat init/unint work correctly for VIMAGE:
* Do per vnet instance cleanup (previously it was only for vnet0 on
module unload, and led to libalias leaks and possible panics due to
stale pointer dereferences).
* Instead of protecting ipfw hooks registering/deregistering by only
vnet0 lock (which does not prevent pointers access from another
vnets), introduce per vnet ipfw_nat_loaded variable. The variable is
set after hooks are registered and unset before they are deregistered.
* Devirtualize ifaddr_event_tag as we run only one event handler for
all vnets.
* It is supposed that ifaddr_change event handler is called in the
interface vnet context, so add the assertion.
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index 6317013..b9dc18e 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -142,6 +142,8 @@ VNET_DEFINE(int, verbose_limit);
/* layer3_chain contains the list of rules for layer 3 */
VNET_DEFINE(struct ip_fw_chain, layer3_chain);
+VNET_DEFINE(int, ipfw_nat_loaded) = 0;
+
ipfw_nat_t *ipfw_nat_ptr = NULL;
struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c
index 84852db..155eddd 100644
--- a/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/sys/netpfil/ipfw/ip_fw_nat.c
@@ -53,8 +53,7 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h> /* XXX for in_cksum */
-static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
-#define V_ifaddr_event_tag VNET(ifaddr_event_tag)
+static eventhandler_tag ifaddr_event_tag;
static void
ifaddr_change(void *arg __unused, struct ifnet *ifp)
@@ -63,6 +62,8 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp)
struct ifaddr *ifa;
struct ip_fw_chain *chain;
+ KASSERT(curvnet == ifp->if_vnet,
+ ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
chain = &V_layer3_chain;
IPFW_WLOCK(chain);
/* Check every nat entry... */
@@ -589,11 +590,38 @@ ipfw_nat_get_log(struct sockopt *sopt)
return(0);
}
+static int
+vnet_ipfw_nat_init(const void *arg __unused)
+{
+
+ V_ipfw_nat_ready = 1;
+ return (0);
+}
+
+static int
+vnet_ipfw_nat_uninit(const void *arg __unused)
+{
+ struct cfg_nat *ptr, *ptr_temp;
+ struct ip_fw_chain *chain;
+
+ chain = &V_layer3_chain;
+ IPFW_WLOCK(chain);
+ LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
+ LIST_REMOVE(ptr, _next);
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
+ }
+ flush_nat_ptrs(chain, -1 /* flush all */);
+ V_ipfw_nat_ready = 0;
+ IPFW_WUNLOCK(chain);
+ return (0);
+}
+
static void
ipfw_nat_init(void)
{
- IPFW_WLOCK(&V_layer3_chain);
/* init ipfw hooks */
ipfw_nat_ptr = ipfw_nat;
lookup_nat_ptr = lookup_nat;
@@ -601,28 +629,16 @@ ipfw_nat_init(void)
ipfw_nat_del_ptr = ipfw_nat_del;
ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
ipfw_nat_get_log_ptr = ipfw_nat_get_log;
- IPFW_WUNLOCK(&V_layer3_chain);
- V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
- ifaddr_event, ifaddr_change,
+
+ ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
NULL, EVENTHANDLER_PRI_ANY);
}
static void
ipfw_nat_destroy(void)
{
- struct cfg_nat *ptr, *ptr_temp;
- struct ip_fw_chain *chain;
- chain = &V_layer3_chain;
- IPFW_WLOCK(chain);
- LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
- LIST_REMOVE(ptr, _next);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
- }
- EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
- flush_nat_ptrs(chain, -1 /* flush all */);
+ EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
/* deregister ipfw_nat */
ipfw_nat_ptr = NULL;
lookup_nat_ptr = NULL;
@@ -630,7 +646,6 @@ ipfw_nat_destroy(void)
ipfw_nat_del_ptr = NULL;
ipfw_nat_get_cfg_ptr = NULL;
ipfw_nat_get_log_ptr = NULL;
- IPFW_WUNLOCK(chain);
}
static int
@@ -640,11 +655,9 @@ ipfw_nat_modevent(module_t mod, int type, void *unused)
switch (type) {
case MOD_LOAD:
- ipfw_nat_init();
break;
case MOD_UNLOAD:
- ipfw_nat_destroy();
break;
default:
@@ -660,8 +673,25 @@ static moduledata_t ipfw_nat_mod = {
0
};
-DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+/* Define startup order. */
+#define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_NAT_MODEVENT_ORDER (SI_ORDER_ANY - 255)
+#define IPFW_NAT_MODULE_ORDER (IPFW_NAT_MODEVENT_ORDER + 1)
+#define IPFW_NAT_VNET_ORDER (IPFW_NAT_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
MODULE_VERSION(ipfw_nat, 1);
+
+SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+ ipfw_nat_init, NULL);
+VNET_SYSINIT(vnet_ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_VNET_ORDER,
+ vnet_ipfw_nat_init, NULL);
+
+SYSUNINIT(ipfw_nat_destroy, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+ ipfw_nat_destroy, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat_uninit, IPFW_NAT_SI_SUB_FIREWALL,
+ IPFW_NAT_VNET_ORDER, vnet_ipfw_nat_uninit, NULL);
+
/* end of file */
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index a41cdf5..a8d7eea 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -327,9 +327,11 @@ extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
typedef int ipfw_nat_cfg_t(struct sockopt *);
-extern ipfw_nat_t *ipfw_nat_ptr;
-#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
+VNET_DECLARE(int, ipfw_nat_ready);
+#define V_ipfw_nat_ready VNET(ipfw_nat_ready)
+#define IPFW_NAT_LOADED (V_ipfw_nat_ready)
+extern ipfw_nat_t *ipfw_nat_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130814204303.GA13541>
