Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 4 Dec 2018 16:01:26 +0000 (UTC)
From:      "Andrey V. Elsukov" <ae@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r341471 - in head/sys/netpfil/ipfw: . nat64 nptv6
Message-ID:  <201812041601.wB4G1QDT001641@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ae
Date: Tue Dec  4 16:01:25 2018
New Revision: 341471
URL: https://svnweb.freebsd.org/changeset/base/341471

Log:
  Reimplement how net.inet.ip.fw.dyn_keep_states works.
  
  Turning on of this feature allows to keep dynamic states when parent
  rule is deleted. But it works only when the default rule is
  "allow from any to any".
  
  Now when rule with dynamic opcode is going to be deleted, and
  net.inet.ip.fw.dyn_keep_states is enabled, existing states will reference
  named objects corresponding to this rule, and also reference the rule.
  And when ipfw_dyn_lookup_state() will find state for deleted parent rule,
  it will return the pointer to the deleted rule, that is still valid.
  This implementation doesn't support O_LIMIT_PARENT rules.
  
  The refcnt field was added to struct ip_fw to keep reference, also
  next pointer added to be able iterate rules and not damage the content
  when deleted rules are chained.
  
  Named objects are referenced only when states are going to be deleted to
  be able reuse kidx of named objects when new parent rules will be
  installed.
  
  ipfw_dyn_get_count() function was modified and now it also looks into
  dynamic states and constructs maps of existing named objects. This is
  needed to correctly export orphaned states into userland.
  
  ipfw_free_rule() was changed to be global, since now dynamic state can
  free rule, when it is expired and references counters becomes 1.
  
  External actions subsystem also modified, since external actions can be
  deregisterd and instances can be destroyed. In these cases deleted rules,
  that are referenced by orphaned states, must be modified to prevent access
  to freed memory. ipfw_dyn_reset_eaction(), ipfw_reset_eaction_instance()
  functions added for these purposes.
  
  Obtained from:	Yandex LLC
  MFC after:	2 months
  Sponsored by:	Yandex LLC
  Differential Revision:	https://reviews.freebsd.org/D17532

Modified:
  head/sys/netpfil/ipfw/ip_fw_dynamic.c
  head/sys/netpfil/ipfw/ip_fw_eaction.c
  head/sys/netpfil/ipfw/ip_fw_private.h
  head/sys/netpfil/ipfw/ip_fw_sockopt.c
  head/sys/netpfil/ipfw/nat64/nat64lsn_control.c
  head/sys/netpfil/ipfw/nat64/nat64stl_control.c
  head/sys/netpfil/ipfw/nptv6/nptv6.c

Modified: head/sys/netpfil/ipfw/ip_fw_dynamic.c
==============================================================================
--- head/sys/netpfil/ipfw/ip_fw_dynamic.c	Tue Dec  4 15:25:15 2018	(r341470)
+++ head/sys/netpfil/ipfw/ip_fw_dynamic.c	Tue Dec  4 16:01:25 2018	(r341471)
@@ -122,6 +122,12 @@ __FBSDID("$FreeBSD$");
 	(d)->bcnt_ ## dir += pktlen;		\
 	} while (0)
 
+#define	DYN_REFERENCED		0x01
+/*
+ * DYN_REFERENCED flag is used to show that state keeps reference to named
+ * object, and this reference should be released when state becomes expired.
+ */
+
 struct dyn_data {
 	void		*parent;	/* pointer to parent rule */
 	uint32_t	chain_id;	/* cached ruleset id */
@@ -129,7 +135,8 @@ struct dyn_data {
 
 	uint32_t	hashval;	/* hash value used for hash resize */
 	uint16_t	fibnum;		/* fib used to send keepalives */
-	uint8_t		_pad[3];
+	uint8_t		_pad[2];
+	uint8_t		flags;		/* internal flags */
 	uint8_t		set;		/* parent rule set number */
 	uint16_t	rulenum;	/* parent rule number */
 	uint32_t	ruleid;		/* parent rule id */
@@ -1399,20 +1406,29 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, c
 			 * should be deleted by dyn_expire_states().
 			 *
 			 * In case when dyn_keep_states is enabled, return
-			 * pointer to default rule and corresponding f_pos
-			 * value.
-			 * XXX: In this case we lose the cache efficiency,
-			 *      since f_pos is not cached, because it seems
-			 *      there is no easy way to atomically switch
-			 *      all fields related to parent rule of given
-			 *      state.
+			 * pointer to deleted rule and f_pos value
+			 * corresponding to penultimate rule.
+			 * When we have enabled V_dyn_keep_states, states
+			 * that become orphaned will get the DYN_REFERENCED
+			 * flag and rule will keep around. So we can return
+			 * it. But since it is not in the rules map, we need
+			 * return such f_pos value, so after the state
+			 * handling if the search will continue, the next rule
+			 * will be the last one - the default rule.
 			 */
 			if (V_layer3_chain.map[data->f_pos] == rule) {
 				data->chain_id = V_layer3_chain.id;
 				info->f_pos = data->f_pos;
 			} else if (V_dyn_keep_states != 0) {
-				rule = V_layer3_chain.default_rule;
-				info->f_pos = V_layer3_chain.n_rules - 1;
+				/*
+				 * The original rule pointer is still usable.
+				 * So, we return it, but f_pos need to be
+				 * changed to point to the penultimate rule.
+				 */
+				MPASS(V_layer3_chain.n_rules > 1);
+				data->chain_id = V_layer3_chain.id;
+				data->f_pos = V_layer3_chain.n_rules - 2;
+				info->f_pos = data->f_pos;
 			} else {
 				rule = NULL;
 				info->direction = MATCH_NONE;
@@ -2112,40 +2128,102 @@ dyn_match_range(uint16_t rulenum, uint8_t set, const i
 	return (1);
 }
 
+static void
+dyn_acquire_rule(struct ip_fw_chain *ch, struct dyn_data *data,
+    struct ip_fw *rule, uint16_t kidx)
+{
+	struct dyn_state_obj *obj;
+
+	/*
+	 * Do not acquire reference twice.
+	 * This can happen when rule deletion executed for
+	 * the same range, but different ruleset id.
+	 */
+	if (data->flags & DYN_REFERENCED)
+		return;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	MPASS(kidx != 0);
+
+	data->flags |= DYN_REFERENCED;
+	/* Reference the named object */
+	obj = SRV_OBJECT(ch, kidx);
+	obj->no.refcnt++;
+	MPASS(obj->no.etlv == IPFW_TLV_STATE_NAME);
+
+	/* Reference the parent rule */
+	rule->refcnt++;
+}
+
+static void
+dyn_release_rule(struct ip_fw_chain *ch, struct dyn_data *data,
+    struct ip_fw *rule, uint16_t kidx)
+{
+	struct dyn_state_obj *obj;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	MPASS(kidx != 0);
+
+	obj = SRV_OBJECT(ch, kidx);
+	if (obj->no.refcnt == 1)
+		dyn_destroy(ch, &obj->no);
+	else
+		obj->no.refcnt--;
+
+	if (--rule->refcnt == 1)
+		ipfw_free_rule(rule);
+}
+
+/*
+ * We do not keep O_LIMIT_PARENT states when V_dyn_keep_states is enabled.
+ * O_LIMIT state is created when new connection is going to be established
+ * and there is no matching state. So, since the old parent rule was deleted
+ * we can't create new states with old parent, and thus we can not account
+ * new connections with already established connections, and can not do
+ * proper limiting.
+ */
 static int
-dyn_match_ipv4_state(struct dyn_ipv4_state *s, const ipfw_range_tlv *rt)
+dyn_match_ipv4_state(struct ip_fw_chain *ch, struct dyn_ipv4_state *s,
+    const ipfw_range_tlv *rt)
 {
+	struct ip_fw *rule;
+	int ret;
 
 	if (s->type == O_LIMIT_PARENT)
 		return (dyn_match_range(s->limit->rulenum,
 		    s->limit->set, rt));
 
-	if (s->type == O_LIMIT)
-		return (dyn_match_range(s->data->rulenum, s->data->set, rt));
+	ret = dyn_match_range(s->data->rulenum, s->data->set, rt);
+	if (ret == 0 || V_dyn_keep_states == 0)
+		return (ret);
 
-	if (V_dyn_keep_states == 0 &&
-	    dyn_match_range(s->data->rulenum, s->data->set, rt))
-		return (1);
-
+	rule = s->data->parent;
+	if (s->type == O_LIMIT)
+		rule = ((struct dyn_ipv4_state *)rule)->limit->parent;
+	dyn_acquire_rule(ch, s->data, rule, s->kidx);
 	return (0);
 }
 
 #ifdef INET6
 static int
-dyn_match_ipv6_state(struct dyn_ipv6_state *s, const ipfw_range_tlv *rt)
+dyn_match_ipv6_state(struct ip_fw_chain *ch, struct dyn_ipv6_state *s,
+    const ipfw_range_tlv *rt)
 {
+	struct ip_fw *rule;
+	int ret;
 
 	if (s->type == O_LIMIT_PARENT)
 		return (dyn_match_range(s->limit->rulenum,
 		    s->limit->set, rt));
 
-	if (s->type == O_LIMIT)
-		return (dyn_match_range(s->data->rulenum, s->data->set, rt));
+	ret = dyn_match_range(s->data->rulenum, s->data->set, rt);
+	if (ret == 0 || V_dyn_keep_states == 0)
+		return (ret);
 
-	if (V_dyn_keep_states == 0 &&
-	    dyn_match_range(s->data->rulenum, s->data->set, rt))
-		return (1);
-
+	rule = s->data->parent;
+	if (s->type == O_LIMIT)
+		rule = ((struct dyn_ipv6_state *)rule)->limit->parent;
+	dyn_acquire_rule(ch, s->data, rule, s->kidx);
 	return (0);
 }
 #endif
@@ -2155,7 +2233,7 @@ dyn_match_ipv6_state(struct dyn_ipv6_state *s, const i
  * @rt can be used to specify the range of states for deletion.
  */
 static void
-dyn_expire_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt)
+dyn_expire_states(struct ip_fw_chain *ch, ipfw_range_tlv *rt)
 {
 	struct dyn_ipv4_slist expired_ipv4;
 #ifdef INET6
@@ -2163,8 +2241,11 @@ dyn_expire_states(struct ip_fw_chain *chain, ipfw_rang
 	struct dyn_ipv6_state *s6, *s6n, *s6p;
 #endif
 	struct dyn_ipv4_state *s4, *s4n, *s4p;
+	void *rule;
 	int bucket, removed, length, max_length;
 
+	IPFW_UH_WLOCK_ASSERT(ch);
+
 	/*
 	 * Unlink expired states from each bucket.
 	 * With acquired bucket lock iterate entries of each lists:
@@ -2189,7 +2270,8 @@ dyn_expire_states(struct ip_fw_chain *chain, ipfw_rang
 	while (s != NULL) {						\
 		next = CK_SLIST_NEXT(s, entry);				\
 		if ((TIME_LEQ((s)->exp, time_uptime) && extra) ||	\
-		    (rt != NULL && dyn_match_ ## af ## _state(s, rt))) {\
+		    (rt != NULL &&					\
+		     dyn_match_ ## af ## _state(ch, s, rt))) {		\
 			if (prev != NULL)				\
 				CK_SLIST_REMOVE_AFTER(prev, entry);	\
 			else						\
@@ -2201,6 +2283,14 @@ dyn_expire_states(struct ip_fw_chain *chain, ipfw_rang
 				DYN_COUNT_DEC(dyn_parent_count);	\
 			else {						\
 				DYN_COUNT_DEC(dyn_count);		\
+				if (s->data->flags & DYN_REFERENCED) {	\
+					rule = s->data->parent;		\
+					if (s->type == O_LIMIT)		\
+						rule = ((__typeof(s))	\
+						    rule)->limit->parent;\
+					dyn_release_rule(ch, s->data,	\
+					    rule, s->kidx);		\
+				}					\
 				if (s->type == O_LIMIT)	{		\
 					s = s->data->parent;		\
 					DPARENT_COUNT_DEC(s->limit);	\
@@ -2685,6 +2775,42 @@ ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw
 }
 
 /*
+ * Pass through all states and reset eaction for orphaned rules.
+ */
+void
+ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id,
+    uint16_t default_id, uint16_t instance_id)
+{
+#ifdef INET6
+	struct dyn_ipv6_state *s6;
+#endif
+	struct dyn_ipv4_state *s4;
+	struct ip_fw *rule;
+	uint32_t bucket;
+
+#define	DYN_RESET_EACTION(s, h, b)					\
+	CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) {			\
+		if ((s->data->flags & DYN_REFERENCED) == 0)		\
+			continue;					\
+		rule = s->data->parent;					\
+		if (s->type == O_LIMIT)					\
+			rule = ((__typeof(s))rule)->limit->parent;	\
+		ipfw_reset_eaction(ch, rule, eaction_id,		\
+		    default_id, instance_id);				\
+	}
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	if (V_dyn_count == 0)
+		return;
+	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
+		DYN_RESET_EACTION(s4, ipv4, bucket);
+#ifdef INET6
+		DYN_RESET_EACTION(s6, ipv6, bucket);
+#endif
+	}
+}
+
+/*
  * Returns size of dynamic states in legacy format
  */
 int
@@ -2696,11 +2822,40 @@ ipfw_dyn_len(void)
 
 /*
  * Returns number of dynamic states.
+ * Marks every named object index used by dynamic states with bit in @bmask.
+ * Returns number of named objects accounted in bmask via @nocnt.
  * Used by dump format v1 (current).
  */
 uint32_t
-ipfw_dyn_get_count(void)
+ipfw_dyn_get_count(uint32_t *bmask, int *nocnt)
 {
+#ifdef INET6
+	struct dyn_ipv6_state *s6;
+#endif
+	struct dyn_ipv4_state *s4;
+	uint32_t bucket;
+
+#define	DYN_COUNT_OBJECTS(s, h, b)					\
+	CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) {			\
+		MPASS(s->kidx != 0);					\
+		if (ipfw_mark_object_kidx(bmask, IPFW_TLV_STATE_NAME,	\
+		    s->kidx) != 0)					\
+			(*nocnt)++;					\
+	}
+
+	IPFW_UH_RLOCK_ASSERT(&V_layer3_chain);
+
+	/* No need to pass through all the buckets. */
+	*nocnt = 0;
+	if (V_dyn_count + V_dyn_parent_count == 0)
+		return (0);
+
+	for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) {
+		DYN_COUNT_OBJECTS(s4, ipv4, bucket);
+#ifdef INET6
+		DYN_COUNT_OBJECTS(s6, ipv6, bucket);
+#endif
+	}
 
 	return (V_dyn_count + V_dyn_parent_count);
 }

Modified: head/sys/netpfil/ipfw/ip_fw_eaction.c
==============================================================================
--- head/sys/netpfil/ipfw/ip_fw_eaction.c	Tue Dec  4 15:25:15 2018	(r341470)
+++ head/sys/netpfil/ipfw/ip_fw_eaction.c	Tue Dec  4 16:01:25 2018	(r341471)
@@ -252,11 +252,10 @@ destroy_eaction_obj(struct ip_fw_chain *ch, struct nam
  * Resets all eaction opcodes to default handlers.
  */
 static void
-reset_eaction_obj(struct ip_fw_chain *ch, uint16_t eaction_id)
+reset_eaction_rules(struct ip_fw_chain *ch, uint16_t eaction_id,
+    uint16_t instance_id, bool reset_rules)
 {
 	struct named_object *no;
-	struct ip_fw *rule;
-	ipfw_insn *cmd;
 	int i;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
@@ -267,35 +266,32 @@ reset_eaction_obj(struct ip_fw_chain *ch, uint16_t eac
 		panic("Default external action handler is not found");
 	if (eaction_id == no->kidx)
 		panic("Wrong eaction_id");
-	EACTION_DEBUG("replace id %u with %u", eaction_id, no->kidx);
+
+	EACTION_DEBUG("Going to replace id %u with %u", eaction_id, no->kidx);
 	IPFW_WLOCK(ch);
-	for (i = 0; i < ch->n_rules; i++) {
-		rule = ch->map[i];
-		cmd = ACTION_PTR(rule);
-		if (cmd->opcode != O_EXTERNAL_ACTION)
-			continue;
-		if (cmd->arg1 != eaction_id)
-			continue;
-		cmd->arg1 = no->kidx; /* Set to default id */
-		/*
-		 * XXX: we only bump refcount on default_eaction.
-		 * Refcount on the original object will be just
-		 * ignored on destroy. But on default_eaction it
-		 * will be decremented on rule deletion.
-		 */
-		no->refcnt++;
-		/*
-		 * Since named_object related to this instance will be
-		 * also destroyed, truncate the chain of opcodes to
-		 * remove the rest of cmd chain just after O_EXTERNAL_ACTION
-		 * opcode.
-		 */
-		if (rule->act_ofs < rule->cmd_len - 1) {
-			EACTION_DEBUG("truncate rule %d: len %u -> %u",
-			    rule->rulenum, rule->cmd_len, rule->act_ofs + 1);
-			rule->cmd_len = rule->act_ofs + 1;
+	/*
+	 * Reset eaction objects only if it is referenced by rules.
+	 * But always reset objects for orphaned dynamic states.
+	 */
+	if (reset_rules) {
+		for (i = 0; i < ch->n_rules; i++) {
+			/*
+			 * Refcount on the original object will be just
+			 * ignored on destroy. Refcount on default_eaction
+			 * will be decremented on rule deletion, thus we
+			 * need to reference default_eaction object.
+			 */
+			if (ipfw_reset_eaction(ch, ch->map[i], eaction_id,
+			    no->kidx, instance_id) != 0)
+				no->refcnt++;
 		}
 	}
+	/*
+	 * Reset eaction opcodes for orphaned dynamic states.
+	 * Since parent rules are already deleted, we don't need to
+	 * reference named object of default_eaction.
+	 */
+	ipfw_dyn_reset_eaction(ch, eaction_id, no->kidx, instance_id);
 	IPFW_WUNLOCK(ch);
 }
 
@@ -368,12 +364,71 @@ ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eact
 		IPFW_UH_WUNLOCK(ch);
 		return (EINVAL);
 	}
-	if (no->refcnt > 1)
-		reset_eaction_obj(ch, eaction_id);
+	reset_eaction_rules(ch, eaction_id, 0, (no->refcnt > 1));
 	EACTION_DEBUG("External action '%s' with id %u unregistered",
 	    no->name, eaction_id);
 	destroy_eaction_obj(ch, no);
 	IPFW_UH_WUNLOCK(ch);
+	return (0);
+}
+
+int
+ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
+    uint16_t eaction_id, uint16_t default_id, uint16_t instance_id)
+{
+	ipfw_insn *cmd, *icmd;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	IPFW_WLOCK_ASSERT(ch);
+
+	cmd = ACTION_PTR(rule);
+	if (cmd->opcode != O_EXTERNAL_ACTION ||
+	    cmd->arg1 != eaction_id)
+		return (0);
+
+	if (instance_id != 0 && rule->act_ofs < rule->cmd_len - 1) {
+		icmd = cmd + 1;
+		if (icmd->opcode != O_EXTERNAL_INSTANCE ||
+		    icmd->arg1 != instance_id)
+			return (0);
+		/* FALLTHROUGH */
+	}
+
+	cmd->arg1 = default_id; /* Set to default id */
+	/*
+	 * Since named_object related to this instance will be
+	 * also destroyed, truncate the chain of opcodes to
+	 * remove the rest of cmd chain just after O_EXTERNAL_ACTION
+	 * opcode.
+	 */
+	if (rule->act_ofs < rule->cmd_len - 1) {
+		EACTION_DEBUG("truncate rule %d: len %u -> %u",
+		    rule->rulenum, rule->cmd_len, rule->act_ofs + 1);
+		rule->cmd_len = rule->act_ofs + 1;
+	}
+	/*
+	 * Return 1 when reset successfully happened.
+	 */
+	return (1);
+}
+
+/*
+ * This function should be called before external action instance is
+ * destroyed. It will reset eaction_id to default_id for rules, where
+ * eaction has instance with id == kidx.
+ */
+int
+ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id,
+    uint16_t kidx)
+{
+	struct named_object *no;
+
+	IPFW_UH_WLOCK_ASSERT(ch);
+	no = ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), eaction_id);
+	if (no == NULL || no->etlv != IPFW_TLV_EACTION)
+		return (EINVAL);
+
+	reset_eaction_rules(ch, eaction_id, kidx, 0);
 	return (0);
 }
 

Modified: head/sys/netpfil/ipfw/ip_fw_private.h
==============================================================================
--- head/sys/netpfil/ipfw/ip_fw_private.h	Tue Dec  4 15:25:15 2018	(r341470)
+++ head/sys/netpfil/ipfw/ip_fw_private.h	Tue Dec  4 16:01:25 2018	(r341471)
@@ -146,6 +146,9 @@ enum {
 /*
  * Function definitions.
  */
+int ipfw_chk(struct ip_fw_args *args);
+struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
+    u_int32_t, u_int32_t, int);
 
 /* attach (arg = 1) or detach (arg = 0) hooks */
 int ipfw_attach_hooks(int);
@@ -156,6 +159,7 @@ void ipfw_nat_destroy(void);
 /* In ip_fw_log.c */
 struct ip;
 struct ip_fw_chain;
+
 void ipfw_bpf_init(int);
 void ipfw_bpf_uninit(int);
 void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
@@ -168,6 +172,7 @@ VNET_DECLARE(int, verbose_limit);
 #define	V_verbose_limit		VNET(verbose_limit)
 
 /* In ip_fw_dynamic.c */
+struct sockopt_data;
 
 enum { /* result for matching dynamic rules */
 	MATCH_REVERSE = 0,
@@ -177,19 +182,6 @@ enum { /* result for matching dynamic rules */
 };
 
 /*
- * The lock for dynamic rules is only used once outside the file,
- * and only to release the result of lookup_dyn_rule().
- * Eventually we may implement it with a callback on the function.
- */
-struct ip_fw_chain;
-struct sockopt_data;
-int ipfw_is_dyn_rule(struct ip_fw *rule);
-void ipfw_expire_dyn_states(struct ip_fw_chain *, ipfw_range_tlv *);
-
-struct tcphdr;
-struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
-    u_int32_t, u_int32_t, int);
-/*
  * Macro to determine that we need to do or redo dynamic state lookup.
  * direction == MATCH_UNKNOWN means that this is first lookup, then we need
  * to do lookup.
@@ -219,13 +211,17 @@ struct ip_fw *ipfw_dyn_lookup_state(const struct ip_fw
     const void *ulp, int pktlen, const ipfw_insn *cmd,
     struct ipfw_dyn_info *info);
 
+int ipfw_is_dyn_rule(struct ip_fw *rule);
+void ipfw_expire_dyn_states(struct ip_fw_chain *, ipfw_range_tlv *);
 void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep);
 int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd);
 
 void ipfw_dyn_init(struct ip_fw_chain *);	/* per-vnet initialization */
 void ipfw_dyn_uninit(int);	/* per-vnet deinitialization */
 int ipfw_dyn_len(void);
-uint32_t ipfw_dyn_get_count(void);
+uint32_t ipfw_dyn_get_count(uint32_t *, int *);
+void ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id,
+    uint16_t default_id, uint16_t instance_id);
 
 /* common variables */
 VNET_DECLARE(int, fw_one_pass);
@@ -280,7 +276,9 @@ struct ip_fw {
 	uint32_t	id;		/* rule id			*/
 	uint32_t	cached_id;	/* used by jump_fast		*/
 	uint32_t	cached_pos;	/* used by jump_fast		*/
+	uint32_t	refcnt;		/* number of references		*/
 
+	struct ip_fw	*next;		/* linked list of deleted rules */
 	ipfw_insn	cmd[1];		/* storage for commands		*/
 };
 
@@ -650,7 +648,6 @@ void ipfw_init_skipto_cache(struct ip_fw_chain *chain)
 void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain);
 int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
 int ipfw_ctl3(struct sockopt *sopt);
-int ipfw_chk(struct ip_fw_args *args);
 int ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
     int locked);
 void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
@@ -659,7 +656,9 @@ void ipfw_reap_rules(struct ip_fw *head);
 void ipfw_init_counters(void);
 void ipfw_destroy_counters(void);
 struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize);
+void ipfw_free_rule(struct ip_fw *rule);
 int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt);
+int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx);
 
 typedef int (sopt_handler_f)(struct ip_fw_chain *ch,
     ip_fw3_opheader *op3, struct sockopt_data *sd);
@@ -758,6 +757,10 @@ uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw
 int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id);
 int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args,
     ipfw_insn *cmd, int *done);
+int ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule,
+    uint16_t eaction_id, uint16_t default_id, uint16_t instance_id);
+int ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id,
+    uint16_t instance_id);
 
 /* In ip_fw_table.c */
 struct table_info;

Modified: head/sys/netpfil/ipfw/ip_fw_sockopt.c
==============================================================================
--- head/sys/netpfil/ipfw/ip_fw_sockopt.c	Tue Dec  4 15:25:15 2018	(r341470)
+++ head/sys/netpfil/ipfw/ip_fw_sockopt.c	Tue Dec  4 16:01:25 2018	(r341471)
@@ -161,8 +161,6 @@ static int
 set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule);
 static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd,
     uint16_t *puidx, uint8_t *ptype);
-static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
-    uint32_t *bmask);
 static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
     struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti);
 static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd,
@@ -209,14 +207,23 @@ ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rule
 
 	rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO);
 	rule->cntr = uma_zalloc_pcpu(V_ipfw_cntr_zone, M_WAITOK | M_ZERO);
+	rule->refcnt = 1;
 
 	return (rule);
 }
 
-static void
-free_rule(struct ip_fw *rule)
+void
+ipfw_free_rule(struct ip_fw *rule)
 {
 
+	/*
+	 * We don't release refcnt here, since this function
+	 * can be called without any locks held. The caller
+	 * must release reference under IPFW_UH_WLOCK, and then
+	 * call this function if refcount becomes 1.
+	 */
+	if (rule->refcnt > 1)
+		return;
 	uma_zfree_pcpu(V_ipfw_cntr_zone, rule->cntr);
 	free(rule, M_IPFW);
 }
@@ -827,7 +834,7 @@ ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw 
 	/* Unlink rule from everywhere */
 	unref_rule_objects(chain, rule);
 
-	*((struct ip_fw **)rule) = *head;
+	rule->next = *head;
 	*head = rule;
 }
 
@@ -842,8 +849,8 @@ ipfw_reap_rules(struct ip_fw *head)
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
-		head = *((struct ip_fw **)head);
-		free_rule(rule);
+		head = head->next;
+		ipfw_free_rule(rule);
 	}
 }
 
@@ -2187,6 +2194,7 @@ struct dump_args {
 	uint32_t	rsize;	/* rules size */
 	uint32_t	tcount;	/* number of tables */
 	int		rcounters;	/* counters */
+	uint32_t	*bmask;	/* index bitmask of used named objects */
 };
 
 void
@@ -2223,6 +2231,49 @@ export_objhash_ntlv(struct namedobj_instance *ni, uint
 	return (0);
 }
 
+static int
+export_named_objects(struct namedobj_instance *ni, struct dump_args *da,
+    struct sockopt_data *sd)
+{
+	int error, i;
+
+	for (i = 0; i < IPFW_TABLES_MAX && da->tcount > 0; i++) {
+		if ((da->bmask[i / 32] & (1 << (i % 32))) == 0)
+			continue;
+		if ((error = export_objhash_ntlv(ni, i, sd)) != 0)
+			return (error);
+		da->tcount--;
+	}
+	return (0);
+}
+
+static int
+dump_named_objects(struct ip_fw_chain *ch, struct dump_args *da,
+    struct sockopt_data *sd)
+{
+	ipfw_obj_ctlv *ctlv;
+	int error;
+
+	MPASS(da->tcount > 0);
+	/* Header first */
+	ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
+	if (ctlv == NULL)
+		return (ENOMEM);
+	ctlv->head.type = IPFW_TLV_TBLNAME_LIST;
+	ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) +
+	    sizeof(*ctlv);
+	ctlv->count = da->tcount;
+	ctlv->objsize = sizeof(ipfw_obj_ntlv);
+
+	/* Dump table names first (if any) */
+	error = export_named_objects(ipfw_get_table_objhash(ch), da, sd);
+	if (error != 0)
+		return (error);
+	/* Then dump another named objects */
+	da->bmask += IPFW_TABLES_MAX / 32;
+	return (export_named_objects(CHAIN_TO_SRV(ch), da, sd));
+}
+
 /*
  * Dumps static rules with table TLVs in buffer @sd.
  *
@@ -2230,52 +2281,13 @@ export_objhash_ntlv(struct namedobj_instance *ni, uint
  */
 static int
 dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da,
-    uint32_t *bmask, struct sockopt_data *sd)
+    struct sockopt_data *sd)
 {
-	int error;
-	int i, l;
-	uint32_t tcount;
 	ipfw_obj_ctlv *ctlv;
 	struct ip_fw *krule;
-	struct namedobj_instance *ni;
 	caddr_t dst;
+	int i, l;
 
-	/* Dump table names first (if any) */
-	if (da->tcount > 0) {
-		/* Header first */
-		ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
-		if (ctlv == NULL)
-			return (ENOMEM);
-		ctlv->head.type = IPFW_TLV_TBLNAME_LIST;
-		ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + 
-		    sizeof(*ctlv);
-		ctlv->count = da->tcount;
-		ctlv->objsize = sizeof(ipfw_obj_ntlv);
-	}
-
-	i = 0;
-	tcount = da->tcount;
-	ni = ipfw_get_table_objhash(chain);
-	while (tcount > 0) {
-		if ((bmask[i / 32] & (1 << (i % 32))) == 0) {
-			i++;
-			continue;
-		}
-
-		/* Jump to shared named object bitmask */
-		if (i >= IPFW_TABLES_MAX) {
-			ni = CHAIN_TO_SRV(chain);
-			i -= IPFW_TABLES_MAX;
-			bmask += IPFW_TABLES_MAX / 32;
-		}
-
-		if ((error = export_objhash_ntlv(ni, i, sd)) != 0)
-			return (error);
-
-		i++;
-		tcount--;
-	}
-
 	/* Dump rules */
 	ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv));
 	if (ctlv == NULL)
@@ -2300,27 +2312,41 @@ dump_static_rules(struct ip_fw_chain *chain, struct du
 	return (0);
 }
 
+int
+ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx)
+{
+	uint32_t bidx;
+
+	/*
+	 * Maintain separate bitmasks for table and non-table objects.
+	 */
+	bidx = (etlv == IPFW_TLV_TBL_NAME) ? 0: IPFW_TABLES_MAX / 32;
+	bidx += kidx / 32;
+	if ((bmask[bidx] & (1 << (kidx % 32))) != 0)
+		return (0);
+
+	bmask[bidx] |= 1 << (kidx % 32);
+	return (1);
+}
+
 /*
  * Marks every object index used in @rule with bit in @bmask.
  * Used to generate bitmask of referenced tables/objects for given ruleset
  * or its part.
- *
- * Returns number of newly-referenced objects.
  */
-static int
-mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
-    uint32_t *bmask)
+static void
+mark_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
+    struct dump_args *da)
 {
 	struct opcode_obj_rewrite *rw;
 	ipfw_insn *cmd;
-	int bidx, cmdlen, l, count;
+	int cmdlen, l;
 	uint16_t kidx;
 	uint8_t subtype;
 
 	l = rule->cmd_len;
 	cmd = rule->cmd;
 	cmdlen = 0;
-	count = 0;
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
@@ -2328,21 +2354,9 @@ mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw 
 		if (rw == NULL)
 			continue;
 
-		bidx = kidx / 32;
-		/*
-		 * Maintain separate bitmasks for table and
-		 * non-table objects.
-		 */
-		if (rw->etlv != IPFW_TLV_TBL_NAME)
-			bidx += IPFW_TABLES_MAX / 32;
-
-		if ((bmask[bidx] & (1 << (kidx % 32))) == 0)
-			count++;
-
-		bmask[bidx] |= 1 << (kidx % 32);
+		if (ipfw_mark_object_kidx(da->bmask, rw->etlv, kidx))
+			da->tcount++;
 	}
-
-	return (count);
 }
 
 /*
@@ -2366,13 +2380,12 @@ static int
 dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
+	struct dump_args da;
 	ipfw_cfg_lheader *hdr;
 	struct ip_fw *rule;
 	size_t sz, rnum;
-	uint32_t hdr_flags;
+	uint32_t hdr_flags, *bmask;
 	int error, i;
-	struct dump_args da;
-	uint32_t *bmask;
 
 	hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr));
 	if (hdr == NULL)
@@ -2380,9 +2393,15 @@ dump_config(struct ip_fw_chain *chain, ip_fw3_opheader
 
 	error = 0;
 	bmask = NULL;
-	/* Allocate needed state. Note we allocate 2xspace mask, for table&srv  */
-	if (hdr->flags & IPFW_CFG_GET_STATIC)
-		bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO);
+	memset(&da, 0, sizeof(da));
+	/*
+	 * Allocate needed state.
+	 * Note we allocate 2xspace mask, for table & srv
+	 */
+	if (hdr->flags & (IPFW_CFG_GET_STATIC | IPFW_CFG_GET_STATES))
+		da.bmask = bmask = malloc(
+		    sizeof(uint32_t) * IPFW_TABLES_MAX * 2 / 32, M_TEMP,
+		    M_WAITOK | M_ZERO);
 
 	IPFW_UH_RLOCK(chain);
 
@@ -2391,9 +2410,6 @@ dump_config(struct ip_fw_chain *chain, ip_fw3_opheader
 	 * Prepare used tables bitmask.
 	 */
 	sz = sizeof(ipfw_cfg_lheader);
-	memset(&da, 0, sizeof(da));
-
-	da.b = 0;
 	da.e = chain->n_rules;
 
 	if (hdr->end_rule != 0) {
@@ -2412,24 +2428,25 @@ dump_config(struct ip_fw_chain *chain, ip_fw3_opheader
 			da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv);
 			da.rcount++;
 			/* Update bitmask of used objects for given range */
-			da.tcount += mark_object_kidx(chain, rule, bmask);
+			mark_rule_objects(chain, rule, &da);
 		}
 		/* Add counters if requested */
 		if (hdr->flags & IPFW_CFG_GET_COUNTERS) {
 			da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount;
 			da.rcounters = 1;
 		}
-
-		if (da.tcount > 0)
-			sz += da.tcount * sizeof(ipfw_obj_ntlv) +
-			    sizeof(ipfw_obj_ctlv);
 		sz += da.rsize + sizeof(ipfw_obj_ctlv);
 	}
 
-	if (hdr->flags & IPFW_CFG_GET_STATES)
-		sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) +
-		     sizeof(ipfw_obj_ctlv);
+	if (hdr->flags & IPFW_CFG_GET_STATES) {
+		sz += sizeof(ipfw_obj_ctlv) +
+		    ipfw_dyn_get_count(bmask, &i) * sizeof(ipfw_obj_dyntlv);
+		da.tcount += i;
+	}
 
+	if (da.tcount > 0)
+		sz += da.tcount * sizeof(ipfw_obj_ntlv) +
+		    sizeof(ipfw_obj_ctlv);
 
 	/*
 	 * Fill header anyway.
@@ -2447,8 +2464,14 @@ dump_config(struct ip_fw_chain *chain, ip_fw3_opheader
 	}
 
 	/* STAGE2: Store actual data */
+	if (da.tcount > 0) {
+		error = dump_named_objects(chain, &da, sd);
+		if (error != 0)
+			goto cleanup;
+	}
+
 	if (hdr_flags & IPFW_CFG_GET_STATIC) {
-		error = dump_static_rules(chain, &da, bmask, sd);
+		error = dump_static_rules(chain, &da, sd);
 		if (error != 0)
 			goto cleanup;
 	}
@@ -3027,7 +3050,7 @@ add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *
 	if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) {
 		/* Free allocate krules */
 		for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++)
-			free_rule(ci->krule);
+			ipfw_free_rule(ci->krule);
 	}
 
 	if (cbuf != NULL && cbuf != &rci)
@@ -3851,7 +3874,7 @@ ipfw_ctl(struct sockopt *sopt)
 			import_rule0(&ci);
 			error = commit_rules(chain, &ci, 1);
 			if (error != 0)
-				free_rule(ci.krule);
+				ipfw_free_rule(ci.krule);
 			else if (sopt->sopt_dir == SOPT_GET) {
 				if (is7) {
 					error = convert_rule_to_7(rule);

Modified: head/sys/netpfil/ipfw/nat64/nat64lsn_control.c
==============================================================================
--- head/sys/netpfil/ipfw/nat64/nat64lsn_control.c	Tue Dec  4 15:25:15 2018	(r341470)
+++ head/sys/netpfil/ipfw/nat64/nat64lsn_control.c	Tue Dec  4 16:01:25 2018	(r341471)
@@ -256,6 +256,7 @@ nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_ophead
 		return (EBUSY);
 	}
 
+	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
 	nat64lsn_detach_config(ch, cfg);
 	IPFW_UH_WUNLOCK(ch);

Modified: head/sys/netpfil/ipfw/nat64/nat64stl_control.c
==============================================================================
--- head/sys/netpfil/ipfw/nat64/nat64stl_control.c	Tue Dec  4 15:25:15 2018	(r341470)
+++ head/sys/netpfil/ipfw/nat64/nat64stl_control.c	Tue Dec  4 16:01:25 2018	(r341471)
@@ -342,6 +342,7 @@ nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_ophead
 		return (EBUSY);
 	}
 
+	ipfw_reset_eaction_instance(ch, V_nat64stl_eid, cfg->no.kidx);
 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
 	nat64stl_detach_config(ch, cfg);
 	IPFW_UH_WUNLOCK(ch);

Modified: head/sys/netpfil/ipfw/nptv6/nptv6.c
==============================================================================
--- head/sys/netpfil/ipfw/nptv6/nptv6.c	Tue Dec  4 15:25:15 2018	(r341470)
+++ head/sys/netpfil/ipfw/nptv6/nptv6.c	Tue Dec  4 16:01:25 2018	(r341471)
@@ -746,6 +746,7 @@ nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader 
 		return (EBUSY);
 	}
 
+	ipfw_reset_eaction_instance(ch, V_nptv6_eid, cfg->no.kidx);
 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
 	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201812041601.wB4G1QDT001641>