Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 21 Dec 2009 17:55:10 +0000 (UTC)
From:      Luigi Rizzo <luigi@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r200782 - in user/luigi/ipfw3-head/sys/netinet: . ipfw
Message-ID:  <200912211755.nBLHtAZm035231@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: luigi
Date: Mon Dec 21 17:55:10 2009
New Revision: 200782
URL: http://svn.freebsd.org/changeset/base/200782

Log:
  partial code to reduce contention on the ipfw lock
  and especially remove some O(N) sections of code
  with the write lock held from userland.
  
  1. introduce a IPFW_UH_LOCK to arbitrate requests from
     the upper half of the kernel. Some things, such as 'ipfw show'
     can be done holding this lock in read mode, whereas insert and
     delete require IPFW_UH_WLOCK.
  
  2. introduce a mapping structure to keep rules together. This replaces
     the 'next' chain currently used in ipfw rules. At the moment
     the map is a simple array (sorted by rule number and then rule_id),
     so we can find a rule quickly instead of having to scan the list.
  
  3. when an expensive operation (such as insert or delete) is done
     by userland, we grab IPFW_UH_WLOCK, create a new copy of the map
     without blocking the bottom half of the kernel, then acquire
     IPFW_WLOCK and quickly update pointers to the map and related info.
     After dropping IPFW_LOCK we can then continue the cleanup protected
     by IPFW_UH_LOCK.
  
  4. do not pass pointers to rules through dummynet, netgraph, divert etc,
     but rather pass a <rulenum,id> pair that lets us do a lookup quickly.
  
  All the above does not change the ABI.
  
  5. a similar type of protection should be applied to references from
     dynamic rules to their parent (not done yet, will require changes
     to the structure of dynamic rules which are known to userland too).

Modified:
  user/luigi/ipfw3-head/sys/netinet/ip_fw.h
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw2.c
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_private.h
  user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_sockopt.c

Modified: user/luigi/ipfw3-head/sys/netinet/ip_fw.h
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ip_fw.h	Mon Dec 21 17:41:57 2009	(r200781)
+++ user/luigi/ipfw3-head/sys/netinet/ip_fw.h	Mon Dec 21 17:55:10 2009	(r200782)
@@ -461,7 +461,7 @@ typedef struct _ipfw_insn_icmp6 {
  */
 
 struct ip_fw {
-	struct ip_fw	*next;		/* linked list of rules		*/
+	struct ip_fw	*x_next;		/* linked list of rules		*/
 	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
 	/* 'next_rule' is used to pass up 'set_disable' status		*/
 

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw2.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw2.c	Mon Dec 21 17:41:57 2009	(r200781)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw2.c	Mon Dec 21 17:55:10 2009	(r200782)
@@ -629,31 +629,6 @@ send_reject(struct ip_fw_args *args, int
 	args->m = NULL;
 }
 
-/**
- * Return the pointer to the skipto target.
- *
- * IMPORTANT: this should only be called on SKIPTO rules, and the
- * jump target is taken from the 'rulenum' argument, which may come
- * from the rule itself (direct skipto) or not (tablearg)
- *
- * The function never returns NULL: if the requested rule is not
- * present, it returns the next rule in the chain.
- * This also happens in case of a bogus argument > 65535
- */
-static struct ip_fw *
-lookup_next_rule(struct ip_fw *me, uint32_t rulenum)
-{
-	struct ip_fw *rule;
-
-	for (rule = me->next; rule ; rule = rule->next) {
-		if (rule->rulenum >= rulenum)
-			break;
-	}
-	if (rule == NULL)	/* failure or not a skipto */
-		rule = me->next ? me->next : me;
-	return rule;
-}
-
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
@@ -738,6 +713,14 @@ check_uidgid(ipfw_insn_u32 *insn, int pr
 	return match;
 }
 
+static inline void
+set_match(struct ip_fw_args *args, struct ip_fw *f, struct ip_fw_chain *chain)
+{
+	args->rule = (void *)(uintptr_t)f->rulenum;
+	args->rule_id = f->id;
+	args->chain_id = chain->id;
+}
+
 /*
  * The main check routine for the firewall.
  *
@@ -830,6 +813,7 @@ ipfw_chk(struct ip_fw_args *args)
 	struct ifnet *oif = args->oif;
 
 	struct ip_fw *f = NULL;		/* matching rule */
+	int f_pos = 0;			/* index of f in the array */
 	int retval = 0;
 
 	/*
@@ -1167,22 +1151,14 @@ do {								\
 		 * if still present, otherwise use the default rule.
 		 * XXX If fw_one_pass != 0 then just accept it, though
 		 * the caller should never pass us such packets.
+		 * XXX rule is now the rule number so we can do a lookup
 		 */
 		if (V_fw_one_pass) {
 			IPFW_RUNLOCK(chain);
 			return (IP_FW_PASS);
 		}
-		if (chain->id == args->chain_id) { /* pointer still valid */
-			f = args->rule->next;
-		} else { /* must revalidate the pointer */
-			for (f = chain->rules; f != NULL; f = f->next)
-				if (f == args->rule && f->id == args->rule_id) {
-					f = args->rule->next;
-					break;
-				}
-		}
-		if (f == NULL) /* in case of errors, use default; */
-			f = chain->default_rule;
+		f_pos = ipfw_find_rule(chain, (uint32_t)(args->rule), args->rule_id+1);
+		f = chain->map[f_pos];
 	} else {
 		/*
 		 * Find the starting rule. It can be either the first
@@ -1191,17 +1167,14 @@ do {								\
 		int skipto = mtag ? divert_cookie(mtag) : 0;
 
 		f = chain->rules;
+		f_pos = 0;
 		if (args->eh == NULL && skipto != 0) {
 			if (skipto >= IPFW_DEFAULT_RULE) {
 				IPFW_RUNLOCK(chain);
 				return (IP_FW_DENY); /* invalid */
 			}
-			while (f && f->rulenum <= skipto)
-				f = f->next;
-			if (f == NULL) {	/* drop packet */
-				IPFW_RUNLOCK(chain);
-				return (IP_FW_DENY);
-			}
+			f_pos = ipfw_find_rule(chain, skipto, 0);
+			f = chain->map[f_pos];
 		}
 	}
 	/* reset divert rule to avoid confusion later */
@@ -1229,12 +1202,13 @@ do {								\
 	 * We can restart the inner loop by setting l>0 and f, cmd
 	 * as needed.
 	 */
-	for (; f; f = f->next) {
+	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 
 /* again: */
+		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
@@ -1917,6 +1891,8 @@ do {								\
 					q->pcnt++;
 					q->bcnt += pktlen;
 					f = q->rule;
+					/* the pointer is valid so we can do a lookup */
+					f_pos = ipfw_find_rule(chain, f->rulenum, f->id);
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					ipfw_dyn_unlock();
@@ -1942,9 +1918,7 @@ do {								\
 
 			case O_PIPE:
 			case O_QUEUE:
-				args->rule = f; /* report matching rule */
-				args->rule_id = f->id;
-				args->chain_id = chain->id;
+				set_match(args, f, chain);
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					args->cookie = tablearg;
 				else
@@ -1990,14 +1964,11 @@ do {								\
 					break;
 				}
 				/* skipto: */
-				if (cmd->arg1 == IP_FW_TABLEARG) {
-				    f = lookup_next_rule(f, tablearg);
-				} else { /* direct skipto */
-				    /* update f->next_rule if not set */
-				    if (f->next_rule == NULL)
-					f->next_rule =
-					    lookup_next_rule(f, cmd->arg1);
-				    f = f->next_rule;
+				{
+					int i = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1;
+					if (i <= f->rulenum)
+						i = f->rulenum + 1;	/* no backward jumps */
+					f_pos = ipfw_find_rule(chain, i, 0);
 				}
 				/*
 				 * Skip disabled rules, and
@@ -2005,14 +1976,11 @@ do {								\
 				 * with the correct f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
-				while (f && (V_set_disable & (1 << f->set)))
-					f = f->next;
-				if (f) { /* found a valid rule */
-					l = f->cmd_len;
-					cmd = f->cmd;
-				} else { /* should not happen */
-					l = 0;  /* exit inner loop */
-				}
+				for (; f_pos < chain->n_rules - 1 && (V_set_disable & (1 << chain->map[f_pos]->set)); f_pos++)
+					;
+				f = chain->map[f_pos];
+				l = f->cmd_len;
+				cmd = f->cmd;
 				match = 1;
 				cmdlen = 0;
 				skip_or = 0;
@@ -2077,9 +2045,7 @@ do {								\
 
 			case O_NETGRAPH:
 			case O_NGTEE:
-				args->rule = f;	/* report matching rule */
-				args->rule_id = f->id;
-				args->chain_id = chain->id;
+				set_match(args, f, chain);
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					args->cookie = tablearg;
 				else
@@ -2106,14 +2072,12 @@ do {								\
 				    struct cfg_nat *t;
 				    int nat_id;
 
-				    args->rule = f; /* Report matching rule. */
-				    args->rule_id = f->id;
-				    args->chain_id = chain->id;
+				    set_match(args, f, chain);
 				    t = ((ipfw_insn_nat *)cmd)->nat;
 				    if (t == NULL) {
 					nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
 						tablearg : cmd->arg1;
-					t = (*lookup_nat_ptr)(&V_layer3_chain.nat, nat_id);
+					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
@@ -2175,9 +2139,7 @@ do {								\
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
-				    args->rule = f;
-				    args->rule_id = f->id;
-				    args->chain_id = chain->id;
+				    set_match(args, f, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
@@ -2310,50 +2272,53 @@ static int
 vnet_ipfw_init(const void *unused)
 {
 	int error;
-	struct ip_fw default_rule;
+	struct ip_fw *rule = NULL;
+	struct ip_fw_chain *chain;
+
+	chain = &V_layer3_chain;
 
 	/* First set up some values that are compile time options */
+	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
+	V_fw_deny_unknown_exthdrs = 1;
 #ifdef IPFIREWALL_VERBOSE
 	V_fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
-
-	error = ipfw_init_tables(&V_layer3_chain);
-	if (error) {
-		panic("init_tables"); /* XXX Marko fix this ! */
-	}
 #ifdef IPFIREWALL_NAT
-	LIST_INIT(&V_layer3_chain.nat);
+	LIST_INIT(chain->nat);
 #endif
 
-	V_autoinc_step = 100;	/* bounded to 1..1000 in add_rule() */
-
-	V_fw_deny_unknown_exthdrs = 1;
-
-	V_layer3_chain.rules = NULL;
-	IPFW_LOCK_INIT(&V_layer3_chain);
-
-	bzero(&default_rule, sizeof default_rule);
-	default_rule.act_ofs = 0;
-	default_rule.rulenum = IPFW_DEFAULT_RULE;
-	default_rule.cmd_len = 1;
-	default_rule.set = RESVD_SET;
-	default_rule.cmd[0].len = 1;
-	default_rule.cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
-	error = ipfw_add_rule(&V_layer3_chain, &default_rule);
-
-	if (error != 0) {
-		printf("ipfw2: error %u initializing default rule "
-			"(support disabled)\n", error);
-		IPFW_LOCK_DESTROY(&V_layer3_chain);
-		printf("leaving ipfw_iattach (1) with error %d\n", error);
-		return (error);
+	/* insert the default rule and create the initial map */
+	chain->n_rules = 1;
+	chain->static_len = sizeof(struct ip_fw);
+	chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO);
+	if (chain->map)
+		rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO);
+	if (rule == NULL) {
+		if (chain->map)
+			free(chain->map, M_IPFW);
+		printf("ipfw2: ENOSPC initializing default rule "
+			"(support disabled)\n");
+		return (ENOSPC);
+	}
+	error = ipfw_init_tables(chain);
+	if (error) {
+		panic("init_tables"); /* XXX Marko fix this ! */
 	}
 
-	V_layer3_chain.default_rule = V_layer3_chain.rules;
+	/* fill and insert the default rule */
+	rule->act_ofs = 0;
+	rule->rulenum = IPFW_DEFAULT_RULE;
+	rule->cmd_len = 1;
+	rule->set = RESVD_SET;
+	rule->cmd[0].len = 1;
+	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
+	chain->rules = chain->default_rule = chain->map[0] = rule;
+	chain->id = rule->id = 1;
 
+	IPFW_LOCK_INIT(chain);
 	ipfw_dyn_init();
 
 	/* First set up some values that are compile time options */

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_private.h
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_private.h	Mon Dec 21 17:41:57 2009	(r200781)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_private.h	Mon Dec 21 17:55:10 2009	(r200782)
@@ -174,9 +174,13 @@ struct ip_fw_chain {
 	struct ip_fw	*rules;		/* list of rules */
 	struct ip_fw	*reap;		/* list of rules to reap */
 	struct ip_fw	*default_rule;
+	int		n_rules;	/* number of static rules */
+	int		static_len;	/* total len of static rules */
+	struct ip_fw    **map;	/* array of rule ptrs to ease lookup */
 	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
 	struct radix_node_head *tables[IPFW_TABLES_MAX];
 	struct rwlock	rwmtx;
+	struct rwlock	uh_lock;	/* lock for upper half */
 	uint32_t	id;		/* ruleset id */
 };
 
@@ -187,9 +191,16 @@ struct sockopt;	/* used by tcp_var.h */
  * so the variable and the macros must be here.
  */
 
-#define	IPFW_LOCK_INIT(_chain) \
-	rw_init(&(_chain)->rwmtx, "IPFW static rules")
-#define	IPFW_LOCK_DESTROY(_chain)	rw_destroy(&(_chain)->rwmtx)
+#define	IPFW_LOCK_INIT(_chain) do {			\
+	rw_init(&(_chain)->rwmtx, "IPFW static rules");	\
+	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
+	} while (0)
+
+#define	IPFW_LOCK_DESTROY(_chain) do {			\
+	rw_destroy(&(_chain)->rwmtx);			\
+	rw_destroy(&(_chain)->uh_lock);			\
+	} while (0)
+
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
@@ -197,7 +208,12 @@ struct sockopt;	/* used by tcp_var.h */
 #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
 #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
 
+#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
+#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
+#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
+#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
 /* In ip_fw_sockopt.c */
+int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
 int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
 int ipfw_ctl(struct sockopt *sopt);
 int ipfw_chk(struct ip_fw_args *args);

Modified: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_sockopt.c
==============================================================================
--- user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_sockopt.c	Mon Dec 21 17:41:57 2009	(r200781)
+++ user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_sockopt.c	Mon Dec 21 17:55:10 2009	(r200782)
@@ -80,16 +80,10 @@ MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "Ip
  * static variables followed by global ones
  */
 
-static VNET_DEFINE(u_int32_t, static_count);	/* # of static rules */
-#define	V_static_count			VNET(static_count)
-
-static VNET_DEFINE(u_int32_t, static_len);	/* bytes of static rules */
-#define	V_static_len			VNET(static_len)
-
 #ifdef SYSCTL_NODE
 SYSCTL_DECL(_net_inet_ip_fw);
 SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
-    CTLFLAG_RD, &VNET_NAME(static_count), 0,
+    CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
 
 #endif /* SYSCTL_NODE */
@@ -101,97 +95,175 @@ SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUT
 static void
 flush_rule_ptrs(struct ip_fw_chain *chain)
 {
+#if 0
 	struct ip_fw *rule;
 
 	IPFW_WLOCK_ASSERT(chain);
 
-	chain->id++;
-
 	for (rule = chain->rules; rule; rule = rule->next)
 		rule->next_rule = NULL;
+#endif
+	chain->id++;
+}
+
+/*
+ * Find the smallest rule >= key, id. 
+ * In case of multiple rules with the same number finds the first one.
+ * We could use bsearch but it is so simple that we code it directly
+ */
+int
+ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
+{
+	int i, lo, hi;
+	struct ip_fw *r;
+
+	printf("looking for rule %d:%d\n", key, id);
+  	for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
+		printf(" -- looking for rule %d:%d [%d-%d]\n", key, id, lo, hi);
+		i = (lo + hi) / 2;
+		r = chain->map[i];
+		if (r->rulenum < key)
+			lo = i + 1;
+		else if (r->rulenum > key)
+			hi = i; // i - 1;
+		else if (r->id < id)
+			lo = i + 1;
+		else if (r->id > id)
+			hi = i; // i - 1;
+		else
+			hi = i;
+	};
+	printf("search end lo %d hi %d\n",lo, hi);
+	if (hi < 0)
+		hi = 0;
+	if (lo >= chain->n_rules - 1)
+		lo = chain->n_rules - 1;
+	printf(" -- looking for rule %d:%d found at %d\n", key, id, hi);
+	return hi;
+}
+
+/*
+ * allocate a new map, returns the chain locked. extra is the number
+ * of entries to add or delete.
+ */
+static struct ip_fw **
+get_map(struct ip_fw_chain *chain, int extra, int locked)
+{
+	struct ip_fw **map;
+
+	for (;;) {
+		int i = chain->n_rules + extra;
+		map = malloc( (1 + i) * sizeof(struct ip_fw *), M_IPFW, M_WAITOK);
+		if (map == NULL) {
+			printf("%s: cannot allocate map\n", __FUNCTION__);
+			break;
+		}
+		if (!locked)
+			IPFW_UH_WLOCK(chain);
+		if (i >= chain->n_rules + extra)
+			break;
+		/* otherwise we lost the race, free and retry */
+		if (!locked)
+			IPFW_UH_WUNLOCK(chain);
+		free(map, M_IPFW);
+	}
+	return map;
+}
+
+/* swap the maps. It is supposed to be called with IPFW_UH_WLOCK
+ * so we can do the remaining housekeepking outside
+ */
+static struct ip_fw **
+swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
+{
+	struct ip_fw **old_map;
+	int i, lim;
+
+	IPFW_WLOCK(chain);
+	printf("%s %p %d --> %p %d\n", __FUNCTION__,
+		chain->map, chain->n_rules,
+		new_map, new_len);
+	flush_rule_ptrs(chain);
+	lim = chain->n_rules < new_len ? new_len : chain->n_rules;
+	for (i=0; i < lim; i++)
+		printf("%3d %p %p\n", i,
+			(i<chain->n_rules) ? chain->map[i] : NULL,
+			(i<new_len) ? new_map[i] : NULL);
+			
+	/* chain->id incremented inside flush_rule_ptrs() */
+	chain->n_rules = new_len;
+	old_map = chain->map;
+	chain->map = new_map;
+	IPFW_WUNLOCK(chain);
+	return old_map;
 }
 
 /*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
+ * XXX not good for the default rule.
  */
 int
 ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
 {
-	struct ip_fw *rule, *f, *prev;
+	struct ip_fw *rule;
 	int l = RULESIZE(input_rule);
+	int i, insert_before;
+	struct ip_fw **map;	/* the new array of pointers */
 
-	if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
+	if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
 		return (EINVAL);
 
-	rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO);
+	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
 	if (rule == NULL)
 		return (ENOSPC);
+	map = get_map(chain, 1, 0 /* not locked */);
+	if (map == NULL) {
+		free(map, M_IPFW);
+		return ENOSPC;
+	}
 
 	bcopy(input_rule, rule, l);
-
-	rule->next = NULL;
-	rule->next_rule = NULL;
+	rule->x_next = NULL;
+	//rule->next_rule = NULL;
 
 	rule->pcnt = 0;
 	rule->bcnt = 0;
 	rule->timestamp = 0;
 
-	IPFW_WLOCK(chain);
-
-	if (chain->rules == NULL) {	/* default rule */
-		chain->rules = rule;
-		rule->id = ++chain->id;
-		goto done;
-        }
-
-	/*
-	 * If rulenum is 0, find highest numbered rule before the
-	 * default rule, and add autoinc_step
-	 */
 	if (V_autoinc_step < 1)
 		V_autoinc_step = 1;
 	else if (V_autoinc_step > 1000)
 		V_autoinc_step = 1000;
+	/* find the insertion point */
+	insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+	i = ipfw_find_rule(chain, insert_before, 0);
+	/* duplicate first part */
+	printf("insert %d before %d (%d) at pos %d, +copy %d\n",
+		rule->rulenum, insert_before,
+		i>= 0 ? chain->map[i]->rulenum : -1, i, chain->n_rules - i);
+	if (i > 0)
+		bcopy(chain->map, map, i * sizeof(struct ip_fw *));
+	map[i] = rule;
+	/* duplicate remaining part, we always have the default rule */
+	bcopy(chain->map + i, map + i + 1, sizeof(struct ip_fw *) *(chain->n_rules - i));
 	if (rule->rulenum == 0) {
-		/*
-		 * locate the highest numbered rule before default
-		 */
-		for (f = chain->rules; f; f = f->next) {
-			if (f->rulenum == IPFW_DEFAULT_RULE)
-				break;
-			rule->rulenum = f->rulenum;
-		}
+		/* set the number */
+		rule->rulenum = map[i]->rulenum;
 		if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
 			rule->rulenum += V_autoinc_step;
 		input_rule->rulenum = rule->rulenum;
 	}
 
-	/*
-	 * Now insert the new rule in the right place in the sorted list.
-	 */
-	for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) {
-		if (f->rulenum > rule->rulenum) { /* found the location */
-			if (prev) {
-				rule->next = f;
-				prev->next = rule;
-			} else { /* head insert */
-				rule->next = chain->rules;
-				chain->rules = rule;
-			}
-			break;
-		}
-	}
-	flush_rule_ptrs(chain);
-	/* chain->id incremented inside flush_rule_ptrs() */
-	rule->id = chain->id;
-done:
-	V_static_count++;
-	V_static_len += l;
-	IPFW_WUNLOCK(chain);
+	rule->id = chain->id + 1;
+	map = swap_map(chain, map, chain->n_rules + 1);
+	chain->static_len += l;
+	IPFW_UH_WUNLOCK(chain);
+	if (map)
+		free(map, M_IPFW);
 	DEB(printf("ipfw: installed rule %d, static count now %d\n",
-		rule->rulenum, V_static_count);)
+		rule->rulenum, chain->n_rules);)
 	return (0);
 }
 
@@ -203,28 +275,15 @@ done:
  * @return a pointer to the next entry.
  * Arguments are not checked, so they better be correct.
  */
-static struct ip_fw *
-remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
-    struct ip_fw *prev)
+static void
+remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule)
 {
-	struct ip_fw *n;
 	int l = RULESIZE(rule);
 
-	IPFW_WLOCK_ASSERT(chain);
-
-	n = rule->next;
+	chain->static_len -= l;
 	ipfw_remove_dyn_children(rule);
-	if (prev == NULL)
-		chain->rules = n;
-	else
-		prev->next = n;
-	V_static_count--;
-	V_static_len -= l;
-
-	rule->next = chain->reap;
+	rule->x_next = chain->reap;
 	chain->reap = rule;
-
-	return n;
 }
 
 /*
@@ -238,7 +297,7 @@ ipfw_reap_rules(struct ip_fw *head)
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
-		head = head->next;
+		head = head->x_next;
 		free(rule, M_IPFW);
 	}
 }
@@ -251,6 +310,7 @@ ipfw_reap_rules(struct ip_fw *head)
 void
 ipfw_free_chain(struct ip_fw_chain *chain, int kill_default)
 {
+#if 0 // XXX
 	struct ip_fw *prev, *rule;
 
 	IPFW_WLOCK_ASSERT(chain);
@@ -259,11 +319,12 @@ ipfw_free_chain(struct ip_fw_chain *chai
 	flush_rule_ptrs(chain); /* more efficient to do outside the loop */
 	for (prev = NULL, rule = chain->rules; rule ; )
 		if (kill_default || rule->set != RESVD_SET)
-			rule = remove_rule(chain, rule, prev);
+			rule = remove_rule(chain, rule);
 		else {
 			prev = rule;
 			rule = rule->next;
 		}
+#endif
 }
 
 /**
@@ -283,9 +344,12 @@ ipfw_free_chain(struct ip_fw_chain *chai
 static int
 del_entry(struct ip_fw_chain *chain, u_int32_t arg)
 {
-	struct ip_fw *prev = NULL, *rule;
+	struct ip_fw *rule;
 	u_int16_t rulenum;	/* rule or old_set */
 	u_int8_t cmd, new_set;
+	int start, end, i, ofs, n;
+	struct ip_fw **map = NULL;
+	int error = 0;
 
 	rulenum = arg & 0xffff;
 	cmd = (arg >> 24) & 0xff;
@@ -301,62 +365,130 @@ del_entry(struct ip_fw_chain *chain, u_i
 			return EINVAL;
 	}
 
-	IPFW_WLOCK(chain);
-	rule = chain->rules;	/* common starting point */
+	IPFW_UH_WLOCK(chain); /* prevent conflicts among the writers */
 	chain->reap = NULL;	/* prepare for deletions */
 	switch (cmd) {
 	case 0:	/* delete rules with given number */
-		/*
-		 * locate first rule to delete
-		 */
-		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
-			;
-		if (rule->rulenum != rulenum) {
-			IPFW_WUNLOCK(chain);
-			return EINVAL;
+		/* locate first rule to delete */
+		start = ipfw_find_rule(chain, rulenum, 0);
+		/* count matching rules and end of range */
+		for (end = start, n = 0; end < chain->n_rules; end++) {
+			rule = chain->map[end];
+			if (rule->rulenum != rulenum)
+				break;
+			if (rule->set != RESVD_SET)
+				n++;
 		}
-
-		/*
-		 * flush pointers outside the loop, then delete all matching
-		 * rules. prev remains the same throughout the cycle.
-		 */
-		flush_rule_ptrs(chain);
-		while (rule->rulenum == rulenum)
-			rule = remove_rule(chain, rule, prev);
+		printf("must delete %d rules between %d and %d\n", n, start, end);
+		/* allocate the map, if needed */
+		if (n > 0)
+			map = get_map(chain, -n, 1 /* locked */);
+		if (n == 0 || map == NULL) {
+			error = EINVAL;
+			goto done;
+		}
+		/* copy the initial part of the map */
+		if (start > 0)
+			bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+		/* copy active rules between start and end */
+		for (i = ofs = start; i < end; i++) {
+			rule = chain->map[i];
+			if (rule->set == RESVD_SET)
+				map[ofs++] = chain->map[i];
+		}
+		/* finally the tail */
+		bcopy(chain->map + end, map + ofs,
+			(chain->n_rules - end) * sizeof(struct ip_fw *));
+		map = swap_map(chain, map, chain->n_rules - n);
+		/* now remove the rules deleted */
+		for (i = start; i < end; i++) {
+			rule = map[i];
+			printf("about to remove rule %p at %d\n",
+				rule, i);
+			if (rule->set != RESVD_SET)
+				remove_rule(chain, rule);
+		}
+		printf("done with removals\n");
 		break;
 
 	case 1:	/* delete all rules with given set number */
-		flush_rule_ptrs(chain);
-		while (rule->rulenum < IPFW_DEFAULT_RULE) {
-			if (rule->set == rulenum)
-				rule = remove_rule(chain, rule, prev);
-			else {
-				prev = rule;
-				rule = rule->next;
+		IPFW_UH_WLOCK(chain);
+		/* locate first rule to delete */
+		for (start = 0; start < chain->n_rules; start++) {
+			rule = chain->map[start];
+			if (rule->set != rulenum)
+				break;
+		}
+		for (n = 0, end = i = start; i < chain->n_rules; i++) {
+			rule = chain->map[i];
+			if (rule->set == rulenum) {
+				end = i;
+				n++;
 			}
 		}
+		end++;	/* first non-matching */
+		printf("must delete %d rules between %d and %d\n", n, start, end);
+		/* allocate the map, if needed */
+		if (n > 0)
+			map = get_map(chain, -n, 1 /* locked */);
+		if (n == 0 || map == NULL) {
+			error = EINVAL;
+			goto done;
+		}
+		/* copy the initial part of the map */
+		if (start > 0)
+			bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+		/* copy reserved rules */
+		for (i = ofs = start; i < end; i++) {
+			rule = chain->map[i];
+			if (rule->set != rulenum)
+				map[ofs++] = chain->map[i];
+		}
+		/* finally the tail */
+		bcopy(chain->map + end, map + ofs,
+			(chain->n_rules - end) * sizeof(struct ip_fw *));
+		map = swap_map(chain, map, chain->n_rules - n);
+		/* now remove the rules deleted */
+		for (i = start; i < end; i++) {
+			rule = map[i];
+			if (rule->set == rulenum)
+				remove_rule(chain, map[i]);
+		}
 		break;
 
 	case 2:	/* move rules with given number to new set */
-		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
+		IPFW_UH_WLOCK(chain);
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->rulenum == rulenum)
 				rule->set = new_set;
+		}
+		IPFW_UH_WUNLOCK(chain);
 		break;
 
 	case 3: /* move rules with given set number to new set */
-		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
+		IPFW_UH_WLOCK(chain);
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->set == rulenum)
 				rule->set = new_set;
+		}
+		IPFW_UH_WUNLOCK(chain);
 		break;
 
 	case 4: /* swap two sets */
-		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
+		IPFW_UH_WLOCK(chain);
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->set == rulenum)
 				rule->set = new_set;
 			else if (rule->set == new_set)
 				rule->set = rulenum;
+		}
+		IPFW_UH_WUNLOCK(chain);
 		break;
 
+#if 0 // XXX case 5 is a restriction of 1
 	case 5: /* delete rules with given number and with given set number.
 		 * rulenum - given rule number;
 		 * new_set - given set number.
@@ -376,16 +508,20 @@ del_entry(struct ip_fw_chain *chain, u_i
 				rule = rule->next;
 			}
 		}
+#endif
 	}
 	/*
 	 * Look for rules to reclaim.  We grab the list before
 	 * releasing the lock then reclaim them w/o the lock to
 	 * avoid a LOR with dummynet.
 	 */
+done:
 	rule = chain->reap;
-	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
 	ipfw_reap_rules(rule);
-	return 0;
+	if (map)
+		free(map, M_IPFW);
+	return error;
 }
 
 /*
@@ -419,6 +555,7 @@ zero_entry(struct ip_fw_chain *chain, u_
 {
 	struct ip_fw *rule;
 	char *msg;
+	int i;
 
 	uint16_t rulenum = arg & 0xffff;
 	uint8_t set = (arg >> 16) & 0xff;
@@ -429,11 +566,12 @@ zero_entry(struct ip_fw_chain *chain, u_
 	if (cmd == 1 && set > RESVD_SET)
 		return (EINVAL);
 
-	IPFW_WLOCK(chain);
+	IPFW_UH_RLOCK(chain);
 	if (rulenum == 0) {
 		V_norule_counter = 0;
-		for (rule = chain->rules; rule; rule = rule->next) {
-			/* Skip rules from another set. */
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
+			/* Skip rules not in our set. */
 			if (cmd == 1 && rule->set != set)
 				continue;
 			clear_counters(rule, log_only);
@@ -442,27 +580,23 @@ zero_entry(struct ip_fw_chain *chain, u_
 		    "Accounting cleared";
 	} else {
 		int cleared = 0;
-		/*
-		 * We can have multiple rules with the same number, so we
-		 * need to clear them all.
-		 */
-		for (rule = chain->rules; rule; rule = rule->next)
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->rulenum == rulenum) {
-				while (rule && rule->rulenum == rulenum) {
-					if (cmd == 0 || rule->set == set)
-						clear_counters(rule, log_only);
-					rule = rule->next;
-				}
+				if (cmd == 0 || rule->set == set)
+					clear_counters(rule, log_only);
 				cleared = 1;
-				break;
 			}
+			if (rule->rulenum > rulenum)
+				break;
+		}
 		if (!cleared) {	/* we did not find any matching rules */
 			IPFW_WUNLOCK(chain);
 			return (EINVAL);
 		}
 		msg = log_only ? "logging count reset" : "cleared";
 	}
-	IPFW_WUNLOCK(chain);
+	IPFW_UH_RUNLOCK(chain);
 
 	if (V_fw_verbose) {
 		int lev = LOG_SECURITY | LOG_NOTICE;
@@ -794,13 +928,16 @@ ipfw_getrules(struct ip_fw_chain *chain,
 	char *bp = buf;
 	char *ep = bp + space;
 	struct ip_fw *rule;
-	int i;
+	int i, ix;
 	time_t	boot_seconds;
 
         boot_seconds = boottime.tv_sec;
 	/* XXX this can take a long time and locking will block packet flow */
-	IPFW_RLOCK(chain);
-	for (rule = chain->rules; rule ; rule = rule->next) {
+	IPFW_UH_RLOCK(chain);
+	printf("%s map %p %d\n", __FUNCTION__, chain->map, chain->n_rules);
+	for (ix = 0; ix < chain->n_rules; ix++) {
+		rule = chain->map[ix];
+		printf("rule %d %p\n", ix, rule);
 		/*
 		 * Verify the entry fits in the buffer in case the
 		 * rules changed between calculating buffer space and
@@ -823,7 +960,7 @@ ipfw_getrules(struct ip_fw_chain *chain,
 			bp += i;
 		}
 	}
-	IPFW_RUNLOCK(chain);
+	IPFW_UH_RUNLOCK(chain);
 	ipfw_get_dynamic(&bp, ep); /* protected by the dynamic lock */
 	return (bp - (char *)buf);
 }
@@ -839,6 +976,7 @@ ipfw_ctl(struct sockopt *sopt)
 	int error;
 	size_t size;
 	struct ip_fw *buf, *rule;
+	struct ip_fw_chain *chain;
 	u_int32_t rulenum[2];
 
 	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
@@ -856,6 +994,7 @@ ipfw_ctl(struct sockopt *sopt)
 			return (error);
 	}
 
+	chain = &V_layer3_chain;
 	error = 0;
 
 	switch (sopt->sopt_name) {
@@ -871,7 +1010,8 @@ ipfw_ctl(struct sockopt *sopt)
 		 * change between calculating the size and returning the
 		 * data in which case we'll just return what fits.
 		 */
-		size = V_static_len;	/* size of static rules */
+		size = chain->static_len;	/* size of static rules */
+		printf("static len %d\n", chain->static_len);
 		size += ipfw_dyn_len();
 
 		if (size >= sopt->sopt_valsize)
@@ -883,7 +1023,7 @@ ipfw_ctl(struct sockopt *sopt)
 		 */
 		buf = malloc(size, M_TEMP, M_WAITOK);
 		error = sooptcopyout(sopt, buf,
-				ipfw_getrules(&V_layer3_chain, buf, size));
+				ipfw_getrules(chain, buf, size));
 		free(buf, M_TEMP);
 		break;
 
@@ -901,10 +1041,10 @@ ipfw_ctl(struct sockopt *sopt)
 		 * the old list without the need for a lock.
 		 */
 
-		IPFW_WLOCK(&V_layer3_chain);
-		ipfw_free_chain(&V_layer3_chain, 0 /* keep default rule */);
-		rule = V_layer3_chain.reap;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200912211755.nBLHtAZm035231>