Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 19 Apr 2012 10:48:25 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r234456 - projects/pf/head/sys/contrib/pf/net
Message-ID:  <201204191048.q3JAmPGS085474@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Thu Apr 19 10:48:25 2012
New Revision: 234456
URL: http://svn.freebsd.org/changeset/base/234456

Log:
  Create hash table for the source nodes instead of rb-tree.
  Locking is per hash slot.
  
  At normal runtime these locks are obtained on state creation, prior
  to id hash lock or key hash lock. Expiry thread also obtains them.

Modified:
  projects/pf/head/sys/contrib/pf/net/pf.c
  projects/pf/head/sys/contrib/pf/net/pf_ioctl.c
  projects/pf/head/sys/contrib/pf/net/pf_lb.c
  projects/pf/head/sys/contrib/pf/net/pfvar.h

Modified: projects/pf/head/sys/contrib/pf/net/pf.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pf.c	Thu Apr 19 10:36:23 2012	(r234455)
+++ projects/pf/head/sys/contrib/pf/net/pf.c	Thu Apr 19 10:48:25 2012	(r234456)
@@ -187,13 +187,15 @@ static struct mtx pf_sendqueue_mtx;
 #define	PF_QUEUE_LOCK()		mtx_lock(&pf_sendqueue_mtx);
 #define	PF_QUEUE_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx);
 
-VNET_DEFINE(uma_zone_t,	 pf_src_tree_z);
+VNET_DEFINE(uma_zone_t,	 pf_sources_z);
 VNET_DEFINE(uma_zone_t,	 pf_rule_z);
 VNET_DEFINE(uma_zone_t,	 pf_pooladdr_z);
 VNET_DEFINE(uma_zone_t,	 pf_state_z);
 VNET_DEFINE(uma_zone_t,	 pf_state_key_z);
 VNET_DEFINE(uma_zone_t,	 pf_altq_z);
 
+#define	V_pf_sources_z	VNET(pf_sources_z)
+
 static void		 pf_src_tree_remove_state(struct pf_state *);
 static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
 			    u_int32_t);
@@ -342,62 +344,69 @@ VNET_DEFINE(struct pf_pool_limit, pf_poo
 		s->rule.ptr->states_cur--;		\
 	} while (0)
 
-static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
-
-VNET_DEFINE(struct pf_src_tree,	 	 tree_src_tracking);
-
 MALLOC_DEFINE(M_PFHASH, "pf hashes", "pf(4) hash header structures");
 /* XXXGL: make static? */
 VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
 VNET_DEFINE(struct pf_idhash *, pf_idhash);
 VNET_DEFINE(u_long, pf_hashmask);
+VNET_DEFINE(struct pf_srchash *, pf_srchash);
+VNET_DEFINE(u_long, pf_srchashmask);
 
 VNET_DEFINE(void *, pf_swi_cookie);
 
-RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
-
-static __inline int
-pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
+/*
+ * Hash function shamelessly taken from ng_netflow(4), trusting
+ * mav@ and melifaro@ data on its decent distribution.
+ */
+static __inline u_int
+pf_hashkey(struct pf_state_key *sk)
 {
-	int	diff;
+	u_int h;
 
-	if (a->rule.ptr > b->rule.ptr)
-		return (1);
-	if (a->rule.ptr < b->rule.ptr)
-		return (-1);
-	if ((diff = a->af - b->af) != 0)
-		return (diff);
-	switch (a->af) {
-#ifdef INET
+#define	FULL_HASH(a1, a2, p1, p2)	\
+	(((a1) ^ ((a1) >> 16) ^		\
+	htons((a2) ^ ((a2) >> 16))) ^	\
+	(p1) ^ htons(p2))
+ 
+#define	ADDR_HASH(a1, a2)		\
+	((a1) ^ ((a1) >> 16) ^		\
+	htons((a2) ^ ((a2) >> 16)))
+
+	switch (sk->af) {
 	case AF_INET:
-		if (a->addr.addr32[0] > b->addr.addr32[0])
-			return (1);
-		if (a->addr.addr32[0] < b->addr.addr32[0])
-			return (-1);
+		switch (sk->proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+			h = FULL_HASH(sk->addr[0].v4.s_addr,
+			    sk->addr[1].v4.s_addr, sk->port[0], sk->port[1]);
+			break;
+		default:
+			h = ADDR_HASH(sk->addr[0].v4.s_addr,
+			    sk->addr[1].v4.s_addr);
+			break;
+		}
 		break;
-#endif /* INET */
-#ifdef INET6
 	case AF_INET6:
-		if (a->addr.addr32[3] > b->addr.addr32[3])
-			return (1);
-		if (a->addr.addr32[3] < b->addr.addr32[3])
-			return (-1);
-		if (a->addr.addr32[2] > b->addr.addr32[2])
-			return (1);
-		if (a->addr.addr32[2] < b->addr.addr32[2])
-			return (-1);
-		if (a->addr.addr32[1] > b->addr.addr32[1])
-			return (1);
-		if (a->addr.addr32[1] < b->addr.addr32[1])
-			return (-1);
-		if (a->addr.addr32[0] > b->addr.addr32[0])
-			return (1);
-		if (a->addr.addr32[0] < b->addr.addr32[0])
-			return (-1);
+		switch (sk->proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+			h = FULL_HASH(sk->addr[0].v6.__u6_addr.__u6_addr32[3],
+			    sk->addr[1].v6.__u6_addr.__u6_addr32[3],
+			    sk->port[0], sk->port[1]);
+			break;
+		default:
+			h = ADDR_HASH(sk->addr[0].v6.__u6_addr.__u6_addr32[3],
+			    sk->addr[1].v6.__u6_addr.__u6_addr32[3]);
+			break;
+		}
 		break;
-#endif /* INET6 */
+	default:
+		panic("%s: unknown address family %u", __func__, sk->af);
 	}
-	return (0);
+#undef FULL_HASH
+#undef ADDR_HASH
+
+	return (h & V_pf_hashmask);
 }
 
 #ifdef INET6
@@ -557,57 +566,72 @@ pf_src_connlimit(struct pf_state **state
 	return (1);
 }
 
+/*
+ * Can return locked on failure, so that we can consistently
+ * allocate and insert a new one.
+ */
+struct pf_src_node *
+pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
+	int returnlocked)
+{
+	struct pf_srchash *sh;
+	struct pf_src_node *n;
+
+	V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
+
+	sh = &V_pf_srchash[pf_hashsrc(src, af)];
+	PF_HASHROW_LOCK(sh);
+	LIST_FOREACH(n, &sh->nodes, entry)
+		if (n->rule.ptr == rule && n->af == af &&
+		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
+		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
+			break;
+	if (n != NULL || returnlocked == 0)
+		PF_HASHROW_UNLOCK(sh);
+
+	return (n);
+}
+
 static int
 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
     struct pf_addr *src, sa_family_t af)
 {
-	struct pf_src_node	k;
+
+	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
+	    rule->rpool.opts & PF_POOL_STICKYADDR),
+	    ("%s for non-tracking rule %p", __func__, rule));
+
+	if (*sn == NULL)
+		*sn = pf_find_src_node(src, rule, af, 1);
 
 	if (*sn == NULL) {
-		k.af = af;
-		PF_ACPY(&k.addr, src, af);
-		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
-		    rule->rpool.opts & PF_POOL_STICKYADDR)
-			k.rule.ptr = rule;
-		else
-			k.rule.ptr = NULL;
-		V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
-		*sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k);
-	}
-	if (*sn == NULL) {
+		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
+
+		PF_HASHROW_ASSERT(sh);
+
 		if (!rule->max_src_nodes ||
 		    rule->src_nodes < rule->max_src_nodes)
-			(*sn) = uma_zalloc(V_pf_src_tree_z, M_NOWAIT | M_ZERO);
+			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
 		else
 			V_pf_status.lcounters[LCNT_SRCNODES]++;
-		if ((*sn) == NULL)
+		if ((*sn) == NULL) {
+			PF_HASHROW_UNLOCK(sh);
 			return (-1);
+		}
 
 		pf_init_threshold(&(*sn)->conn_rate,
 		    rule->max_src_conn_rate.limit,
 		    rule->max_src_conn_rate.seconds);
 
 		(*sn)->af = af;
-		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
-		    rule->rpool.opts & PF_POOL_STICKYADDR)
-			(*sn)->rule.ptr = rule;
-		else
-			(*sn)->rule.ptr = NULL;
+		(*sn)->rule.ptr = rule;
 		PF_ACPY(&(*sn)->addr, src, af);
-		if (RB_INSERT(pf_src_tree,
-		    &V_tree_src_tracking, *sn) != NULL) {
-			if (V_pf_status.debug >= PF_DEBUG_MISC) {
-				printf("pf: src_tree insert failed: ");
-				pf_print_host(&(*sn)->addr, 0, af);
-				printf("\n");
-			}
-			uma_zfree(V_pf_src_tree_z, *sn);
-			return (-1);
-		}
+		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
 		(*sn)->creation = time_second;
 		(*sn)->ruletype = rule->action;
 		if ((*sn)->rule.ptr != NULL)
 			(*sn)->rule.ptr->src_nodes++;
+		PF_HASHROW_UNLOCK(sh);
 		V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
 		V_pf_status.src_nodes++;
 	} else {
@@ -620,57 +644,15 @@ pf_insert_src_node(struct pf_src_node **
 	return (0);
 }
 
-/*
- * Hash function shamelessly taken from ng_netflow(4), trusting
- * mav@ and melifaro@ data on its decent distribution.
- */
-static __inline u_int
-pf_hashkey(struct pf_state_key *sk)
+static void
+pf_remove_src_node(struct pf_src_node *src)
 {
-	u_int h;
-
-#define	FULL_HASH(a1, a2, p1, p2)	\
-	(((a1) ^ ((a1) >> 16) ^		\
-	htons((a2) ^ ((a2) >> 16))) ^	\
-	(p1) ^ htons(p2))
- 
-#define	ADDR_HASH(a1, a2)		\
-	((a1) ^ ((a1) >> 16) ^		\
-	htons((a2) ^ ((a2) >> 16)))
-
-	switch (sk->af) {
-	case AF_INET:
-		switch (sk->proto) {
-		case IPPROTO_TCP:
-		case IPPROTO_UDP:
-			h = FULL_HASH(sk->addr[0].v4.s_addr,
-			    sk->addr[1].v4.s_addr, sk->port[0], sk->port[1]);
-			break;
-		default:
-			h = ADDR_HASH(sk->addr[0].v4.s_addr,
-			    sk->addr[1].v4.s_addr);
-			break;
-		}
-		break;
-	case AF_INET6:
-		switch (sk->proto) {
-		case IPPROTO_TCP:
-		case IPPROTO_UDP:
-			h = FULL_HASH(sk->addr[0].v6.__u6_addr.__u6_addr32[3],
-			    sk->addr[1].v6.__u6_addr.__u6_addr32[3],
-			    sk->port[0], sk->port[1]);
-			break;
-		default:
-			h = ADDR_HASH(sk->addr[0].v6.__u6_addr.__u6_addr32[3],
-			    sk->addr[1].v6.__u6_addr.__u6_addr32[3]);
-			break;
-		}
-		break;
-	default:
-		panic("%s: unknown address family %u", __func__, sk->af);
-	}
+	struct pf_srchash *sh;
 
-	return (h & V_pf_hashmask);
+	sh = &V_pf_srchash[pf_hashsrc(&src->addr, src->af)];
+	PF_HASHROW_LOCK(sh);
+	LIST_REMOVE(src, entry);
+	PF_HASHROW_UNLOCK(sh);
 }
 
 /* Data storage structures initialization. */
@@ -679,6 +661,7 @@ pf_initialize()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
+	struct pf_srchash	*sh;
 	u_int i;
 
 	/* States and state keys storage. */
@@ -702,11 +685,16 @@ pf_initialize()
 	}
 
 	/* Source nodes. */
-	V_pf_src_tree_z = uma_zcreate("pf src nodes",
+	V_pf_sources_z = uma_zcreate("pf source nodes",
 	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    0);
-	V_pf_pool_limits[PF_LIMIT_SRC_NODES].pp = V_pf_src_tree_z;
-	RB_INIT(&V_tree_src_tracking);
+	V_pf_pool_limits[PF_LIMIT_SRC_NODES].pp = V_pf_sources_z;
+	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
+	V_pf_srchash = malloc((PF_HASHSIZ / 4) * sizeof(struct pf_srchash),
+	  M_PFHASH, M_WAITOK|M_ZERO);
+	V_pf_srchashmask = (PF_HASHSIZ / 4) - 1;
+	for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; i++, sh++)
+		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
 
 	/* ALTQ */
 	V_pf_altq_z = uma_zcreate("pf altq", sizeof(struct pf_altq),
@@ -744,6 +732,7 @@ pf_cleanup()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
+	struct pf_srchash	*sh;
 	struct pf_send_entry	*pfse, *next;
 	u_int i;
 
@@ -759,13 +748,20 @@ pf_cleanup()
 	free(V_pf_keyhash, M_PFHASH);
 	free(V_pf_idhash, M_PFHASH);
 
+	for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) {
+		KASSERT(LIST_EMPTY(&sh->nodes),
+		    ("%s: source node hash not empty", __func__));
+		mtx_destroy(&sh->lock);
+	}
+	free(V_pf_srchash, M_PFHASH);
+
 	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
 		m_freem(pfse->pfse_m);
 		free(pfse, M_PFTEMP);
 	}
 	mtx_destroy(&pf_sendqueue_mtx);
 
-	uma_zdestroy(V_pf_src_tree_z);
+	uma_zdestroy(V_pf_sources_z);
 	uma_zdestroy(V_pf_rule_z);
 	uma_zdestroy(V_pf_state_z);
 	uma_zdestroy(V_pf_state_key_z);
@@ -1360,11 +1356,13 @@ pf_state_expires(const struct pf_state *
 void
 pf_purge_expired_src_nodes()
 {
+	struct pf_srchash	*sh;
 	struct pf_src_node	*cur, *next;
+	int i;
 
-	for (cur = RB_MIN(pf_src_tree, &V_tree_src_tracking); cur; cur = next) {
-		next = RB_NEXT(pf_src_tree, &V_tree_src_tracking, cur);
-
+	for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask; i++, sh++) {
+	    PF_HASHROW_LOCK(sh);
+	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next) 
 		if (cur->states <= 0 && cur->expire <= time_second) {
 			if (cur->rule.ptr != NULL) {
 				cur->rule.ptr->src_nodes--;
@@ -1372,11 +1370,12 @@ pf_purge_expired_src_nodes()
 				    cur->rule.ptr->max_src_nodes <= 0)
 					pf_rm_rule(NULL, cur->rule.ptr);
 			}
-			RB_REMOVE(pf_src_tree, &V_tree_src_tracking, cur);
+			LIST_REMOVE(cur, entry);
 			V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 			V_pf_status.src_nodes--;
-			uma_zfree(V_pf_src_tree_z, cur);
+			uma_zfree(V_pf_sources_z, cur);
 		}
+	    PF_HASHROW_UNLOCK(sh);
 	}
 }
 
@@ -3479,16 +3478,16 @@ csfailed:
 		uma_zfree(V_pf_state_key_z, nk);
 
 	if (sn != NULL && sn->states == 0 && sn->expire == 0) {
-		RB_REMOVE(pf_src_tree, &V_tree_src_tracking, sn);
+		pf_remove_src_node(sn);
 		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 		V_pf_status.src_nodes--;
-		uma_zfree(V_pf_src_tree_z, sn);
+		uma_zfree(V_pf_sources_z, sn);
 	}
 	if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
-		RB_REMOVE(pf_src_tree, &V_tree_src_tracking, nsn);
+		pf_remove_src_node(nsn);
 		V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 		V_pf_status.src_nodes--;
-		uma_zfree(V_pf_src_tree_z, nsn);
+		uma_zfree(V_pf_sources_z, nsn);
 	}
 	return (PF_DROP);
 }

Modified: projects/pf/head/sys/contrib/pf/net/pf_ioctl.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pf_ioctl.c	Thu Apr 19 10:36:23 2012	(r234455)
+++ projects/pf/head/sys/contrib/pf/net/pf_ioctl.c	Thu Apr 19 10:48:25 2012	(r234456)
@@ -3144,21 +3144,27 @@ DIOCGETSTATES_full:
 
 	case DIOCGETSRCNODES: {
 		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
+		struct pf_srchash	*sh;
 		struct pf_src_node	*n, *p, *pstore;
-		u_int32_t		 nr = 0;
+		uint32_t		 i, nr = 0;
 
 		if (psn->psn_len == 0) {
-			PF_LOCK();
-			RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking)
-				nr++;
-			PF_UNLOCK();
+			for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+			    i++, sh++) {
+				PF_HASHROW_LOCK(sh);
+				LIST_FOREACH(n, &sh->nodes, entry)
+					nr++;
+				PF_HASHROW_UNLOCK(sh);
+			}
 			psn->psn_len = sizeof(struct pf_src_node) * nr;
 			break;
 		}
 
 		p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK);
-		PF_LOCK();
-		RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) {
+		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+		    i++, sh++) {
+		    PF_HASHROW_LOCK(sh);
+		    LIST_FOREACH(n, &sh->nodes, entry) {
 			int	secs = time_second, diff;
 
 			if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
@@ -3183,8 +3189,9 @@ DIOCGETSTATES_full:
 				    n->conn_rate.seconds;
 			p++;
 			nr++;
+		    }
+		    PF_HASHROW_UNLOCK(sh);
 		}
-		PF_UNLOCK();
 		error = copyout(pstore, psn->psn_src_nodes,
 		    sizeof(struct pf_src_node) * nr);
 		if (error) {
@@ -3207,13 +3214,21 @@ DIOCGETSTATES_full:
 	}
 
 	case DIOCKILLSRCNODES: {
-		struct pf_src_node	*sn;
 		struct pfioc_src_node_kill *psnk =
 		    (struct pfioc_src_node_kill *)addr;
-		u_int			killed = 0;
+		struct pf_srchash	*sh;
+		struct pf_src_node	*sn;
+		u_int			i, killed = 0;
 
-		PF_LOCK();
-		RB_FOREACH(sn, pf_src_tree, &V_tree_src_tracking) {
+		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+		    i++, sh++) {
+		    /*
+		     * XXXGL: we don't ever acquire sources hash lock
+		     * but if we ever do, the below call to pf_clear_srcnodes()
+		     * would lead to a LOR.
+		     */
+		    PF_HASHROW_LOCK(sh);
+		    LIST_FOREACH(sn, &sh->nodes, entry)
 			if (PF_MATCHA(psnk->psnk_src.neg,
 				&psnk->psnk_src.addr.v.a.addr,
 				&psnk->psnk_src.addr.v.a.mask,
@@ -3228,12 +3243,12 @@ DIOCGETSTATES_full:
 				sn->expire = 1;
 				killed++;
 			}
+		    PF_HASHROW_UNLOCK(sh);
 		}
 
 		if (killed > 0)
 			pf_purge_expired_src_nodes();
 
-		PF_UNLOCK();
 		psnk->psnk_killed = killed;
 		break;
 	}
@@ -3415,22 +3430,28 @@ pf_clear_srcnodes(struct pf_src_node *n)
 
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
-			PF_STATE_LOCK(s);
 			if (n == NULL || n == s->src_node)
 				s->src_node = NULL;
 			if (n == NULL || n == s->nat_src_node)
 				s->nat_src_node = NULL;
-			PF_STATE_UNLOCK(s);
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
 
 	if (n == NULL) {
-		RB_FOREACH(n, pf_src_tree, &V_tree_src_tracking) {
-			n->expire = 1;
-			n->states = 0;
+		struct pf_srchash *sh;
+
+		for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
+		    i++, sh++) {
+			PF_HASHROW_LOCK(sh);
+			LIST_FOREACH(n, &sh->nodes, entry) {
+				n->expire = 1;
+				n->states = 0;
+			}
+			PF_HASHROW_UNLOCK(sh);
 		}
 	} else {
+		/* XXX: hash slot should already be locked here. */
 		n->expire = 1;
 		n->states = 0;
 	}

Modified: projects/pf/head/sys/contrib/pf/net/pf_lb.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pf_lb.c	Thu Apr 19 10:36:23 2012	(r234455)
+++ projects/pf/head/sys/contrib/pf/net/pf_lb.c	Thu Apr 19 10:48:25 2012	(r234456)
@@ -355,24 +355,19 @@ pf_map_addr(sa_family_t af, struct pf_ru
 	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
 	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
 	struct pf_pooladdr	*acur = rpool->cur;
-	struct pf_src_node	 k;
+
+	KASSERT((r->rule_flag & PFRULE_RULESRCTRACK ||
+	    r->rpool.opts & PF_POOL_STICKYADDR),
+	    ("%s for non-tracking rule %p", __func__, r));
 
 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
-		k.af = af;
-		PF_ACPY(&k.addr, saddr, af);
-		if (r->rule_flag & PFRULE_RULESRCTRACK ||
-		    r->rpool.opts & PF_POOL_STICKYADDR)
-			k.rule.ptr = r;
-		else
-			k.rule.ptr = NULL;
-		V_pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
-		*sn = RB_FIND(pf_src_tree, &V_tree_src_tracking, &k);
+		*sn = pf_find_src_node(saddr, r, af, 0);
 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
 			PF_ACPY(naddr, &(*sn)->raddr, af);
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pf_map_addr: src tracking maps ");
-				pf_print_host(&k.addr, 0, af);
+				pf_print_host(saddr, 0, af);
 				printf(" to ");
 				pf_print_host(naddr, 0, af);
 				printf("\n");

Modified: projects/pf/head/sys/contrib/pf/net/pfvar.h
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pfvar.h	Thu Apr 19 10:36:23 2012	(r234455)
+++ projects/pf/head/sys/contrib/pf/net/pfvar.h	Thu Apr 19 10:48:25 2012	(r234456)
@@ -714,7 +714,7 @@ struct pf_threshold {
 };
 
 struct pf_src_node {
-	RB_ENTRY(pf_src_node) entry;
+	LIST_ENTRY(pf_src_node) entry;
 	struct pf_addr	 addr;
 	struct pf_addr	 raddr;
 	union pf_rule_ptr rule;
@@ -1684,10 +1684,10 @@ struct pf_ifspeed {
 #define	DIOCGIFSPEED	_IOWR('D', 92, struct pf_ifspeed)
 
 #ifdef _KERNEL
-RB_HEAD(pf_src_tree, pf_src_node);
-RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare);
-VNET_DECLARE(struct pf_src_tree,	 tree_src_tracking);
-#define	V_tree_src_tracking		 VNET(tree_src_tracking)
+struct pf_srchash {
+	LIST_HEAD(, pf_src_node)	nodes;
+	struct mtx			lock;
+};
 
 struct pf_keyhash {
 	LIST_HEAD(, pf_state_key)	keys;
@@ -1706,6 +1706,10 @@ VNET_DECLARE(u_long, pf_hashmask);
 #define V_pf_keyhash	VNET(pf_keyhash)
 #define	V_pf_idhash	VNET(pf_idhash)
 #define	V_pf_hashmask	VNET(pf_hashmask)
+VNET_DECLARE(struct pf_srchash *, pf_srchash);
+VNET_DECLARE(u_long, pf_srchashmask);
+#define	V_pf_srchash	VNET(pf_srchash)
+#define V_pf_srchashmask VNET(pf_srchashmask)
 
 #define PF_IDHASH(s)	(be64toh((s)->id) % (V_pf_hashmask + 1))
 
@@ -1749,8 +1753,6 @@ extern void			 pf_calc_skip_steps(struct
 #ifdef ALTQ
 extern	void			 pf_altq_ifnet_event(struct ifnet *, int);
 #endif
-VNET_DECLARE(uma_zone_t,	 pf_src_tree_z);
-#define	V_pf_src_tree_z		 VNET(pf_src_tree_z)
 VNET_DECLARE(uma_zone_t,	 pf_rule_z);
 #define	V_pf_rule_z		 VNET(pf_rule_z)
 VNET_DECLARE(uma_zone_t,	 pf_state_z);
@@ -1783,6 +1785,27 @@ extern int			 pf_state_insert(struct pfi
 				    struct pf_state *);
 extern void			 pf_free_state(struct pf_state *);
 
+static __inline u_int
+pf_hashsrc(struct pf_addr *addr, sa_family_t af)
+{
+	u_int h;
+
+#define	ADDR_HASH(a)	((a) ^ ((a) >> 16))
+
+	switch (af) {
+	case AF_INET:
+		h = ADDR_HASH(addr->v4.s_addr);
+		break;
+	case AF_INET6:
+		h = ADDR_HASH(addr->v6.__u6_addr.__u6_addr32[3]);
+	default:
+		panic("%s: unknown address family %u", __func__, af);
+	}
+#undef ADDR_HASH
+
+	return (h & V_pf_srchashmask);
+}
+
 static __inline void
 pf_ref_state(struct pf_state *s)
 {
@@ -1801,6 +1824,8 @@ pf_release_state(struct pf_state *s)
 extern struct pf_state		*pf_find_state_byid(uint64_t, uint32_t);
 extern struct pf_state		*pf_find_state_all(struct pf_state_key_cmp *,
 				    u_int, int *);
+struct pf_src_node		*pf_find_src_node(struct pf_addr *, struct pf_rule *,
+				    sa_family_t, int);
 extern void			 pf_print_state(struct pf_state *);
 extern void			 pf_print_flags(u_int8_t);
 extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204191048.q3JAmPGS085474>