Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 20 Feb 2025 09:35:28 GMT
From:      Kristof Provost <kp@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: c2346c3d3ab7 - main - pf: support source-hash and random with tables and dynifs, not just pools
Message-ID:  <202502200935.51K9ZSQV032062@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=c2346c3d3ab754299d1e75705997db6fb9a49420

commit c2346c3d3ab754299d1e75705997db6fb9a49420
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2025-02-13 13:14:56 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2025-02-20 08:25:51 +0000

    pf: support source-hash and random with tables and dynifs, not just pools
    
    This finally allows to use source-hash for dynamic loadbalancing, eg.
    "rdr-to <hosts> source-hash", instead of just round-robin and least-states.
    
    An older pre-siphash version of this diff was tested by many people.
    
    OK tedu@ benno@
    
    Obtained from:  OpenBSD, reyk <reyk@openbsd.org>, 252a05523f
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
---
 sbin/pfctl/parse.y            | 43 ++++++++++--------------
 share/man/man5/pf.conf.5      | 10 +++---
 sys/netpfil/pf/pf.h           |  5 +++
 sys/netpfil/pf/pf_lb.c        | 74 ++++++++++++++++++++++++++++++++++-------
 tests/sys/netpfil/pf/nat64.sh | 76 +++++++++++++++++++++++++++++++++----------
 5 files changed, 148 insertions(+), 60 deletions(-)

diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
index f3334c961909..8f732bdf268a 100644
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -2747,16 +2747,16 @@ pfrule		: action dir logquick interface route af proto fromto
 				    $5.host->addr.type == PF_ADDR_TABLE ||
 				    DYNIF_MULTIADDR($5.host->addr)))
 					r.route.opts |= PF_POOL_ROUNDROBIN;
-				if ((r.route.opts & PF_POOL_TYPEMASK) !=
-				    PF_POOL_ROUNDROBIN &&
-				    disallow_table($5.host, "tables are only "
-				    "supported in round-robin routing pools"))
-					YYERROR;
-				if ((r.route.opts & PF_POOL_TYPEMASK) !=
-				    PF_POOL_ROUNDROBIN &&
-				    disallow_alias($5.host, "interface (%s) "
-				    "is only supported in round-robin "
-				    "routing pools"))
+				if ($5.host->next != NULL &&
+				    !PF_POOL_DYNTYPE(r.route.opts)) {
+					yyerror("address pool option "
+					    "not supported by type");
+				}
+				if (!PF_POOL_DYNTYPE(r.route.opts) &&
+				    (disallow_table($5.host,
+				    "tables are not supported by pool type") ||
+				    disallow_alias($5.host,
+				    "interface (%s) is not supported by pool type")))
 					YYERROR;
 				if ($5.host->next != NULL) {
 					if ((r.route.opts & PF_POOL_TYPEMASK) !=
@@ -2829,10 +2829,9 @@ pfrule		: action dir logquick interface route af proto fromto
 				r.nat.opts = $9.nat.pool_opts.type;
 				r.nat.opts |= $9.nat.pool_opts.opts;
 
-				if ((r.nat.opts & PF_POOL_TYPEMASK) !=
-				    PF_POOL_ROUNDROBIN &&
-				    disallow_table($9.nat.rdr->host, "tables are only "
-				    "supported in round-robin pools"))
+				if (!PF_POOL_DYNTYPE(r.nat.opts) &&
+				    disallow_table($9.nat.rdr->host, "tables are not "
+				    "supported by pool type"))
 					YYERROR;
 			}
 
@@ -4916,17 +4915,11 @@ natrule		: nataction interface af proto fromto tag tagged rtable
 				    $9->host->addr.type == PF_ADDR_TABLE ||
 				    DYNIF_MULTIADDR($9->host->addr)))
 					r.rdr.opts = PF_POOL_ROUNDROBIN;
-				if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
-				    PF_POOL_ROUNDROBIN &&
-				    disallow_table($9->host, "tables are only "
-				    "supported in round-robin redirection "
-				    "pools"))
-					YYERROR;
-				if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
-				    PF_POOL_ROUNDROBIN &&
-				    disallow_alias($9->host, "interface (%s) "
-				    "is only supported in round-robin "
-				    "redirection pools"))
+				if (!PF_POOL_DYNTYPE(r.rdr.opts) &&
+				    (disallow_table($9->host,
+				    "tables are not supported by pool type") ||
+				    disallow_alias($9->host,
+				    "interface (%s) is not supported by pool type")))
 					YYERROR;
 				if ($9->host->next != NULL) {
 					if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5
index 75cb0b39272f..1192cd3d02e8 100644
--- a/share/man/man5/pf.conf.5
+++ b/share/man/man5/pf.conf.5
@@ -27,7 +27,7 @@
 .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd February 11, 2025
+.Dd February 13, 2025
 .Dt PF.CONF 5
 .Os
 .Sh NAME
@@ -132,8 +132,8 @@ Tables can also be used for the redirect address of
 .Ar nat
 and
 .Ar rdr
-rules and in the routing options of filter rules, but only for
-.Ar round-robin
+and in the routing options of filter rules, but not for
+.Ar bitmask
 pools.
 .Pp
 Tables can be defined with any of the following
@@ -2335,8 +2335,8 @@ The
 option loops through the redirection address(es).
 .Pp
 When more than one redirection address is specified,
-.Ar round-robin
-is the only permitted pool type.
+.Ar bitmask
+is not permitted as a pool type.
 .It Ar static-port
 With
 .Ar nat
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index dfa86e7f1d6d..b5c0eeaa8f01 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -133,6 +133,11 @@ enum	{ PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
 #define	PF_WSCALE_FLAG		0x80
 #define	PF_WSCALE_MASK		0x0f
 
+#define PF_POOL_DYNTYPE(_o)					\
+	((((_o) & PF_POOL_TYPEMASK) == PF_POOL_ROUNDROBIN) ||	\
+	(((_o) & PF_POOL_TYPEMASK) == PF_POOL_RANDOM) ||	\
+	(((_o) & PF_POOL_TYPEMASK) == PF_POOL_SRCHASH))
+
 #define	PF_LOG			0x01
 #define	PF_LOG_ALL		0x02
 #define	PF_LOG_SOCKET_LOOKUP	0x04
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 83dd63fd2290..d1ba2495dc30 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -73,7 +73,7 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
 
 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
 
-static void		 pf_hash(struct pf_addr *, struct pf_addr *,
+static uint64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
 			    struct pf_poolhashkey *, sa_family_t);
 static struct pf_krule	*pf_match_translation(struct pf_pdesc *,
 			    int, struct pf_kanchor_stackframe *);
@@ -84,7 +84,7 @@ static int		 pf_get_sport(struct pf_pdesc *, struct pf_krule *,
 			    pf_sn_types_t);
 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
 
-static void
+static uint64_t
 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
     struct pf_poolhashkey *key, sa_family_t af)
 {
@@ -95,20 +95,23 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
 		uint32_t hash32[2];
 	} h;
 #endif
+	uint64_t	 res = 0;
 
 	_Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, "");
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
-		hash->addr32[0] = SipHash24(&ctx, (const uint8_t *)key,
+		res = SipHash24(&ctx, (const uint8_t *)key,
 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
+		hash->addr32[0] = res;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
-		h.hash64 = SipHash24(&ctx, (const uint8_t *)key,
+		res = SipHash24(&ctx, (const uint8_t *)key,
 		    &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0]));
+		h.hash64 = res;
 		hash->addr32[0] = h.hash32[0];
 		hash->addr32[1] = h.hash32[1];
 		/*
@@ -120,6 +123,7 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
 		break;
 #endif /* INET6 */
 	}
+	return (res);
 }
 
 static struct pf_krule *
@@ -459,6 +463,8 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 {
 	u_short			 reason = PFRES_MATCH;
 	struct pf_addr		*raddr = NULL, *rmask = NULL;
+	uint64_t		 hashidx;
+	int			 cnt;
 
 	mtx_lock(&rpool->mtx);
 	/* Find the route using chosen algorithm. Store the found route
@@ -472,8 +478,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 #ifdef INET
 		case AF_INET:
 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
-			    (rpool->opts & PF_POOL_TYPEMASK) !=
-			    PF_POOL_ROUNDROBIN) {
+			    !PF_POOL_DYNTYPE(rpool->opts)) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx;
 			}
@@ -484,8 +489,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 #ifdef INET6
 		case AF_INET6:
 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
-			    (rpool->opts & PF_POOL_TYPEMASK) !=
-			    PF_POOL_ROUNDROBIN) {
+			    !PF_POOL_DYNTYPE(rpool->opts)) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx;
 			}
@@ -495,7 +499,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 #endif /* INET6 */
 		}
 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
-		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
+		if (!PF_POOL_DYNTYPE(rpool->opts)) {
 			reason = PFRES_MAPFAILED;
 			goto done_pool_mtx; /* unsupported */
 		}
@@ -512,7 +516,28 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
 		break;
 	case PF_POOL_RANDOM:
-		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
+			rpool->tblidx = (int)arc4random_uniform(cnt);
+			memset(&rpool->counter, 0, sizeof(rpool->counter));
+			if (pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter, af, NULL)) {
+				reason = PFRES_MAPFAILED;
+				goto done_pool_mtx; /* unsupported */
+			}
+			PF_ACPY(naddr, &rpool->counter, af);
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+			rpool->tblidx = (int)arc4random_uniform(cnt);
+			memset(&rpool->counter, 0, sizeof(rpool->counter));
+			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter, af,
+			    pf_islinklocal)) {
+				reason = PFRES_MAPFAILED;
+				goto done_pool_mtx; /* unsupported */
+			}
+			PF_ACPY(naddr, &rpool->counter, af);
+		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
 			switch (af) {
 #ifdef INET
 			case AF_INET:
@@ -554,8 +579,33 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 	    {
 		unsigned char hash[16];
 
-		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
-		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+		hashidx =
+		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+			cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
+			rpool->tblidx = (int)(hashidx % cnt);
+			memset(&rpool->counter, 0, sizeof(rpool->counter));
+			if (pfr_pool_get(rpool->cur->addr.p.tbl,
+			    &rpool->tblidx, &rpool->counter, af, NULL)) {
+				reason = PFRES_MAPFAILED;
+				goto done_pool_mtx; /* unsupported */
+			}
+			PF_ACPY(naddr, &rpool->counter, af);
+		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+			rpool->tblidx = (int)(hashidx % cnt);
+			memset(&rpool->counter, 0, sizeof(rpool->counter));
+			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+			    &rpool->tblidx, &rpool->counter, af,
+			    pf_islinklocal)) {
+				reason = PFRES_MAPFAILED;
+				goto done_pool_mtx; /* unsupported */
+			}
+			PF_ACPY(naddr, &rpool->counter, af);
+		} else {
+			PF_POOLMASK(naddr, raddr, rmask,
+			    (struct pf_addr *)&hash, af);
+		}
 		break;
 	    }
 	case PF_POOL_ROUNDROBIN:
diff --git a/tests/sys/netpfil/pf/nat64.sh b/tests/sys/netpfil/pf/nat64.sh
index 9cc6aececc42..68d9191cacb9 100644
--- a/tests/sys/netpfil/pf/nat64.sh
+++ b/tests/sys/netpfil/pf/nat64.sh
@@ -409,7 +409,7 @@ pool_cleanup()
 atf_test_case "table"
 table_head()
 {
-	atf_set descr 'Tables require round-robin'
+	atf_set descr 'Check table restrictions'
 	atf_set require.user root
 }
 
@@ -417,9 +417,18 @@ table_body()
 {
 	pft_init
 
-	echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr>" | \
+	# Round-robin and random are allowed
+	echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> round-robin" | \
+	    atf_check -s exit:0 \
+	    pfctl -f -
+	echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> random" | \
+	    atf_check -s exit:0 \
+	    pfctl -f -
+
+	# bitmask is not
+	echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> bitmask" | \
 	    atf_check -s exit:1 \
-	    -e match:"tables are only supported in round-robin pools" \
+	    -e match:"tables are not supported by pool type" \
 	    pfctl -f -
 }
 
@@ -489,15 +498,11 @@ table_range_cleanup()
 	pft_cleanup
 }
 
-atf_test_case "table_round_robin" "cleanup"
-table_round_robin_head()
+table_common_body()
 {
-	atf_set descr 'Use a table of IPv4 addresses in round-robin mode'
-	atf_set require.user root
-}
+	pool_type=$1
+echo pool_type=${pool_type}
 
-table_round_robin_body()
-{
 	pft_init
 
 	epair_link=$(vnet_mkepair)
@@ -527,7 +532,7 @@ table_round_robin_body()
 	    "set reassemble yes" \
 	    "set state-policy if-bound" \
 	    "table <wanaddrs> { 192.0.2.1, 192.0.2.3, 192.0.2.4 }" \
-	    "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet from <wanaddrs> round-robin"
+	    "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet from <wanaddrs> ${pool_type}"
 
 	# Use pf to count sources
 	jexec dst pfctl -e
@@ -541,13 +546,30 @@ table_round_robin_body()
 	atf_check -s exit:0 -o ignore \
 	    ping6 -c 1 64:ff9b::192.0.2.2
 
-	# Verify on dst that we saw different source addresses
-	atf_check -s exit:0 -o match:".*192.0.2.1.*" \
-	    jexec dst pfctl -ss
-	atf_check -s exit:0 -o match:".*192.0.2.3.*" \
-	    jexec dst pfctl -ss
-	atf_check -s exit:0 -o match:".*192.0.2.4.*" \
-	    jexec dst pfctl -ss
+	# XXX We can't reasonably check pool type random because it's random. It may end
+	# up choosing the same source IP for all three connections.
+	if [ "${pool_type}" == "round-robin" ];
+	then
+		# Verify on dst that we saw different source addresses
+		atf_check -s exit:0 -o match:".*192.0.2.1.*" \
+		    jexec dst pfctl -ss
+		atf_check -s exit:0 -o match:".*192.0.2.3.*" \
+		    jexec dst pfctl -ss
+		atf_check -s exit:0 -o match:".*192.0.2.4.*" \
+		    jexec dst pfctl -ss
+	fi
+}
+
+atf_test_case "table_round_robin" "cleanup"
+table_round_robin_head()
+{
+	atf_set descr 'Use a table of IPv4 addresses in round-robin mode'
+	atf_set require.user root
+}
+
+table_round_robin_body()
+{
+	table_common_body round-robin
 }
 
 table_round_robin_cleanup()
@@ -555,6 +577,23 @@ table_round_robin_cleanup()
 	pft_cleanup
 }
 
+atf_test_case "table_random" "cleanup"
+table_random_head()
+{
+	atf_set descr 'Use a table of IPv4 addresses in random mode'
+	atf_set require.user root
+}
+
+table_random_body()
+{
+	table_common_body random
+}
+
+table_random_cleanup()
+{
+	pft_cleanup
+}
+
 atf_test_case "dummynet" "cleanup"
 dummynet_head()
 {
@@ -775,6 +814,7 @@ atf_init_test_cases()
 	atf_add_test_case "table"
 	atf_add_test_case "table_range"
 	atf_add_test_case "table_round_robin"
+	atf_add_test_case "table_random"
 	atf_add_test_case "dummynet"
 	atf_add_test_case "gateway6"
 	atf_add_test_case "route_to"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202502200935.51K9ZSQV032062>