Date: Thu, 20 Feb 2025 09:35:28 GMT From: Kristof Provost <kp@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: c2346c3d3ab7 - main - pf: support source-hash and random with tables and dynifs, not just pools Message-ID: <202502200935.51K9ZSQV032062@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by kp: URL: https://cgit.FreeBSD.org/src/commit/?id=c2346c3d3ab754299d1e75705997db6fb9a49420 commit c2346c3d3ab754299d1e75705997db6fb9a49420 Author: Kristof Provost <kp@FreeBSD.org> AuthorDate: 2025-02-13 13:14:56 +0000 Commit: Kristof Provost <kp@FreeBSD.org> CommitDate: 2025-02-20 08:25:51 +0000 pf: support source-hash and random with tables and dynifs, not just pools This finally allows to use source-hash for dynamic loadbalancing, eg. "rdr-to <hosts> source-hash", instead of just round-robin and least-states. An older pre-siphash version of this diff was tested by many people. OK tedu@ benno@ Obtained from: OpenBSD, reyk <reyk@openbsd.org>, 252a05523f Sponsored by: Rubicon Communications, LLC ("Netgate") --- sbin/pfctl/parse.y | 43 ++++++++++-------------- share/man/man5/pf.conf.5 | 10 +++--- sys/netpfil/pf/pf.h | 5 +++ sys/netpfil/pf/pf_lb.c | 74 ++++++++++++++++++++++++++++++++++------- tests/sys/netpfil/pf/nat64.sh | 76 +++++++++++++++++++++++++++++++++---------- 5 files changed, 148 insertions(+), 60 deletions(-) diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y index f3334c961909..8f732bdf268a 100644 --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -2747,16 +2747,16 @@ pfrule : action dir logquick interface route af proto fromto $5.host->addr.type == PF_ADDR_TABLE || DYNIF_MULTIADDR($5.host->addr))) r.route.opts |= PF_POOL_ROUNDROBIN; - if ((r.route.opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN && - disallow_table($5.host, "tables are only " - "supported in round-robin routing pools")) - YYERROR; - if ((r.route.opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN && - disallow_alias($5.host, "interface (%s) " - "is only supported in round-robin " - "routing pools")) + if ($5.host->next != NULL && + !PF_POOL_DYNTYPE(r.route.opts)) { + yyerror("address pool option " + "not supported by type"); + } + if (!PF_POOL_DYNTYPE(r.route.opts) && + (disallow_table($5.host, + "tables are not supported by pool type") || + disallow_alias($5.host, + "interface (%s) is not supported by pool type"))) YYERROR; if ($5.host->next != NULL) { if ((r.route.opts & PF_POOL_TYPEMASK) != @@ -2829,10 +2829,9 @@ pfrule : action dir logquick interface route af proto fromto r.nat.opts = $9.nat.pool_opts.type; r.nat.opts |= $9.nat.pool_opts.opts; - if ((r.nat.opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN && - disallow_table($9.nat.rdr->host, "tables are only " - "supported in round-robin pools")) + if (!PF_POOL_DYNTYPE(r.nat.opts) && + disallow_table($9.nat.rdr->host, "tables are not " + "supported by pool type")) YYERROR; } @@ -4916,17 +4915,11 @@ natrule : nataction interface af proto fromto tag tagged rtable $9->host->addr.type == PF_ADDR_TABLE || DYNIF_MULTIADDR($9->host->addr))) r.rdr.opts = PF_POOL_ROUNDROBIN; - if ((r.rdr.opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN && - disallow_table($9->host, "tables are only " - "supported in round-robin redirection " - "pools")) - YYERROR; - if ((r.rdr.opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN && - disallow_alias($9->host, "interface (%s) " - "is only supported in round-robin " - "redirection pools")) + if (!PF_POOL_DYNTYPE(r.rdr.opts) && + (disallow_table($9->host, + "tables are not supported by pool type") || + disallow_alias($9->host, + "interface (%s) is not supported by pool type"))) YYERROR; if ($9->host->next != NULL) { if ((r.rdr.opts & PF_POOL_TYPEMASK) != diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5 index 75cb0b39272f..1192cd3d02e8 100644 --- a/share/man/man5/pf.conf.5 +++ b/share/man/man5/pf.conf.5 @@ -27,7 +27,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd February 11, 2025 +.Dd February 13, 2025 .Dt PF.CONF 5 .Os .Sh NAME @@ -132,8 +132,8 @@ Tables can also be used for the redirect address of .Ar nat and .Ar rdr -rules and in the routing options of filter rules, but only for -.Ar round-robin +and in the routing options of filter rules, but not for +.Ar bitmask pools. .Pp Tables can be defined with any of the following @@ -2335,8 +2335,8 @@ The option loops through the redirection address(es). .Pp When more than one redirection address is specified, -.Ar round-robin -is the only permitted pool type. +.Ar bitmask +is not permitted as a pool type. .It Ar static-port With .Ar nat diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index dfa86e7f1d6d..b5c0eeaa8f01 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -133,6 +133,11 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f +#define PF_POOL_DYNTYPE(_o) \ + ((((_o) & PF_POOL_TYPEMASK) == PF_POOL_ROUNDROBIN) || \ + (((_o) & PF_POOL_TYPEMASK) == PF_POOL_RANDOM) || \ + (((_o) & PF_POOL_TYPEMASK) == PF_POOL_SRCHASH)) + #define PF_LOG 0x01 #define PF_LOG_ALL 0x02 #define PF_LOG_SOCKET_LOOKUP 0x04 diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 83dd63fd2290..d1ba2495dc30 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -73,7 +73,7 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x -static void pf_hash(struct pf_addr *, struct pf_addr *, +static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); static struct pf_krule *pf_match_translation(struct pf_pdesc *, int, struct pf_kanchor_stackframe *); @@ -84,7 +84,7 @@ static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, pf_sn_types_t); static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); -static void +static uint64_t pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, struct pf_poolhashkey *key, sa_family_t af) { @@ -95,20 +95,23 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, uint32_t hash32[2]; } h; #endif + uint64_t res = 0; _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, ""); switch (af) { #ifdef INET case AF_INET: - hash->addr32[0] = SipHash24(&ctx, (const uint8_t *)key, + res = SipHash24(&ctx, (const uint8_t *)key, &inaddr->addr32[0], sizeof(inaddr->addr32[0])); + hash->addr32[0] = res; break; #endif /* INET */ #ifdef INET6 case AF_INET6: - h.hash64 = SipHash24(&ctx, (const uint8_t *)key, + res = SipHash24(&ctx, (const uint8_t *)key, &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0])); + h.hash64 = res; hash->addr32[0] = h.hash32[0]; hash->addr32[1] = h.hash32[1]; /* @@ -120,6 +123,7 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, break; #endif /* INET6 */ } + return (res); } static struct pf_krule * @@ -459,6 +463,8 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, { u_short reason = PFRES_MATCH; struct pf_addr *raddr = NULL, *rmask = NULL; + uint64_t hashidx; + int cnt; mtx_lock(&rpool->mtx); /* Find the route using chosen algorithm. Store the found route @@ -472,8 +478,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, #ifdef INET case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && - (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) { + !PF_POOL_DYNTYPE(rpool->opts)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } @@ -484,8 +489,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, #ifdef INET6 case AF_INET6: if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && - (rpool->opts & PF_POOL_TYPEMASK) != - PF_POOL_ROUNDROBIN) { + !PF_POOL_DYNTYPE(rpool->opts)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } @@ -495,7 +499,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, #endif /* INET6 */ } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { - if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { + if (!PF_POOL_DYNTYPE(rpool->opts)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } @@ -512,7 +516,28 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, PF_POOLMASK(naddr, raddr, rmask, saddr, af); break; case PF_POOL_RANDOM: - if (init_addr != NULL && PF_AZERO(init_addr, af)) { + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; + rpool->tblidx = (int)arc4random_uniform(cnt); + memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, af, NULL)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; /* unsupported */ + } + PF_ACPY(naddr, &rpool->counter, af); + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; + rpool->tblidx = (int)arc4random_uniform(cnt); + memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, af, + pf_islinklocal)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; /* unsupported */ + } + PF_ACPY(naddr, &rpool->counter, af); + } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { switch (af) { #ifdef INET case AF_INET: @@ -554,8 +579,33 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, { unsigned char hash[16]; - pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); - PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); + hashidx = + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + cnt = rpool->cur->addr.p.tbl->pfrkt_cnt; + rpool->tblidx = (int)(hashidx % cnt); + memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, af, NULL)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; /* unsupported */ + } + PF_ACPY(naddr, &rpool->counter, af); + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; + rpool->tblidx = (int)(hashidx % cnt); + memset(&rpool->counter, 0, sizeof(rpool->counter)); + if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, af, + pf_islinklocal)) { + reason = PFRES_MAPFAILED; + goto done_pool_mtx; /* unsupported */ + } + PF_ACPY(naddr, &rpool->counter, af); + } else { + PF_POOLMASK(naddr, raddr, rmask, + (struct pf_addr *)&hash, af); + } break; } case PF_POOL_ROUNDROBIN: diff --git a/tests/sys/netpfil/pf/nat64.sh b/tests/sys/netpfil/pf/nat64.sh index 9cc6aececc42..68d9191cacb9 100644 --- a/tests/sys/netpfil/pf/nat64.sh +++ b/tests/sys/netpfil/pf/nat64.sh @@ -409,7 +409,7 @@ pool_cleanup() atf_test_case "table" table_head() { - atf_set descr 'Tables require round-robin' + atf_set descr 'Check table restrictions' atf_set require.user root } @@ -417,9 +417,18 @@ table_body() { pft_init - echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr>" | \ + # Round-robin and random are allowed + echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> round-robin" | \ + atf_check -s exit:0 \ + pfctl -f - + echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> random" | \ + atf_check -s exit:0 \ + pfctl -f - + + # bitmask is not + echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> bitmask" | \ atf_check -s exit:1 \ - -e match:"tables are only supported in round-robin pools" \ + -e match:"tables are not supported by pool type" \ pfctl -f - } @@ -489,15 +498,11 @@ table_range_cleanup() pft_cleanup } -atf_test_case "table_round_robin" "cleanup" -table_round_robin_head() +table_common_body() { - atf_set descr 'Use a table of IPv4 addresses in round-robin mode' - atf_set require.user root -} + pool_type=$1 +echo pool_type=${pool_type} -table_round_robin_body() -{ pft_init epair_link=$(vnet_mkepair) @@ -527,7 +532,7 @@ table_round_robin_body() "set reassemble yes" \ "set state-policy if-bound" \ "table <wanaddrs> { 192.0.2.1, 192.0.2.3, 192.0.2.4 }" \ - "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet from <wanaddrs> round-robin" + "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet from <wanaddrs> ${pool_type}" # Use pf to count sources jexec dst pfctl -e @@ -541,13 +546,30 @@ table_round_robin_body() atf_check -s exit:0 -o ignore \ ping6 -c 1 64:ff9b::192.0.2.2 - # Verify on dst that we saw different source addresses - atf_check -s exit:0 -o match:".*192.0.2.1.*" \ - jexec dst pfctl -ss - atf_check -s exit:0 -o match:".*192.0.2.3.*" \ - jexec dst pfctl -ss - atf_check -s exit:0 -o match:".*192.0.2.4.*" \ - jexec dst pfctl -ss + # XXX We can't reasonably check pool type random because it's random. It may end + # up choosing the same source IP for all three connections. + if [ "${pool_type}" == "round-robin" ]; + then + # Verify on dst that we saw different source addresses + atf_check -s exit:0 -o match:".*192.0.2.1.*" \ + jexec dst pfctl -ss + atf_check -s exit:0 -o match:".*192.0.2.3.*" \ + jexec dst pfctl -ss + atf_check -s exit:0 -o match:".*192.0.2.4.*" \ + jexec dst pfctl -ss + fi +} + +atf_test_case "table_round_robin" "cleanup" +table_round_robin_head() +{ + atf_set descr 'Use a table of IPv4 addresses in round-robin mode' + atf_set require.user root +} + +table_round_robin_body() +{ + table_common_body round-robin } table_round_robin_cleanup() @@ -555,6 +577,23 @@ table_round_robin_cleanup() pft_cleanup } +atf_test_case "table_random" "cleanup" +table_random_head() +{ + atf_set descr 'Use a table of IPv4 addresses in random mode' + atf_set require.user root +} + +table_random_body() +{ + table_common_body random +} + +table_random_cleanup() +{ + pft_cleanup +} + atf_test_case "dummynet" "cleanup" dummynet_head() { @@ -775,6 +814,7 @@ atf_init_test_cases() atf_add_test_case "table" atf_add_test_case "table_range" atf_add_test_case "table_round_robin" + atf_add_test_case "table_random" atf_add_test_case "dummynet" atf_add_test_case "gateway6" atf_add_test_case "route_to"
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202502200935.51K9ZSQV032062>