Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 2 Jun 2025 15:30:34 GMT
From:      Kristof Provost <kp@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: fe96610ba943 - main - pf: rework anchor handling to recurse
Message-ID:  <202506021530.552FUYTC049693@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=fe96610ba94332c7c4c59f3f82920d1ef1177bf2

commit fe96610ba94332c7c4c59f3f82920d1ef1177bf2
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2025-05-22 10:02:49 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2025-06-02 15:30:18 +0000

    pf: rework anchor handling to recurse
    
    - percpu anchor stacks
      we actually don't need to pre-allocate per_anchor_stack[], if we use
      a 'natural' recursion, when doing anchor tree traversal.
    
    O.K. mikeb@, mpi@
    
    Extended because FreeBSD pf still has separate NAT rules processing, which also
    needed this change.
    
    Obtained from:  OpenBSD, sashan <sashan@openbsd.org>, e236f0fa7b
    Obtained from:  OpenBSD, sashan <sashan@openbsd.org>, 5e4ad307dc
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D50579
---
 sys/net/pfvar.h        |  51 ++++-
 sys/netpfil/pf/pf.c    | 557 ++++++++++++++++++++++++-------------------------
 sys/netpfil/pf/pf_lb.c | 146 +++++++++----
 3 files changed, 422 insertions(+), 332 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 82d74e08c14f..b3e3260d80a6 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1151,6 +1151,45 @@ struct pf_kstate {
  * Try to not grow the struct beyond that.
  */
 _Static_assert(sizeof(struct pf_kstate) <= 384, "pf_kstate size crosses 384 bytes");
+
+enum pf_test_status {
+	PF_TEST_FAIL = -1,
+	PF_TEST_OK,
+	PF_TEST_QUICK
+};
+
+struct pf_test_ctx {
+	enum pf_test_status	 test_status;
+	struct pf_pdesc		*pd;
+	struct pf_rule_actions	 act;
+	uint8_t			 icmpcode;
+	uint8_t			 icmptype;
+	int			 icmp_dir;
+	int			 state_icmp;
+	int			 tag;
+	int			 rewrite;
+	u_short			 reason;
+	struct pf_src_node	*sns[PF_SN_MAX];
+	struct pf_krule_slist	 rules;
+	struct pf_krule		*nr;
+	struct pf_krule		*tr;
+	struct pf_krule		**rm;
+	struct pf_krule		*a;
+	struct pf_krule		**am;
+	struct pf_kruleset	**rsm;
+	struct pf_kruleset	*arsm;
+	struct pf_kruleset	*aruleset;
+	struct pf_state_key	*sk;
+	struct pf_state_key	*nk;
+	struct tcphdr		*th;
+	struct pf_udp_mapping	*udp_mapping;
+	struct pf_kpool		*nat_pool;
+	uint16_t		 virtual_type;
+	uint16_t		 virtual_id;
+	int			 depth;
+};
+
+#define	PF_ANCHOR_STACK_MAX	32
 #endif
 
 /*
@@ -1411,7 +1450,6 @@ RB_PROTOTYPE(pf_kanchor_node, pf_kanchor, entry_node, pf_kanchor_compare);
 				 PFR_TFLAG_REFDANCHOR	| \
 				 PFR_TFLAG_COUNTERS)
 
-struct pf_kanchor_stackframe;
 struct pf_keth_anchor_stackframe;
 
 struct pfr_table {
@@ -2678,12 +2716,8 @@ int	pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
 #ifdef _KERNEL
 void			 pf_print_host(struct pf_addr *, u_int16_t, sa_family_t);
 
-void			 pf_step_into_anchor(struct pf_kanchor_stackframe *, int *,
-			    struct pf_kruleset **, int, struct pf_krule **,
-			    struct pf_krule **);
-int			 pf_step_out_of_anchor(struct pf_kanchor_stackframe *, int *,
-			    struct pf_kruleset **, int, struct pf_krule **,
-			    struct pf_krule **, int *);
+enum pf_test_status	 pf_step_into_anchor(struct pf_test_ctx *, struct pf_krule *);
+int			 pf_match_rule(struct pf_test_ctx *, struct pf_kruleset *);
 void			 pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *,
 			    int *, struct pf_keth_ruleset **,
 			    struct pf_keth_rule **, struct pf_keth_rule **,
@@ -2706,8 +2740,7 @@ int			 pf_get_transaddr_af(struct pf_krule *,
 			    struct pf_pdesc *);
 u_short			 pf_get_translation(struct pf_pdesc *,
 			    int, struct pf_state_key **, struct pf_state_key **,
-			    struct pf_kanchor_stackframe *, struct pf_krule **,
-			    struct pf_udp_mapping **udp_mapping);
+			    struct pf_test_ctx *, struct pf_udp_mapping **udp_mapping);
 u_short			 pf_get_transaddr(struct pf_pdesc *,
 			    struct pf_state_key **, struct pf_state_key **,
 			    struct pf_krule *, struct pf_udp_mapping **,
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 5c572dfcd425..a5f51d69ab2b 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -4629,13 +4629,6 @@ pf_tag_packet(struct pf_pdesc *pd, int tag)
 	return (0);
 }
 
-#define	PF_ANCHOR_STACKSIZE	32
-struct pf_kanchor_stackframe {
-	struct pf_kruleset	*rs;
-	struct pf_krule		*r;	/* XXX: + match bit */
-	struct pf_kanchor	*child;
-};
-
 /*
  * XXX: We rely on malloc(9) returning pointer aligned addresses.
  */
@@ -4649,80 +4642,42 @@ struct pf_kanchor_stackframe {
 				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
 } while (0)
 
-void
-pf_step_into_anchor(struct pf_kanchor_stackframe *stack, int *depth,
-    struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a)
+enum pf_test_status
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r)
 {
-	struct pf_kanchor_stackframe	*f;
+	enum pf_test_status	rv;
 
 	PF_RULES_RASSERT();
 
-	if (*depth >= PF_ANCHOR_STACKSIZE) {
+	if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
 		printf("%s: anchor stack overflow on %s\n",
-		    __func__, (*r)->anchor->name);
-		*r = TAILQ_NEXT(*r, entries);
-		return;
-	} else if (*depth == 0 && a != NULL)
-		*a = *r;
-	f = stack + (*depth)++;
-	f->rs = *rs;
-	f->r = *r;
-	if ((*r)->anchor_wildcard) {
-		struct pf_kanchor_node *parent = &(*r)->anchor->children;
-
-		if ((f->child = RB_MIN(pf_kanchor_node, parent)) == NULL) {
-			*r = NULL;
-			return;
-		}
-		*rs = &f->child->ruleset;
-	} else {
-		f->child = NULL;
-		*rs = &(*r)->anchor->ruleset;
+		    __func__, r->anchor->name);
+		return (PF_TEST_FAIL);
 	}
-	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
-}
 
-int
-pf_step_out_of_anchor(struct pf_kanchor_stackframe *stack, int *depth,
-    struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a,
-    int *match)
-{
-	struct pf_kanchor_stackframe	*f;
-	struct pf_krule *fr;
-	int quick = 0;
-
-	PF_RULES_RASSERT();
+	ctx->depth++;
 
-	do {
-		if (*depth <= 0)
-			break;
-		f = stack + *depth - 1;
-		fr = PF_ANCHOR_RULE(f);
-		if (f->child != NULL) {
-			f->child = RB_NEXT(pf_kanchor_node,
-			    &fr->anchor->children, f->child);
-			if (f->child != NULL) {
-				*rs = &f->child->ruleset;
-				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
-				if (*r == NULL)
-					continue;
-				else
-					break;
+	if (r->anchor_wildcard) {
+		struct pf_kanchor *child;
+		rv = PF_TEST_OK;
+		RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) {
+			rv = pf_match_rule(ctx, &child->ruleset);
+			if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
+				/*
+				 * we either hit a rule qith quick action
+				 * (more likely), or hit some runtime
+				 * error (e.g. pool_get() faillure).
+				 */
+				break;
 			}
 		}
-		(*depth)--;
-		if (*depth == 0 && a != NULL)
-			*a = NULL;
-		*rs = f->rs;
-		if (match != NULL && *match > *depth) {
-			*match = *depth;
-			if (f->r->quick)
-				quick = 1;
-		}
-		*r = TAILQ_NEXT(fr, entries);
-	} while (*r == NULL);
+	} else {
+		rv = pf_match_rule(ctx, &r->anchor->ruleset);
+	}
 
-	return (quick);
+	ctx->depth--;
+
+	return (rv);
 }
 
 struct pf_keth_anchor_stackframe {
@@ -4749,7 +4704,7 @@ pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
 
 	if (match)
 		*match = 0;
-	if (*depth >= PF_ANCHOR_STACKSIZE) {
+	if (*depth >= PF_ANCHOR_STACK_MAX) {
 		printf("%s: anchor stack overflow on %s\n",
 		    __func__, (*r)->anchor->name);
 		*r = TAILQ_NEXT(*r, entries);
@@ -5240,7 +5195,7 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
 	int asd = 0, match = 0;
 	int tag = -1;
 	uint8_t action;
-	struct pf_keth_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
+	struct pf_keth_anchor_stackframe	anchor_stack[PF_ANCHOR_STACK_MAX];
 
 	MPASS(kif->pfik_ifp->if_vnet == curvnet);
 	NET_EPOCH_ASSERT();
@@ -5495,12 +5450,11 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
 	return (action);
 }
 
-#define PF_TEST_ATTRIB(t, a)\
-	do {				\
-		if (t) {		\
-			r = a;		\
-			goto nextrule;	\
-		}			\
+#define PF_TEST_ATTRIB(t, a)		\
+	if (t) {			\
+		r = a;			\
+		continue;		\
+	} else do {			\
 	} while (0)
 
 static __inline u_short
@@ -5555,132 +5509,18 @@ pf_rule_apply_nat(struct pf_pdesc *pd, struct pf_state_key **skp,
 	return (PFRES_MAX);
 }
 
-static int
-pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
-    struct pf_pdesc *pd, struct pf_krule **am,
-    struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp)
+enum pf_test_status
+pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset)
 {
-	struct pf_krule		*nr = NULL;
-	struct pf_krule		*r, *a = NULL;
-	struct pf_kruleset	*ruleset = NULL;
-	struct pf_krule_slist	 match_rules;
 	struct pf_krule_item	*ri;
-	struct tcphdr		*th = &pd->hdr.tcp;
-	struct pf_state_key	*sk = NULL, *nk = NULL;
+	struct pf_krule		*r;
+	struct pf_pdesc		*pd = ctx->pd;
 	u_short			 transerror;
-	int			 rewrite = 0;
-	int			 tag = -1;
-	int			 asd = 0;
-	int			 match = 0;
-	int			 state_icmp = 0, icmp_dir;
-	int			 action = PF_PASS;
-	u_int16_t		 virtual_type, virtual_id;
-	u_int16_t		 bproto_sum = 0, bip_sum = 0;
-	u_int8_t		 icmptype = 0, icmpcode = 0;
-	struct pf_kanchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
-	struct pf_udp_mapping	*udp_mapping = NULL;
-	struct pf_kpool		*nat_pool = NULL;
-
-	PF_RULES_RASSERT();
-
-	PF_ACPY(&pd->nsaddr, pd->src, pd->af);
-	PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
-
-	SLIST_INIT(&match_rules);
-
-	if (inp != NULL) {
-		INP_LOCK_ASSERT(inp);
-		pd->lookup.uid = inp->inp_cred->cr_uid;
-		pd->lookup.gid = inp->inp_cred->cr_groups[0];
-		pd->lookup.done = 1;
-	}
-
-	if (pd->ip_sum)
-		bip_sum = *pd->ip_sum;
-
-	switch (pd->virtual_proto) {
-	case IPPROTO_TCP:
-		bproto_sum = th->th_sum;
-		pd->nsport = th->th_sport;
-		pd->ndport = th->th_dport;
-		break;
-	case IPPROTO_UDP:
-		bproto_sum = pd->hdr.udp.uh_sum;
-		pd->nsport = pd->hdr.udp.uh_sport;
-		pd->ndport = pd->hdr.udp.uh_dport;
-		break;
-	case IPPROTO_SCTP:
-		pd->nsport = pd->hdr.sctp.src_port;
-		pd->ndport = pd->hdr.sctp.dest_port;
-		break;
-#ifdef INET
-	case IPPROTO_ICMP:
-		MPASS(pd->af == AF_INET);
-		icmptype = pd->hdr.icmp.icmp_type;
-		icmpcode = pd->hdr.icmp.icmp_code;
-		state_icmp = pf_icmp_mapping(pd, icmptype,
-		    &icmp_dir, &virtual_id, &virtual_type);
-		if (icmp_dir == PF_IN) {
-			pd->nsport = virtual_id;
-			pd->ndport = virtual_type;
-		} else {
-			pd->nsport = virtual_type;
-			pd->ndport = virtual_id;
-		}
-		break;
-#endif /* INET */
-#ifdef INET6
-	case IPPROTO_ICMPV6:
-		MPASS(pd->af == AF_INET6);
-		icmptype = pd->hdr.icmp6.icmp6_type;
-		icmpcode = pd->hdr.icmp6.icmp6_code;
-		state_icmp = pf_icmp_mapping(pd, icmptype,
-		    &icmp_dir, &virtual_id, &virtual_type);
-		if (icmp_dir == PF_IN) {
-			pd->nsport = virtual_id;
-			pd->ndport = virtual_type;
-		} else {
-			pd->nsport = virtual_type;
-			pd->ndport = virtual_id;
-		}
-
-		break;
-#endif /* INET6 */
-	default:
-		pd->nsport = pd->ndport = 0;
-		break;
-	}
-	pd->osport = pd->nsport;
-	pd->odport = pd->ndport;
-
-	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
-
-	/* check packet for BINAT/NAT/RDR */
-	transerror = pf_get_translation(pd, pd->off, &sk, &nk, anchor_stack,
-	    &nr, &udp_mapping);
-	switch (transerror) {
-	default:
-		/* A translation error occurred. */
-		REASON_SET(reason, transerror);
-		goto cleanup;
-	case PFRES_MAX:
-		/* No match. */
-		break;
-	case PFRES_MATCH:
-		KASSERT(sk != NULL, ("%s: null sk", __func__));
-		KASSERT(nk != NULL, ("%s: null nk", __func__));
-		if (nr->log) {
-			PFLOG_PACKET(nr->action, PFRES_MATCH, nr, a,
-			    ruleset, pd, 1, NULL);
-		}
-
-		rewrite += pf_translate_compat(pd, sk, nk, nr, virtual_type);
-		nat_pool = &(nr->rdr);
-	}
 
+	r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr);
 	while (r != NULL) {
-		if (pd->related_rule) {
-			*rm = pd->related_rule;
+		if (ctx->pd->related_rule) {
+			*ctx->rm = ctx->pd->related_rule;
 			break;
 		}
 		pf_counter_u64_add(&r->evaluations, 1);
@@ -5714,7 +5554,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 			break;
 
 		case IPPROTO_TCP:
-			PF_TEST_ATTRIB((r->flagset & tcp_get_flags(th)) != r->flags,
+			PF_TEST_ATTRIB((r->flagset & tcp_get_flags(ctx->th))
+			    != r->flags,
 				TAILQ_NEXT(r, entries));
 			/* FALLTHROUGH */
 		case IPPROTO_SCTP:
@@ -5744,10 +5585,10 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 		case IPPROTO_ICMP:
 		case IPPROTO_ICMPV6:
 			/* icmp only. type always 0 in other cases */
-			PF_TEST_ATTRIB(r->type && r->type != icmptype + 1,
+			PF_TEST_ATTRIB(r->type && r->type != ctx->icmptype + 1,
 				TAILQ_NEXT(r, entries));
 			/* icmp only. type always 0 in other cases */
-			PF_TEST_ATTRIB(r->code && r->code != icmpcode + 1,
+			PF_TEST_ATTRIB(r->code && r->code != ctx->icmpcode + 1,
 				TAILQ_NEXT(r, entries));
 			break;
 
@@ -5762,8 +5603,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 		PF_TEST_ATTRIB(r->prob &&
 		    r->prob <= arc4random(),
 			TAILQ_NEXT(r, entries));
-		PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &tag,
-		    pd->pf_mtag ? pd->pf_mtag->tag : 0),
+		PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r,
+		    &ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0),
 			TAILQ_NEXT(r, entries));
 		PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
 		   r->rcvifnot),
@@ -5773,21 +5614,21 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 			TAILQ_NEXT(r, entries));
 		PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY &&
 		    (pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match(
-		    pf_osfp_fingerprint(pd, th),
+		    pf_osfp_fingerprint(pd, ctx->th),
 		    r->os_fingerprint)),
 			TAILQ_NEXT(r, entries));
 		/* FALLTHROUGH */
 		if (r->tag)
-			tag = r->tag;
+			ctx->tag = r->tag;
 		if (r->anchor == NULL) {
 			if (r->action == PF_MATCH) {
 				/*
 				 * Apply translations before increasing counters,
 				 * in case it fails.
 				 */
-				transerror = pf_rule_apply_nat(pd, &sk, &nk, r,
-				    &nr, &udp_mapping, virtual_type, &rewrite,
-				    &nat_pool);
+				transerror = pf_rule_apply_nat(pd, &ctx->sk, &ctx->nk, r,
+				    &ctx->nr, &ctx->udp_mapping, ctx->virtual_type,
+				    &ctx->rewrite, &ctx->nat_pool);
 				switch (transerror) {
 				case PFRES_MATCH:
 					/* Translation action found in rule and applied successfully */
@@ -5796,16 +5637,16 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 					break;
 				default:
 					/* Translation action found in rule but failed to apply */
-					REASON_SET(reason, transerror);
-					goto cleanup;
+					REASON_SET(&ctx->reason, transerror);
+					return (PF_TEST_FAIL);
 				}
 				ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO);
 				if (ri == NULL) {
-					REASON_SET(reason, PFRES_MEMORY);
-					goto cleanup;
+					REASON_SET(&ctx->reason, PFRES_MEMORY);
+					return (PF_TEST_FAIL);
 				}
 				ri->r = r;
-				SLIST_INSERT_HEAD(&match_rules, ri, entry);
+				SLIST_INSERT_HEAD(&ctx->rules, ri, entry);
 				pf_counter_u64_critical_enter();
 				pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1);
 				pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len);
@@ -5813,36 +5654,183 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 				pf_rule_to_actions(r, &pd->act);
 				if (r->log)
 					PFLOG_PACKET(r->action, PFRES_MATCH, r,
-					    a, ruleset, pd, 1, NULL);
+					    ctx->a, ruleset, pd, 1, NULL);
 			} else {
-				match = asd;
-				*rm = r;
-				*am = a;
-				*rsm = ruleset;
+				/*
+				 * found matching r
+				 */
+				*ctx->rm = r;
+				/*
+				 * anchor, with ruleset, where r belongs to
+				 */
+				*ctx->am = ctx->a;
+				/*
+				 * ruleset where r belongs to
+				 */
+				*ctx->rsm = ruleset;
+				/*
+				 * ruleset, where anchor belongs to.
+				 */
+				ctx->arsm = ctx->aruleset;
 			}
 			if (pd->act.log & PF_LOG_MATCHES)
-				pf_log_matches(pd, r, a, ruleset, &match_rules);
-			if (r->quick)
+				pf_log_matches(pd, r, ctx->a, ruleset, &ctx->rules);
+			if (r->quick) {
+				ctx->test_status = PF_TEST_QUICK;
 				break;
-			r = TAILQ_NEXT(r, entries);
-		} else
-			pf_step_into_anchor(anchor_stack, &asd,
-			    &ruleset, PF_RULESET_FILTER, &r, &a);
-nextrule:
-		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
-		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
-			break;
+			}
+		} else {
+			ctx->a = r;			/* remember anchor */
+			ctx->aruleset = ruleset;	/* and its ruleset */
+			if (ctx->a->quick)
+				ctx->test_status = PF_TEST_QUICK;
+			if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) {
+				break;
+			}
+		}
+		r = TAILQ_NEXT(r, entries);
+	}
+
+	return (ctx->test_status);
+}
+
+static int
+pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
+    struct pf_pdesc *pd, struct pf_krule **am,
+    struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp)
+{
+	struct pf_krule		*r = NULL;
+	struct pf_kruleset	*ruleset = NULL;
+	struct pf_krule_item	*ri;
+	struct pf_test_ctx	 ctx;
+	u_short			 transerror;
+	int			 action = PF_PASS;
+	u_int16_t		 bproto_sum = 0, bip_sum = 0;
+	enum pf_test_status	 rv;
+
+	PF_RULES_RASSERT();
+
+	bzero(&ctx, sizeof(ctx));
+	ctx.tag = -1;
+	ctx.pd = pd;
+	ctx.rm = rm;
+	ctx.am = am;
+	ctx.rsm = rsm;
+	ctx.th = &pd->hdr.tcp;
+	ctx.reason = *reason;
+	SLIST_INIT(&ctx.rules);
+
+	PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+	PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+
+	if (inp != NULL) {
+		INP_LOCK_ASSERT(inp);
+		pd->lookup.uid = inp->inp_cred->cr_uid;
+		pd->lookup.gid = inp->inp_cred->cr_groups[0];
+		pd->lookup.done = 1;
+	}
+
+	if (pd->ip_sum)
+		bip_sum = *pd->ip_sum;
+
+	switch (pd->virtual_proto) {
+	case IPPROTO_TCP:
+		bproto_sum = ctx.th->th_sum;
+		pd->nsport = ctx.th->th_sport;
+		pd->ndport = ctx.th->th_dport;
+		break;
+	case IPPROTO_UDP:
+		bproto_sum = pd->hdr.udp.uh_sum;
+		pd->nsport = pd->hdr.udp.uh_sport;
+		pd->ndport = pd->hdr.udp.uh_dport;
+		break;
+	case IPPROTO_SCTP:
+		pd->nsport = pd->hdr.sctp.src_port;
+		pd->ndport = pd->hdr.sctp.dest_port;
+		break;
+#ifdef INET
+	case IPPROTO_ICMP:
+		MPASS(pd->af == AF_INET);
+		ctx.icmptype = pd->hdr.icmp.icmp_type;
+		ctx.icmpcode = pd->hdr.icmp.icmp_code;
+		ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+		    &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
+		if (ctx.icmp_dir == PF_IN) {
+			pd->nsport = ctx.virtual_id;
+			pd->ndport = ctx.virtual_type;
+		} else {
+			pd->nsport = ctx.virtual_type;
+			pd->ndport = ctx.virtual_id;
+		}
+		break;
+#endif /* INET */
+#ifdef INET6
+	case IPPROTO_ICMPV6:
+		MPASS(pd->af == AF_INET6);
+		ctx.icmptype = pd->hdr.icmp6.icmp6_type;
+		ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
+		ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+		    &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
+		if (ctx.icmp_dir == PF_IN) {
+			pd->nsport = ctx.virtual_id;
+			pd->ndport = ctx.virtual_type;
+		} else {
+			pd->nsport = ctx.virtual_type;
+			pd->ndport = ctx.virtual_id;
+		}
+
+		break;
+#endif /* INET6 */
+	default:
+		pd->nsport = pd->ndport = 0;
+		break;
 	}
-	r = *rm;
-	a = *am;
-	ruleset = *rsm;
+	pd->osport = pd->nsport;
+	pd->odport = pd->ndport;
 
-	REASON_SET(reason, PFRES_MATCH);
+	/* check packet for BINAT/NAT/RDR */
+	transerror = pf_get_translation(pd, pd->off, &ctx.sk, &ctx.nk, &ctx,
+	    &ctx.udp_mapping);
+	switch (transerror) {
+	default:
+		/* A translation error occurred. */
+		REASON_SET(&ctx.reason, transerror);
+		goto cleanup;
+	case PFRES_MAX:
+		/* No match. */
+		break;
+	case PFRES_MATCH:
+		KASSERT(ctx.sk != NULL, ("%s: null sk", __func__));
+		KASSERT(ctx.nk != NULL, ("%s: null nk", __func__));
+		if (ctx.nr->log) {
+			PFLOG_PACKET(ctx.nr->action, PFRES_MATCH, ctx.nr, ctx.a,
+			    ruleset, pd, 1, NULL);
+		}
+
+		ctx.rewrite += pf_translate_compat(pd, ctx.sk, ctx.nk, ctx.nr, ctx.virtual_type);
+		ctx.nat_pool = &(ctx.nr->rdr);
+	}
+
+	ruleset = &pf_main_ruleset;
+	rv = pf_match_rule(&ctx, ruleset);
+	if (rv == PF_TEST_FAIL) {
+		/*
+		 * Reason has been set in pf_match_rule() already.
+		 */
+		goto cleanup;
+	}
+
+	r = *ctx.rm;			/* matching rule */
+	ctx.a = *ctx.am;		/* rule that defines an anchor containing 'r' */
+	ruleset = *ctx.rsm;		/* ruleset of the anchor defined by the rule 'a' */
+	ctx.aruleset = ctx.arsm;	/* ruleset of the 'a' rule itself */
+
+	REASON_SET(&ctx.reason, PFRES_MATCH);
 
 	/* apply actions for last matching pass/block rule */
 	pf_rule_to_actions(r, &pd->act);
-	transerror = pf_rule_apply_nat(pd, &sk, &nk, r, &nr, &udp_mapping,
-	    virtual_type, &rewrite, &nat_pool);
+	transerror = pf_rule_apply_nat(pd, &ctx.sk, &ctx.nk, r, &ctx.nr, &ctx.udp_mapping,
+	    ctx.virtual_type, &ctx.rewrite, &ctx.nat_pool);
 	switch (transerror) {
 	case PFRES_MATCH:
 		/* Translation action found in rule and applied successfully */
@@ -5851,31 +5839,31 @@ nextrule:
 		break;
 	default:
 		/* Translation action found in rule but failed to apply */
-		REASON_SET(reason, transerror);
+		REASON_SET(&ctx.reason, transerror);
 		goto cleanup;
 	}
 
 	if (r->log) {
-		if (rewrite)
+		if (ctx.rewrite)
 			m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
-		PFLOG_PACKET(r->action, *reason, r, a, ruleset, pd, 1, NULL);
+		PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL);
 	}
 	if (pd->act.log & PF_LOG_MATCHES)
-		pf_log_matches(pd, r, a, ruleset, &match_rules);
+		pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules);
 	if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
 	   (r->action == PF_DROP) &&
 	    ((r->rule_flag & PFRULE_RETURNRST) ||
 	    (r->rule_flag & PFRULE_RETURNICMP) ||
 	    (r->rule_flag & PFRULE_RETURN))) {
-		pf_return(r, nr, pd, th, bproto_sum,
-		    bip_sum, reason, r->rtableid);
+		pf_return(r, ctx.nr, pd, ctx.th, bproto_sum,
+		    bip_sum, &ctx.reason, r->rtableid);
 	}
 
 	if (r->action == PF_DROP)
 		goto cleanup;
 
-	if (tag > 0 && pf_tag_packet(pd, tag)) {
-		REASON_SET(reason, PFRES_MEMORY);
+	if (ctx.tag > 0 && pf_tag_packet(pd, ctx.tag)) {
+		REASON_SET(&ctx.reason, PFRES_MEMORY);
 		goto cleanup;
 	}
 	if (pd->act.rtableid >= 0)
@@ -5890,31 +5878,32 @@ nextrule:
 		 */
 		pd->act.rt = r->rt;
 		/* Don't use REASON_SET, pf_map_addr increases the reason counters */
-		*reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr,
+		ctx.reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr,
 		    &pd->act.rt_kif, NULL, &sn, &snh, &(r->route), PF_SN_ROUTE);
-		if (*reason != 0)
+		if (ctx.reason != 0)
 			goto cleanup;
 	}
 
 	if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
-	   (!state_icmp && (r->keep_state || nr != NULL ||
+	   (!ctx.state_icmp && (r->keep_state || ctx.nr != NULL ||
 	    (pd->flags & PFDESC_TCP_NORM)))) {
 		bool nat64;
 
-		action = pf_create_state(r, nr, a, pd, nk, sk,
-		    &rewrite, sm, tag, bproto_sum, bip_sum,
-		    &match_rules, udp_mapping, nat_pool, reason);
-		sk = nk = NULL;
+		action = pf_create_state(r, ctx.nr, ctx.a, pd, ctx.nk, ctx.sk,
+		    &ctx.rewrite, sm, ctx.tag, bproto_sum, bip_sum,
+		    &ctx.rules, ctx.udp_mapping, ctx.nat_pool, &ctx.reason);
+		ctx.sk = ctx.nk = NULL;
 		if (action != PF_PASS) {
-			pf_udp_mapping_release(udp_mapping);
-			if (r->log || (nr != NULL && nr->log) ||
-			    *reason == PFRES_MEMORY)
+			pf_udp_mapping_release(ctx.udp_mapping);
+			if (r->log || (ctx.nr != NULL && ctx.nr->log) ||
+			    ctx.reason == PFRES_MEMORY)
 				pd->act.log |= PF_LOG_FORCE;
 			if (action == PF_DROP &&
 			    (r->rule_flag & PFRULE_RETURN))
-				pf_return(r, nr, pd, th,
-				    bproto_sum, bip_sum, reason,
+				pf_return(r, ctx.nr, pd, ctx.th,
+				    bproto_sum, bip_sum, &ctx.reason,
 				    pd->act.rtableid);
+			*reason = ctx.reason;
 			return (action);
 		}
 
@@ -5922,69 +5911,73 @@ nextrule:
 		if (nat64) {
 			int			 ret;
 
-			if (sk == NULL)
-				sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
-			if (nk == NULL)
-				nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
+			if (ctx.sk == NULL)
+				ctx.sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
+			if (ctx.nk == NULL)
+				ctx.nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
 
 			if (pd->dir == PF_IN) {
-				ret = pf_translate(pd, &sk->addr[pd->didx],
-				    sk->port[pd->didx], &sk->addr[pd->sidx],
-				    sk->port[pd->sidx], virtual_type,
-				    icmp_dir);
+				ret = pf_translate(pd, &ctx.sk->addr[pd->didx],
+				    ctx.sk->port[pd->didx], &ctx.sk->addr[pd->sidx],
+				    ctx.sk->port[pd->sidx], ctx.virtual_type,
+				    ctx.icmp_dir);
 			} else {
-				ret = pf_translate(pd, &sk->addr[pd->sidx],
-				    sk->port[pd->sidx], &sk->addr[pd->didx],
-				    sk->port[pd->didx], virtual_type,
-				    icmp_dir);
+				ret = pf_translate(pd, &ctx.sk->addr[pd->sidx],
+				    ctx.sk->port[pd->sidx], &ctx.sk->addr[pd->didx],
+				    ctx.sk->port[pd->didx], ctx.virtual_type,
+				    ctx.icmp_dir);
 			}
 
 			if (ret < 0)
 				goto cleanup;
 
-			rewrite += ret;
+			ctx.rewrite += ret;
 
-			if (rewrite && sk->af != nk->af)
+			if (ctx.rewrite && ctx.sk->af != ctx.nk->af)
 				action = PF_AFRT;
 		}
 	} else {
-		while ((ri = SLIST_FIRST(&match_rules))) {
-			SLIST_REMOVE_HEAD(&match_rules, entry);
+		while ((ri = SLIST_FIRST(&ctx.rules))) {
+			SLIST_REMOVE_HEAD(&ctx.rules, entry);
 			free(ri, M_PF_RULE_ITEM);
 		}
 
-		uma_zfree(V_pf_state_key_z, sk);
-		uma_zfree(V_pf_state_key_z, nk);
-		sk = nk = NULL;
-		pf_udp_mapping_release(udp_mapping);
+		uma_zfree(V_pf_state_key_z, ctx.sk);
+		uma_zfree(V_pf_state_key_z, ctx.nk);
+		ctx.sk = ctx.nk = NULL;
+		pf_udp_mapping_release(ctx.udp_mapping);
 	}
 
 	/* copy back packet headers if we performed NAT operations */
-	if (rewrite)
+	if (ctx.rewrite)
 		m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
 
 	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
 	    pd->dir == PF_OUT &&
-	    V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m))
+	    V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) {
 		/*
 		 * We want the state created, but we dont
 		 * want to send this in case a partner
 		 * firewall has to know about it to allow
 		 * replies through it.
 		 */
+		*reason = ctx.reason;
 		return (PF_DEFER);
+	}
 
+	*reason = ctx.reason;
 	return (action);
 
 cleanup:
-	while ((ri = SLIST_FIRST(&match_rules))) {
-		SLIST_REMOVE_HEAD(&match_rules, entry);
+	while ((ri = SLIST_FIRST(&ctx.rules))) {
+		SLIST_REMOVE_HEAD(&ctx.rules, entry);
 		free(ri, M_PF_RULE_ITEM);
 	}
 
-	uma_zfree(V_pf_state_key_z, sk);
-	uma_zfree(V_pf_state_key_z, nk);
-	pf_udp_mapping_release(udp_mapping);
+	uma_zfree(V_pf_state_key_z, ctx.sk);
+	uma_zfree(V_pf_state_key_z, ctx.nk);
+	pf_udp_mapping_release(ctx.udp_mapping);
+	*reason = ctx.reason;
 
 	return (PF_DROP);
 }
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 00f25c29e23c..ed87aa2cec4d 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -75,8 +75,9 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
 
 static uint64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
 			    struct pf_poolhashkey *, sa_family_t);
-struct pf_krule		*pf_match_translation(struct pf_pdesc *,
-			    int, struct pf_kanchor_stackframe *);
+struct pf_krule		*pf_match_translation(int, struct pf_test_ctx *);
+static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *,
+			    struct pf_krule *);
 static int		 pf_get_sport(struct pf_pdesc *, struct pf_krule *,
 			    struct pf_addr *, uint16_t *, uint16_t, uint16_t,
 			    struct pf_ksrc_node **, struct pf_srchash **,
@@ -128,25 +129,21 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
 	return (res);
 }
 
-#define PF_TEST_ATTRIB(t, a)\
-	do {				\
-		if (t) {		\
-			r = a;		\
-			goto nextrule;	\
-		}			\
+#define PF_TEST_ATTRIB(t, a)		\
+	if (t) {			\
+		r = a;			\
+		continue;		\
+	} else do {			\
 	} while (0)
 
-struct pf_krule *
-pf_match_translation(struct pf_pdesc *pd,
-    int rs_num, struct pf_kanchor_stackframe *anchor_stack)
+static enum pf_test_status
+pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset)
 {
-	struct pf_krule		*r, *rm = NULL;
-	struct pf_kruleset	*ruleset = NULL;
-	int			 tag = -1;
+	struct pf_krule		*r;
+	struct pf_pdesc		*pd = ctx->pd;
 	int			 rtableid = -1;
-	int			 asd = 0;
 
-	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
+	r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
 	while (r != NULL) {
 		struct pf_rule_addr	*src = NULL, *dst = NULL;
 		struct pf_addr_wrap	*xdst = NULL;
@@ -188,7 +185,7 @@ pf_match_translation(struct pf_pdesc *pd,
 		    !pf_match_port(dst->port_op, dst->port[0],
 		    dst->port[1], pd->ndport),
 			r->skip[PF_SKIP_DST_PORT]);
-		PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &tag,
+		PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0),
 			TAILQ_NEXT(r, entries));
 		PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
@@ -196,33 +193,101 @@ pf_match_translation(struct pf_pdesc *pd,
 		    &pd->hdr.tcp), r->os_fingerprint)),
 			TAILQ_NEXT(r, entries));
 		if (r->tag)
-			tag = r->tag;
+			ctx->tag = r->tag;
 		if (r->rtableid >= 0)
 			rtableid = r->rtableid;
 		if (r->anchor == NULL) {
-			rm = r;
-			if (rm->action == PF_NONAT ||
-			    rm->action == PF_NORDR ||
-			    rm->action == PF_NOBINAT) {
-				rm = NULL;
+			if (r->action == PF_NONAT ||
+			    r->action == PF_NORDR ||
+			    r->action == PF_NOBINAT) {
+				*ctx->rm = NULL;
+			} else {
+				/*
+				 * found matching r
+				 */
+				ctx->tr = r;
+				/*
+				 * anchor, with ruleset, where r belongs to
+				 */
+				*ctx->am = ctx->a;
+				/*
+				 * ruleset where r belongs to
+				 */
+				*ctx->rsm = ruleset;
+				/*
+				 * ruleset, where anchor belongs to.
+				 */
+				ctx->arsm = ctx->aruleset;
 			}
-			break;
*** 122 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202506021530.552FUYTC049693>