Date: Mon, 2 Jun 2025 15:30:34 GMT From: Kristof Provost <kp@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: fe96610ba943 - main - pf: rework anchor handling to recurse Message-ID: <202506021530.552FUYTC049693@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by kp: URL: https://cgit.FreeBSD.org/src/commit/?id=fe96610ba94332c7c4c59f3f82920d1ef1177bf2 commit fe96610ba94332c7c4c59f3f82920d1ef1177bf2 Author: Kristof Provost <kp@FreeBSD.org> AuthorDate: 2025-05-22 10:02:49 +0000 Commit: Kristof Provost <kp@FreeBSD.org> CommitDate: 2025-06-02 15:30:18 +0000 pf: rework anchor handling to recurse - percpu anchor stacks we actually don't need to pre-allocate per_anchor_stack[], if we use a 'natural' recursion, when doing anchor tree traversal. O.K. mikeb@, mpi@ Extended because FreeBSD pf still has separate NAT rules processing, which also needed this change. Obtained from: OpenBSD, sashan <sashan@openbsd.org>, e236f0fa7b Obtained from: OpenBSD, sashan <sashan@openbsd.org>, 5e4ad307dc Sponsored by: Rubicon Communications, LLC ("Netgate") Differential Revision: https://reviews.freebsd.org/D50579 --- sys/net/pfvar.h | 51 ++++- sys/netpfil/pf/pf.c | 557 ++++++++++++++++++++++++------------------------- sys/netpfil/pf/pf_lb.c | 146 +++++++++---- 3 files changed, 422 insertions(+), 332 deletions(-) diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 82d74e08c14f..b3e3260d80a6 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1151,6 +1151,45 @@ struct pf_kstate { * Try to not grow the struct beyond that. */ _Static_assert(sizeof(struct pf_kstate) <= 384, "pf_kstate size crosses 384 bytes"); + +enum pf_test_status { + PF_TEST_FAIL = -1, + PF_TEST_OK, + PF_TEST_QUICK +}; + +struct pf_test_ctx { + enum pf_test_status test_status; + struct pf_pdesc *pd; + struct pf_rule_actions act; + uint8_t icmpcode; + uint8_t icmptype; + int icmp_dir; + int state_icmp; + int tag; + int rewrite; + u_short reason; + struct pf_src_node *sns[PF_SN_MAX]; + struct pf_krule_slist rules; + struct pf_krule *nr; + struct pf_krule *tr; + struct pf_krule **rm; + struct pf_krule *a; + struct pf_krule **am; + struct pf_kruleset **rsm; + struct pf_kruleset *arsm; + struct pf_kruleset *aruleset; + struct pf_state_key *sk; + struct pf_state_key *nk; + struct tcphdr *th; + struct pf_udp_mapping *udp_mapping; + struct pf_kpool *nat_pool; + uint16_t virtual_type; + uint16_t virtual_id; + int depth; +}; + +#define PF_ANCHOR_STACK_MAX 32 #endif /* @@ -1411,7 +1450,6 @@ RB_PROTOTYPE(pf_kanchor_node, pf_kanchor, entry_node, pf_kanchor_compare); PFR_TFLAG_REFDANCHOR | \ PFR_TFLAG_COUNTERS) -struct pf_kanchor_stackframe; struct pf_keth_anchor_stackframe; struct pfr_table { @@ -2678,12 +2716,8 @@ int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); #ifdef _KERNEL void pf_print_host(struct pf_addr *, u_int16_t, sa_family_t); -void pf_step_into_anchor(struct pf_kanchor_stackframe *, int *, - struct pf_kruleset **, int, struct pf_krule **, - struct pf_krule **); -int pf_step_out_of_anchor(struct pf_kanchor_stackframe *, int *, - struct pf_kruleset **, int, struct pf_krule **, - struct pf_krule **, int *); +enum pf_test_status pf_step_into_anchor(struct pf_test_ctx *, struct pf_krule *); +int pf_match_rule(struct pf_test_ctx *, struct pf_kruleset *); void pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *, int *, struct pf_keth_ruleset **, struct pf_keth_rule **, struct pf_keth_rule **, @@ -2706,8 +2740,7 @@ int pf_get_transaddr_af(struct pf_krule *, struct pf_pdesc *); u_short pf_get_translation(struct pf_pdesc *, int, struct pf_state_key **, struct pf_state_key **, - struct pf_kanchor_stackframe *, struct pf_krule **, - struct pf_udp_mapping **udp_mapping); + struct pf_test_ctx *, struct pf_udp_mapping **udp_mapping); u_short pf_get_transaddr(struct pf_pdesc *, struct pf_state_key **, struct pf_state_key **, struct pf_krule *, struct pf_udp_mapping **, diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 5c572dfcd425..a5f51d69ab2b 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -4629,13 +4629,6 @@ pf_tag_packet(struct pf_pdesc *pd, int tag) return (0); } -#define PF_ANCHOR_STACKSIZE 32 -struct pf_kanchor_stackframe { - struct pf_kruleset *rs; - struct pf_krule *r; /* XXX: + match bit */ - struct pf_kanchor *child; -}; - /* * XXX: We rely on malloc(9) returning pointer aligned addresses. */ @@ -4649,80 +4642,42 @@ struct pf_kanchor_stackframe { ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \ } while (0) -void -pf_step_into_anchor(struct pf_kanchor_stackframe *stack, int *depth, - struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a) +enum pf_test_status +pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r) { - struct pf_kanchor_stackframe *f; + enum pf_test_status rv; PF_RULES_RASSERT(); - if (*depth >= PF_ANCHOR_STACKSIZE) { + if (ctx->depth >= PF_ANCHOR_STACK_MAX) { printf("%s: anchor stack overflow on %s\n", - __func__, (*r)->anchor->name); - *r = TAILQ_NEXT(*r, entries); - return; - } else if (*depth == 0 && a != NULL) - *a = *r; - f = stack + (*depth)++; - f->rs = *rs; - f->r = *r; - if ((*r)->anchor_wildcard) { - struct pf_kanchor_node *parent = &(*r)->anchor->children; - - if ((f->child = RB_MIN(pf_kanchor_node, parent)) == NULL) { - *r = NULL; - return; - } - *rs = &f->child->ruleset; - } else { - f->child = NULL; - *rs = &(*r)->anchor->ruleset; + __func__, r->anchor->name); + return (PF_TEST_FAIL); } - *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); -} -int -pf_step_out_of_anchor(struct pf_kanchor_stackframe *stack, int *depth, - struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a, - int *match) -{ - struct pf_kanchor_stackframe *f; - struct pf_krule *fr; - int quick = 0; - - PF_RULES_RASSERT(); + ctx->depth++; - do { - if (*depth <= 0) - break; - f = stack + *depth - 1; - fr = PF_ANCHOR_RULE(f); - if (f->child != NULL) { - f->child = RB_NEXT(pf_kanchor_node, - &fr->anchor->children, f->child); - if (f->child != NULL) { - *rs = &f->child->ruleset; - *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); - if (*r == NULL) - continue; - else - break; + if (r->anchor_wildcard) { + struct pf_kanchor *child; + rv = PF_TEST_OK; + RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) { + rv = pf_match_rule(ctx, &child->ruleset); + if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { + /* + * we either hit a rule qith quick action + * (more likely), or hit some runtime + * error (e.g. pool_get() faillure). + */ + break; } } - (*depth)--; - if (*depth == 0 && a != NULL) - *a = NULL; - *rs = f->rs; - if (match != NULL && *match > *depth) { - *match = *depth; - if (f->r->quick) - quick = 1; - } - *r = TAILQ_NEXT(fr, entries); - } while (*r == NULL); + } else { + rv = pf_match_rule(ctx, &r->anchor->ruleset); + } - return (quick); + ctx->depth--; + + return (rv); } struct pf_keth_anchor_stackframe { @@ -4749,7 +4704,7 @@ pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth, if (match) *match = 0; - if (*depth >= PF_ANCHOR_STACKSIZE) { + if (*depth >= PF_ANCHOR_STACK_MAX) { printf("%s: anchor stack overflow on %s\n", __func__, (*r)->anchor->name); *r = TAILQ_NEXT(*r, entries); @@ -5240,7 +5195,7 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0) int asd = 0, match = 0; int tag = -1; uint8_t action; - struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; + struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACK_MAX]; MPASS(kif->pfik_ifp->if_vnet == curvnet); NET_EPOCH_ASSERT(); @@ -5495,12 +5450,11 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0) return (action); } -#define PF_TEST_ATTRIB(t, a)\ - do { \ - if (t) { \ - r = a; \ - goto nextrule; \ - } \ +#define PF_TEST_ATTRIB(t, a) \ + if (t) { \ + r = a; \ + continue; \ + } else do { \ } while (0) static __inline u_short @@ -5555,132 +5509,18 @@ pf_rule_apply_nat(struct pf_pdesc *pd, struct pf_state_key **skp, return (PFRES_MAX); } -static int -pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, - struct pf_pdesc *pd, struct pf_krule **am, - struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp) +enum pf_test_status +pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) { - struct pf_krule *nr = NULL; - struct pf_krule *r, *a = NULL; - struct pf_kruleset *ruleset = NULL; - struct pf_krule_slist match_rules; struct pf_krule_item *ri; - struct tcphdr *th = &pd->hdr.tcp; - struct pf_state_key *sk = NULL, *nk = NULL; + struct pf_krule *r; + struct pf_pdesc *pd = ctx->pd; u_short transerror; - int rewrite = 0; - int tag = -1; - int asd = 0; - int match = 0; - int state_icmp = 0, icmp_dir; - int action = PF_PASS; - u_int16_t virtual_type, virtual_id; - u_int16_t bproto_sum = 0, bip_sum = 0; - u_int8_t icmptype = 0, icmpcode = 0; - struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; - struct pf_udp_mapping *udp_mapping = NULL; - struct pf_kpool *nat_pool = NULL; - - PF_RULES_RASSERT(); - - PF_ACPY(&pd->nsaddr, pd->src, pd->af); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); - - SLIST_INIT(&match_rules); - - if (inp != NULL) { - INP_LOCK_ASSERT(inp); - pd->lookup.uid = inp->inp_cred->cr_uid; - pd->lookup.gid = inp->inp_cred->cr_groups[0]; - pd->lookup.done = 1; - } - - if (pd->ip_sum) - bip_sum = *pd->ip_sum; - - switch (pd->virtual_proto) { - case IPPROTO_TCP: - bproto_sum = th->th_sum; - pd->nsport = th->th_sport; - pd->ndport = th->th_dport; - break; - case IPPROTO_UDP: - bproto_sum = pd->hdr.udp.uh_sum; - pd->nsport = pd->hdr.udp.uh_sport; - pd->ndport = pd->hdr.udp.uh_dport; - break; - case IPPROTO_SCTP: - pd->nsport = pd->hdr.sctp.src_port; - pd->ndport = pd->hdr.sctp.dest_port; - break; -#ifdef INET - case IPPROTO_ICMP: - MPASS(pd->af == AF_INET); - icmptype = pd->hdr.icmp.icmp_type; - icmpcode = pd->hdr.icmp.icmp_code; - state_icmp = pf_icmp_mapping(pd, icmptype, - &icmp_dir, &virtual_id, &virtual_type); - if (icmp_dir == PF_IN) { - pd->nsport = virtual_id; - pd->ndport = virtual_type; - } else { - pd->nsport = virtual_type; - pd->ndport = virtual_id; - } - break; -#endif /* INET */ -#ifdef INET6 - case IPPROTO_ICMPV6: - MPASS(pd->af == AF_INET6); - icmptype = pd->hdr.icmp6.icmp6_type; - icmpcode = pd->hdr.icmp6.icmp6_code; - state_icmp = pf_icmp_mapping(pd, icmptype, - &icmp_dir, &virtual_id, &virtual_type); - if (icmp_dir == PF_IN) { - pd->nsport = virtual_id; - pd->ndport = virtual_type; - } else { - pd->nsport = virtual_type; - pd->ndport = virtual_id; - } - - break; -#endif /* INET6 */ - default: - pd->nsport = pd->ndport = 0; - break; - } - pd->osport = pd->nsport; - pd->odport = pd->ndport; - - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - - /* check packet for BINAT/NAT/RDR */ - transerror = pf_get_translation(pd, pd->off, &sk, &nk, anchor_stack, - &nr, &udp_mapping); - switch (transerror) { - default: - /* A translation error occurred. */ - REASON_SET(reason, transerror); - goto cleanup; - case PFRES_MAX: - /* No match. */ - break; - case PFRES_MATCH: - KASSERT(sk != NULL, ("%s: null sk", __func__)); - KASSERT(nk != NULL, ("%s: null nk", __func__)); - if (nr->log) { - PFLOG_PACKET(nr->action, PFRES_MATCH, nr, a, - ruleset, pd, 1, NULL); - } - - rewrite += pf_translate_compat(pd, sk, nk, nr, virtual_type); - nat_pool = &(nr->rdr); - } + r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { - if (pd->related_rule) { - *rm = pd->related_rule; + if (ctx->pd->related_rule) { + *ctx->rm = ctx->pd->related_rule; break; } pf_counter_u64_add(&r->evaluations, 1); @@ -5714,7 +5554,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, break; case IPPROTO_TCP: - PF_TEST_ATTRIB((r->flagset & tcp_get_flags(th)) != r->flags, + PF_TEST_ATTRIB((r->flagset & tcp_get_flags(ctx->th)) + != r->flags, TAILQ_NEXT(r, entries)); /* FALLTHROUGH */ case IPPROTO_SCTP: @@ -5744,10 +5585,10 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, case IPPROTO_ICMP: case IPPROTO_ICMPV6: /* icmp only. type always 0 in other cases */ - PF_TEST_ATTRIB(r->type && r->type != icmptype + 1, + PF_TEST_ATTRIB(r->type && r->type != ctx->icmptype + 1, TAILQ_NEXT(r, entries)); /* icmp only. type always 0 in other cases */ - PF_TEST_ATTRIB(r->code && r->code != icmpcode + 1, + PF_TEST_ATTRIB(r->code && r->code != ctx->icmpcode + 1, TAILQ_NEXT(r, entries)); break; @@ -5762,8 +5603,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, PF_TEST_ATTRIB(r->prob && r->prob <= arc4random(), TAILQ_NEXT(r, entries)); - PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &tag, - pd->pf_mtag ? pd->pf_mtag->tag : 0), + PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, + &ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0), TAILQ_NEXT(r, entries)); PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) == r->rcvifnot), @@ -5773,21 +5614,21 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, TAILQ_NEXT(r, entries)); PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match( - pf_osfp_fingerprint(pd, th), + pf_osfp_fingerprint(pd, ctx->th), r->os_fingerprint)), TAILQ_NEXT(r, entries)); /* FALLTHROUGH */ if (r->tag) - tag = r->tag; + ctx->tag = r->tag; if (r->anchor == NULL) { if (r->action == PF_MATCH) { /* * Apply translations before increasing counters, * in case it fails. */ - transerror = pf_rule_apply_nat(pd, &sk, &nk, r, - &nr, &udp_mapping, virtual_type, &rewrite, - &nat_pool); + transerror = pf_rule_apply_nat(pd, &ctx->sk, &ctx->nk, r, + &ctx->nr, &ctx->udp_mapping, ctx->virtual_type, + &ctx->rewrite, &ctx->nat_pool); switch (transerror) { case PFRES_MATCH: /* Translation action found in rule and applied successfully */ @@ -5796,16 +5637,16 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, break; default: /* Translation action found in rule but failed to apply */ - REASON_SET(reason, transerror); - goto cleanup; + REASON_SET(&ctx->reason, transerror); + return (PF_TEST_FAIL); } ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO); if (ri == NULL) { - REASON_SET(reason, PFRES_MEMORY); - goto cleanup; + REASON_SET(&ctx->reason, PFRES_MEMORY); + return (PF_TEST_FAIL); } ri->r = r; - SLIST_INSERT_HEAD(&match_rules, ri, entry); + SLIST_INSERT_HEAD(&ctx->rules, ri, entry); pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); @@ -5813,36 +5654,183 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, pf_rule_to_actions(r, &pd->act); if (r->log) PFLOG_PACKET(r->action, PFRES_MATCH, r, - a, ruleset, pd, 1, NULL); + ctx->a, ruleset, pd, 1, NULL); } else { - match = asd; - *rm = r; - *am = a; - *rsm = ruleset; + /* + * found matching r + */ + *ctx->rm = r; + /* + * anchor, with ruleset, where r belongs to + */ + *ctx->am = ctx->a; + /* + * ruleset where r belongs to + */ + *ctx->rsm = ruleset; + /* + * ruleset, where anchor belongs to. + */ + ctx->arsm = ctx->aruleset; } if (pd->act.log & PF_LOG_MATCHES) - pf_log_matches(pd, r, a, ruleset, &match_rules); - if (r->quick) + pf_log_matches(pd, r, ctx->a, ruleset, &ctx->rules); + if (r->quick) { + ctx->test_status = PF_TEST_QUICK; break; - r = TAILQ_NEXT(r, entries); - } else - pf_step_into_anchor(anchor_stack, &asd, - &ruleset, PF_RULESET_FILTER, &r, &a); -nextrule: - if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd, - &ruleset, PF_RULESET_FILTER, &r, &a, &match)) - break; + } + } else { + ctx->a = r; /* remember anchor */ + ctx->aruleset = ruleset; /* and its ruleset */ + if (ctx->a->quick) + ctx->test_status = PF_TEST_QUICK; + if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) { + break; + } + } + r = TAILQ_NEXT(r, entries); + } + + return (ctx->test_status); +} + +static int +pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, + struct pf_pdesc *pd, struct pf_krule **am, + struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp) +{ + struct pf_krule *r = NULL; + struct pf_kruleset *ruleset = NULL; + struct pf_krule_item *ri; + struct pf_test_ctx ctx; + u_short transerror; + int action = PF_PASS; + u_int16_t bproto_sum = 0, bip_sum = 0; + enum pf_test_status rv; + + PF_RULES_RASSERT(); + + bzero(&ctx, sizeof(ctx)); + ctx.tag = -1; + ctx.pd = pd; + ctx.rm = rm; + ctx.am = am; + ctx.rsm = rsm; + ctx.th = &pd->hdr.tcp; + ctx.reason = *reason; + SLIST_INIT(&ctx.rules); + + PF_ACPY(&pd->nsaddr, pd->src, pd->af); + PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + + if (inp != NULL) { + INP_LOCK_ASSERT(inp); + pd->lookup.uid = inp->inp_cred->cr_uid; + pd->lookup.gid = inp->inp_cred->cr_groups[0]; + pd->lookup.done = 1; + } + + if (pd->ip_sum) + bip_sum = *pd->ip_sum; + + switch (pd->virtual_proto) { + case IPPROTO_TCP: + bproto_sum = ctx.th->th_sum; + pd->nsport = ctx.th->th_sport; + pd->ndport = ctx.th->th_dport; + break; + case IPPROTO_UDP: + bproto_sum = pd->hdr.udp.uh_sum; + pd->nsport = pd->hdr.udp.uh_sport; + pd->ndport = pd->hdr.udp.uh_dport; + break; + case IPPROTO_SCTP: + pd->nsport = pd->hdr.sctp.src_port; + pd->ndport = pd->hdr.sctp.dest_port; + break; +#ifdef INET + case IPPROTO_ICMP: + MPASS(pd->af == AF_INET); + ctx.icmptype = pd->hdr.icmp.icmp_type; + ctx.icmpcode = pd->hdr.icmp.icmp_code; + ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, + &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type); + if (ctx.icmp_dir == PF_IN) { + pd->nsport = ctx.virtual_id; + pd->ndport = ctx.virtual_type; + } else { + pd->nsport = ctx.virtual_type; + pd->ndport = ctx.virtual_id; + } + break; +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: + MPASS(pd->af == AF_INET6); + ctx.icmptype = pd->hdr.icmp6.icmp6_type; + ctx.icmpcode = pd->hdr.icmp6.icmp6_code; + ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, + &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type); + if (ctx.icmp_dir == PF_IN) { + pd->nsport = ctx.virtual_id; + pd->ndport = ctx.virtual_type; + } else { + pd->nsport = ctx.virtual_type; + pd->ndport = ctx.virtual_id; + } + + break; +#endif /* INET6 */ + default: + pd->nsport = pd->ndport = 0; + break; } - r = *rm; - a = *am; - ruleset = *rsm; + pd->osport = pd->nsport; + pd->odport = pd->ndport; - REASON_SET(reason, PFRES_MATCH); + /* check packet for BINAT/NAT/RDR */ + transerror = pf_get_translation(pd, pd->off, &ctx.sk, &ctx.nk, &ctx, + &ctx.udp_mapping); + switch (transerror) { + default: + /* A translation error occurred. */ + REASON_SET(&ctx.reason, transerror); + goto cleanup; + case PFRES_MAX: + /* No match. */ + break; + case PFRES_MATCH: + KASSERT(ctx.sk != NULL, ("%s: null sk", __func__)); + KASSERT(ctx.nk != NULL, ("%s: null nk", __func__)); + if (ctx.nr->log) { + PFLOG_PACKET(ctx.nr->action, PFRES_MATCH, ctx.nr, ctx.a, + ruleset, pd, 1, NULL); + } + + ctx.rewrite += pf_translate_compat(pd, ctx.sk, ctx.nk, ctx.nr, ctx.virtual_type); + ctx.nat_pool = &(ctx.nr->rdr); + } + + ruleset = &pf_main_ruleset; + rv = pf_match_rule(&ctx, ruleset); + if (rv == PF_TEST_FAIL) { + /* + * Reason has been set in pf_match_rule() already. + */ + goto cleanup; + } + + r = *ctx.rm; /* matching rule */ + ctx.a = *ctx.am; /* rule that defines an anchor containing 'r' */ + ruleset = *ctx.rsm; /* ruleset of the anchor defined by the rule 'a' */ + ctx.aruleset = ctx.arsm; /* ruleset of the 'a' rule itself */ + + REASON_SET(&ctx.reason, PFRES_MATCH); /* apply actions for last matching pass/block rule */ pf_rule_to_actions(r, &pd->act); - transerror = pf_rule_apply_nat(pd, &sk, &nk, r, &nr, &udp_mapping, - virtual_type, &rewrite, &nat_pool); + transerror = pf_rule_apply_nat(pd, &ctx.sk, &ctx.nk, r, &ctx.nr, &ctx.udp_mapping, + ctx.virtual_type, &ctx.rewrite, &ctx.nat_pool); switch (transerror) { case PFRES_MATCH: /* Translation action found in rule and applied successfully */ @@ -5851,31 +5839,31 @@ nextrule: break; default: /* Translation action found in rule but failed to apply */ - REASON_SET(reason, transerror); + REASON_SET(&ctx.reason, transerror); goto cleanup; } if (r->log) { - if (rewrite) + if (ctx.rewrite) m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any); - PFLOG_PACKET(r->action, *reason, r, a, ruleset, pd, 1, NULL); + PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL); } if (pd->act.log & PF_LOG_MATCHES) - pf_log_matches(pd, r, a, ruleset, &match_rules); + pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules); if (pd->virtual_proto != PF_VPROTO_FRAGMENT && (r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { - pf_return(r, nr, pd, th, bproto_sum, - bip_sum, reason, r->rtableid); + pf_return(r, ctx.nr, pd, ctx.th, bproto_sum, + bip_sum, &ctx.reason, r->rtableid); } if (r->action == PF_DROP) goto cleanup; - if (tag > 0 && pf_tag_packet(pd, tag)) { - REASON_SET(reason, PFRES_MEMORY); + if (ctx.tag > 0 && pf_tag_packet(pd, ctx.tag)) { + REASON_SET(&ctx.reason, PFRES_MEMORY); goto cleanup; } if (pd->act.rtableid >= 0) @@ -5890,31 +5878,32 @@ nextrule: */ pd->act.rt = r->rt; /* Don't use REASON_SET, pf_map_addr increases the reason counters */ - *reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr, + ctx.reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr, &pd->act.rt_kif, NULL, &sn, &snh, &(r->route), PF_SN_ROUTE); - if (*reason != 0) + if (ctx.reason != 0) goto cleanup; } if (pd->virtual_proto != PF_VPROTO_FRAGMENT && - (!state_icmp && (r->keep_state || nr != NULL || + (!ctx.state_icmp && (r->keep_state || ctx.nr != NULL || (pd->flags & PFDESC_TCP_NORM)))) { bool nat64; - action = pf_create_state(r, nr, a, pd, nk, sk, - &rewrite, sm, tag, bproto_sum, bip_sum, - &match_rules, udp_mapping, nat_pool, reason); - sk = nk = NULL; + action = pf_create_state(r, ctx.nr, ctx.a, pd, ctx.nk, ctx.sk, + &ctx.rewrite, sm, ctx.tag, bproto_sum, bip_sum, + &ctx.rules, ctx.udp_mapping, ctx.nat_pool, &ctx.reason); + ctx.sk = ctx.nk = NULL; if (action != PF_PASS) { - pf_udp_mapping_release(udp_mapping); - if (r->log || (nr != NULL && nr->log) || - *reason == PFRES_MEMORY) + pf_udp_mapping_release(ctx.udp_mapping); + if (r->log || (ctx.nr != NULL && ctx.nr->log) || + ctx.reason == PFRES_MEMORY) pd->act.log |= PF_LOG_FORCE; if (action == PF_DROP && (r->rule_flag & PFRULE_RETURN)) - pf_return(r, nr, pd, th, - bproto_sum, bip_sum, reason, + pf_return(r, ctx.nr, pd, ctx.th, + bproto_sum, bip_sum, &ctx.reason, pd->act.rtableid); + *reason = ctx.reason; return (action); } @@ -5922,69 +5911,73 @@ nextrule: if (nat64) { int ret; - if (sk == NULL) - sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE]; - if (nk == NULL) - nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK]; + if (ctx.sk == NULL) + ctx.sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE]; + if (ctx.nk == NULL) + ctx.nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK]; if (pd->dir == PF_IN) { - ret = pf_translate(pd, &sk->addr[pd->didx], - sk->port[pd->didx], &sk->addr[pd->sidx], - sk->port[pd->sidx], virtual_type, - icmp_dir); + ret = pf_translate(pd, &ctx.sk->addr[pd->didx], + ctx.sk->port[pd->didx], &ctx.sk->addr[pd->sidx], + ctx.sk->port[pd->sidx], ctx.virtual_type, + ctx.icmp_dir); } else { - ret = pf_translate(pd, &sk->addr[pd->sidx], - sk->port[pd->sidx], &sk->addr[pd->didx], - sk->port[pd->didx], virtual_type, - icmp_dir); + ret = pf_translate(pd, &ctx.sk->addr[pd->sidx], + ctx.sk->port[pd->sidx], &ctx.sk->addr[pd->didx], + ctx.sk->port[pd->didx], ctx.virtual_type, + ctx.icmp_dir); } if (ret < 0) goto cleanup; - rewrite += ret; + ctx.rewrite += ret; - if (rewrite && sk->af != nk->af) + if (ctx.rewrite && ctx.sk->af != ctx.nk->af) action = PF_AFRT; } } else { - while ((ri = SLIST_FIRST(&match_rules))) { - SLIST_REMOVE_HEAD(&match_rules, entry); + while ((ri = SLIST_FIRST(&ctx.rules))) { + SLIST_REMOVE_HEAD(&ctx.rules, entry); free(ri, M_PF_RULE_ITEM); } - uma_zfree(V_pf_state_key_z, sk); - uma_zfree(V_pf_state_key_z, nk); - sk = nk = NULL; - pf_udp_mapping_release(udp_mapping); + uma_zfree(V_pf_state_key_z, ctx.sk); + uma_zfree(V_pf_state_key_z, ctx.nk); + ctx.sk = ctx.nk = NULL; + pf_udp_mapping_release(ctx.udp_mapping); } /* copy back packet headers if we performed NAT operations */ - if (rewrite) + if (ctx.rewrite) m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any); if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) && pd->dir == PF_OUT && - V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) + V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) { /* * We want the state created, but we dont * want to send this in case a partner * firewall has to know about it to allow * replies through it. */ + *reason = ctx.reason; return (PF_DEFER); + } + *reason = ctx.reason; return (action); cleanup: - while ((ri = SLIST_FIRST(&match_rules))) { - SLIST_REMOVE_HEAD(&match_rules, entry); + while ((ri = SLIST_FIRST(&ctx.rules))) { + SLIST_REMOVE_HEAD(&ctx.rules, entry); free(ri, M_PF_RULE_ITEM); } - uma_zfree(V_pf_state_key_z, sk); - uma_zfree(V_pf_state_key_z, nk); - pf_udp_mapping_release(udp_mapping); + uma_zfree(V_pf_state_key_z, ctx.sk); + uma_zfree(V_pf_state_key_z, ctx.nk); + pf_udp_mapping_release(ctx.udp_mapping); + *reason = ctx.reason; return (PF_DROP); } diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 00f25c29e23c..ed87aa2cec4d 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -75,8 +75,9 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); -struct pf_krule *pf_match_translation(struct pf_pdesc *, - int, struct pf_kanchor_stackframe *); +struct pf_krule *pf_match_translation(int, struct pf_test_ctx *); +static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *, + struct pf_krule *); static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, struct pf_addr *, uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **, struct pf_srchash **, @@ -128,25 +129,21 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, return (res); } -#define PF_TEST_ATTRIB(t, a)\ - do { \ - if (t) { \ - r = a; \ - goto nextrule; \ - } \ +#define PF_TEST_ATTRIB(t, a) \ + if (t) { \ + r = a; \ + continue; \ + } else do { \ } while (0) -struct pf_krule * -pf_match_translation(struct pf_pdesc *pd, - int rs_num, struct pf_kanchor_stackframe *anchor_stack) +static enum pf_test_status +pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) { - struct pf_krule *r, *rm = NULL; - struct pf_kruleset *ruleset = NULL; - int tag = -1; + struct pf_krule *r; + struct pf_pdesc *pd = ctx->pd; int rtableid = -1; - int asd = 0; - r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); + r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while (r != NULL) { struct pf_rule_addr *src = NULL, *dst = NULL; struct pf_addr_wrap *xdst = NULL; @@ -188,7 +185,7 @@ pf_match_translation(struct pf_pdesc *pd, !pf_match_port(dst->port_op, dst->port[0], dst->port[1], pd->ndport), r->skip[PF_SKIP_DST_PORT]); - PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &tag, + PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0), TAILQ_NEXT(r, entries)); PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto != @@ -196,33 +193,101 @@ pf_match_translation(struct pf_pdesc *pd, &pd->hdr.tcp), r->os_fingerprint)), TAILQ_NEXT(r, entries)); if (r->tag) - tag = r->tag; + ctx->tag = r->tag; if (r->rtableid >= 0) rtableid = r->rtableid; if (r->anchor == NULL) { - rm = r; - if (rm->action == PF_NONAT || - rm->action == PF_NORDR || - rm->action == PF_NOBINAT) { - rm = NULL; + if (r->action == PF_NONAT || + r->action == PF_NORDR || + r->action == PF_NOBINAT) { + *ctx->rm = NULL; + } else { + /* + * found matching r + */ + ctx->tr = r; + /* + * anchor, with ruleset, where r belongs to + */ + *ctx->am = ctx->a; + /* + * ruleset where r belongs to + */ + *ctx->rsm = ruleset; + /* + * ruleset, where anchor belongs to. + */ + ctx->arsm = ctx->aruleset; } - break; *** 122 LINES SKIPPED ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202506021530.552FUYTC049693>