Date: Fri, 24 Sep 2021 09:43:55 GMT From: Kristof Provost <kp@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 63b3c1c77036 - main - pf: support dummynet Message-ID: <202109240943.18O9htUL081162@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by kp: URL: https://cgit.FreeBSD.org/src/commit/?id=63b3c1c77036814c85d36fe7a48c704db7c6fc9c commit 63b3c1c77036814c85d36fe7a48c704db7c6fc9c Author: Kristof Provost <kp@FreeBSD.org> AuthorDate: 2021-05-15 11:49:22 +0000 Commit: Kristof Provost <kp@FreeBSD.org> CommitDate: 2021-09-24 09:41:25 +0000 pf: support dummynet Allow pf to use dummynet pipes and queues. We re-use the currently unused IPFW_IS_DUMMYNET flag to allow dummynet to tell us that a packet is being re-injected after being delayed. This is needed to avoid endlessly looping the packet between pf and dummynet. MFC after: 2 weeks Sponsored by: Rubicon Communications, LLC ("Netgate") Differential Revision: https://reviews.freebsd.org/D31904 --- lib/libpfctl/libpfctl.c | 6 ++ lib/libpfctl/libpfctl.h | 3 + sbin/pfctl/parse.y | 41 ++++++++++ sbin/pfctl/pfctl_parser.c | 9 +++ sys/net/pfvar.h | 10 +++ sys/netpfil/ipfw/ip_dn_io.c | 3 +- sys/netpfil/pf/pf.c | 182 +++++++++++++++++++++++++++++++++++++++++++- sys/netpfil/pf/pf_nv.c | 6 ++ 8 files changed, 256 insertions(+), 4 deletions(-) diff --git a/lib/libpfctl/libpfctl.c b/lib/libpfctl/libpfctl.c index 3d52502f9ba8..576b256155fb 100644 --- a/lib/libpfctl/libpfctl.c +++ b/lib/libpfctl/libpfctl.c @@ -480,6 +480,9 @@ pf_nvrule_to_rule(const nvlist_t *nvl, struct pfctl_rule *rule) nvlist_get_number(nvl, "max_src_conn_rate.seconds"); rule->qid = nvlist_get_number(nvl, "qid"); rule->pqid = nvlist_get_number(nvl, "pqid"); + rule->dnpipe = nvlist_get_number(nvl, "dnpipe"); + rule->dnrpipe = nvlist_get_number(nvl, "dnrpipe"); + rule->free_flags = nvlist_get_number(nvl, "dnflags"); rule->prob = nvlist_get_number(nvl, "prob"); rule->cuid = nvlist_get_number(nvl, "cuid"); rule->cpid = nvlist_get_number(nvl, "cpid"); @@ -584,6 +587,9 @@ pfctl_add_rule(int dev, const struct pfctl_rule *r, const char *anchor, r->max_src_conn_rate.limit); nvlist_add_number(nvlr, "max_src_conn_rate.seconds", r->max_src_conn_rate.seconds); + nvlist_add_number(nvlr, "dnpipe", r->dnpipe); + nvlist_add_number(nvlr, "dnrpipe", r->dnrpipe); + nvlist_add_number(nvlr, "dnflags", r->free_flags); nvlist_add_number(nvlr, "prob", r->prob); nvlist_add_number(nvlr, "cuid", r->cuid); nvlist_add_number(nvlr, "cpid", r->cpid); diff --git a/lib/libpfctl/libpfctl.h b/lib/libpfctl/libpfctl.h index 70de7627f0a6..f57497b4a88a 100644 --- a/lib/libpfctl/libpfctl.h +++ b/lib/libpfctl/libpfctl.h @@ -114,6 +114,9 @@ struct pfctl_rule { } max_src_conn_rate; u_int32_t qid; u_int32_t pqid; + u_int16_t dnpipe; + u_int16_t dnrpipe; + u_int32_t free_flags; u_int32_t nr; u_int32_t prob; uid_t cuid; diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y index dbfe299cf34f..6bcf5a0bc397 100644 --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -248,6 +248,9 @@ static struct filter_opts { char *tag; char *match_tag; u_int8_t match_tag_not; + u_int16_t dnpipe; + u_int16_t dnrpipe; + u_int32_t free_flags; u_int rtableid; u_int8_t prio; u_int8_t set_prio[2]; @@ -468,6 +471,7 @@ int parseport(char *, struct range *r, int); %token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY MAPEPORTSET %token ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME %token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL +%token DNPIPE DNQUEUE %token LOAD RULESET_OPTIMIZATION PRIO %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE %token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY @@ -2464,6 +2468,15 @@ pfrule : action dir logquick interface route af proto fromto } #endif + if ($9.dnpipe || $9.dnrpipe) { + r.dnpipe = $9.dnpipe; + r.dnrpipe = $9.dnrpipe; + if ($9.free_flags & PFRULE_DN_IS_PIPE) + r.free_flags |= PFRULE_DN_IS_PIPE; + else + r.free_flags |= PFRULE_DN_IS_QUEUE; + } + expand_rule(&r, $4, $5.host, $7, $8.src_os, $8.src.host, $8.src.port, $8.dst.host, $8.dst.port, $9.uid, $9.gid, $9.icmpspec, ""); @@ -2565,6 +2578,32 @@ filter_opt : USER uids { } filter_opts.queues = $1; } + | DNPIPE number { + filter_opts.dnpipe = $2; + filter_opts.free_flags |= PFRULE_DN_IS_PIPE; + } + | DNPIPE '(' number ')' { + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_PIPE; + } + | DNPIPE '(' number comma number ')' { + filter_opts.dnrpipe = $5; + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_PIPE; + } + | DNQUEUE number { + filter_opts.dnpipe = $2; + filter_opts.free_flags |= PFRULE_DN_IS_QUEUE; + } + | DNQUEUE '(' number comma number ')' { + filter_opts.dnrpipe = $5; + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_QUEUE; + } + | DNQUEUE '(' number ')' { + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_QUEUE; + } | TAG string { filter_opts.tag = $2; } @@ -5592,6 +5631,8 @@ lookup(char *s) { "debug", DEBUG}, { "divert-reply", DIVERTREPLY}, { "divert-to", DIVERTTO}, + { "dnpipe", DNPIPE}, + { "dnqueue", DNQUEUE}, { "drop", DROP}, { "drop-ovl", FRAGDROP}, { "dup-to", DUPTO}, diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c index 89c9bc349ca3..131ad22123e2 100644 --- a/sbin/pfctl/pfctl_parser.c +++ b/sbin/pfctl/pfctl_parser.c @@ -1018,6 +1018,15 @@ print_rule(struct pfctl_rule *r, const char *anchor_call, int verbose, int numer i = 0; while (r->label[i][0]) printf(" label \"%s\"", r->label[i++]); + /* Only dnrpipe as we might do (0, 42) to only queue return traffic. */ + if (r->dnrpipe) + printf(" %s(%d, %d)", + r->free_flags & PFRULE_DN_IS_PIPE ? "dnpipe" : "dnqueue", + r->dnpipe, r->dnrpipe); + else if (r->dnpipe) + printf(" %s %d", + r->free_flags & PFRULE_DN_IS_PIPE ? "dnpipe" : "dnqueue", + r->dnpipe); if (r->qname[0] && r->pqname[0]) printf(" queue(%s, %s)", r->qname, r->pqname); else if (r->qname[0]) diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 2c5c0037cb92..b2c177fba68c 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -563,6 +563,9 @@ struct pf_kpool { struct pf_rule_actions { uint16_t qid; uint16_t pqid; + uint16_t dnpipe; + uint16_t dnrpipe; /* Reverse direction pipe */ + uint32_t flags; }; union pf_krule_ptr { @@ -608,6 +611,9 @@ struct pf_krule { } max_src_conn_rate; u_int16_t qid; u_int16_t pqid; + u_int16_t dnpipe; + u_int16_t dnrpipe; + u_int32_t free_flags; u_int32_t nr; u_int32_t prob; uid_t cuid; @@ -755,6 +761,8 @@ struct pf_state_cmp { /* was PFSTATE_PFLOW 0x04 */ #define PFSTATE_NOSYNC 0x08 #define PFSTATE_ACK 0x10 +#define PFRULE_DN_IS_PIPE 0x40 +#define PFRULE_DN_IS_QUEUE 0x80 #define PFSTATE_SETPRIO 0x0200 #define PFSTATE_SETMASK (PFSTATE_SETPRIO) @@ -858,6 +866,8 @@ struct pf_kstate { u_int32_t pfsync_time; u_int16_t qid; u_int16_t pqid; + u_int16_t dnpipe; + u_int16_t dnrpipe; u_int16_t tag; u_int8_t log; }; diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c index dad5cb087b39..11ad498505f4 100644 --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -841,7 +841,8 @@ tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa) m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ dt = (struct dn_pkt_tag *)(mtag + 1); dt->rule = fwa->rule; - dt->rule.info &= IPFW_ONEPASS; /* only keep this info */ + /* only keep this info */ + dt->rule.info &= (IPFW_ONEPASS | IPFW_IS_DUMMYNET); dt->dn_dir = dir; dt->ifp = fwa->flags & IPFW_ARGS_OUT ? fwa->ifp : NULL; /* dt->output tame is updated as we move through */ diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 1d492370953a..ac329a37f7bd 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -94,6 +94,13 @@ __FBSDID("$FreeBSD$"); #include <netinet/udp.h> #include <netinet/udp_var.h> +/* dummynet */ +#include <netinet/ip_dummynet.h> +#include <netinet/ip_fw.h> +#include <netpfil/ipfw/dn_heap.h> +#include <netpfil/ipfw/ip_fw_private.h> +#include <netpfil/ipfw/ip_dn_private.h> + #ifdef INET6 #include <netinet/ip6.h> #include <netinet/icmp6.h> @@ -3310,6 +3317,12 @@ pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a) a->qid = r->qid; if (r->pqid) a->pqid = r->pqid; + if (r->dnpipe) + a->dnpipe = r->dnpipe; + if (r->dnrpipe) + a->dnpipe = r->dnrpipe; + if (r->free_flags & PFRULE_DN_IS_PIPE) + a->flags |= PFRULE_DN_IS_PIPE; } int @@ -3982,6 +3995,9 @@ pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a, s->sync_state = PFSYNC_S_NONE; s->qid = pd->act.qid; s->pqid = pd->act.pqid; + s->dnpipe = pd->act.dnpipe; + s->dnrpipe = pd->act.dnrpipe; + s->state_flags |= pd->act.flags; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; switch (pd->proto) { @@ -6226,6 +6242,64 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a return (0); } +static bool +pf_pdesc_to_dnflow(int dir, const struct pf_pdesc *pd, + const struct pf_krule *r, const struct pf_kstate *s, + struct ip_fw_args *dnflow) +{ + int dndir = r->direction; + + if (s && dndir == PF_INOUT) + dndir = s->direction; + + memset(dnflow, 0, sizeof(*dnflow)); + + if (pd->dport != NULL) + dnflow->f_id.dst_port = ntohs(*pd->dport); + if (pd->sport != NULL) + dnflow->f_id.src_port = ntohs(*pd->sport); + + if (dir == PF_IN) + dnflow->flags |= IPFW_ARGS_IN; + else + dnflow->flags |= IPFW_ARGS_OUT; + + if (dir != dndir && pd->act.dnrpipe) { + dnflow->rule.info = pd->act.dnrpipe; + } + else if (dir == dndir) { + dnflow->rule.info = pd->act.dnpipe; + } + else { + return (false); + } + + dnflow->rule.info |= IPFW_IS_DUMMYNET; + if (r->free_flags & PFRULE_DN_IS_PIPE) + dnflow->rule.info |= IPFW_IS_PIPE; + + dnflow->f_id.proto = pd->proto; + dnflow->f_id.extra = dnflow->rule.info; + switch (pd->af) { + case AF_INET: + dnflow->f_id.addr_type = 4; + dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr); + dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr); + break; + case AF_INET6: + dnflow->flags |= IPFW_ARGS_IP6; + dnflow->f_id.addr_type = 6; + dnflow->f_id.src_ip6 = pd->src->v6; + dnflow->f_id.dst_ip6 = pd->dst->v6; + break; + default: + panic("Invalid AF"); + break; + } + + return (true); +} + #ifdef INET int pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) @@ -6267,10 +6341,11 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * PF_RULES_RLOCK(); - if (__predict_false(ip_divert_ptr != NULL) && + if ((__predict_false(ip_divert_ptr != NULL) || ip_dn_io_ptr != NULL) && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); - if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { + if ((rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) || + (rr->info & IPFW_IS_DUMMYNET)) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; @@ -6405,6 +6480,8 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * log = action != PF_PASS; goto done; } + pd.sport = &pd.hdr.udp.uh_sport; + pd.dport = &pd.hdr.udp.uh_dport; if (pd.hdr.udp.uh_dport == 0 || ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off || ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) { @@ -6524,6 +6601,47 @@ done: } #endif /* ALTQ */ + if (s && (s->dnpipe || s->dnrpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.dnrpipe = s->dnrpipe; + pd.act.flags = s->state_flags; + } else if (r->dnpipe || r->dnrpipe) { + pd.act.dnpipe = r->dnpipe; + pd.act.dnrpipe = r->dnrpipe; + pd.act.flags = r->free_flags; + } + if ((pd.act.dnpipe || pd.act.dnrpipe) && !PACKET_LOOPED(&pd)) { + if (ip_dn_io_ptr == NULL) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } else { + struct ip_fw_args dnflow; + + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + if (s) + PF_STATE_UNLOCK(s); + return (action); + } + + if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) { + ip_dn_io_ptr(m0, &dnflow); + + if (*m0 == NULL) { + if (s) + PF_STATE_UNLOCK(s); + return (action); + } else { + /* This is dummynet fast io processing */ + m_tag_delete(*m0, m_tag_first(*m0)); + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + } + } + } + } + /* * connections redirected to loopback should not match sockets * bound specifically to loopback due to security implications, @@ -6684,6 +6802,7 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb u_short action, reason = 0, log = 0; struct mbuf *m = *m0, *n = NULL; struct m_tag *mtag; + struct m_tag *ipfwtag; struct ip6_hdr *h = NULL; struct pf_krule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; struct pf_kstate *s = NULL; @@ -6719,7 +6838,19 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb PF_RULES_RLOCK(); /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { + if (ip_dn_io_ptr != NULL && + ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { + struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); + if (rr->info & IPFW_IS_DUMMYNET) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + goto done; + } + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + m_tag_delete(m, ipfwtag); + } + } else if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } @@ -6828,6 +6959,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb goto done; } pd.p_len = pd.tot_len - off - (pd.hdr.tcp.th_off << 2); + pd.sport = &pd.hdr.tcp.th_sport; + pd.dport = &pd.hdr.tcp.th_dport; action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) goto done; @@ -6851,6 +6984,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb log = action != PF_PASS; goto done; } + pd.sport = &pd.hdr.udp.uh_sport; + pd.dport = &pd.hdr.udp.uh_dport; if (pd.hdr.udp.uh_dport == 0 || ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off || ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) { @@ -6974,6 +7109,47 @@ done: } #endif /* ALTQ */ + if (s && (s->dnpipe || s->dnrpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.dnrpipe = s->dnrpipe; + pd.act.flags = s->state_flags; + } else { + pd.act.dnpipe = r->dnpipe; + pd.act.dnrpipe = r->dnrpipe; + pd.act.flags = r->free_flags; + } + if ((pd.act.dnpipe || pd.act.dnrpipe) && !PACKET_LOOPED(&pd)) { + if (ip_dn_io_ptr == NULL) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } else { + struct ip_fw_args dnflow; + + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + if (s) + PF_STATE_UNLOCK(s); + return (action); + } + + if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) { + ip_dn_io_ptr(m0, &dnflow); + + if (*m0 == NULL) { + if (s) + PF_STATE_UNLOCK(s); + return (action); + } else { + /* This is dummynet fast io processing */ + m_tag_delete(*m0, m_tag_first(*m0)); + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + } + } + } + } + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c index d1eca90e0ee5..fb2bab77ad8f 100644 --- a/sys/netpfil/pf/pf_nv.c +++ b/sys/netpfil/pf/pf_nv.c @@ -527,6 +527,9 @@ pf_nvrule_to_krule(const nvlist_t *nvl, struct pf_krule *rule) sizeof(rule->pqname))); PFNV_CHK(pf_nvstring(nvl, "tagname", rule->tagname, sizeof(rule->tagname))); + PFNV_CHK(pf_nvuint16_opt(nvl, "dnpipe", &rule->dnpipe, 0)); + PFNV_CHK(pf_nvuint16_opt(nvl, "dnrpipe", &rule->dnrpipe, 0)); + PFNV_CHK(pf_nvuint32_opt(nvl, "dnflags", &rule->free_flags, 0)); PFNV_CHK(pf_nvstring(nvl, "match_tagname", rule->match_tagname, sizeof(rule->match_tagname))); PFNV_CHK(pf_nvstring(nvl, "overload_tblname", rule->overload_tblname, @@ -687,6 +690,9 @@ pf_krule_to_nvrule(struct pf_krule *rule) nvlist_add_string(nvl, "ifname", rule->ifname); nvlist_add_string(nvl, "qname", rule->qname); nvlist_add_string(nvl, "pqname", rule->pqname); + nvlist_add_number(nvl, "dnpipe", rule->dnpipe); + nvlist_add_number(nvl, "dnrpipe", rule->dnrpipe); + nvlist_add_number(nvl, "dnflags", rule->free_flags); nvlist_add_string(nvl, "tagname", rule->tagname); nvlist_add_string(nvl, "match_tagname", rule->match_tagname); nvlist_add_string(nvl, "overload_tblname", rule->overload_tblname);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202109240943.18O9htUL081162>