From owner-dev-commits-src-all@freebsd.org Mon Jun 7 12:12:04 2021 Return-Path: Delivered-To: dev-commits-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id ECCE364D175; Mon, 7 Jun 2021 12:12:04 +0000 (UTC) (envelope-from tuexen@freebsd.org) Received: from drew.franken.de (mail-n.franken.de [193.175.24.27]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "*.franken.de", Issuer "Sectigo RSA Domain Validation Secure Server CA" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4FzC0h47yYz4pPP; Mon, 7 Jun 2021 12:12:04 +0000 (UTC) (envelope-from tuexen@freebsd.org) Received: from smtpclient.apple (unknown [IPv6:2a02:8109:1140:c3d:c07f:3a0d:782c:bd56]) (Authenticated sender: macmic) by drew.franken.de (Postfix) with ESMTPSA id 4EB08721E2809; Mon, 7 Jun 2021 14:11:59 +0200 (CEST) From: Michael Tuexen Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: quoted-printable Mime-Version: 1.0 (Mac OS X Mail 14.0 \(3654.100.0.2.22\)) Subject: Re: git: fa50e98328b4 - stable/13 - mend Date: Mon, 7 Jun 2021 14:11:58 +0200 References: <202106070901.15791qUa008332@gitrepo.freebsd.org> To: "src-committers@freebsd.org" , "dev-commits-src-all@freebsd.org" , "dev-commits-src-branches@freebsd.org" In-Reply-To: <202106070901.15791qUa008332@gitrepo.freebsd.org> Message-Id: <1758EB9E-F9AC-4170-ACD0-C5B609405818@freebsd.org> X-Mailer: Apple Mail (2.3654.100.0.2.22) X-Spam-Status: No, score=-2.9 required=5.0 tests=ALL_TRUSTED,BAYES_00 autolearn=disabled version=3.4.1 X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on mail-n.franken.de X-Rspamd-Queue-Id: 4FzC0h47yYz4pPP X-Spamd-Bar: / Authentication-Results: mx1.freebsd.org; none X-Spamd-Result: default: False [0.00 / 15.00]; ASN(0.00)[asn:680, ipnet:193.174.0.0/15, country:DE]; local_wl_from(0.00)[freebsd.org] X-BeenThere: dev-commits-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commit messages for all branches of the src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 07 Jun 2021 12:12:05 -0000 > On 7. Jun 2021, at 11:01, Michael Tuexen wrote: >=20 > The branch stable/13 has been updated by tuexen: >=20 > URL: = https://cgit.FreeBSD.org/src/commit/?id=3Dfa50e98328b48da4fa8dbd97d0a78796= 2cf249f5 >=20 > commit fa50e98328b48da4fa8dbd97d0a787962cf249f5 > Author: Michael Tuexen > AuthorDate: 2021-04-18 14:08:08 +0000 > Commit: Michael Tuexen > CommitDate: 2021-06-07 09:01:28 +0000 >=20 > mend Not sure how the commit ended up this way, but it is MFCing = https://cgit.FreeBSD.org/src/commit/?id=3D9e644c23000c2f5028b235f6263d17ff= b24d3605 and manually resolving the merge conflicts. Best regards Michael > --- > share/man/man4/tcp.4 | 15 +- > sys/netinet/tcp.h | 1 + > sys/netinet/tcp_input.c | 48 ++++- > sys/netinet/tcp_output.c | 80 ++++++-- > sys/netinet/tcp_stacks/bbr.c | 38 +--- > sys/netinet/tcp_stacks/rack.c | 26 +-- > sys/netinet/tcp_subr.c | 462 = ++++++++++++++++++++++++++++++++++++++++-- > sys/netinet/tcp_syncache.c | 127 +++++++++--- > sys/netinet/tcp_syncache.h | 12 +- > sys/netinet/tcp_timewait.c | 84 ++++++-- > sys/netinet/tcp_usrreq.c | 30 +++ > sys/netinet/tcp_var.h | 27 ++- > sys/netinet/toecore.c | 4 +- > sys/netinet6/tcp6_var.h | 2 + > sys/sys/mbuf.h | 1 + > usr.bin/netstat/inet.c | 4 + > usr.bin/sockstat/sockstat.1 | 6 +- > usr.bin/sockstat/sockstat.c | 13 +- > 18 files changed, 822 insertions(+), 158 deletions(-) >=20 > diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 > index d01505e58427..b5735a40b320 100644 > --- a/share/man/man4/tcp.4 > +++ b/share/man/man4/tcp.4 > @@ -34,7 +34,7 @@ > .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 > .\" $FreeBSD$ > .\" > -.Dd April 8, 2021 > +.Dd April 18, 2021 > .Dt TCP 4 > .Os > .Sh NAME > @@ -329,6 +329,9 @@ currently executing. > This is typically used after a process or thread inherits a listen > socket from its parent, and sets its CPU affinity to a particular = core. > .El > +.It Dv TCP_REMOTE_UDP_ENCAPS_PORT > +Set and get the remote UDP encapsulation port. > +It can only be set on a closed TCP socket. > .El > .Pp > The option level for the > @@ -752,6 +755,16 @@ A CSV list of template_spec=3Dpercent key-value = pairs which controls the per > template sampling rates when > .Xr stats 3 > sampling is enabled. > +.It Va udp_tunneling_port > +The local UDP encapsulation port. > +A value of 0 indicates that UDP encapsulation is disabled. > +The default is 0. > +.It Va udp_tunneling_overhead > +The overhead taken into account when using UDP encapsulation. > +Since MSS clamping by middleboxes will most likely not work, values = larger than > +8 (the size of the UDP header) are also supported. > +Supported values are between 8 and 1024. > +The default is 8. > .El > .Sh ERRORS > A socket operation may fail with one of the following errors returned: > diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h > index 0b71bd4658f8..d2bf1f8431fd 100644 > --- a/sys/netinet/tcp.h > +++ b/sys/netinet/tcp.h > @@ -183,6 +183,7 @@ struct tcphdr { > #define TCP_RXTLS_MODE 42 /* Receive TLS mode */ > #define TCP_CONGESTION 64 /* get/set congestion control = algorithm */ > #define TCP_CCALGOOPT 65 /* get/set cc algorithm specific = options */ > +#define TCP_REMOTE_UDP_ENCAPS_PORT 71 /* Enable TCP over UDP = tunneling via the specified port */ > #define TCP_DELACK 72 /* socket option for delayed ack */ > #define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a = RST */ > #define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log = */ > diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c > index 397cbc5084e6..d36f9566ffba 100644 > --- a/sys/netinet/tcp_input.c > +++ b/sys/netinet/tcp_input.c > @@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$"); > #ifdef TCP_OFFLOAD > #include > #endif > +#include >=20 > #include >=20 > @@ -573,7 +574,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr = *th, uint8_t iptos) > */ > #ifdef INET6 > int > -tcp6_input(struct mbuf **mp, int *offp, int proto) > +tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t = port) > { > struct mbuf *m; > struct in6_ifaddr *ia6; > @@ -603,12 +604,19 @@ tcp6_input(struct mbuf **mp, int *offp, int = proto) > } >=20 > *mp =3D m; > - return (tcp_input(mp, offp, proto)); > + return (tcp_input_with_port(mp, offp, proto, port)); > +} > + > +int > +tcp6_input(struct mbuf **mp, int *offp, int proto) > +{ > + > + return(tcp6_input_with_port(mp, offp, proto, 0)); > } > #endif /* INET6 */ >=20 > int > -tcp_input(struct mbuf **mp, int *offp, int proto) > +tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t = port) > { > struct mbuf *m =3D *mp; > struct tcphdr *th =3D NULL; > @@ -664,6 +672,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > ip6 =3D mtod(m, struct ip6_hdr *); > th =3D (struct tcphdr *)((caddr_t)ip6 + off0); > tlen =3D sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; > + if (port) > + goto skip6_csum; > if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { > if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) > th->th_sum =3D m->m_pkthdr.csum_data; > @@ -677,7 +687,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > TCPSTAT_INC(tcps_rcvbadsum); > goto drop; > } > - > + skip6_csum: > /* > * Be proactive about unspecified IPv6 address in = source. > * As we use all-zero to indicate unbounded/unconnected = pcb, > @@ -718,6 +728,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > tlen =3D ntohs(ip->ip_len) - off0; >=20 > iptos =3D ip->ip_tos; > + if (port) > + goto skip_csum; > if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { > if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) > th->th_sum =3D m->m_pkthdr.csum_data; > @@ -747,8 +759,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > ip->ip_v =3D IPVERSION; > ip->ip_hl =3D off0 >> 2; > } > - > - if (th->th_sum) { > + skip_csum: > + if (th->th_sum && (port =3D=3D 0)) { > TCPSTAT_INC(tcps_rcvbadsum); > goto drop; > } > @@ -1006,6 +1018,11 @@ findpcb: > goto dropwithreset; > } >=20 > + if ((tp->t_port !=3D port) && (tp->t_state > TCPS_LISTEN)) { > + rstreason =3D BANDLIM_RST_CLOSEDPORT; > + goto dropwithreset; > + } > + > #ifdef TCP_OFFLOAD > if (tp->t_flags & TF_TOE) { > tcp_offload_input(tp, m); > @@ -1077,7 +1094,7 @@ findpcb: > * NB: syncache_expand() doesn't unlock > * inp and tcpinfo locks. > */ > - rstreason =3D syncache_expand(&inc, &to, th, = &so, m); > + rstreason =3D syncache_expand(&inc, &to, th, = &so, m, port); > if (rstreason < 0) { > /* > * A failing TCP MD5 signature = comparison > @@ -1157,7 +1174,7 @@ tfo_socket_result: > * causes. > */ > if (thflags & TH_RST) { > - syncache_chkrst(&inc, th, m); > + syncache_chkrst(&inc, th, m, port); > goto dropunlock; > } > /* > @@ -1179,7 +1196,7 @@ tfo_socket_result: > log(LOG_DEBUG, "%s; %s: Listen socket: " > "SYN|ACK invalid, segment = rejected\n", > s, __func__); > - syncache_badack(&inc); /* XXX: Not needed! */ > + syncache_badack(&inc, port); /* XXX: Not = needed! */ > TCPSTAT_INC(tcps_badsyn); > rstreason =3D BANDLIM_RST_OPENPORT; > goto dropwithreset; > @@ -1336,7 +1353,8 @@ tfo_socket_result: > #endif > TCP_PROBE3(debug__input, tp, th, m); > tcp_dooptions(&to, optp, optlen, TO_SYN); > - if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, = iptos)) > + if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, = iptos, > + port)) > goto tfo_socket_result; >=20 > /* > @@ -1467,6 +1485,12 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr = *th, struct socket *so, > return (newsize); > } >=20 > +int > +tcp_input(struct mbuf **mp, int *offp, int proto) > +{ > + return(tcp_input_with_port(mp, offp, proto, 0)); > +} > + > void > tcp_handle_wakeup(struct tcpcb *tp, struct socket *so) > { > @@ -3671,11 +3695,13 @@ tcp_mss_update(struct tcpcb *tp, int offer, = int mtuoffer, > sizeof (struct ip6_hdr) + sizeof (struct = tcphdr) : > sizeof (struct tcpiphdr); > #else > - const size_t min_protoh =3D sizeof(struct tcpiphdr); > + size_t min_protoh =3D sizeof(struct tcpiphdr); > #endif >=20 > INP_WLOCK_ASSERT(tp->t_inpcb); >=20 > + if (tp->t_port) > + min_protoh +=3D V_tcp_udp_tunneling_overhead; > if (mtuoffer !=3D -1) { > KASSERT(offer =3D=3D -1, ("%s: conflict", __func__)); > offer =3D mtuoffer - min_protoh; > diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c > index e23cdc749e98..5bda2be14df0 100644 > --- a/sys/netinet/tcp_output.c > +++ b/sys/netinet/tcp_output.c > @@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$"); >=20 > #include >=20 > +#include > +#include > #include >=20 > #include > @@ -207,7 +209,7 @@ tcp_output(struct tcpcb *tp) > #endif > struct tcphdr *th; > u_char opt[TCP_MAXOLEN]; > - unsigned ipoptlen, optlen, hdrlen; > + unsigned ipoptlen, optlen, hdrlen, ulen; > #if defined(IPSEC) || defined(IPSEC_SUPPORT) > unsigned ipsec_optlen =3D 0; > #endif > @@ -216,6 +218,7 @@ tcp_output(struct tcpcb *tp) > struct sackhole *p; > int tso, mtu; > struct tcpopt to; > + struct udphdr *udp =3D NULL; > unsigned int wanted_cookie =3D 0; > unsigned int dont_sendalot =3D 0; > #if 0 > @@ -558,6 +561,7 @@ after_sack_rexmit: > #endif >=20 > if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg = && > + (tp->t_port =3D=3D 0) && > ((tp->t_flags & TF_SIGNATURE) =3D=3D 0) && > tp->rcv_numsacks =3D=3D 0 && sack_rxmit =3D=3D 0 && > ipoptlen =3D=3D 0 && !(flags & TH_SYN)) > @@ -800,6 +804,8 @@ send: > /* Maximum segment size. */ > if (flags & TH_SYN) { > to.to_mss =3D tcp_mssopt(&tp->t_inpcb->inp_inc); > + if (tp->t_port) > + to.to_mss -=3D = V_tcp_udp_tunneling_overhead; > to.to_flags |=3D TOF_MSS; >=20 > /* > @@ -887,7 +893,14 @@ send: > !(to.to_flags & TOF_FASTOPEN)) > len =3D 0; > } > - > + if (tp->t_port) { > + if (V_tcp_udp_tunneling_port =3D=3D 0) { > + /* The port was removed?? */ > + SOCKBUF_UNLOCK(&so->so_snd); > + return (EHOSTUNREACH); > + } > + hdrlen +=3D sizeof(struct udphdr); > + } > /* > * Adjust data length if insertion of options will > * bump the packet length beyond the t_maxseg length. > @@ -1140,8 +1153,17 @@ send: > #ifdef INET6 > if (isipv6) { > ip6 =3D mtod(m, struct ip6_hdr *); > - th =3D (struct tcphdr *)(ip6 + 1); > - tcpip_fillheaders(tp->t_inpcb, ip6, th); > + if (tp->t_port) { > + udp =3D (struct udphdr *)((caddr_t)ip6 + = ipoptlen + sizeof(struct ip6_hdr)); > + udp->uh_sport =3D = htons(V_tcp_udp_tunneling_port); > + udp->uh_dport =3D tp->t_port; > + ulen =3D hdrlen + len - sizeof(struct ip6_hdr); > + udp->uh_ulen =3D htons(ulen); > + th =3D (struct tcphdr *)(udp + 1); > + } else { > + th =3D (struct tcphdr *)(ip6 + 1); > + } > + tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th); > } else > #endif /* INET6 */ > { > @@ -1149,8 +1171,16 @@ send: > #ifdef TCPDEBUG > ipov =3D (struct ipovly *)ip; > #endif > - th =3D (struct tcphdr *)(ip + 1); > - tcpip_fillheaders(tp->t_inpcb, ip, th); > + if (tp->t_port) { > + udp =3D (struct udphdr *)((caddr_t)ip + ipoptlen = + sizeof(struct ip)); > + udp->uh_sport =3D = htons(V_tcp_udp_tunneling_port); > + udp->uh_dport =3D tp->t_port; > + ulen =3D hdrlen + len - sizeof(struct ip); > + udp->uh_ulen =3D htons(ulen); > + th =3D (struct tcphdr *)(udp + 1); > + } else > + th =3D (struct tcphdr *)(ip + 1); > + tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th); > } >=20 > /* > @@ -1309,7 +1339,6 @@ send: > * checksum extended header and data. > */ > m->m_pkthdr.len =3D hdrlen + len; /* in6_cksum() need this */ > - m->m_pkthdr.csum_data =3D offsetof(struct tcphdr, th_sum); >=20 > #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) > if (to.to_flags & TOF_SIGNATURE) { > @@ -1336,9 +1365,19 @@ send: > * There is no need to fill in ip6_plen right now. > * It will be filled later by ip6_output. > */ > - m->m_pkthdr.csum_flags =3D CSUM_TCP_IPV6; > - th->th_sum =3D in6_cksum_pseudo(ip6, sizeof(struct = tcphdr) + > - optlen + len, IPPROTO_TCP, 0); > + if (tp->t_port) { > + m->m_pkthdr.csum_flags =3D CSUM_UDP_IPV6; > + m->m_pkthdr.csum_data =3D offsetof(struct = udphdr, uh_sum); > + udp->uh_sum =3D in6_cksum_pseudo(ip6, ulen, = IPPROTO_UDP, 0); > + th->th_sum =3D htons(0); > + UDPSTAT_INC(udps_opackets); > + } else { > + m->m_pkthdr.csum_flags =3D CSUM_TCP_IPV6; > + m->m_pkthdr.csum_data =3D offsetof(struct = tcphdr, th_sum); > + th->th_sum =3D in6_cksum_pseudo(ip6, > + sizeof(struct tcphdr) + optlen + len, = IPPROTO_TCP, > + 0); > + } > } > #endif > #if defined(INET6) && defined(INET) > @@ -1346,9 +1385,20 @@ send: > #endif > #ifdef INET > { > - m->m_pkthdr.csum_flags =3D CSUM_TCP; > - th->th_sum =3D in_pseudo(ip->ip_src.s_addr, = ip->ip_dst.s_addr, > - htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + = optlen)); > + if (tp->t_port) { > + m->m_pkthdr.csum_flags =3D CSUM_UDP; > + m->m_pkthdr.csum_data =3D offsetof(struct = udphdr, uh_sum); > + udp->uh_sum =3D in_pseudo(ip->ip_src.s_addr, > + ip->ip_dst.s_addr, htons(ulen + = IPPROTO_UDP)); > + th->th_sum =3D htons(0); > + UDPSTAT_INC(udps_opackets); > + } else { > + m->m_pkthdr.csum_flags =3D CSUM_TCP; > + m->m_pkthdr.csum_data =3D offsetof(struct = tcphdr, th_sum); > + th->th_sum =3D in_pseudo(ip->ip_src.s_addr, > + ip->ip_dst.s_addr, htons(sizeof(struct = tcphdr) + > + IPPROTO_TCP + len + optlen)); > + } >=20 > /* IP version must be set here for ipv4/ipv6 checking = later */ > KASSERT(ip->ip_v =3D=3D IPVERSION, > @@ -1473,8 +1523,10 @@ send: > * NB: Don't set DF on small MTU/MSS to have a safe fallback. > */ > if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) { > - ip->ip_off |=3D htons(IP_DF); > tp->t_flags2 |=3D TF2_PLPMTU_PMTUD; > + if (tp->t_port =3D=3D 0 || len < V_tcp_minmss) { > + ip->ip_off |=3D htons(IP_DF); > + } > } else { > tp->t_flags2 &=3D ~TF2_PLPMTU_PMTUD; > } > diff --git a/sys/netinet/tcp_stacks/bbr.c = b/sys/netinet/tcp_stacks/bbr.c > index cc20d6bf52ca..1ee8d26446fd 100644 > --- a/sys/netinet/tcp_stacks/bbr.c > +++ b/sys/netinet/tcp_stacks/bbr.c > @@ -11969,14 +11969,10 @@ bbr_output_wtime(struct tcpcb *tp, const = struct timeval *tv) > #endif > struct tcp_bbr *bbr; > struct tcphdr *th; > -#ifdef NETFLIX_TCPOUDP > struct udphdr *udp =3D NULL; > -#endif > u_char opt[TCP_MAXOLEN]; > unsigned ipoptlen, optlen, hdrlen; > -#ifdef NETFLIX_TCPOUDP > unsigned ulen; > -#endif > uint32_t bbr_seq; > uint32_t delay_calc=3D0; > uint8_t doing_tlp =3D 0; > @@ -12991,10 +12987,8 @@ send: > /* Maximum segment size. */ > if (flags & TH_SYN) { > to.to_mss =3D tcp_mssopt(&inp->inp_inc); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) > to.to_mss -=3D = V_tcp_udp_tunneling_overhead; > -#endif > to.to_flags |=3D TOF_MSS; > /* > * On SYN or SYN|ACK transmits on TFO = connections, > @@ -13063,7 +13057,6 @@ send: > !(to.to_flags & TOF_FASTOPEN)) > len =3D 0; > } > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > if (V_tcp_udp_tunneling_port =3D=3D 0) { > /* The port was removed?? */ > @@ -13072,7 +13065,6 @@ send: > } > hdrlen +=3D sizeof(struct udphdr); > } > -#endif > #ifdef INET6 > if (isipv6) > ipoptlen =3D ip6_optlen(tp->t_inpcb); > @@ -13408,7 +13400,6 @@ send: > #ifdef INET6 > if (isipv6) { > ip6 =3D mtod(m, struct ip6_hdr *); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp =3D (struct udphdr *)((caddr_t)ip6 + = ipoptlen + sizeof(struct ip6_hdr)); > udp->uh_sport =3D = htons(V_tcp_udp_tunneling_port); > @@ -13417,17 +13408,9 @@ send: > udp->uh_ulen =3D htons(ulen); > th =3D (struct tcphdr *)(udp + 1); > } else { > -#endif > th =3D (struct tcphdr *)(ip6 + 1); > - > -#ifdef NETFLIX_TCPOUDP > } > -#endif > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip6, th); > + tcpip_fillheaders(inp, tp->t_port, ip6, th); > } else > #endif /* INET6 */ > { > @@ -13435,7 +13418,6 @@ send: > #ifdef TCPDEBUG > ipov =3D (struct ipovly *)ip; > #endif > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp =3D (struct udphdr *)((caddr_t)ip + ipoptlen = + sizeof(struct ip)); > udp->uh_sport =3D = htons(V_tcp_udp_tunneling_port); > @@ -13443,14 +13425,10 @@ send: > ulen =3D hdrlen + len - sizeof(struct ip); > udp->uh_ulen =3D htons(ulen); > th =3D (struct tcphdr *)(udp + 1); > - } else > -#endif > + } else { > th =3D (struct tcphdr *)(ip + 1); > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip, th); > + } > + tcpip_fillheaders(inp, tp->t_port, ip, th); > } > /* > * If we are doing retransmissions, then snd_nxt will not = reflect > @@ -13600,7 +13578,6 @@ send: > * ip6_plen is not need to be filled now, and will be = filled > * in ip6_output. > */ > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > m->m_pkthdr.csum_flags =3D CSUM_UDP_IPV6; > m->m_pkthdr.csum_data =3D offsetof(struct = udphdr, uh_sum); > @@ -13608,14 +13585,11 @@ send: > th->th_sum =3D htons(0); > UDPSTAT_INC(udps_opackets); > } else { > -#endif > csum_flags =3D m->m_pkthdr.csum_flags =3D = CSUM_TCP_IPV6; > m->m_pkthdr.csum_data =3D offsetof(struct = tcphdr, th_sum); > th->th_sum =3D in6_cksum_pseudo(ip6, = sizeof(struct tcphdr) + > optlen + len, IPPROTO_TCP, 0); > -#ifdef NETFLIX_TCPOUDP > } > -#endif > } > #endif > #if defined(INET6) && defined(INET) > @@ -13623,7 +13597,6 @@ send: > #endif > #ifdef INET > { > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > m->m_pkthdr.csum_flags =3D CSUM_UDP; > m->m_pkthdr.csum_data =3D offsetof(struct = udphdr, uh_sum); > @@ -13632,15 +13605,12 @@ send: > th->th_sum =3D htons(0); > UDPSTAT_INC(udps_opackets); > } else { > -#endif > csum_flags =3D m->m_pkthdr.csum_flags =3D = CSUM_TCP; > m->m_pkthdr.csum_data =3D offsetof(struct = tcphdr, th_sum); > th->th_sum =3D in_pseudo(ip->ip_src.s_addr, > ip->ip_dst.s_addr, htons(sizeof(struct = tcphdr) + > IPPROTO_TCP + len + optlen)); > -#ifdef NETFLIX_TCPOUDP > } > -#endif > /* IP version must be set here for ipv4/ipv6 checking = later */ > KASSERT(ip->ip_v =3D=3D IPVERSION, > ("%s: IP version incorrect: %d", __func__, = ip->ip_v)); > diff --git a/sys/netinet/tcp_stacks/rack.c = b/sys/netinet/tcp_stacks/rack.c > index 0ee73a95a6d7..12827d1699d0 100644 > --- a/sys/netinet/tcp_stacks/rack.c > +++ b/sys/netinet/tcp_stacks/rack.c > @@ -13008,10 +13008,8 @@ send: > if (flags & TH_SYN) { > tp->snd_nxt =3D tp->iss; > to.to_mss =3D tcp_mssopt(&inp->inp_inc); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) > to.to_mss -=3D = V_tcp_udp_tunneling_overhead; > -#endif > to.to_flags |=3D TOF_MSS; >=20 > /* > @@ -13088,7 +13086,6 @@ send: > !(to.to_flags & TOF_FASTOPEN)) > len =3D 0; > } > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > if (V_tcp_udp_tunneling_port =3D=3D 0) { > /* The port was removed?? */ > @@ -13097,7 +13094,6 @@ send: > } > hdrlen +=3D sizeof(struct udphdr); > } > -#endif > #ifdef INET6 > if (isipv6) > ipoptlen =3D ip6_optlen(tp->t_inpcb); > @@ -13372,7 +13368,6 @@ send: > #ifdef INET6 > if (isipv6) { > ip6 =3D mtod(m, struct ip6_hdr *); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp =3D (struct udphdr *)((caddr_t)ip6 + = ipoptlen + sizeof(struct ip6_hdr)); > udp->uh_sport =3D = htons(V_tcp_udp_tunneling_port); > @@ -13380,14 +13375,10 @@ send: > ulen =3D hdrlen + len - sizeof(struct ip6_hdr); > udp->uh_ulen =3D htons(ulen); > th =3D (struct tcphdr *)(udp + 1); > - } else > -#endif > + } else { > th =3D (struct tcphdr *)(ip6 + 1); > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip6, th); > + } > + tcpip_fillheaders(inp, tp->t_port, ip6, th); > } else > #endif /* INET6 */ > { > @@ -13395,7 +13386,6 @@ send: > #ifdef TCPDEBUG > ipov =3D (struct ipovly *)ip; > #endif > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp =3D (struct udphdr *)((caddr_t)ip + ipoptlen = + sizeof(struct ip)); > udp->uh_sport =3D = htons(V_tcp_udp_tunneling_port); > @@ -13403,14 +13393,10 @@ send: > ulen =3D hdrlen + len - sizeof(struct ip); > udp->uh_ulen =3D htons(ulen); > th =3D (struct tcphdr *)(udp + 1); > - } else > -#endif > + } else { > th =3D (struct tcphdr *)(ip + 1); > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip, th); > + } > + tcpip_fillheaders(inp, tp->t_port, ip, th); > } > /* > * Fill in fields, remembering maximum advertised window for use = in > diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c > index dff7767cd9cf..6bdeb3984aee 100644 > --- a/sys/netinet/tcp_subr.c > +++ b/sys/netinet/tcp_subr.c > @@ -126,6 +126,8 @@ __FBSDID("$FreeBSD$"); > #ifdef TCP_OFFLOAD > #include > #endif > +#include > +#include >=20 > #include >=20 > @@ -501,6 +503,80 @@ tcp_switch_back_to_default(struct tcpcb *tp) > } > } >=20 > +static void > +tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb = *inp, > + const struct sockaddr *sa, void *ctx) > +{ > + struct ip *iph; > +#ifdef INET6 > + struct ip6_hdr *ip6; > +#endif > + struct udphdr *uh; > + struct tcphdr *th; > + int thlen; > + uint16_t port; > + > + TCPSTAT_INC(tcps_tunneled_pkts); > + if ((m->m_flags & M_PKTHDR) =3D=3D 0) { > + /* Can't handle one that is not a pkt hdr */ > + TCPSTAT_INC(tcps_tunneled_errs); > + goto out; > + } > + thlen =3D sizeof(struct tcphdr); > + if (m->m_len < off + sizeof(struct udphdr) + thlen && > + (m =3D m_pullup(m, off + sizeof(struct udphdr) + thlen)) =3D=3D= NULL) { > + TCPSTAT_INC(tcps_tunneled_errs); > + goto out; > + } > + iph =3D mtod(m, struct ip *); > + uh =3D (struct udphdr *)((caddr_t)iph + off); > + th =3D (struct tcphdr *)(uh + 1); > + thlen =3D th->th_off << 2; > + if (m->m_len < off + sizeof(struct udphdr) + thlen) { > + m =3D m_pullup(m, off + sizeof(struct udphdr) + thlen); > + if (m =3D=3D NULL) { > + TCPSTAT_INC(tcps_tunneled_errs); > + goto out; > + } else { > + iph =3D mtod(m, struct ip *); > + uh =3D (struct udphdr *)((caddr_t)iph + off); > + th =3D (struct tcphdr *)(uh + 1); > + } > + } > + m->m_pkthdr.tcp_tun_port =3D port =3D uh->uh_sport; > + bcopy(th, uh, m->m_len - off); > + m->m_len -=3D sizeof(struct udphdr); > + m->m_pkthdr.len -=3D sizeof(struct udphdr); > + /* > + * We use the same algorithm for > + * both UDP and TCP for c-sum. So > + * the code in tcp_input will skip > + * the checksum. So we do nothing > + * with the flag (m->m_pkthdr.csum_flags). > + */ > + switch (iph->ip_v) { > +#ifdef INET > + case IPVERSION: > + iph->ip_len =3D htons(ntohs(iph->ip_len) - sizeof(struct = udphdr)); > + tcp_input_with_port(&m, &off, IPPROTO_TCP, port); > + break; > +#endif > +#ifdef INET6 > + case IPV6_VERSION >> 4: > + ip6 =3D mtod(m, struct ip6_hdr *); > + ip6->ip6_plen =3D htons(ntohs(ip6->ip6_plen) - = sizeof(struct udphdr)); > + tcp6_input_with_port(&m, &off, IPPROTO_TCP, port); > + break; > +#endif > + default: > + goto out; > + break; > + } > + return; > +out: > + m_freem(m); > +} > + > static int > sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) > { > @@ -598,6 +674,183 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, = functions_available, > NULL, 0, sysctl_net_inet_list_available, "A", > "list available TCP Function sets"); >=20 > +VNET_DEFINE(int, tcp_udp_tunneling_port) =3D = TCP_TUNNELING_PORT_DEFAULT; > + > +#ifdef INET > +VNET_DEFINE(struct socket *, udp4_tun_socket) =3D NULL; > +#define V_udp4_tun_socket VNET(udp4_tun_socket) > +#endif > +#ifdef INET6 > +VNET_DEFINE(struct socket *, udp6_tun_socket) =3D NULL; > +#define V_udp6_tun_socket VNET(udp6_tun_socket) > +#endif > + > +static void > +tcp_over_udp_stop(void) > +{ > + /* > + * This function assumes sysctl caller holds inp_rinfo_lock() > + * for writting! > + */ > +#ifdef INET > + if (V_udp4_tun_socket !=3D NULL) { > + soclose(V_udp4_tun_socket); > + V_udp4_tun_socket =3D NULL; > + } > +#endif > +#ifdef INET6 > + if (V_udp6_tun_socket !=3D NULL) { > + soclose(V_udp6_tun_socket); > + V_udp6_tun_socket =3D NULL; > + } > +#endif > +} > + > +static int > +tcp_over_udp_start(void) > +{ > + uint16_t port; > + int ret; > +#ifdef INET > + struct sockaddr_in sin; > +#endif > +#ifdef INET6 > + struct sockaddr_in6 sin6; > +#endif > + /* > + * This function assumes sysctl caller holds inp_info_rlock() > + * for writting! > + */ > + port =3D V_tcp_udp_tunneling_port; > + if (ntohs(port) =3D=3D 0) { > + /* Must have a port set */ > + return (EINVAL); > + } > +#ifdef INET > + if (V_udp4_tun_socket !=3D NULL) { > + /* Already running -- must stop first */ > + return (EALREADY); > + } > +#endif > +#ifdef INET6 > + if (V_udp6_tun_socket !=3D NULL) { > + /* Already running -- must stop first */ > + return (EALREADY); > + } > +#endif > +#ifdef INET > + if ((ret =3D socreate(PF_INET, &V_udp4_tun_socket, > + SOCK_DGRAM, IPPROTO_UDP, > + curthread->td_ucred, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Call the special UDP hook. */ > + if ((ret =3D udp_set_kernel_tunneling(V_udp4_tun_socket, > + tcp_recv_udp_tunneled_packet, > + tcp_ctlinput_viaudp, > + NULL))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Ok, we have a socket, bind it to the port. */ > + memset(&sin, 0, sizeof(struct sockaddr_in)); > + sin.sin_len =3D sizeof(struct sockaddr_in); > + sin.sin_family =3D AF_INET; > + sin.sin_port =3D htons(port); > + if ((ret =3D sobind(V_udp4_tun_socket, > + (struct sockaddr *)&sin, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > +#endif > +#ifdef INET6 > + if ((ret =3D socreate(PF_INET6, &V_udp6_tun_socket, > + SOCK_DGRAM, IPPROTO_UDP, > + curthread->td_ucred, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Call the special UDP hook. */ > + if ((ret =3D udp_set_kernel_tunneling(V_udp6_tun_socket, > + tcp_recv_udp_tunneled_packet, > + tcp6_ctlinput_viaudp, > + NULL))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Ok, we have a socket, bind it to the port. */ > + memset(&sin6, 0, sizeof(struct sockaddr_in6)); > + sin6.sin6_len =3D sizeof(struct sockaddr_in6); > + sin6.sin6_family =3D AF_INET6; > + sin6.sin6_port =3D htons(port); > + if ((ret =3D sobind(V_udp6_tun_socket, > + (struct sockaddr *)&sin6, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > +#endif > + return (0); > +} > + > +static int > +sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS) > +{ > + int error; > + uint32_t old, new; > + > + old =3D V_tcp_udp_tunneling_port; > + new =3D old; > + error =3D sysctl_handle_int(oidp, &new, 0, req); > + if ((error =3D=3D 0) && > + (req->newptr !=3D NULL)) { > + if ((new < TCP_TUNNELING_PORT_MIN) || > + (new > TCP_TUNNELING_PORT_MAX)) { > + error =3D EINVAL; > + } else { > + V_tcp_udp_tunneling_port =3D new; > + if (old !=3D 0) { > + tcp_over_udp_stop(); > + } > + if (new !=3D 0) { > + error =3D tcp_over_udp_start(); > + } > + } > + } > + return (error); > +} > + > +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port, > + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, > + &VNET_NAME(tcp_udp_tunneling_port), > + 0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU", > + "Tunneling port for tcp over udp"); > + > +VNET_DEFINE(int, tcp_udp_tunneling_overhead) =3D = TCP_TUNNELING_OVERHEAD_DEFAULT; > + > +static int > +sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS) > +{ > + int error, new; > + > + new =3D V_tcp_udp_tunneling_overhead; > + error =3D sysctl_handle_int(oidp, &new, 0, req); > + if (error =3D=3D 0 && req->newptr) { > + if ((new < TCP_TUNNELING_OVERHEAD_MIN) || > + (new > TCP_TUNNELING_OVERHEAD_MAX)) > + error =3D EINVAL; > + else > + V_tcp_udp_tunneling_overhead =3D new; > + } > + return (error); > +} > + > +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead, > + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, > + &VNET_NAME(tcp_udp_tunneling_overhead), > + 0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU", > + "MSS reduction when using tcp over udp"); > + > /* > * Exports one (struct tcp_function_info) for each alias/name. > */ > @@ -1305,7 +1558,7 @@ tcp_fini(void *xtp) > * of the tcpcb each time to conserve mbufs. > */ > void > -tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) > +tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, = void *tcp_ptr) > { > struct tcphdr *th =3D (struct tcphdr *)tcp_ptr; >=20 > @@ -1320,7 +1573,10 @@ tcpip_fillheaders(struct inpcb *inp, void = *ip_ptr, void *tcp_ptr) > (inp->inp_flow & IPV6_FLOWINFO_MASK); > ip6->ip6_vfc =3D (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | > (IPV6_VERSION & IPV6_VERSION_MASK); > - ip6->ip6_nxt =3D IPPROTO_TCP; > + if (port =3D=3D 0) > + ip6->ip6_nxt =3D IPPROTO_TCP; > + else > + ip6->ip6_nxt =3D IPPROTO_UDP; > ip6->ip6_plen =3D htons(sizeof(struct tcphdr)); > ip6->ip6_src =3D inp->in6p_laddr; > ip6->ip6_dst =3D inp->in6p_faddr; > @@ -1342,7 +1598,10 @@ tcpip_fillheaders(struct inpcb *inp, void = *ip_ptr, void *tcp_ptr) > ip->ip_off =3D 0; > ip->ip_ttl =3D inp->inp_ip_ttl; > ip->ip_sum =3D 0; > - ip->ip_p =3D IPPROTO_TCP; > + if (port =3D=3D 0) > + ip->ip_p =3D IPPROTO_TCP; > + else > + ip->ip_p =3D IPPROTO_UDP; > ip->ip_src =3D inp->inp_laddr; > ip->ip_dst =3D inp->inp_faddr; > } > @@ -1372,7 +1631,7 @@ tcpip_maketemplate(struct inpcb *inp) > t =3D malloc(sizeof(*t), M_TEMP, M_NOWAIT); > if (t =3D=3D NULL) > return (NULL); > - tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); > + tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void = *)&t->tt_t); > return (t); > } >=20 > @@ -1398,14 +1657,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, = struct tcphdr *th, struct mbuf *m, > struct inpcb *inp; > struct ip *ip; > struct mbuf *optm; > + struct udphdr *uh =3D NULL; > struct tcphdr *nth; > u_char *optp; > #ifdef INET6 > struct ip6_hdr *ip6; > int isipv6; > #endif /* INET6 */ > - int optlen, tlen, win; > + int optlen, tlen, win, ulen; > bool incl_opts; > + uint16_t port; >=20 > KASSERT(tp !=3D NULL || m !=3D NULL, ("tcp_respond: tp and m = both NULL")); > NET_EPOCH_ASSERT(); > @@ -1423,6 +1684,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, = struct tcphdr *th, struct mbuf *m, > } else > inp =3D NULL; >=20 > + if (m !=3D NULL) { > +#ifdef INET6 > + if (isipv6 && ip6 && (ip6->ip6_nxt =3D=3D IPPROTO_UDP)) > + port =3D m->m_pkthdr.tcp_tun_port; > + else > *** 1128 LINES SKIPPED ***