From owner-dev-commits-src-all@freebsd.org Wed Apr 21 20:01:48 2021 Return-Path: Delivered-To: dev-commits-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 4E9435F07AD; Wed, 21 Apr 2021 20:01:48 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4FQWfN1qwVz4RKh; Wed, 21 Apr 2021 20:01:48 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 322974548; Wed, 21 Apr 2021 20:01:48 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 13LK1mDC089633; Wed, 21 Apr 2021 20:01:48 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 13LK1mpd089632; Wed, 21 Apr 2021 20:01:48 GMT (envelope-from git) Date: Wed, 21 Apr 2021 20:01:48 GMT Message-Id: <202104212001.13LK1mpd089632@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Navdeep Parhar Subject: git: 01d74fe1ffc3 - main - Path MTU discovery hooks for offloaded TCP connections. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: np X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: 01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2 Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commit messages for all branches of the src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Apr 2021 20:01:48 -0000 The branch main has been updated by np: URL: https://cgit.FreeBSD.org/src/commit/?id=01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2 commit 01d74fe1ffc32dc7f42dc0fb0c4861276a6b2bd2 Author: Navdeep Parhar AuthorDate: 2021-04-13 00:25:22 +0000 Commit: Navdeep Parhar CommitDate: 2021-04-21 20:00:16 +0000 Path MTU discovery hooks for offloaded TCP connections. Notify the TOE driver when when an ICMP type 3 code 4 (Fragmentation needed and DF set) message is received for an offloaded connection. This gives the driver an opportunity to lower the path MTU for the connection and resume transmission, much like what the kernel does for the connections that it handles. Reviewed by: glebius@ Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D29755 --- sys/netinet/tcp_offload.c | 11 +++++++ sys/netinet/tcp_offload.h | 3 ++ sys/netinet/tcp_subr.c | 80 ++++++++++++++++++++++++++++++++--------------- sys/netinet/toecore.c | 9 ++++++ sys/netinet/toecore.h | 4 +++ 5 files changed, 81 insertions(+), 26 deletions(-) diff --git a/sys/netinet/tcp_offload.c b/sys/netinet/tcp_offload.c index ba190f0303f1..84a4bc3c31a3 100644 --- a/sys/netinet/tcp_offload.c +++ b/sys/netinet/tcp_offload.c @@ -219,3 +219,14 @@ tcp_offload_detach(struct tcpcb *tp) tod->tod_pcb_detach(tod, tp); } + +void +tcp_offload_pmtu_update(struct tcpcb *tp, tcp_seq seq, int mtu) +{ + struct toedev *tod = tp->tod; + + KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); + INP_WLOCK_ASSERT(tp->t_inpcb); + + tod->tod_pmtu_update(tod, tp, seq, mtu); +} diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h index 19c120ccdd7d..8f3786e9f7eb 100644 --- a/sys/netinet/tcp_offload.h +++ b/sys/netinet/tcp_offload.h @@ -36,6 +36,8 @@ #error "no user-serviceable parts inside" #endif +#include + extern int registered_toedevs; int tcp_offload_connect(struct socket *, struct sockaddr *); @@ -48,5 +50,6 @@ void tcp_offload_ctloutput(struct tcpcb *, int, int); void tcp_offload_tcp_info(struct tcpcb *, struct tcp_info *); int tcp_offload_alloc_tls_session(struct tcpcb *, struct ktls_session *, int); void tcp_offload_detach(struct tcpcb *); +void tcp_offload_pmtu_update(struct tcpcb *, tcp_seq, int); #endif diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 1ce7a5b1fcf3..b5ecdc6f2307 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2791,6 +2791,21 @@ SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, #endif /* INET6 */ #ifdef INET +/* Path MTU to try next when a fragmentation-needed message is received. */ +static inline int +tcp_next_pmtu(const struct icmp *icp, const struct ip *ip) +{ + int mtu = ntohs(icp->icmp_nextmtu); + + /* If no alternative MTU was proposed, try the next smaller one. */ + if (!mtu) + mtu = ip_next_mtu(ntohs(ip->ip_len), 1); + if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr)) + mtu = V_tcp_minmss + sizeof(struct tcpiphdr); + + return (mtu); +} + static void tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port) { @@ -2846,6 +2861,17 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port) !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) { + /* + * MTU discovery for offloaded connections. Let + * the TOE driver verify seq# and process it. + */ + mtu = tcp_next_pmtu(icp, ip); + tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu); + goto out; + } +#endif if (tp->t_port != port) { goto out; } @@ -2853,24 +2879,11 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port) SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* - * MTU discovery: - * If we got a needfrag set the MTU - * in the route to the suggested new - * value (if given) and then notify. + * MTU discovery: we got a needfrag and + * will potentially try a lower MTU. */ - mtu = ntohs(icp->icmp_nextmtu); - /* - * If no alternative MTU was - * proposed, try the next smaller - * one. - */ - if (!mtu) - mtu = ip_next_mtu( - ntohs(ip->ip_len), 1); - if (mtu < V_tcp_minmss + - sizeof(struct tcpiphdr)) - mtu = V_tcp_minmss + - sizeof(struct tcpiphdr); + mtu = tcp_next_pmtu(icp, ip); + /* * Only process the offered MTU if it * is smaller than the current one. @@ -2948,6 +2961,20 @@ tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused) #endif /* INET */ #ifdef INET6 +static inline int +tcp6_next_pmtu(const struct icmp6_hdr *icmp6) +{ + int mtu = ntohl(icmp6->icmp6_mtu); + + /* + * If no alternative MTU was proposed, or the proposed MTU was too + * small, set to the min. + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU - 8; /* XXXNP: what is the adjustment for? */ + return (mtu); +} + static void tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port) { @@ -3039,6 +3066,14 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port) !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) { + /* MTU discovery for offloaded connections. */ + mtu = tcp6_next_pmtu(icmp6); + tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu); + goto out; + } +#endif if (tp->t_port != port) { goto out; } @@ -3051,15 +3086,8 @@ tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port) * in the route to the suggested new * value (if given) and then notify. */ - mtu = ntohl(icmp6->icmp6_mtu); - /* - * If no alternative MTU was - * proposed, or the proposed - * MTU was too small, set to - * the min. - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU - 8; + mtu = tcp6_next_pmtu(icmp6); + bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c index d8d499a6fde3..5792298d2883 100644 --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -199,6 +199,14 @@ toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused, return (EINVAL); } +static void +toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused, + tcp_seq seq __unused, int mtu __unused) +{ + + return; +} + /* * Inform one or more TOE devices about a listening socket. */ @@ -290,6 +298,7 @@ init_toedev(struct toedev *tod) tod->tod_ctloutput = toedev_ctloutput; tod->tod_tcp_info = toedev_tcp_info; tod->tod_alloc_tls_session = toedev_alloc_tls_session; + tod->tod_pmtu_update = toedev_pmtu_update; } /* diff --git a/sys/netinet/toecore.h b/sys/netinet/toecore.h index 36493abf7149..ce796ab54dc5 100644 --- a/sys/netinet/toecore.h +++ b/sys/netinet/toecore.h @@ -35,6 +35,7 @@ #error "no user-serviceable parts inside" #endif +#include #include struct tcpopt; @@ -114,6 +115,9 @@ struct toedev { /* Create a TLS session */ int (*tod_alloc_tls_session)(struct toedev *, struct tcpcb *, struct ktls_session *, int); + + /* ICMP fragmentation-needed received, adjust PMTU. */ + void (*tod_pmtu_update)(struct toedev *, struct tcpcb *, tcp_seq, int); }; typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);