From owner-svn-src-head@FreeBSD.ORG Mon Apr 16 13:49:04 2012 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 42170106566B; Mon, 16 Apr 2012 13:49:04 +0000 (UTC) (envelope-from glebius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 2CBA88FC14; Mon, 16 Apr 2012 13:49:04 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q3GDn4Ng036233; Mon, 16 Apr 2012 13:49:04 GMT (envelope-from glebius@svn.freebsd.org) Received: (from glebius@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q3GDn3a2036228; Mon, 16 Apr 2012 13:49:03 GMT (envelope-from glebius@svn.freebsd.org) Message-Id: <201204161349.q3GDn3a2036228@svn.freebsd.org> From: Gleb Smirnoff Date: Mon, 16 Apr 2012 13:49:03 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r234342 - head/sys/netinet X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 16 Apr 2012 13:49:04 -0000 Author: glebius Date: Mon Apr 16 13:49:03 2012 New Revision: 234342 URL: http://svn.freebsd.org/changeset/base/234342 Log: When we receive an ICMP unreach need fragmentation datagram, we take proposed MTU value from it and update the TCP host cache. Then tcp_mss_update() is called on the corresponding tcpcb. It finds the just allocated entry in the TCP host cache and updates MSS on the tcpcb. And then we do a fast retransmit of what we have in the tcp send buffer. This sequence gets broken if the TCP host cache is exausted. In this case allocation fails, and later called tcp_mss_update() finds nothing in cache. The fast retransmit is done with not reduced MSS and is immidiately replied by remote host with new ICMP datagrams and the cycle repeats. This ping-pong can go up to wirespeed. To fix this: - tcp_mss_update() gets new parameter - mtuoffer, that is like offer, but needs to have min_protoh subtracted. - tcp_mtudisc() as notification method renamed to tcp_mtudisc_notify(). - tcp_mtudisc() now accepts not a useless error argument, but proposed MTU value, that is passed to tcp_mss_update() as mtuoffer. Reported by: az Reported by: Andrey Zonov Reviewed by: andre (previous version of patch) Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_output.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c ============================================================================== --- head/sys/netinet/tcp_input.c Mon Apr 16 13:41:46 2012 (r234341) +++ head/sys/netinet/tcp_input.c Mon Apr 16 13:49:03 2012 (r234342) @@ -3288,22 +3288,19 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt * are present. Store the upper limit of the length of options plus * data in maxopd. * - * In case of T/TCP, we call this routine during implicit connection - * setup as well (offer = -1), to initialize maxseg from the cached - * MSS of our peer. - * * NOTE that this routine is only called when we process an incoming - * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt(). + * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS + * settings are handled in tcp_mssopt(). */ void -tcp_mss_update(struct tcpcb *tp, int offer, +tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, struct hc_metrics_lite *metricptr, int *mtuflags) { int mss = 0; u_long maxmtu = 0; struct inpcb *inp = tp->t_inpcb; struct hc_metrics_lite metrics; - int origoffer = offer; + int origoffer; #ifdef INET6 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; size_t min_protoh = isipv6 ? @@ -3315,6 +3312,12 @@ tcp_mss_update(struct tcpcb *tp, int off INP_WLOCK_ASSERT(tp->t_inpcb); + if (mtuoffer != -1) { + KASSERT(offer == -1, ("%s: conflict", __func__)); + offer = mtuoffer - min_protoh; + } + origoffer = offer; + /* Initialize. */ #ifdef INET6 if (isipv6) { @@ -3473,7 +3476,7 @@ tcp_mss(struct tcpcb *tp, int offer) KASSERT(tp != NULL, ("%s: tp == NULL", __func__)); - tcp_mss_update(tp, offer, &metrics, &mtuflags); + tcp_mss_update(tp, offer, -1, &metrics, &mtuflags); mss = tp->t_maxseg; inp = tp->t_inpcb; Modified: head/sys/netinet/tcp_output.c ============================================================================== --- head/sys/netinet/tcp_output.c Mon Apr 16 13:41:46 2012 (r234341) +++ head/sys/netinet/tcp_output.c Mon Apr 16 13:49:03 2012 (r234342) @@ -1293,7 +1293,7 @@ out: */ if (tso) tp->t_flags &= ~TF_TSO; - tcp_mtudisc(tp->t_inpcb, 0); + tcp_mtudisc(tp->t_inpcb, -1); return (0); case EHOSTDOWN: case EHOSTUNREACH: Modified: head/sys/netinet/tcp_subr.c ============================================================================== --- head/sys/netinet/tcp_subr.c Mon Apr 16 13:41:46 2012 (r234341) +++ head/sys/netinet/tcp_subr.c Mon Apr 16 13:49:03 2012 (r234342) @@ -222,6 +222,7 @@ VNET_DEFINE(uma_zone_t, sack_hole_zone); VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); static struct inpcb *tcp_notify(struct inpcb *, int); +static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); @@ -1337,7 +1338,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s return; if (cmd == PRC_MSGSIZE) - notify = tcp_mtudisc; + notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; @@ -1410,9 +1411,10 @@ tcp_ctlinput(int cmd, struct sockaddr *s */ if (mtu <= tcp_maxmtu(&inc, NULL)) tcp_hc_updatemtu(&inc, mtu); - } - - inp = (*notify)(inp, inetctlerrmap[cmd]); + tcp_mtudisc(inp, mtu); + } else + inp = (*notify)(inp, + inetctlerrmap[cmd]); } } if (inp != NULL) @@ -1452,7 +1454,7 @@ tcp6_ctlinput(int cmd, struct sockaddr * return; if (cmd == PRC_MSGSIZE) - notify = tcp_mtudisc; + notify = tcp_mtudisc_notify; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; @@ -1653,12 +1655,19 @@ tcp_drop_syn_sent(struct inpcb *inp, int /* * When `need fragmentation' ICMP is received, update our idea of the MSS - * based on the new value in the route. Also nudge TCP to send something, - * since we know the packet we just sent was dropped. + * based on the new value. Also nudge TCP to send something, since we + * know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ +static struct inpcb * +tcp_mtudisc_notify(struct inpcb *inp, int error) +{ + + return (tcp_mtudisc(inp, -1)); +} + struct inpcb * -tcp_mtudisc(struct inpcb *inp, int errno) +tcp_mtudisc(struct inpcb *inp, int mtuoffer) { struct tcpcb *tp; struct socket *so; @@ -1671,7 +1680,7 @@ tcp_mtudisc(struct inpcb *inp, int errno tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL")); - tcp_mss_update(tp, -1, NULL, NULL); + tcp_mss_update(tp, -1, mtuoffer, NULL, NULL); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); Modified: head/sys/netinet/tcp_var.h ============================================================================== --- head/sys/netinet/tcp_var.h Mon Apr 16 13:41:46 2012 (r234341) +++ head/sys/netinet/tcp_var.h Mon Apr 16 13:49:03 2012 (r234342) @@ -675,7 +675,8 @@ void tcp_reass_destroy(void); void tcp_input(struct mbuf *, int); u_long tcp_maxmtu(struct in_conninfo *, int *); u_long tcp_maxmtu6(struct in_conninfo *, int *); -void tcp_mss_update(struct tcpcb *, int, struct hc_metrics_lite *, int *); +void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, + int *); void tcp_mss(struct tcpcb *, int); int tcp_mssopt(struct in_conninfo *); struct inpcb *