Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 May 2018 02:22:34 +0000 (UTC)
From:      Matt Macy <mmacy@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r333346 - head/sys/netinet
Message-ID:  <201805080222.w482MYiX087233@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mmacy
Date: Tue May  8 02:22:34 2018
New Revision: 333346
URL: https://svnweb.freebsd.org/changeset/base/333346

Log:
  Fix spurious retransmit recovery on low latency networks
  
  TCP's smoothed RTT (SRTT) can be much larger than an actual observed RTT. This can be either because of hz restricting the calculable RTT to 10ms in VMs or 1ms using the default 1000hz or simply because SRTT recently incorporated a larger value.
  
  If an ACK arrives before the calculated badrxtwin (now + SRTT):
  tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
  
  We'll erroneously reset snd_una to snd_max. If multiple segments were dropped and this happens repeatedly the transmit rate will be limited to 1MSS per RTO until we've retransmitted all drops.
  
  Reported by:	rstone
  Reviewed by:	hiren, transport
  Approved by:	sbruno
  MFC after:	1 month
  Differential Revision:	https://reviews.freebsd.org/D8556

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_timer.c

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Tue May  8 01:39:45 2018	(r333345)
+++ head/sys/netinet/tcp_input.c	Tue May  8 02:22:34 2018	(r333346)
@@ -1682,6 +1682,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
+		else if (tp->t_flags & TF_PREVVALID &&
+			 tp->t_badrxtwin != 0 && SEQ_LT(to.to_tsecr, tp->t_badrxtwin))
+			cc_cong_signal(tp, th, CC_RTO_ERR);
 	}
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
@@ -1794,9 +1797,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 				TCPSTAT_INC(tcps_predack);
 
 				/*
-				 * "bad retransmit" recovery.
+				 * "bad retransmit" recovery without timestamps.
 				 */
-				if (tp->t_rxtshift == 1 &&
+				if ((to.to_flags & TOF_TS) == 0 &&
+				    tp->t_rxtshift == 1 &&
 				    tp->t_flags & TF_PREVVALID &&
 				    (int)(ticks - tp->t_badrxtwin) < 0) {
 					cc_cong_signal(tp, th, CC_RTO_ERR);
@@ -2787,8 +2791,10 @@ process_ACK:
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
-		if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
-		    (int)(ticks - tp->t_badrxtwin) < 0)
+		if (tp->t_rxtshift == 1 &&
+		    tp->t_flags & TF_PREVVALID &&
+		    tp->t_badrxtwin &&
+		    SEQ_LT(to.to_tsecr, tp->t_badrxtwin))
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 
 		/*

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c	Tue May  8 01:39:45 2018	(r333345)
+++ head/sys/netinet/tcp_output.c	Tue May  8 02:22:34 2018	(r333346)
@@ -206,7 +206,7 @@ tcp_output(struct tcpcb *tp)
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	unsigned ipsec_optlen = 0;
 #endif
-	int idle, sendalot;
+	int idle, sendalot, curticks;
 	int sack_rxmit, sack_bytes_rxmt;
 	struct sackhole *p;
 	int tso, mtu;
@@ -808,9 +808,12 @@ send:
 		/* Timestamps. */
 		if ((tp->t_flags & TF_RCVD_TSTMP) ||
 		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
-			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
+			curticks = tcp_ts_getticks();
+			to.to_tsval = curticks + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
+			if (tp->t_rxtshift == 1)
+				tp->t_badrxtwin = curticks;
 		}
 
 		/* Set receive buffer autosizing timestamp. */

Modified: head/sys/netinet/tcp_timer.c
==============================================================================
--- head/sys/netinet/tcp_timer.c	Tue May  8 01:39:45 2018	(r333345)
+++ head/sys/netinet/tcp_timer.c	Tue May  8 02:22:34 2018	(r333346)
@@ -693,7 +693,12 @@ tcp_timer_rexmt(void * xtp)
 			tp->t_flags |= TF_WASCRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASCRECOVERY;
-		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+		if ((tp->t_flags & TF_RCVD_TSTMP) == 0)
+			tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+		/* In the event that we've negotiated timestamps
+		 * badrxtwin will be set to the value that we set
+		 * the retransmitted packet's to_tsval to by tcp_output
+		 */
 		tp->t_flags |= TF_PREVVALID;
 	} else
 		tp->t_flags &= ~TF_PREVVALID;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805080222.w482MYiX087233>