Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 Dec 2015 21:21:48 +0000 (UTC)
From:      Hiren Panchasara <hiren@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r292003 - head/sys/netinet
Message-ID:  <201512082121.tB8LLmDL059933@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: hiren
Date: Tue Dec  8 21:21:48 2015
New Revision: 292003
URL: https://svnweb.freebsd.org/changeset/base/292003

Log:
  One of the ways to detect loss is to count duplicate acks coming back from the
  other end till it reaches predetermined threshold which is 3 for us right now.
  Once that happens, we trigger fast-retransmit to do loss recovery.
  
  Main problem with the current implementation is that we don't honor SACK
  information well to detect whether an incoming ack is a dupack or not. RFC6675
  has latest recommendations for that. According to it, dupack is a segment that
  arrives carrying a SACK block that identifies previously unknown information
  between snd_una and snd_max even if it carries new data, changes the advertised
  window, or moves the cumulative acknowledgment point.
  
  With the prevalence of Selective ACK (SACK) these days, improper handling can
  lead to delayed loss recovery.
  
  With the fix, new behavior looks like following:
  
  0) th_ack < snd_una --> ignore
  Old acks are ignored.
  1) th_ack == snd_una, !sack_changed --> ignore
  Acks with SACK enabled but without any new SACK info in them are ignored.
  2) th_ack == snd_una, window == old_window --> increment
  Increment on a good dupack.
  3) th_ack == snd_una, window != old_window, sack_changed --> increment
  When SACK enabled, it's okay to have advertized window changed if the ack has
  new SACK info.
  4) th_ack > snd_una --> reset to 0
  Reset to 0 when left edge moves.
  5) th_ack > snd_una, sack_changed --> increment
  Increment if left edge moves but there is new SACK info.
  
  Here, sack_changed is the indicator that incoming ack has previously unknown
  SACK info in it.
  
  Note: This fix is not fully compliant to RFC6675. That may require a few
  changes to current implementation in order to keep per-sackhole dupack counter
  and change to the way we mark/handle sack holes.
  
  PR:			203663
  Reviewed by:		jtl
  MFC after:		3 weeks
  Sponsored by:		Limelight Networks
  Differential Revision:	https://reviews.freebsd.org/D4225

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_sack.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Tue Dec  8 20:20:40 2015	(r292002)
+++ head/sys/netinet/tcp_input.c	Tue Dec  8 21:21:48 2015	(r292003)
@@ -1481,7 +1481,7 @@ tcp_do_segment(struct mbuf *m, struct tc
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
     int ti_locked)
 {
-	int thflags, acked, ourfinisacked, needoutput = 0;
+	int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
 	int rstreason, todrop, win;
 	u_long tiwin;
 	char *s;
@@ -1501,6 +1501,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 	thflags = th->th_flags;
 	inc = &tp->t_inpcb->inp_inc;
 	tp->sackhint.last_sack_ack = 0;
+	sack_changed = 0;
 
 	/*
 	 * If this is either a state-changing packet or current state isn't
@@ -2424,7 +2425,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to.to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes)))
-			tcp_sack_doack(tp, &to, th->th_ack);
+			sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
 		else
 			/*
 			 * Reset the value so that previous (valid) value
@@ -2436,7 +2437,9 @@ tcp_do_segment(struct mbuf *m, struct tc
 		hhook_run_tcp_est_in(tp, th, &to);
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
-			if (tlen == 0 && tiwin == tp->snd_wnd) {
+			if (tlen == 0 &&
+			    (tiwin == tp->snd_wnd ||
+			    (tp->t_flags & TF_SACK_PERMIT))) {
 				/*
 				 * If this is the first time we've seen a
 				 * FIN from the remote, this is not a
@@ -2478,8 +2481,20 @@ tcp_do_segment(struct mbuf *m, struct tc
 				 * When using TCP ECN, notify the peer that
 				 * we reduced the cwnd.
 				 */
-				if (!tcp_timer_active(tp, TT_REXMT) ||
-				    th->th_ack != tp->snd_una)
+				/*
+				 * Following 2 kinds of acks should not affect
+				 * dupack counting:
+				 * 1) Old acks
+				 * 2) Acks with SACK but without any new SACK
+				 * information in them. These could result from
+				 * any anomaly in the network like a switch
+				 * duplicating packets or a possible DoS attack.
+				 */
+				if (th->th_ack != tp->snd_una ||
+				    ((tp->t_flags & TF_SACK_PERMIT) &&
+				    !sack_changed))
+					break;
+				else if (!tcp_timer_active(tp, TT_REXMT))
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				     IN_FASTRECOVERY(tp->t_flags)) {
@@ -2608,9 +2623,20 @@ tcp_do_segment(struct mbuf *m, struct tc
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
-			} else
-				tp->t_dupacks = 0;
+			}
 			break;
+		} else {
+			/*
+			 * This ack is advancing the left edge, reset the
+			 * counter.
+			 */
+			tp->t_dupacks = 0;
+			/*
+			 * If this ack also has new SACK info, increment the
+			 * counter as per rfc6675.
+			 */
+			if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
+				tp->t_dupacks++;
 		}
 
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
@@ -2629,7 +2655,6 @@ tcp_do_segment(struct mbuf *m, struct tc
 			} else
 				cc_post_recovery(tp, th);
 		}
-		tp->t_dupacks = 0;
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.

Modified: head/sys/netinet/tcp_sack.c
==============================================================================
--- head/sys/netinet/tcp_sack.c	Tue Dec  8 20:20:40 2015	(r292002)
+++ head/sys/netinet/tcp_sack.c	Tue Dec  8 21:21:48 2015	(r292003)
@@ -345,17 +345,22 @@ tcp_sackhole_remove(struct tcpcb *tp, st
  * Process cumulative ACK and the TCP SACK option to update the scoreboard.
  * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
  * the sequence space).
+ * Returns 1 if incoming ACK has previously unknown SACK information,
+ * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
+ * to that (i.e. left edge moving) would also be considered a change in SACK
+ * information which is slightly different than rfc6675.
  */
-void
+int
 tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 {
 	struct sackhole *cur, *temp;
 	struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
-	int i, j, num_sack_blks;
+	int i, j, num_sack_blks, sack_changed;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	num_sack_blks = 0;
+	sack_changed = 0;
 	/*
 	 * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
 	 * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
@@ -392,7 +397,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
 	 * received.
 	 */
 	if (num_sack_blks == 0)
-		return;
+		return (sack_changed);
 
 	/*
 	 * Sort the SACK blocks so we can update the scoreboard with just one
@@ -443,6 +448,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
 			tp->snd_fack = sblkp->end;
 			/* Go to the previous sack block. */
 			sblkp--;
+			sack_changed = 1;
 		} else {
 			/* 
 			 * We failed to add a new hole based on the current 
@@ -459,9 +465,11 @@ tcp_sack_doack(struct tcpcb *tp, struct 
 			    SEQ_LT(tp->snd_fack, sblkp->end))
 				tp->snd_fack = sblkp->end;
 		}
-	} else if (SEQ_LT(tp->snd_fack, sblkp->end))
+	} else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
 		/* fack is advanced. */
 		tp->snd_fack = sblkp->end;
+		sack_changed = 1;
+	}
 	/* We must have at least one SACK hole in scoreboard. */
 	KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
 	    ("SACK scoreboard must not be empty"));
@@ -490,6 +498,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
 		tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
 		KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
 		    ("sackhint bytes rtx >= 0"));
+		sack_changed = 1;
 		if (SEQ_LEQ(sblkp->start, cur->start)) {
 			/* Data acks at least the beginning of hole. */
 			if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -545,6 +554,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
 		else
 			sblkp--;
 	}
+	return (sack_changed);
 }
 
 /*

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Tue Dec  8 20:20:40 2015	(r292002)
+++ head/sys/netinet/tcp_var.h	Tue Dec  8 21:21:48 2015	(r292003)
@@ -741,7 +741,7 @@ void	 tcp_hc_update(struct in_conninfo *
 extern	struct pr_usrreqs tcp_usrreqs;
 tcp_seq tcp_new_isn(struct tcpcb *);
 
-void	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+int	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
 void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
 void	 tcp_clean_sackreport(struct tcpcb *tp);
 void	 tcp_sack_adjust(struct tcpcb *tp);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512082121.tB8LLmDL059933>