Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 10 Dec 2015 10:05:52 +0900
From:      Yongmin Cho <yongmincho82@gmail.com>
To:        Hiren Panchasara <hiren@FreeBSD.org>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r292003 - head/sys/netinet
Message-ID:  <20151210010551.GA11213@yongmincho-All-Series>
In-Reply-To: <201512082121.tB8LLmDL059933@repo.freebsd.org>
References:  <201512082121.tB8LLmDL059933@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
I've been wating for this issue is solved.
I will try to test this implementation.
thanks.

On Tue, Dec 08, 2015 at 09:21:48PM +0000, Hiren Panchasara wrote:
> Author: hiren
> Date: Tue Dec  8 21:21:48 2015
> New Revision: 292003
> URL: https://svnweb.freebsd.org/changeset/base/292003
> 
> Log:
>   One of the ways to detect loss is to count duplicate acks coming back from the
>   other end till it reaches predetermined threshold which is 3 for us right now.
>   Once that happens, we trigger fast-retransmit to do loss recovery.
>   
>   Main problem with the current implementation is that we don't honor SACK
>   information well to detect whether an incoming ack is a dupack or not. RFC6675
>   has latest recommendations for that. According to it, dupack is a segment that
>   arrives carrying a SACK block that identifies previously unknown information
>   between snd_una and snd_max even if it carries new data, changes the advertised
>   window, or moves the cumulative acknowledgment point.
>   
>   With the prevalence of Selective ACK (SACK) these days, improper handling can
>   lead to delayed loss recovery.
>   
>   With the fix, new behavior looks like following:
>   
>   0) th_ack < snd_una --> ignore
>   Old acks are ignored.
>   1) th_ack == snd_una, !sack_changed --> ignore
>   Acks with SACK enabled but without any new SACK info in them are ignored.
>   2) th_ack == snd_una, window == old_window --> increment
>   Increment on a good dupack.
>   3) th_ack == snd_una, window != old_window, sack_changed --> increment
>   When SACK enabled, it's okay to have advertized window changed if the ack has
>   new SACK info.
>   4) th_ack > snd_una --> reset to 0
>   Reset to 0 when left edge moves.
>   5) th_ack > snd_una, sack_changed --> increment
>   Increment if left edge moves but there is new SACK info.
>   
>   Here, sack_changed is the indicator that incoming ack has previously unknown
>   SACK info in it.
>   
>   Note: This fix is not fully compliant to RFC6675. That may require a few
>   changes to current implementation in order to keep per-sackhole dupack counter
>   and change to the way we mark/handle sack holes.
>   
>   PR:			203663
>   Reviewed by:		jtl
>   MFC after:		3 weeks
>   Sponsored by:		Limelight Networks
>   Differential Revision:	https://reviews.freebsd.org/D4225
> 
> Modified:
>   head/sys/netinet/tcp_input.c
>   head/sys/netinet/tcp_sack.c
>   head/sys/netinet/tcp_var.h
> 
> Modified: head/sys/netinet/tcp_input.c
> ==============================================================================
> --- head/sys/netinet/tcp_input.c	Tue Dec  8 20:20:40 2015	(r292002)
> +++ head/sys/netinet/tcp_input.c	Tue Dec  8 21:21:48 2015	(r292003)
> @@ -1481,7 +1481,7 @@ tcp_do_segment(struct mbuf *m, struct tc
>      struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
>      int ti_locked)
>  {
> -	int thflags, acked, ourfinisacked, needoutput = 0;
> +	int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
>  	int rstreason, todrop, win;
>  	u_long tiwin;
>  	char *s;
> @@ -1501,6 +1501,7 @@ tcp_do_segment(struct mbuf *m, struct tc
>  	thflags = th->th_flags;
>  	inc = &tp->t_inpcb->inp_inc;
>  	tp->sackhint.last_sack_ack = 0;
> +	sack_changed = 0;
>  
>  	/*
>  	 * If this is either a state-changing packet or current state isn't
> @@ -2424,7 +2425,7 @@ tcp_do_segment(struct mbuf *m, struct tc
>  		if ((tp->t_flags & TF_SACK_PERMIT) &&
>  		    ((to.to_flags & TOF_SACK) ||
>  		     !TAILQ_EMPTY(&tp->snd_holes)))
> -			tcp_sack_doack(tp, &to, th->th_ack);
> +			sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
>  		else
>  			/*
>  			 * Reset the value so that previous (valid) value
> @@ -2436,7 +2437,9 @@ tcp_do_segment(struct mbuf *m, struct tc
>  		hhook_run_tcp_est_in(tp, th, &to);
>  
>  		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
> -			if (tlen == 0 && tiwin == tp->snd_wnd) {
> +			if (tlen == 0 &&
> +			    (tiwin == tp->snd_wnd ||
> +			    (tp->t_flags & TF_SACK_PERMIT))) {
>  				/*
>  				 * If this is the first time we've seen a
>  				 * FIN from the remote, this is not a
> @@ -2478,8 +2481,20 @@ tcp_do_segment(struct mbuf *m, struct tc
>  				 * When using TCP ECN, notify the peer that
>  				 * we reduced the cwnd.
>  				 */
> -				if (!tcp_timer_active(tp, TT_REXMT) ||
> -				    th->th_ack != tp->snd_una)
> +				/*
> +				 * Following 2 kinds of acks should not affect
> +				 * dupack counting:
> +				 * 1) Old acks
> +				 * 2) Acks with SACK but without any new SACK
> +				 * information in them. These could result from
> +				 * any anomaly in the network like a switch
> +				 * duplicating packets or a possible DoS attack.
> +				 */
> +				if (th->th_ack != tp->snd_una ||
> +				    ((tp->t_flags & TF_SACK_PERMIT) &&
> +				    !sack_changed))
> +					break;
> +				else if (!tcp_timer_active(tp, TT_REXMT))
>  					tp->t_dupacks = 0;
>  				else if (++tp->t_dupacks > tcprexmtthresh ||
>  				     IN_FASTRECOVERY(tp->t_flags)) {
> @@ -2608,9 +2623,20 @@ tcp_do_segment(struct mbuf *m, struct tc
>  					tp->snd_cwnd = oldcwnd;
>  					goto drop;
>  				}
> -			} else
> -				tp->t_dupacks = 0;
> +			}
>  			break;
> +		} else {
> +			/*
> +			 * This ack is advancing the left edge, reset the
> +			 * counter.
> +			 */
> +			tp->t_dupacks = 0;
> +			/*
> +			 * If this ack also has new SACK info, increment the
> +			 * counter as per rfc6675.
> +			 */
> +			if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
> +				tp->t_dupacks++;
>  		}
>  
>  		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
> @@ -2629,7 +2655,6 @@ tcp_do_segment(struct mbuf *m, struct tc
>  			} else
>  				cc_post_recovery(tp, th);
>  		}
> -		tp->t_dupacks = 0;
>  		/*
>  		 * If we reach this point, ACK is not a duplicate,
>  		 *     i.e., it ACKs something we sent.
> 
> Modified: head/sys/netinet/tcp_sack.c
> ==============================================================================
> --- head/sys/netinet/tcp_sack.c	Tue Dec  8 20:20:40 2015	(r292002)
> +++ head/sys/netinet/tcp_sack.c	Tue Dec  8 21:21:48 2015	(r292003)
> @@ -345,17 +345,22 @@ tcp_sackhole_remove(struct tcpcb *tp, st
>   * Process cumulative ACK and the TCP SACK option to update the scoreboard.
>   * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
>   * the sequence space).
> + * Returns 1 if incoming ACK has previously unknown SACK information,
> + * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
> + * to that (i.e. left edge moving) would also be considered a change in SACK
> + * information which is slightly different than rfc6675.
>   */
> -void
> +int
>  tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
>  {
>  	struct sackhole *cur, *temp;
>  	struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
> -	int i, j, num_sack_blks;
> +	int i, j, num_sack_blks, sack_changed;
>  
>  	INP_WLOCK_ASSERT(tp->t_inpcb);
>  
>  	num_sack_blks = 0;
> +	sack_changed = 0;
>  	/*
>  	 * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
>  	 * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
> @@ -392,7 +397,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>  	 * received.
>  	 */
>  	if (num_sack_blks == 0)
> -		return;
> +		return (sack_changed);
>  
>  	/*
>  	 * Sort the SACK blocks so we can update the scoreboard with just one
> @@ -443,6 +448,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>  			tp->snd_fack = sblkp->end;
>  			/* Go to the previous sack block. */
>  			sblkp--;
> +			sack_changed = 1;
>  		} else {
>  			/* 
>  			 * We failed to add a new hole based on the current 
> @@ -459,9 +465,11 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>  			    SEQ_LT(tp->snd_fack, sblkp->end))
>  				tp->snd_fack = sblkp->end;
>  		}
> -	} else if (SEQ_LT(tp->snd_fack, sblkp->end))
> +	} else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
>  		/* fack is advanced. */
>  		tp->snd_fack = sblkp->end;
> +		sack_changed = 1;
> +	}
>  	/* We must have at least one SACK hole in scoreboard. */
>  	KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
>  	    ("SACK scoreboard must not be empty"));
> @@ -490,6 +498,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>  		tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
>  		KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
>  		    ("sackhint bytes rtx >= 0"));
> +		sack_changed = 1;
>  		if (SEQ_LEQ(sblkp->start, cur->start)) {
>  			/* Data acks at least the beginning of hole. */
>  			if (SEQ_GEQ(sblkp->end, cur->end)) {
> @@ -545,6 +554,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>  		else
>  			sblkp--;
>  	}
> +	return (sack_changed);
>  }
>  
>  /*
> 
> Modified: head/sys/netinet/tcp_var.h
> ==============================================================================
> --- head/sys/netinet/tcp_var.h	Tue Dec  8 20:20:40 2015	(r292002)
> +++ head/sys/netinet/tcp_var.h	Tue Dec  8 21:21:48 2015	(r292003)
> @@ -741,7 +741,7 @@ void	 tcp_hc_update(struct in_conninfo *
>  extern	struct pr_usrreqs tcp_usrreqs;
>  tcp_seq tcp_new_isn(struct tcpcb *);
>  
> -void	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
> +int	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
>  void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
>  void	 tcp_clean_sackreport(struct tcpcb *tp);
>  void	 tcp_sack_adjust(struct tcpcb *tp);
> _______________________________________________
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20151210010551.GA11213>