From owner-p4-projects@FreeBSD.ORG Sat Jul 18 14:32:55 2009 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 613511065672; Sat, 18 Jul 2009 14:32:55 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 1E51E106566C for ; Sat, 18 Jul 2009 14:32:55 +0000 (UTC) (envelope-from andre@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 0BC338FC14 for ; Sat, 18 Jul 2009 14:32:55 +0000 (UTC) (envelope-from andre@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id n6IEWsfA071280 for ; Sat, 18 Jul 2009 14:32:54 GMT (envelope-from andre@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id n6IEWska071278 for perforce@freebsd.org; Sat, 18 Jul 2009 14:32:54 GMT (envelope-from andre@freebsd.org) Date: Sat, 18 Jul 2009 14:32:54 GMT Message-Id: <200907181432.n6IEWska071278@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to andre@freebsd.org using -f From: Andre Oppermann To: Perforce Change Reviews Cc: Subject: PERFORCE change 166228 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 18 Jul 2009 14:32:56 -0000 http://perforce.freebsd.org/chv.cgi?CH=166228 Change 166228 by andre@andre_t61 on 2009/07/18 14:32:22 Add and improve comments. Complete tcp_sack_nextseg(). Adapt tcp_retransmit() to new sack semantics. Affected files ... .. //depot/projects/tcp_new/netinet/tcp_output.c#13 edit .. //depot/projects/tcp_new/netinet/tcp_sack.c#9 edit Differences ... ==== //depot/projects/tcp_new/netinet/tcp_output.c#13 (text+ko) ==== @@ -793,23 +793,37 @@ * 5. TCP congestion window validation RFC2861 */ - /* Limited transmit */ - if (tp->snd_dupack < tcp_dupthresh && dlen > *len) + /* + * Limited transmit: transmit new data upon the arrival of the + * first two consecutive duplicate ACKs. + * RFC3042: section 2 + */ + if (tp->snd_dupack < tcp_dupthresh && dlen > *len) { *len = min(dlen, tp->snd_mss); /* up to one mss above cwnd */ - - if (tp->snd_dupack < tcp_dupthresh) return (0); + } - /* Remember the highest byte sent. */ - if (SEQ_LEQ(tp->snd_recover, tp->snd_una)) { + /* + * Remember the highest byte sent yet + * and set snd_rxmit to snd_una. + */ + if (tp->snd_dupack == tcp_dupthresh) { tp->snd_recover = tp->snd_nxt; - //tcp_cc_fr_enter(tp); tp->snd_rxmit = tp->snd_una; + rlen = tcp_sack_firsthole(tp, &rexmit); + } else { + rlen = tcp_sack_nextseg(tp, &tp->snd_rexmit, dlen); } - rlen = tcp_sack_nextseg(tp, &tp->snd_rexmit); + if (rlen == 0) + if (dlen) + *len = dlen; /* XXXAO: pipe! */ + else + *len = 0; + return (0); + } else + rlen = min(rlen, pipe); /* XXXAO: pipe! */ - rlen = min(so->so_snd.sb_cc - SEQ_DELTA(tp->snd_una, tp->snd_rxmit), tp->snd_mss); /* * Fill in headers. ==== //depot/projects/tcp_new/netinet/tcp_sack.c#9 (text+ko) ==== @@ -185,7 +185,13 @@ struct tcp_sack_block *tsb, *tsbn; struct tcp_sack_block sack; - /* Remove any blocks from the scoreboard when full acked. */ + /* + * Remove any blocks from the scoreboard when fully acked. + * When a partial ACK cuts into a sackblock we remove that + * full block. The receiver has either sent incorrect SACK + * information or has flushed data from its reassembly queue + * so that a retransmit has become necessary. + */ RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) { if (SEQ_LT(th_ack, tsb->tsb_blk.start)) break; @@ -193,7 +199,10 @@ tcp_sack_free(tp, tsb); } - /* SACK header but no blocks. */ + /* + * SACK header but no blocks or no SACK information even though + * it was sent before. + */ if ((to->to_flags & TOF_SACK) && to->to_nsacks == 0) return (0); else if (!(to->to_flags & TOF_SACK)) @@ -234,7 +243,10 @@ SEQ_GT(sack.tsb_blk.end, tp->snd_nxt)) continue; - /* Return match that has at least partial overlap to either side. */ + /* + * Return match that has at least partial overlap to either side or + * insert a new sackblock. + */ if ((tsb = RB_FIND(tcp_sackblocks, &tp->snd_sackblocks, &sack)) != NULL) { /* Within an already known block, common case. */ if (SEQ_GEQ(sack.tsb_blk.start, tsb->tsb_blk.start) && @@ -242,6 +254,7 @@ /* * D-SACK, was a duplicate retransmit. * RFC2883: section 5 + * XXXAO: Adjust pipe. */ if (i == 0 && SEQ_DELTA(sack.tsb_blk.start, sack.tsb_blk.end) <= tp->snd_mss) { //TCPSTAT_INC(); @@ -290,30 +303,91 @@ } /* - * Determine the next start and length of the next hole relative - * to rexmit. + * Determine the start and length of the next hole relative to rexmit. + * RFC3517: section 4 and 5 + * + * NB: When we return zero it is the responseability of the caller + * to test whether more previously unsent data is available. + * RFC3517: section 4, NextSeg() test (2) and (4) + * + * NB: We return the full size of the current hole to avoid looping + * over the same hole when pipe allows more than one segment to be + * sent. + * RFC3517: section 5, (C.1) and (C.5) */ int -tcp_sack_nextseg(struct tcpcb *tp, tcp_seq *rexmit) +tcp_sack_nextseg(struct tcpcb *tp, tcp_seq *rexmit, int dlen) { - int len = 0; - struct tcp_sack_block *tsb, *tsbn; + int len = 0, blocks = 0, islost = 0, sacked = 0; + struct tcp_sack_block *tsb, *tsbp; struct tcp_sack_block sack; sack.tsb_blk.start = rexmit; sack.tsb_blk.end = rexmit; - if ((tsb = RB_NFIND(tcp_sackblocks, &tp->snd_sackblocks, &sack)) != NULL) { - if (*rexmit < tsb->tsb_blk.start) { - len = SEQ_DELTA(*rexmit, tsb->tsb_blk.start); - } else if ((tsbn = RB_NEXT(tcp_sackblocks, &tp->snd_sackblocks, tsb)) != NULL) { - *rexmit = tsb->tsb_blk.end; - len = SEQ_DELTA(tsb->tsb_blk.end, tsbn->tsb_blk.start); - } else { - *rexmit = tsb->tsb_blk.end; - len = (SEQ_DELTA(tsb->tsb_blk.end, tp->snd_nxt); + /* + * Find the sackblock that covers rexmit, or the first one above it. + * RFC3517: section 4, NextSeg() Test 4 is automatically fulfilled. + */ + if ((tsb = RB_NFIND(tcp_sackblocks, &tp->snd_sackblocks, &sack)) != NULL) + return (0); + + /* + * If rexmit is below the first sackblock start from there, + * otherwise move rexmit to the end of it (ie. the start of the next hole). + */ + if (SEQ_LT(*rexmit, tsb->tsb_blk.start)) { + len = SEQ_DELTA(*rexmit, tsb->tsb_blk.start); + sacked = SEQ_DELTA(*rexmit, tsb->tsb_blk.end); + } else + *rexmit = tsb->tsb_blk.end; + + /* + * Determine whether all conditions for a retransmit are fulfilled: + * DupThresh * SMSS bytes above 'SeqNum' have been SACKed -or- + * DupThresh discontiguous sackblocks have arrived above 'SeqNum'. + * RFC3517: section 4, IsLost() processing + * RFC3517: section 5, NextSeg() test (1.a-c) processing + */ + RB_FOREACH_FROM(tsbp, tcp_sackblocks, tsb) { + /* Get the size of this hole, if the first. */ + if (len == 0) + len = SEQ_DELTA(tsbp->tsb_blk.end, tsb->tsb_blk.start); + sacked += SEQ_DELTA(tsb->tsb_blk.start, tsb->tsb_blk.end); + if (sacked > 3 * tp->snd_mss || blocks++ > 2) { + islost = 1; + break; } } + + /* + * Disambiguate between NextSeg() test (2) and (3). + * If there is more previously unsent data available + * and we don't fulfill tests (1.a-c) then test (2) + * is true and we return zero. Otherwise test (3) + * is true, if len is set, and we get a retransmit. + * RFC3517: section 4, NextSeg() processing + */ + if (!islost && dlen > 0) + len = 0; + + return (len); +} + +/* + * Return the start and length of the first sack hole. + */ +int +tcp_sack_firsthole(struct tcpcb *tp, tcp_seq *rexmit) +{ + int len = 0; + struct tcp_sack_block *tsb; + + *rexmit = tp->snd_una; + + if ((tsb = RB_MIN(tcp_sackblocks, &tp->snd_sackblocks)) != NULL) + len = SEQ_DELTA(*rexmit, tsb->tsb_blk.start); + return (len); }