Date: Thu, 16 Jul 2009 08:42:16 GMT From: Andre Oppermann <andre@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 166158 for review Message-ID: <200907160842.n6G8gGiP031921@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=166158 Change 166158 by andre@andre_t61 on 2009/07/16 08:41:58 Flush the queue when a RFC2018 reneg is detected. Add RFC2883 DSACK detection support. Change sysctl names to better reflect their usage. Move tcp_sack_flush up to tcp_sack_free. Add more comments. Affected files ... .. //depot/projects/tcp_new/netinet/tcp_sack.c#7 edit Differences ... ==== //depot/projects/tcp_new/netinet/tcp_sack.c#7 (text+ko) ==== @@ -67,7 +67,14 @@ #endif /* TCPDEBUG */ /* - * Store all SACK blocks of the scoreboard in a ranged red-black tree. + * Implementation of Selective Acknowledgements (SACK) as described in + * RFC2018. + * + * This file implements the data sender part of SACK. It stores all + * received SACK blocks in a scoreboard built on a ranged red-black tree. + * + * The data receiver part (RFC2018: section 4) is part of the reassembly + * queue. */ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); @@ -75,17 +82,20 @@ int tcp_do_sack = 1; SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW, &tcp_do_sack, 0, "Enable/Disable TCP SACK support"); -TUNABLE_INT("net.inet.tcp.sack.enable", &tcp_do_sack); + +int tcp_do_dsack = 1; +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, dsack, CTLFLAG_RW, + &tcp_do_dsack, 0, "Enable TCP duplicate D-SACK support"); -static int tcp_sack_maxholes = 128; -SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW, - &tcp_sack_maxholes, 0, - "Maximum number of TCP SACK holes allowed per connection"); +static int tcp_sack_maxblocks = 32; +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxblocks, CTLFLAG_RW, + &tcp_sack_maxholes, 0, + "Per connection limit on the number of SACK blocks"); static int tcp_sack_globalmaxholes = 65536; -SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW, - &tcp_sack_globalmaxholes, 0, - "Global maximum number of TCP SACK holes"); +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxblocks, CTLFLAG_RW, + &tcp_sack_globalmaxblocks, 0, + "Global total limit on the number of SACK blocks"); static uma_zone_t tcp_sackblock_zone; @@ -94,9 +104,14 @@ { tcp_sackblock_zone = uma_zcreate("tcpsackblk", sizeof(struct tcp_sack_block), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + /* XXXAO: limit is not adjusted when changed at runtime. */ uma_zone_set_max(tcp_sackblock_zone, tcp_sack_globalmaxholes); } +/* + * Compare function implementing the ranged lookup on the RB tree. + * NB: The tree must never have any overlapping elements. + */ static __inline int tcp_sack_cmp(struct tcp_sack_block *a, struct tcp_sack_block *b) { @@ -111,6 +126,26 @@ RB_PROTOTYPE_STATIC(tcp_sackblocks, tcp_sack_block, tsb_rb, tcp_sack_cmp); RB_GENERATE_STATIC(tcp_sackblocks, tcp_sack_block, tsb_rb, tcp_sack_cmp); +/* + * Verify the integrity of the ranged RB tree. + */ +#ifdef INVARIANTS +static int +tcp_sack_verify(struct tcpcb *tp) +{ + struct tcp_sack_block *tsb, *tsbn; + + RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) { + if (SEQ_GEQ(tsb->tsb_blk.start, tsb->tsb_blk.end) || + SEQ_LEQ(tsb->tsb_blk.start, tp->snd_una) || + SEQ_GT(tsb->tsb_blk.end, tp->snd_nxt) || + (tsbn != NULL && SEQ_GEQ(tsb->tsb_blk.end, tsbn->tsb_blk.start))) + return (0); + } + return (1); +} +#endif + static void tcp_sack_free(struct tcpcb *tp, struct tcp_sack_block *tsb) { @@ -121,23 +156,21 @@ uma_zfree(tcp_sackblock_zone, tsb); } -#ifdef INVARIANTS -static int -tcp_sack_verify(struct tcpcb *tp) +void +tcp_sack_flush(struct tcpcb *tp) { struct tcp_sack_block *tsb, *tsbn; RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) { - if (SEQ_GEQ(tsb->tsb_blk.start, tsb->tsb_blk.end) || - SEQ_LEQ(tsb->tsb_blk.start, tp->snd_una) || - SEQ_GT(tsb->tsb_blk.end, tp->snd_nxt) || - (tsbn != NULL && SEQ_GEQ(tsb->tsb_blk.end, tsbn->tsb_blk.start))) - return (0); + tcp_sack_free(tp, tsb); } - return (1); } -#endif +/* + * Update the scoreboard to remember which sequence number ranges + * the receiver has reported as sucessfully received. + * RFC2018: section 5 + */ int tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) { @@ -154,11 +187,10 @@ tcp_sack_free(tp, tsb); } - if ((to->to_flags & TOF_SACK) && to->to_nsacks == 0) { - /* remove all sack blocks, strange reneg */ - tcp_sack_flush(tp); + /* SACK header but no blocks. */ + if ((to->to_flags & TOF_SACK) && to->to_nsacks == 0) return (0); - } else if (!(to->to_flags & TOF_SACK)) + else if (!(to->to_flags & TOF_SACK)) return (0); /* Integrate SACK blocks from segment. */ @@ -169,24 +201,45 @@ sack.tsb_blk.start = ntohl(sack.tsb_blk.start); sack.tsb_blk.end = ntohl(sack.tsb_blk.end); - /* Validity checks on SACK blocks as received from sender. */ - if (SEQ_GT(sack.tsb_blk.start, sack.tsb_blk.end) || + /* + * The receiver has reneged, that is flushed the previously + * reported data from its reassembly queue. + * It is a bit difficult to reliably detect this condition. + * We try our best here to avoid false positives. + * RFC2018: section 8 + */ + if (to->to_nsacks == 1 && !RB_EMPTY(tp->snd_sackblocks) && + to->to_len <= TCP_MAXOLEN - TCPOLEN_SACK && + SEQ_DELTA(sack.tsb_blk.start, sack.tsb_blk.end) <= tp->snd_mss) + tcp_sack_flush(tp); + } + + /* XXXAO: Experimental explicit reneg. */ + if (sack.tsb_blk.start == sack.tsb_blk.end && + i == 0 && to->to_nsacks > 1 && + SEQ_GT(sack.tsb_blk.start, tp->snd_una) && + SEQ_LEQ(sack.tsb_blk.start, tp->snd_nxt)) { + tcp_sack_flush(tp); + } + + /* Sanity checks on SACK block. */ + if (SEQ_GEQ(sack.tsb_blk.start, sack.tsb_blk.end) || SEQ_LEQ(sack.tsb_blk.start, th_ack) || SEQ_GT(sack.tsb_blk.end, tp->snd_nxt)) continue; - /* XXXAO: Implicit-explicit reneg. */ - if (sack.tsb_blk.start == sack.tsb_blk.end) { - /* Remove all sackblocks. */ - tcp_sack_flush(tp); - continue; - } - /* Return match that has at least partial overlap to either side. */ if ((tsb = RB_FIND(tcp_sackblocks, &tp->snd_sackblocks, &sack)) != NULL) { - /* within a block, was a duplicate retransmit, D-SACK. */ + /* Within an already known block, common case. */ if (SEQ_GEQ(sack.tsb_blk.start, tsb->tsb_blk.start) && SEQ_LEQ(sack.tsb_blk.end, tsb->tsb_blk.end)) { + /* + * D-SACK, was a duplicate retransmit. + * RFC2883: section 5 + */ + if (i == 0 && SEQ_DELTA(sack.tsb_blk.start, sack.tsb_blk.end) <= tp->snd_mss) { + //TCPSTAT_INC(); + } continue; } /* Extends the end, common case. */ @@ -229,16 +282,6 @@ return (sacked); } -void -tcp_sack_flush(struct tcpcb *tp) -{ - struct tcp_sack_block *tsb, *tsbn; - - RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) { - tcp_sack_free(tp, tsb); - } -} - #ifdef DDB static void db_print_sackblocks(struct tcpcb *tp)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200907160842.n6G8gGiP031921>