From owner-svn-src-all@freebsd.org Mon Sep 2 19:04:04 2019 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 56535E5DC0; Mon, 2 Sep 2019 19:04:04 +0000 (UTC) (envelope-from tuexen@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) server-signature RSA-PSS (4096 bits) client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 46MfdJ1Sxrz4B0Y; Mon, 2 Sep 2019 19:04:04 +0000 (UTC) (envelope-from tuexen@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 15AF81EA95; Mon, 2 Sep 2019 19:04:04 +0000 (UTC) (envelope-from tuexen@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id x82J43U3067594; Mon, 2 Sep 2019 19:04:03 GMT (envelope-from tuexen@FreeBSD.org) Received: (from tuexen@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id x82J43vt067590; Mon, 2 Sep 2019 19:04:03 GMT (envelope-from tuexen@FreeBSD.org) Message-Id: <201909021904.x82J43vt067590@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: tuexen set sender to tuexen@FreeBSD.org using -f From: Michael Tuexen Date: Mon, 2 Sep 2019 19:04:03 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r351725 - in head/sys/netinet: . tcp_stacks X-SVN-Group: head X-SVN-Commit-Author: tuexen X-SVN-Commit-Paths: in head/sys/netinet: . tcp_stacks X-SVN-Commit-Revision: 351725 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 02 Sep 2019 19:04:04 -0000 Author: tuexen Date: Mon Sep 2 19:04:02 2019 New Revision: 351725 URL: https://svnweb.freebsd.org/changeset/base/351725 Log: This patch improves the DSACK handling to conform with RFC 2883. The lowest SACK block is used when multiple Blocks would be elegible as DSACK blocks ACK blocks get reordered - while maintaining the ordering of SACK blocks not relevant in the DSACK context is maintained. Reviewed by: rrs@, tuexen@ Obtained from: Richard Scheffenegger MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D21038 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_sack.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c ============================================================================== --- head/sys/netinet/tcp_input.c Mon Sep 2 18:32:08 2019 (r351724) +++ head/sys/netinet/tcp_input.c Mon Sep 2 19:04:02 2019 (r351725) @@ -1486,7 +1486,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru } else { tp->rfbuf_cnt += tlen; /* add up */ } - return (newsize); } Modified: head/sys/netinet/tcp_sack.c ============================================================================== --- head/sys/netinet/tcp_sack.c Mon Sep 2 18:32:08 2019 (r351724) +++ head/sys/netinet/tcp_sack.c Mon Sep 2 19:04:02 2019 (r351725) @@ -149,7 +149,109 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, &VNET_NAME(tcp_sack_globalholes), 0, "Global number of TCP SACK holes currently allocated"); + /* + * This function will find overlaps with the currently stored sackblocks + * and add any overlap as a dsack block upfront + */ +void +tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end) +{ + struct sackblk head_blk,mid_blk,saved_blks[MAX_SACK_BLKS]; + int i, j, n, identical; + tcp_seq start, end; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end")); + + if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) { + log(LOG_DEBUG, "\nDSACK update: %d..%d, rcv_nxt: %u\n", + rcv_start, rcv_end, tp->rcv_nxt); + } + + if (SEQ_LT(rcv_end, tp->rcv_nxt) || + ((rcv_end == tp->rcv_nxt) && + (tp->rcv_numsacks > 0 ) && + (tp->sackblks[0].end == tp->rcv_nxt))) { + saved_blks[0].start = rcv_start; + saved_blks[0].end = rcv_end; + } else { + saved_blks[0].start = saved_blks[0].end = 0; + } + + head_blk.start = head_blk.end = 0; + mid_blk.start = rcv_start; + mid_blk.end = rcv_end; + identical = 0; + + for (i = 0; i < tp->rcv_numsacks; i++) { + start = tp->sackblks[i].start; + end = tp->sackblks[i].end; + if (SEQ_LT(rcv_end, start)) { + /* pkt left to sack blk */ + continue; + } + if (SEQ_GT(rcv_start, end)) { + /* pkt right to sack blk */ + continue; + } + if (SEQ_GT(tp->rcv_nxt, end)) { + if ((SEQ_MAX(rcv_start, start) != SEQ_MIN(rcv_end, end)) && + (SEQ_GT(head_blk.start, SEQ_MAX(rcv_start, start)) || + (head_blk.start == head_blk.end))) { + head_blk.start = SEQ_MAX(rcv_start, start); + head_blk.end = SEQ_MIN(rcv_end, end); + } + continue; + } + if (((head_blk.start == head_blk.end) || + SEQ_LT(start, head_blk.start)) && + (SEQ_GT(end, rcv_start) && + SEQ_LEQ(start, rcv_end))) { + head_blk.start = start; + head_blk.end = end; + } + mid_blk.start = SEQ_MIN(mid_blk.start, start); + mid_blk.end = SEQ_MAX(mid_blk.end, end); + if ((mid_blk.start == start) && + (mid_blk.end == end)) + identical = 1; + } + if (SEQ_LT(head_blk.start, head_blk.end)) { + /* store overlapping range */ + saved_blks[0].start = SEQ_MAX(rcv_start, head_blk.start); + saved_blks[0].end = SEQ_MIN(rcv_end, head_blk.end); + } + n = 1; + /* + * Second, if not ACKed, store the SACK block that + * overlaps with the DSACK block unless it is identical + */ + if ((SEQ_LT(tp->rcv_nxt, mid_blk.end) && + !((mid_blk.start == saved_blks[0].start) && + (mid_blk.end == saved_blks[0].end))) || + identical == 1) { + saved_blks[n].start = mid_blk.start; + saved_blks[n++].end = mid_blk.end; + } + for (j = 0; (j < tp->rcv_numsacks) && (j < MAX_SACK_BLKS-1); j++) { + if (((SEQ_LT(tp->sackblks[j].end, mid_blk.start) || + SEQ_GT(tp->sackblks[j].start, mid_blk.end)) && + (SEQ_GT(tp->sackblks[j].start, tp->rcv_nxt)))) + saved_blks[n++] = tp->sackblks[j]; + } + j = 0; + for (i = 0; i < n; i++) { + /* we can end up with a stale inital entry */ + if (SEQ_LT(saved_blks[i].start, saved_blks[i].end)) { + tp->sackblks[j++] = saved_blks[i]; + } + } + tp->rcv_numsacks = j; +} + +/* * This function is called upon receipt of new valid data (while not in * header prediction mode), and it updates the ordered list of sacks. */ @@ -170,9 +272,16 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta /* Check arguments. */ KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end")); - /* SACK block for the received segment. */ - head_blk.start = rcv_start; - head_blk.end = rcv_end; + if ((rcv_start == rcv_end) && + (tp->rcv_numsacks >= 1) && + (rcv_end == tp->sackblks[0].end)) { + /* retaining DSACK block below rcv_nxt (todrop) */ + head_blk = tp->sackblks[0]; + } else { + /* SACK block for the received segment. */ + head_blk.start = rcv_start; + head_blk.end = rcv_end; + } /* * Merge updated SACK blocks into head_blk, and save unchanged SACK @@ -266,6 +375,10 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta */ if (num_saved >= MAX_SACK_BLKS) num_saved--; + } + if ((rcv_start == rcv_end) && + (rcv_start == tp->sackblks[0].end)) { + num_head = 1; } if (num_saved > 0) { /* Modified: head/sys/netinet/tcp_stacks/rack.c ============================================================================== --- head/sys/netinet/tcp_stacks/rack.c Mon Sep 2 18:32:08 2019 (r351724) +++ head/sys/netinet/tcp_stacks/rack.c Mon Sep 2 19:04:02 2019 (r351725) @@ -1783,6 +1783,14 @@ rack_drop_checks(struct tcpopt *to, struct mbuf *m, st TCPSTAT_INC(tcps_rcvpartduppack); TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); } + if (tp->t_flags & TF_SACK_PERMIT) { + /* + * record the left, to-be-dropped edge of data + * here, for use as dsack block further down + */ + tcp_update_sack_list(tp, th->th_seq, + th->th_seq + todrop); + } *drop_hdrlen += todrop; /* drop from the top afterwards */ th->th_seq += todrop; tlen -= todrop; @@ -4900,7 +4908,8 @@ dodata: /* XXX */ (TCPS_HAVEESTABLISHED(tp->t_state) || tfo_syn)) { if (DELAY_ACK(tp, tlen) || tfo_syn) { - rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); + rack_timer_cancel(tp, rack, + rack->r_ctl.rc_rcvtime, __LINE__); tp->t_flags |= TF_DELACK; } else { rack->r_wanted_output++; @@ -4934,18 +4943,29 @@ dodata: /* XXX */ * DSACK actually handled in the fastpath * above. */ - tcp_update_sack_list(tp, save_start, save_start + save_tlen); + tcp_update_sack_list(tp, save_start, + save_start + save_tlen); } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) { /* * Cleaning sackblks by using zero length * update. */ - tcp_update_sack_list(tp, save_start, save_start); + if ((tp->rcv_numsacks >= 1) && + (tp->sackblks[0].end == save_start)) { + /* partial overlap, recorded at todrop above */ + tcp_update_sack_list(tp, tp->sackblks[0].start, + tp->sackblks[0].end); + } else { + tcp_update_dsack_list(tp, save_start, + save_start + save_tlen); + } } else if ((tlen > 0) && (tlen >= save_tlen)) { /* Update of sackblks. */ - tcp_update_sack_list(tp, save_start, save_start + save_tlen); + tcp_update_dsack_list(tp, save_start, + save_start + save_tlen); } else if (tlen > 0) { - tcp_update_sack_list(tp, save_start, save_start+tlen); + tcp_update_dsack_list(tp, save_start, + save_start + tlen); } } else { m_freem(m); @@ -4967,7 +4987,8 @@ dodata: /* XXX */ * now. */ if (tp->t_flags & TF_NEEDSYN) { - rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); + rack_timer_cancel(tp, rack, + rack->r_ctl.rc_rcvtime, __LINE__); tp->t_flags |= TF_DELACK; } else { tp->t_flags |= TF_ACKNOW; @@ -4984,7 +5005,8 @@ dodata: /* XXX */ tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: - rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); + rack_timer_cancel(tp, rack, + rack->r_ctl.rc_rcvtime, __LINE__); tcp_state_change(tp, TCPS_CLOSE_WAIT); break; @@ -4993,7 +5015,8 @@ dodata: /* XXX */ * acked so enter the CLOSING state. */ case TCPS_FIN_WAIT_1: - rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); + rack_timer_cancel(tp, rack, + rack->r_ctl.rc_rcvtime, __LINE__); tcp_state_change(tp, TCPS_CLOSING); break; @@ -5003,7 +5026,8 @@ dodata: /* XXX */ * other standard timers. */ case TCPS_FIN_WAIT_2: - rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); + rack_timer_cancel(tp, rack, + rack->r_ctl.rc_rcvtime, __LINE__); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); return (1); @@ -5012,7 +5036,8 @@ dodata: /* XXX */ /* * Return any desired output. */ - if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) { + if ((tp->t_flags & TF_ACKNOW) || + (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) { rack->r_wanted_output++; } INP_WLOCK_ASSERT(tp->t_inpcb); Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c ============================================================================== --- head/sys/netinet/tcp_stacks/rack_bbr_common.c Mon Sep 2 18:32:08 2019 (r351724) +++ head/sys/netinet/tcp_stacks/rack_bbr_common.c Mon Sep 2 19:04:02 2019 (r351725) @@ -495,7 +495,8 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, str * DSACK - add SACK block for dropped range */ if (tp->t_flags & TF_SACK_PERMIT) { - tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen); + tcp_update_sack_list(tp, th->th_seq, + th->th_seq + todrop); /* * ACK now, as the next in-sequence segment * will clear the DSACK block again Modified: head/sys/netinet/tcp_var.h ============================================================================== --- head/sys/netinet/tcp_var.h Mon Sep 2 18:32:08 2019 (r351724) +++ head/sys/netinet/tcp_var.h Mon Sep 2 19:04:02 2019 (r351725) @@ -939,6 +939,7 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *); tcp_seq tcp_new_isn(struct in_conninfo *); int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); +void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq); void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); void tcp_clean_dsack_blocks(struct tcpcb *tp); void tcp_clean_sackreport(struct tcpcb *tp);