Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 9 Jun 2021 00:14:27 GMT
From:      Michael Tuexen <tuexen@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 12e181b67273 - stable/13 - tcp: Fix bugs related to the PUSH bit and rack and an ack war
Message-ID:  <202106090014.1590ERSB071462@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=12e181b6727326f79173182d04eb799eab3bdec7

commit 12e181b6727326f79173182d04eb799eab3bdec7
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2021-05-25 17:23:31 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2021-06-09 00:13:32 +0000

    tcp: Fix bugs related to the PUSH bit and rack and an ack war
    
    Michaels testing with UDP tunneling found an issue with the push bit, which was only partly fixed
    in the last commit. The problem is the left edge gets transmitted before the adjustments are done
    to the send_map, this means that right edge bits must be considered to be added only if
    the entire RSM is being retransmitted.
    
    Now syzkaller also continued to find a crash, which Michael sent me the reproducer for. Turns
    out that the reproducer on default (freebsd) stack made the stack get into an ack-war with itself.
    After fixing the reference issues in rack the same ack-war was found in rack (and bbr). Basically
    what happens is we go into the reassembly code and lose the FIN bit. The trick here is we
    should not be going into the reassembly code if tlen == 0 i.e. the peer never sent you anything.
    That then gets the proper action on the FIN bit but then you end up in LAST_ACK with no
    timers running. This is because the usrclosed function gets called and the FIN's and such have
    already been exchanged. So when we should be entering FIN_WAIT2 (or even FIN_WAIT1) we get
    stuck in LAST_ACK. Fixing this means tweaking the usrclosed function so that we properly
    recognize the condition and drop into FIN_WAIT2 where a timer will allow at least TP_MAXIDLE
    before closing (to allow time for the peer to retransmit its FIN if the ack is lost). Setting the fast_finwait2
    timer can speed this up in testing.
    
    Reviewed by: mtuexen,rscheff
    Sponsored by: Netflix Inc
    Differential Revision:  https://reviews.freebsd.org/D30451
    
    (cherry picked from commit 13c0e198ca275447f9a60a03f730c38c98f19009)
---
 sys/netinet/tcp_input.c       |  6 +++--
 sys/netinet/tcp_stacks/bbr.c  |  6 +++--
 sys/netinet/tcp_stacks/rack.c | 58 +++++++++++++++++++++++++++++--------------
 sys/netinet/tcp_usrreq.c      | 16 ++++++++++++
 4 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 916a7186770c..4ea0f7c5231c 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -3184,8 +3184,10 @@ dodata:							/* XXX */
 			 * when trimming from the head.
 			 */
 			tcp_seq temp = save_start;
-			thflags = tcp_reass(tp, th, &temp, &tlen, m);
-			tp->t_flags |= TF_ACKNOW;
+			if (tlen) {
+				thflags = tcp_reass(tp, th, &temp, &tlen, m);
+				tp->t_flags |= TF_ACKNOW;
+			}
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (save_tlen > 0) &&
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 2a98f5cb43ef..78957bdfb27d 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -8321,8 +8321,10 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			 * trimming from the head.
 			 */
 			tcp_seq temp = save_start;
-			thflags = tcp_reass(tp, th, &temp, &tlen, m);
-			tp->t_flags |= TF_ACKNOW;
+			if (tlen) {
+				thflags = tcp_reass(tp, th, &temp, &tlen, m);
+				tp->t_flags |= TF_ACKNOW;
+			}
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (save_tlen > 0) &&
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 0bdcc20d2b7c..a500c2a18004 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -6017,7 +6017,7 @@ rack_setup_offset_for_rsm(struct rack_sendmap *src_rsm, struct rack_sendmap *rsm
 	struct mbuf *m;
 	uint32_t soff;
 
-	if (src_rsm->orig_m_len != src_rsm->m->m_len) {
+	if (src_rsm->m && (src_rsm->orig_m_len != src_rsm->m->m_len)) {
 		/* Fix up the orig_m_len and possibly the mbuf offset */
 		rack_adjust_orig_mlen(src_rsm);
 	}
@@ -8818,21 +8818,23 @@ more:
 	rack->r_ctl.rc_gp_cumack_ts = rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
 	rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_TRIM_HEAD, th_ack, __LINE__);
 	/* Now we need to move our offset forward too */
-	if (rsm->orig_m_len != rsm->m->m_len) {
+	if (rsm->m && (rsm->orig_m_len != rsm->m->m_len)) {
 		/* Fix up the orig_m_len and possibly the mbuf offset */
 		rack_adjust_orig_mlen(rsm);
 	}
 	rsm->soff += (th_ack - rsm->r_start);
 	rsm->r_start = th_ack;
 	/* Now do we need to move the mbuf fwd too? */
-	while (rsm->soff >= rsm->m->m_len) {
-		rsm->soff -= rsm->m->m_len;
-		rsm->m = rsm->m->m_next;
-		KASSERT((rsm->m != NULL),
-			(" nrsm:%p hit at soff:%u null m",
-			 rsm, rsm->soff));
-	}
-	rsm->orig_m_len = rsm->m->m_len;
+	if (rsm->m) {
+		while (rsm->soff >= rsm->m->m_len) {
+			rsm->soff -= rsm->m->m_len;
+			rsm->m = rsm->m->m_next;
+			KASSERT((rsm->m != NULL),
+				(" nrsm:%p hit at soff:%u null m",
+				 rsm, rsm->soff));
+		}
+		rsm->orig_m_len = rsm->m->m_len;
+	}
 	if (rack->app_limited_needs_set)
 		rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_BEG);
 }
@@ -9655,7 +9657,7 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una)
 		/* Nothing outstanding */
 		return;
 	}
-	while (rsm->m == m) {
+	while (rsm->m && (rsm->m == m)) {
 		/* one to adjust */
 #ifdef INVARIANTS
 		struct mbuf *tm;
@@ -9676,10 +9678,16 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una)
 		}
 		rsm->m = tm;
 		rsm->soff = soff;
-		rsm->orig_m_len = rsm->m->m_len;
+		if (tm)
+			rsm->orig_m_len = rsm->m->m_len;
+		else
+			rsm->orig_m_len = 0;
 #else
 		rsm->m = sbsndmbuf(sb, (rsm->r_start - snd_una), &rsm->soff);
-		rsm->orig_m_len = rsm->m->m_len;
+		if (rsm->m)
+			rsm->orig_m_len = rsm->m->m_len;
+		else
+			rsm->orig_m_len = 0;
 #endif
 		rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree,
 			      rsm);
@@ -10058,6 +10066,7 @@ rack_validate_fo_sendwin_up(struct tcpcb *tp, struct tcp_rack *rack)
 	}
 }
 
+
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
@@ -10227,9 +10236,10 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			 * trimming from the head.
 			 */
 			tcp_seq temp = save_start;
-
-			thflags = tcp_reass(tp, th, &temp, &tlen, m);
-			tp->t_flags |= TF_ACKNOW;
+			if (tlen) {
+				thflags = tcp_reass(tp, th, &temp, &tlen, m);
+				tp->t_flags |= TF_ACKNOW;
+			}
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (save_tlen > 0) &&
@@ -12190,7 +12200,10 @@ rack_init(struct tcpcb *tp)
 		rsm->r_dupack = 0;
 		if (rack->rc_inp->inp_socket->so_snd.sb_mb != NULL) {
 			rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd, 0, &rsm->soff);
-			rsm->orig_m_len = rsm->m->m_len;
+			if (rsm->m)
+				rsm->orig_m_len = rsm->m->m_len;
+			else
+				rsm->orig_m_len = 0;
 		} else {
 			/*
 			 * This can happen if we have a stand-alone FIN or
@@ -15075,6 +15088,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	uint32_t us_cts;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
 	uint32_t if_hw_tsomaxsegsize;
+
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 
@@ -15184,7 +15198,15 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	}
 	th->th_seq = htonl(rsm->r_start);
 	th->th_ack = htonl(tp->rcv_nxt);
-	if(rsm->r_flags & RACK_HAD_PUSH)
+	/*
+	 * The PUSH bit should only be applied
+	 * if the full retransmission is made. If
+	 * we are sending less than this is the
+	 * left hand edge and should not have
+	 * the PUSH bit.
+	 */
+	if ((rsm->r_flags & RACK_HAD_PUSH) &&
+	    (len == (rsm->r_end - rsm->r_start)))
 		flags |= TH_PUSH;
 	th->th_flags = flags;
 	th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index bd847426681e..55aa609fd084 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2635,6 +2635,22 @@ tcp_usrclosed(struct tcpcb *tp)
 		tcp_state_change(tp, TCPS_LAST_ACK);
 		break;
 	}
+	if ((tp->t_state == TCPS_LAST_ACK) &&
+	    (tp->t_flags & TF_SENTFIN)) {
+		/*
+		 * If we have reached LAST_ACK, and
+		 * we sent a FIN (e.g. via MSG_EOR), then
+		 * we really should move to either FIN_WAIT_1
+		 * or FIN_WAIT_2 depending on snd_max/snd_una.
+		 */
+		if (tp->snd_una == tp->snd_max) {
+			/* The FIN is acked */
+			tcp_state_change(tp, TCPS_FIN_WAIT_2);
+		} else {
+			/* The FIN is still outstanding */
+			tcp_state_change(tp, TCPS_FIN_WAIT_1);
+		}
+	}
 	if (tp->t_state >= TCPS_FIN_WAIT_2) {
 		soisdisconnected(tp->t_inpcb->inp_socket);
 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106090014.1590ERSB071462>