Date: Wed, 9 Jun 2021 00:14:27 GMT From: Michael Tuexen <tuexen@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: 12e181b67273 - stable/13 - tcp: Fix bugs related to the PUSH bit and rack and an ack war Message-ID: <202106090014.1590ERSB071462@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by tuexen: URL: https://cgit.FreeBSD.org/src/commit/?id=12e181b6727326f79173182d04eb799eab3bdec7 commit 12e181b6727326f79173182d04eb799eab3bdec7 Author: Randall Stewart <rrs@FreeBSD.org> AuthorDate: 2021-05-25 17:23:31 +0000 Commit: Michael Tuexen <tuexen@FreeBSD.org> CommitDate: 2021-06-09 00:13:32 +0000 tcp: Fix bugs related to the PUSH bit and rack and an ack war Michaels testing with UDP tunneling found an issue with the push bit, which was only partly fixed in the last commit. The problem is the left edge gets transmitted before the adjustments are done to the send_map, this means that right edge bits must be considered to be added only if the entire RSM is being retransmitted. Now syzkaller also continued to find a crash, which Michael sent me the reproducer for. Turns out that the reproducer on default (freebsd) stack made the stack get into an ack-war with itself. After fixing the reference issues in rack the same ack-war was found in rack (and bbr). Basically what happens is we go into the reassembly code and lose the FIN bit. The trick here is we should not be going into the reassembly code if tlen == 0 i.e. the peer never sent you anything. That then gets the proper action on the FIN bit but then you end up in LAST_ACK with no timers running. This is because the usrclosed function gets called and the FIN's and such have already been exchanged. So when we should be entering FIN_WAIT2 (or even FIN_WAIT1) we get stuck in LAST_ACK. Fixing this means tweaking the usrclosed function so that we properly recognize the condition and drop into FIN_WAIT2 where a timer will allow at least TP_MAXIDLE before closing (to allow time for the peer to retransmit its FIN if the ack is lost). Setting the fast_finwait2 timer can speed this up in testing. Reviewed by: mtuexen,rscheff Sponsored by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D30451 (cherry picked from commit 13c0e198ca275447f9a60a03f730c38c98f19009) --- sys/netinet/tcp_input.c | 6 +++-- sys/netinet/tcp_stacks/bbr.c | 6 +++-- sys/netinet/tcp_stacks/rack.c | 58 +++++++++++++++++++++++++++++-------------- sys/netinet/tcp_usrreq.c | 16 ++++++++++++ 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 916a7186770c..4ea0f7c5231c 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -3184,8 +3184,10 @@ dodata: /* XXX */ * when trimming from the head. */ tcp_seq temp = save_start; - thflags = tcp_reass(tp, th, &temp, &tlen, m); - tp->t_flags |= TF_ACKNOW; + if (tlen) { + thflags = tcp_reass(tp, th, &temp, &tlen, m); + tp->t_flags |= TF_ACKNOW; + } } if ((tp->t_flags & TF_SACK_PERMIT) && (save_tlen > 0) && diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 2a98f5cb43ef..78957bdfb27d 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -8321,8 +8321,10 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, * trimming from the head. */ tcp_seq temp = save_start; - thflags = tcp_reass(tp, th, &temp, &tlen, m); - tp->t_flags |= TF_ACKNOW; + if (tlen) { + thflags = tcp_reass(tp, th, &temp, &tlen, m); + tp->t_flags |= TF_ACKNOW; + } } if ((tp->t_flags & TF_SACK_PERMIT) && (save_tlen > 0) && diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 0bdcc20d2b7c..a500c2a18004 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -6017,7 +6017,7 @@ rack_setup_offset_for_rsm(struct rack_sendmap *src_rsm, struct rack_sendmap *rsm struct mbuf *m; uint32_t soff; - if (src_rsm->orig_m_len != src_rsm->m->m_len) { + if (src_rsm->m && (src_rsm->orig_m_len != src_rsm->m->m_len)) { /* Fix up the orig_m_len and possibly the mbuf offset */ rack_adjust_orig_mlen(src_rsm); } @@ -8818,21 +8818,23 @@ more: rack->r_ctl.rc_gp_cumack_ts = rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]; rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_TRIM_HEAD, th_ack, __LINE__); /* Now we need to move our offset forward too */ - if (rsm->orig_m_len != rsm->m->m_len) { + if (rsm->m && (rsm->orig_m_len != rsm->m->m_len)) { /* Fix up the orig_m_len and possibly the mbuf offset */ rack_adjust_orig_mlen(rsm); } rsm->soff += (th_ack - rsm->r_start); rsm->r_start = th_ack; /* Now do we need to move the mbuf fwd too? */ - while (rsm->soff >= rsm->m->m_len) { - rsm->soff -= rsm->m->m_len; - rsm->m = rsm->m->m_next; - KASSERT((rsm->m != NULL), - (" nrsm:%p hit at soff:%u null m", - rsm, rsm->soff)); - } - rsm->orig_m_len = rsm->m->m_len; + if (rsm->m) { + while (rsm->soff >= rsm->m->m_len) { + rsm->soff -= rsm->m->m_len; + rsm->m = rsm->m->m_next; + KASSERT((rsm->m != NULL), + (" nrsm:%p hit at soff:%u null m", + rsm, rsm->soff)); + } + rsm->orig_m_len = rsm->m->m_len; + } if (rack->app_limited_needs_set) rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_BEG); } @@ -9655,7 +9657,7 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una) /* Nothing outstanding */ return; } - while (rsm->m == m) { + while (rsm->m && (rsm->m == m)) { /* one to adjust */ #ifdef INVARIANTS struct mbuf *tm; @@ -9676,10 +9678,16 @@ rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una) } rsm->m = tm; rsm->soff = soff; - rsm->orig_m_len = rsm->m->m_len; + if (tm) + rsm->orig_m_len = rsm->m->m_len; + else + rsm->orig_m_len = 0; #else rsm->m = sbsndmbuf(sb, (rsm->r_start - snd_una), &rsm->soff); - rsm->orig_m_len = rsm->m->m_len; + if (rsm->m) + rsm->orig_m_len = rsm->m->m_len; + else + rsm->orig_m_len = 0; #endif rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); @@ -10058,6 +10066,7 @@ rack_validate_fo_sendwin_up(struct tcpcb *tp, struct tcp_rack *rack) } } + /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still @@ -10227,9 +10236,10 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, * trimming from the head. */ tcp_seq temp = save_start; - - thflags = tcp_reass(tp, th, &temp, &tlen, m); - tp->t_flags |= TF_ACKNOW; + if (tlen) { + thflags = tcp_reass(tp, th, &temp, &tlen, m); + tp->t_flags |= TF_ACKNOW; + } } if ((tp->t_flags & TF_SACK_PERMIT) && (save_tlen > 0) && @@ -12190,7 +12200,10 @@ rack_init(struct tcpcb *tp) rsm->r_dupack = 0; if (rack->rc_inp->inp_socket->so_snd.sb_mb != NULL) { rsm->m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd, 0, &rsm->soff); - rsm->orig_m_len = rsm->m->m_len; + if (rsm->m) + rsm->orig_m_len = rsm->m->m_len; + else + rsm->orig_m_len = 0; } else { /* * This can happen if we have a stand-alone FIN or @@ -15075,6 +15088,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma uint32_t us_cts; uint32_t if_hw_tsomaxsegcount = 0, startseq; uint32_t if_hw_tsomaxsegsize; + #ifdef INET6 struct ip6_hdr *ip6 = NULL; @@ -15184,7 +15198,15 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma } th->th_seq = htonl(rsm->r_start); th->th_ack = htonl(tp->rcv_nxt); - if(rsm->r_flags & RACK_HAD_PUSH) + /* + * The PUSH bit should only be applied + * if the full retransmission is made. If + * we are sending less than this is the + * left hand edge and should not have + * the PUSH bit. + */ + if ((rsm->r_flags & RACK_HAD_PUSH) && + (len == (rsm->r_end - rsm->r_start))) flags |= TH_PUSH; th->th_flags = flags; th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale)); diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index bd847426681e..55aa609fd084 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -2635,6 +2635,22 @@ tcp_usrclosed(struct tcpcb *tp) tcp_state_change(tp, TCPS_LAST_ACK); break; } + if ((tp->t_state == TCPS_LAST_ACK) && + (tp->t_flags & TF_SENTFIN)) { + /* + * If we have reached LAST_ACK, and + * we sent a FIN (e.g. via MSG_EOR), then + * we really should move to either FIN_WAIT_1 + * or FIN_WAIT_2 depending on snd_max/snd_una. + */ + if (tp->snd_una == tp->snd_max) { + /* The FIN is acked */ + tcp_state_change(tp, TCPS_FIN_WAIT_2); + } else { + /* The FIN is still outstanding */ + tcp_state_change(tp, TCPS_FIN_WAIT_1); + } + } if (tp->t_state >= TCPS_FIN_WAIT_2) { soisdisconnected(tp->t_inpcb->inp_socket); /* Prevent the connection hanging in FIN_WAIT_2 forever. */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106090014.1590ERSB071462>