Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 10 Apr 2023 20:34:05 GMT
From:      Randall Stewart <rrs@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: a2b33c9a7a95 - main - tcp: Rack - in the absence of LRO fixed rate pacing (loopback or interfaces with no LRO) does not work correctly.
Message-ID:  <202304102034.33AKY513054142@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=a2b33c9a7a952ff35d5061ae1dc76980d40320d3

commit a2b33c9a7a952ff35d5061ae1dc76980d40320d3
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2023-04-10 20:33:56 +0000
Commit:     Randall Stewart <rrs@FreeBSD.org>
CommitDate: 2023-04-10 20:33:56 +0000

    tcp: Rack - in the absence of LRO fixed rate pacing (loopback or interfaces with no LRO) does not work correctly.
    
    Rack is capable of fixed rate or dynamic rate pacing. Both of these can get mixed up when
    LRO is not available. This is because LRO will hold off waking up the tcp connection to
    processing the inbound packets until the pacing timer is up. Without LRO the pacing only
    sort-of works. Sometimes we pace correctly, other times not so much.
    
    This set of changes will make it so pacing works properly in the absence of LRO.
    
    Reviewed by: tuexen
    Sponsored by: Netflix Inc
    Differential Revision:https://reviews.freebsd.org/D39494
---
 sys/netinet/tcp_stacks/rack.c | 54 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index a1dda9889549..1bab253818b5 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -16433,6 +16433,9 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 	return (0);
 }
 
+#define	TCP_LRO_TS_OPTION \
+    ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | \
+	  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)
 
 static int
 rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
@@ -16458,6 +16461,8 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	struct tcp_rack *rack;
 	struct rack_sendmap *rsm;
 	int32_t prev_state = 0;
+	int no_output = 0;
+	int slot_remaining = 0;
 #ifdef TCP_ACCOUNTING
 	int ack_val_set = 0xf;
 #endif
@@ -16480,6 +16485,43 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 		 */
 		rack_deferred_init(tp, rack);
 	}
+	/*
+	 * Check to see if we need to skip any output plans. This
+	 * can happen in the non-LRO path where we are pacing and
+	 * must process the ack coming in but need to defer sending
+	 * anything becase a pacing timer is running.
+	 */
+	us_cts = tcp_tv_to_usectick(tv);
+	if ((rack->rc_always_pace == 1) &&
+	    (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
+	    (TSTMP_LT(us_cts, rack->r_ctl.rc_last_output_to))) {
+		/*
+		 * Ok conditions are right for queuing the packets
+		 * but we do have to check the flags in the inp, it
+		 * could be, if a sack is present, we want to be awoken and
+		 * so should process the packets.
+		 */
+		slot_remaining = rack->r_ctl.rc_last_output_to - us_cts;
+		if (rack->rc_inp->inp_flags2 & INP_DONT_SACK_QUEUE) {
+			no_output = 1;
+		} else {
+			/*
+			 * If there is no options, or just a
+			 * timestamp option, we will want to queue
+			 * the packets. This is the same that LRO does
+			 * and will need to change with accurate ECN.
+			 */
+			uint32_t *ts_ptr;
+			int optlen;
+
+			optlen = (th->th_off << 2) - sizeof(struct tcphdr);
+			ts_ptr = (uint32_t *)(th + 1);
+			if ((optlen == 0) ||
+			    ((optlen == TCPOLEN_TSTAMP_APPA) &&
+			     (*ts_ptr == TCP_LRO_TS_OPTION)))
+				no_output = 1;
+		}
+	}
 	if (m->m_flags & M_ACKCMP) {
 		/*
 		 * All compressed ack's are ack's by definition so
@@ -16914,7 +16956,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			}
 		}
 #endif
-		if (nxt_pkt == 0) {
+		if ((nxt_pkt == 0) && (no_output == 0)) {
 			if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) {
 do_output_now:
 				if (tcp_output(tp) < 0) {
@@ -16927,6 +16969,16 @@ do_output_now:
 			}
 			rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
 			rack_free_trim(rack);
+		} else if ((no_output == 1) &&
+			   (nxt_pkt == 0)  &&
+			   (tcp_in_hpts(rack->rc_inp) == 0)) {
+			/*
+			 * We are not in hpts and we had a pacing timer up. Use
+			 * the remaining time (slot_remaining) to restart the timer.
+			 */
+			KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
+			rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0);
+			rack_free_trim(rack);
 		}
 		/* Update any rounds needed */
 		if (rack_verbose_logging &&  tcp_bblogging_on(rack->rc_tp))



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202304102034.33AKY513054142>