Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 9 Jun 2021 00:18:20 GMT
From:      Michael Tuexen <tuexen@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 362f95f52897 - stable/13 - tcp: Add a socket option to rack so we can test various changes to the slop value in timers.
Message-ID:  <202106090018.1590IKo4071822@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=362f95f5289706dfcdf46dac3619a6aa8ee25186

commit 362f95f5289706dfcdf46dac3619a6aa8ee25186
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2021-05-26 10:43:30 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2021-06-09 00:16:00 +0000

    tcp: Add a socket option to rack so we can test various changes to the slop value in timers.
    
    Timer_slop, in TCP, has been 200ms for a long time. This value dates back
    a long time when delayed ack timers were longer and links were slower. A
    200ms timer slop allows 1 MSS to be sent over a 60kbps link. Its possible that
    lowering this value to something more in line with todays delayed ack values (40ms)
    might improve TCP. This bit of code makes it so rack can, via a socket option,
    adjust the timer slop.
    
    Reviewed by: mtuexen
    Sponsered by: Netflix Inc
    Differential Revision:  https://reviews.freebsd.org/D30249
    
    (cherry picked from commit 4f3addd94be5e02e6e425f6119f5409972ab5d14)
---
 sys/netinet/tcp.h                 |  1 +
 sys/netinet/tcp_stacks/rack.c     | 44 +++++++++++++++++++++++++++------------
 sys/netinet/tcp_stacks/tcp_rack.h |  2 ++
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 50f0811a6517..7273cb5104ea 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -305,6 +305,7 @@ struct tcphdr {
 #define TCP_FAST_RSM_HACK 1137 /* Do we do the broken thing where we don't twiddle the TLP bits properly in fast_rsm_output? */
 #define TCP_RACK_PACING_BETA 1138	/* Changing the beta for pacing */
 #define TCP_RACK_PACING_BETA_ECN 1139	/* Changing the beta for ecn with pacing */
+#define TCP_RACK_TIMER_SLOP 1140	/* Set or get the timer slop used */
 
 /* Start of reserved space for third-party user-settable options. */
 #define	TCP_VENDOR	SO_VENDOR
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index a500c2a18004..5556e562c0eb 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -408,8 +408,8 @@ counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
 
 #define	RACK_REXMTVAL(tp) max(rack_rto_min, ((tp)->t_srtt + ((tp)->t_rttvar << 2)))
 
-#define	RACK_TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
-	(tv) = (value) + TICKS_2_USEC(tcp_rexmit_slop);	 \
+#define	RACK_TCPT_RANGESET(tv, value, tvmin, tvmax, slop) do {	\
+	(tv) = (value) + slop;	 \
 	if ((u_long)(tv) < (u_long)(tvmin)) \
 		(tv) = (tvmin); \
 	if ((u_long)(tv) > (u_long)(tvmax)) \
@@ -2448,7 +2448,7 @@ rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt)
 		/* Lets capture all the things that make up t_rtxcur */
 		log.u_bbr.applimited = rack_rto_min;
 		log.u_bbr.epoch = rack_rto_max;
-		log.u_bbr.lt_epoch = rtt;
+		log.u_bbr.lt_epoch = rack->r_ctl.timer_slop;
 		log.u_bbr.lost = rack_rto_min;
 		log.u_bbr.pkt_epoch = TICKS_2_USEC(tcp_rexmit_slop);
 		log.u_bbr.rttProp = RACK_REXMTVAL(rack->rc_tp);
@@ -5260,7 +5260,7 @@ rack_get_persists_timer_val(struct tcpcb *tp, struct tcp_rack *rack)
 
 	t = (tp->t_srtt + (tp->t_rttvar << 2));
 	RACK_TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
-	    rack_persist_min, rack_persist_max);
+ 	    rack_persist_min, rack_persist_max, rack->r_ctl.timer_slop);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 	rack->r_ctl.rc_hpts_flags |= PACE_TMR_PERSIT;
@@ -5526,7 +5526,7 @@ rack_enter_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 		rack_timer_cancel(tp, rack, cts, __LINE__);
 		tp->t_rxtshift = 0;
 		RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-			      rack_rto_min, rack_rto_max);
+			      rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
 		rack->rc_in_persist = 1;
 	}
 }
@@ -5581,7 +5581,7 @@ rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 	rack->r_ctl.rc_went_idle_time = 0;
 	tp->t_rxtshift = 0;
 	RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-	   rack_rto_min, rack_rto_max);
+	   rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
 	rack->r_ctl.rc_agg_delayed = 0;
 	rack->r_early = 0;
 	rack->r_late = 0;
@@ -6777,7 +6777,7 @@ drop_it:
 		rexmt = max(rack_rto_min, (tp->t_srtt + (tp->t_rttvar << 2))) * tcp_backoff[tp->t_rxtshift];
 
 	RACK_TCPT_RANGESET(tp->t_rxtcur, rexmt,
-	   max(rack_rto_min, rexmt), rack_rto_max);
+	   max(rack_rto_min, rexmt), rack_rto_max, rack->r_ctl.timer_slop);
 	/*
 	 * We enter the path for PLMTUD if connection is established or, if
 	 * connection is FIN_WAIT_1 status, reason for the last is that if
@@ -7702,7 +7702,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
 	 */
 	tp->t_rxtshift = 0;
 	RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-		      max(rack_rto_min, rtt + 2), rack_rto_max);
+		      max(rack_rto_min, rtt + 2), rack_rto_max, rack->r_ctl.timer_slop);
 	rack_log_rtt_sample(rack, rtt);
 	tp->t_softerror = 0;
 }
@@ -7877,7 +7877,7 @@ rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
 	 */
 	tp->t_rxtshift = 0;
 	RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-		      rack_rto_min, rack_rto_max);
+		      rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
 	tp->t_softerror = 0;
 	if (to && (to->to_flags & TOF_TS) &&
 	    (ack_type == CUM_ACKED) &&
@@ -9735,7 +9735,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (rack->rc_in_persist) {
 			tp->t_rxtshift = 0;
 			RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-				      rack_rto_min, rack_rto_max);
+				      rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
 		}
 		if ((th->th_ack == tp->snd_una) && (tiwin == tp->snd_wnd)) {
 			rack_strike_dupack(rack);
@@ -9798,7 +9798,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		/* assure we are not backed off */
 		tp->t_rxtshift = 0;
 		RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-			      rack_rto_min, rack_rto_max);
+			      rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
 		rack->rc_tlp_in_progress = 0;
 		rack->r_ctl.rc_tlp_cnt_out = 0;
 		/*
@@ -10650,7 +10650,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		m_freem(mfree);
 		tp->t_rxtshift = 0;
 		RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-			      rack_rto_min, rack_rto_max);
+			      rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
 		rack->rc_tlp_in_progress = 0;
 		rack->r_ctl.rc_tlp_cnt_out = 0;
 		/*
@@ -12089,6 +12089,7 @@ rack_init(struct tcpcb *tp)
 	rack->r_ctl.rc_lowest_us_rtt = 0xffffffff;
 	rack->r_ctl.rc_highest_us_rtt = 0;
 	rack->r_ctl.bw_rate_cap = rack_bw_rate_cap;
+	rack->r_ctl.timer_slop = TICKS_2_USEC(tcp_rexmit_slop);
 	if (rack_use_cmp_acks)
 		rack->r_use_cmp_ack = 1;
 	if (rack_disable_prr)
@@ -13183,7 +13184,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 		/* Clear out shifts and such */
 		tp->t_rxtshift = 0;
 		RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
-				   rack_rto_min, rack_rto_max);
+				   rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop);
 		rack->rc_tlp_in_progress = 0;
 		rack->r_ctl.rc_tlp_cnt_out = 0;
 		/* Send recover and snd_nxt must be dragged along */
@@ -18880,6 +18881,19 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
 			rack->r_ctl.rc_saved_beta.beta = optval;
 		}
 		break;
+	case TCP_RACK_TIMER_SLOP:
+		RACK_OPTS_INC(tcp_rack_timer_slop);
+		rack->r_ctl.timer_slop = optval;
+		if (rack->rc_tp->t_srtt) {
+			/*
+			 * If we have an SRTT lets update t_rxtcur
+			 * to have the new slop.
+			 */
+			RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp),
+					   rack_rto_min, rack_rto_max,
+					   rack->r_ctl.timer_slop);
+		}
+		break;
 	case TCP_RACK_PACING_BETA_ECN:
 		RACK_OPTS_INC(tcp_rack_beta_ecn);
 		if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) {
@@ -19526,6 +19540,7 @@ rack_set_sockopt(struct socket *so, struct sockopt *sopt,
 	case TCP_DEFER_OPTIONS:			/*  URL:defer */
 	case TCP_RACK_PACING_BETA:		/*  URL:pacing_beta */
 	case TCP_RACK_PACING_BETA_ECN:		/*  URL:pacing_beta_ecn */
+	case TCP_RACK_TIMER_SLOP:		/*  URL:timer_slop */
 		break;
 	default:
 		/* Filter off all unknown options to the base stack */
@@ -19857,6 +19872,9 @@ rack_get_sockopt(struct socket *so, struct sockopt *sopt,
 	case TCP_SHARED_CWND_TIME_LIMIT:
 		optval = rack->r_limit_scw;
 		break;
+	case TCP_RACK_TIMER_SLOP:
+		optval = rack->r_ctl.timer_slop;
+		break;
 	default:
 		return (tcp_default_ctloutput(so, sopt, inp, tp));
 		break;
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 7f2b2ce4786a..349f6daec2f4 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -245,6 +245,7 @@ struct rack_opts_stats {
 	uint64_t tcp_rack_fastrsm_hack;
 	uint64_t tcp_rack_beta;
 	uint64_t tcp_rack_beta_ecn;
+	uint64_t tcp_rack_timer_slop;
 };
 
 /* RTT shrink reasons */
@@ -341,6 +342,7 @@ struct rack_control {
 	struct rack_sendmap *rc_resend;	/* something we have been asked to
 					 * resend */
 	struct rack_fast_send_blk fsb;	/* The fast-send block */
+	uint32_t timer_slop;
 	uint32_t input_pkt;
 	uint32_t saved_input_pkt;
 	uint32_t rc_hpts_flags;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106090018.1590IKo4071822>