Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 Jun 2021 23:57:16 GMT
From:      Michael Tuexen <tuexen@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: a7619988ebc2 - stable/13 - This brings into sync FreeBSD with the netflix versions of rack and bbr. This fixes several breakages (panics) since the tcp_lro code was committed that have been reported. Quite a few new features are now in rack (prefecting of DGP -- Dynamic Goodput Pacing among the largest). There is also support for ack-war prevention. Documents comming soon on rack..
Message-ID:  <202106082357.158NvG09043885@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=a7619988ebc23fc4db80cddec7c13627f4bb80fe

commit a7619988ebc23fc4db80cddec7c13627f4bb80fe
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2021-05-06 15:22:26 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2021-06-08 23:56:18 +0000

    This brings into sync FreeBSD with the netflix versions of rack and bbr.
    This fixes several breakages (panics) since the tcp_lro code was
    committed that have been reported. Quite a few new features are
    now in rack (prefecting of DGP -- Dynamic Goodput Pacing among the
    largest). There is also support for ack-war prevention. Documents
    comming soon on rack..
    
    Sponsored by:           Netflix
    Reviewed by:            rscheff, mtuexen
    Differential Revision:  https://reviews.freebsd.org/D30036
    
    (cherry picked from commit 5d8fd932e418f03e98b3469c4088a36f0ef34ffe)
---
 sys/netinet/cc/cc.h                      |    7 +-
 sys/netinet/cc/cc_newreno.c              |   34 +-
 sys/netinet/cc/cc_newreno.h              |   13 +-
 sys/netinet/tcp.h                        |   24 +-
 sys/netinet/tcp_accounting.h             |   39 +
 sys/netinet/tcp_input.c                  |   10 +-
 sys/netinet/tcp_log_buf.h                |    8 +-
 sys/netinet/tcp_ratelimit.c              |   29 +-
 sys/netinet/tcp_ratelimit.h              |    4 +-
 sys/netinet/tcp_sack.c                   |   11 +
 sys/netinet/tcp_stacks/bbr.c             |   92 +-
 sys/netinet/tcp_stacks/rack.c            | 9876 ++++++++++++++++++++++--------
 sys/netinet/tcp_stacks/rack_bbr_common.c |  473 +-
 sys/netinet/tcp_stacks/rack_bbr_common.h |   21 +-
 sys/netinet/tcp_stacks/tcp_bbr.h         |    6 +-
 sys/netinet/tcp_stacks/tcp_rack.h        |  201 +-
 sys/netinet/tcp_subr.c                   |  105 +
 sys/netinet/tcp_var.h                    |   12 +
 18 files changed, 8181 insertions(+), 2784 deletions(-)

diff --git a/sys/netinet/cc/cc.h b/sys/netinet/cc/cc.h
index a52cfca9579b..be84a6841d15 100644
--- a/sys/netinet/cc/cc.h
+++ b/sys/netinet/cc/cc.h
@@ -91,15 +91,20 @@ struct cc_var {
 		struct sctp_nets	*sctp;
 	} ccvc;
 	uint16_t	nsegs; /* # segments coalesced into current chain. */
+	uint8_t		labc;  /* Dont use system abc use passed in */
 };
 
 /* cc_var flags. */
 #define	CCF_ABC_SENTAWND	0x0001	/* ABC counted cwnd worth of bytes? */
 #define	CCF_CWND_LIMITED	0x0002	/* Are we currently cwnd limited? */
-#define	CCF_UNUSED1		0x0004	/* unused */
+#define	CCF_USE_LOCAL_ABC       0x0004  /* Dont use the system l_abc val */
 #define	CCF_ACKNOW		0x0008	/* Will this ack be sent now? */
 #define	CCF_IPHDR_CE		0x0010	/* Does this packet set CE bit? */
 #define	CCF_TCPHDR_CWR		0x0020	/* Does this packet set CWR bit? */
+#define	CCF_MAX_CWND		0x0040	/* Have we reached maximum cwnd? */
+#define	CCF_CHG_MAX_CWND	0x0080	/* Cubic max_cwnd changed, for K */
+#define	CCF_USR_IWND		0x0100	/* User specified initial window */
+#define	CCF_USR_IWND_INIT_NSEG	0x0200	/* Convert segs to bytes on conn init */
 
 /* ACK types passed to the ack_received() hook. */
 #define	CC_ACK		0x0001	/* Regular in sequence ACK. */
diff --git a/sys/netinet/cc/cc_newreno.c b/sys/netinet/cc/cc_newreno.c
index a924acb0b8d6..55cab36e149a 100644
--- a/sys/netinet/cc/cc_newreno.c
+++ b/sys/netinet/cc/cc_newreno.c
@@ -86,8 +86,8 @@ static void	newreno_cong_signal(struct cc_var *ccv, uint32_t type);
 static void	newreno_post_recovery(struct cc_var *ccv);
 static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf);
 
-VNET_DEFINE_STATIC(uint32_t, newreno_beta) = 50;
-VNET_DEFINE_STATIC(uint32_t, newreno_beta_ecn) = 80;
+VNET_DEFINE(uint32_t, newreno_beta) = 50;
+VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
 #define V_newreno_beta VNET(newreno_beta)
 #define V_newreno_beta_ecn VNET(newreno_beta_ecn)
 
@@ -101,11 +101,6 @@ struct cc_algo newreno_cc_algo = {
 	.ctl_output = newreno_ctl_output,
 };
 
-struct newreno {
-	uint32_t beta;
-	uint32_t beta_ecn;
-};
-
 static inline struct newreno *
 newreno_malloc(struct cc_var *ccv)
 {
@@ -182,9 +177,15 @@ newreno_ack_received(struct cc_var *ccv, uint16_t type)
 			 * XXXLAS: Find a way to signal SS after RTO that
 			 * doesn't rely on tcpcb vars.
 			 */
+			uint16_t abc_val;
+
+			if (ccv->flags & CCF_USE_LOCAL_ABC)
+				abc_val = ccv->labc;
+			else
+				abc_val = V_tcp_abc_l_var;
 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
 				incr = min(ccv->bytes_this_ack,
-				    ccv->nsegs * V_tcp_abc_l_var *
+				    ccv->nsegs * abc_val *
 				    CCV(ccv, t_maxseg));
 			else
 				incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
@@ -237,7 +238,7 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
 	u_int mss;
 
 	cwin = CCV(ccv, snd_cwnd);
-	mss = tcp_maxseg(ccv->ccvc.tcp);
+	mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
 	/*
 	 * Other TCP congestion controls use newreno_cong_signal(), but
 	 * with their own private cc_data. Make sure the cc_data is used
@@ -246,7 +247,15 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
 	nreno = (CC_ALGO(ccv->ccvc.tcp) == &newreno_cc_algo) ? ccv->cc_data : NULL;
 	beta = (nreno == NULL) ? V_newreno_beta : nreno->beta;
 	beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn;
-	if (V_cc_do_abe && type == CC_ECN)
+
+	/*
+	 * Note that we only change the backoff for ECN if the
+	 * global sysctl V_cc_do_abe is set <or> the stack itself
+	 * has set a flag in our newreno_flags (due to pacing) telling
+	 * us to use the lower valued back-off.
+	 */
+	if (V_cc_do_abe ||
+	    (nreno && (nreno->newreno_flags & CC_NEWRENO_BETA_ECN) && (type == CC_ECN)))
 		factor = beta_ecn;
 	else
 		factor = beta;
@@ -265,8 +274,7 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
 			    V_cc_do_abe && V_cc_abe_frlossreduce)) {
 				CCV(ccv, snd_ssthresh) =
 				    ((uint64_t)CCV(ccv, snd_ssthresh) *
-				    (uint64_t)beta) /
-				    (100ULL * (uint64_t)beta_ecn);
+				     (uint64_t)beta) / (uint64_t)beta_ecn;
 			}
 			if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
 				CCV(ccv, snd_ssthresh) = cwin;
@@ -352,7 +360,7 @@ newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf)
 			nreno->beta = opt->val;
 			break;
 		case CC_NEWRENO_BETA_ECN:
-			if (!V_cc_do_abe)
+			if ((!V_cc_do_abe) && ((nreno->newreno_flags & CC_NEWRENO_BETA_ECN) == 0))
 				return (EACCES);
 			nreno->beta_ecn = opt->val;
 			break;
diff --git a/sys/netinet/cc/cc_newreno.h b/sys/netinet/cc/cc_newreno.h
index 9e2a3cff5fe9..16cf1757e830 100644
--- a/sys/netinet/cc/cc_newreno.h
+++ b/sys/netinet/cc/cc_newreno.h
@@ -31,12 +31,17 @@
 
 #define CCALGONAME_NEWRENO "newreno"
 
+struct newreno {
+	uint32_t beta;
+	uint32_t beta_ecn;
+	uint32_t newreno_flags;
+};
+
 struct cc_newreno_opts {
-	int			name;
+	int		name;
 	uint32_t	val;
 };
 
-#define CC_NEWRENO_BETA		1
-#define CC_NEWRENO_BETA_ECN	2
-
+#define CC_NEWRENO_BETA		1	/* Beta for normal DUP-ACK/Sack recovery */
+#define CC_NEWRENO_BETA_ECN	2	/* ECN Beta for Abe */
 #endif /* _CC_NEWRENO_H */
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index d2bf1f8431fd..50f0811a6517 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -181,13 +181,24 @@ struct tcphdr {
 #define	TCP_TXTLS_MODE	40	/* Transmit TLS mode */
 #define	TCP_RXTLS_ENABLE 41	/* TLS framing and encryption for receive */
 #define	TCP_RXTLS_MODE	42	/* Receive TLS mode */
+#define	TCP_IWND_NB	43	/* Override initial window (units: bytes) */
+#define	TCP_IWND_NSEG	44	/* Override initial window (units: MSS segs) */
+#define	TCP_LOGID_CNT	46	/* get number of connections with the same ID */
+#define	TCP_LOG_TAG	47	/* configure tag for grouping logs */
+#define	TCP_USER_LOG	48	/* userspace log event */
 #define	TCP_CONGESTION	64	/* get/set congestion control algorithm */
 #define	TCP_CCALGOOPT	65	/* get/set cc algorithm specific options */
+#define	TCP_MAXUNACKTIME 68	/* maximum time without making progress (sec) */
+#define	TCP_MAXPEAKRATE 69	/* maximum peak rate allowed (kbps) */
+#define TCP_IDLE_REDUCE 70	/* Reduce cwnd on idle input */
 #define TCP_REMOTE_UDP_ENCAPS_PORT 71	/* Enable TCP over UDP tunneling via the specified port */
 #define TCP_DELACK  	72	/* socket option for delayed ack */
 #define TCP_FIN_IS_RST 73	/* A fin from the peer is treated has a RST */
 #define TCP_LOG_LIMIT  74	/* Limit to number of records in tcp-log */
 #define TCP_SHARED_CWND_ALLOWED 75 	/* Use of a shared cwnd is allowed */
+#define TCP_PROC_ACCOUNTING 76	/* Do accounting on tcp cpu usage and counts */
+#define TCP_USE_CMP_ACKS 77 	/* The transport can handle the Compressed mbuf acks */
+#define	TCP_PERF_INFO	78	/* retrieve accounting counters */
 #define	TCP_KEEPINIT	128	/* N, time to establish connection */
 #define	TCP_KEEPIDLE	256	/* L,N,X start keeplives after this period */
 #define	TCP_KEEPINTVL	512	/* L,N interval between keepalives */
@@ -201,7 +212,7 @@ struct tcphdr {
 #define TCP_RACK_MBUF_QUEUE   1050 /* Do we allow mbuf queuing if supported */
 #define TCP_RACK_PROP	      1051 /* RACK proportional rate reduction (bool) */
 #define TCP_RACK_TLP_REDUCE   1052 /* RACK TLP cwnd reduction (bool) */
-#define TCP_RACK_PACE_REDUCE  1053 /* RACK Pacing reduction factor (divisor) */
+#define TCP_RACK_PACE_REDUCE  1053 /* RACK Pacingv reduction factor (divisor) */
 #define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send  */
 #define TCP_RACK_PACE_ALWAYS  1055 /* Use the always pace method */
 #define TCP_RACK_PROP_RATE    1056 /* The proportional reduction rate */
@@ -284,6 +295,16 @@ struct tcphdr {
 #define TCP_RACK_PACE_TO_FILL 1127 /* If we are not in recovery, always pace to fill the cwnd in 1 RTT */
 #define TCP_SHARED_CWND_TIME_LIMIT 1128 /* we should limit to low time values the scwnd life */
 #define TCP_RACK_PROFILE 1129	/* Select a profile that sets multiple options */
+#define TCP_HDWR_RATE_CAP 1130 /* Allow hardware rates to cap pacing rate */
+#define TCP_PACING_RATE_CAP 1131 /* Highest rate allowed in pacing in bytes per second (uint64_t) */
+#define TCP_HDWR_UP_ONLY 1132	/* Allow the pacing rate to climb but not descend (with the exception of fill-cw */
+#define TCP_RACK_ABC_VAL 1133	/* Set a local ABC value different then the system default */
+#define TCP_REC_ABC_VAL 1134	/* Do we use the ABC value for recovery or the override one from sysctl  */
+#define TCP_RACK_MEASURE_CNT 1135 /* How many measurements are required in GP pacing */
+#define TCP_DEFER_OPTIONS 1136 /* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
+#define TCP_FAST_RSM_HACK 1137 /* Do we do the broken thing where we don't twiddle the TLP bits properly in fast_rsm_output? */
+#define TCP_RACK_PACING_BETA 1138	/* Changing the beta for pacing */
+#define TCP_RACK_PACING_BETA_ECN 1139	/* Changing the beta for ecn with pacing */
 
 /* Start of reserved space for third-party user-settable options. */
 #define	TCP_VENDOR	SO_VENDOR
@@ -295,6 +316,7 @@ struct tcphdr {
 #define	TCPI_OPT_WSCALE		0x04
 #define	TCPI_OPT_ECN		0x08
 #define	TCPI_OPT_TOE		0x10
+#define	TCPI_OPT_TFO		0x20
 
 /* Maximum length of log ID. */
 #define TCP_LOG_ID_LEN	64
diff --git a/sys/netinet/tcp_accounting.h b/sys/netinet/tcp_accounting.h
new file mode 100644
index 000000000000..a2e2d4edd46d
--- /dev/null
+++ b/sys/netinet/tcp_accounting.h
@@ -0,0 +1,39 @@
+#ifndef __tcp_accounting_h__
+#define __tcp_accounting_h__
+/*
+ * Return values from tcp_do_ack_accounting
+ * and indexs to the into the tcp_proc_time[]
+ * array.
+ */
+#define ACK_BEHIND	0
+#define ACK_SACK	1
+#define ACK_CUMACK	2
+#define ACK_CUMACK_SACK	3
+#define ACK_DUPACK	4
+#define ACK_RWND	5
+/* Added values for tracking output too  */
+#define SND_BLOCKED	6
+#define SND_LIMITED	7
+#define SND_OUT_DATA 	8
+#define SND_OUT_ACK	9
+#define SND_OUT_FAIL	10
+/* We also count in the counts array two added (MSS sent and ACKS In) */
+#define CNT_OF_MSS_OUT 11
+#define CNT_OF_ACKS_IN 12
+
+/* for the tcpcb we add two more cycle counters */
+#define CYC_HANDLE_MAP 11
+#define CYC_HANDLE_ACK 12
+
+/* Should the tp->xxx array's be alloc'ed? */
+/* #define TCP_NUM_PROC_COUNTERS 11 defined in tcp_var.h */
+/* #define TCP_NUM_CNT_COUNTERS 13 defined in tcp_var.h */
+
+#ifdef _KERNEL
+#ifdef TCP_ACCOUNTING
+extern counter_u64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
+extern counter_u64_t tcp_proc_time[TCP_NUM_PROC_COUNTERS];
+#endif
+#endif
+
+#endif
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index d36f9566ffba..916a7186770c 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -531,7 +531,7 @@ cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 void inline
-cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
+cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
@@ -549,7 +549,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
 			break;
 		}
 
-		if (th->th_flags & TH_CWR)
+		if (flags & TH_CWR)
 			tp->ccv->flags |= CCF_TCPHDR_CWR;
 		else
 			tp->ccv->flags &= ~CCF_TCPHDR_CWR;
@@ -563,6 +563,12 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
 	}
 }
 
+void inline
+cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
+{
+	cc_ecnpkt_handler_flags(tp, th->th_flags, iptos);
+}
+
 /*
  * TCP input handling is split into multiple parts:
  *   tcp6_input is a thin wrapper around tcp_input for the extended
diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
index 436383124dce..4e8b79babfe8 100644
--- a/sys/netinet/tcp_log_buf.h
+++ b/sys/netinet/tcp_log_buf.h
@@ -174,7 +174,7 @@ enum tcp_log_events {
 	TCP_LOG_IN = 1,		/* Incoming packet                   1 */
 	TCP_LOG_OUT,		/* Transmit (without other event)    2 */
 	TCP_LOG_RTO,		/* Retransmit timeout                3 */
-	TCP_LOG_TF_ACK,		/* Transmit due to TF_ACK            4 */
+	TCP_LOG_SB_WAKE,	/* Awaken socket buffer              4 */
 	TCP_LOG_BAD_RETRAN,	/* Detected bad retransmission       5 */
 	TCP_LOG_PRR,		/* Doing PRR                         6 */
 	TCP_LOG_REORDER,	/* Detected reorder                  7 */
@@ -200,7 +200,7 @@ enum tcp_log_events {
 	BBR_LOG_DOSEG_DONE,     /* hpts do_segment completes        27 */
 	BBR_LOG_EXIT_GAIN,      /* hpts do_segment completes        28 */
 	BBR_LOG_THRESH_CALC,    /* Doing threshold calculation      29 */
-	BBR_LOG_EXTRACWNDGAIN,	/* Removed                          30 */
+	TCP_LOG_MAPCHG,		/* Map Changes to the sendmap       30 */
 	TCP_LOG_USERSEND, 	/* User level sends data            31 */
 	BBR_RSM_CLEARED,	/* RSM cleared of ACK flags         32 */
 	BBR_LOG_STATE_TARGET, 	/* Log of target at state           33 */
@@ -232,7 +232,9 @@ enum tcp_log_events {
 	TCP_LOG_USER_EVENT,	/* User space event data            59 */
 	TCP_LOG_SENDFILE,	/* sendfile() logging for TCP connections 60 */
 	TCP_LOG_HTTP_T,		/* logging of http request tracking 61 */
-	TCP_LOG_END		/* End (keep at end)                62 */
+	TCP_LOG_ACCOUNTING,	/* Log of TCP Accounting data 62 */
+	TCP_LOG_FSB,		/* FSB information 63 */
+	TCP_LOG_END		/* End (keep at end)                64 */
 };
 
 enum tcp_log_states {
diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c
index 8f2cf3d8d061..c33b2872e91f 100644
--- a/sys/netinet/tcp_ratelimit.c
+++ b/sys/netinet/tcp_ratelimit.c
@@ -367,11 +367,22 @@ rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs)
 				       OID_AUTO, "pacetime", CTLFLAG_RD,
 				       &rs->rs_rlt[i].time_between, 0,
 				       "Time hardware inserts between 1500 byte sends");
-			SYSCTL_ADD_U64(&rs->sysctl_ctx,
+			SYSCTL_ADD_LONG(&rs->sysctl_ctx,
 				       SYSCTL_CHILDREN(rl_rate_num),
 				       OID_AUTO, "rate", CTLFLAG_RD,
-				       &rs->rs_rlt[i].rate, 0,
+				       &rs->rs_rlt[i].rate,
 				       "Rate in bytes per second");
+			SYSCTL_ADD_LONG(&rs->sysctl_ctx,
+				       SYSCTL_CHILDREN(rl_rate_num),
+				       OID_AUTO, "using", CTLFLAG_RD,
+				       &rs->rs_rlt[i].using,
+				       "Number of flows using");
+			SYSCTL_ADD_LONG(&rs->sysctl_ctx,
+				       SYSCTL_CHILDREN(rl_rate_num),
+				       OID_AUTO, "enobufs", CTLFLAG_RD,
+				       &rs->rs_rlt[i].rs_num_enobufs,
+				       "Number of enobufs logged on this rate");
+
 		}
 	}
 #endif
@@ -667,6 +678,8 @@ bail:
 		 */
 		rs->rs_rlt[i].ptbl = rs;
 		rs->rs_rlt[i].tag = NULL;
+		rs->rs_rlt[i].using = 0;
+		rs->rs_rlt[i].rs_num_enobufs = 0;
 		/*
 		 * Calculate the time between.
 		 */
@@ -1063,16 +1076,28 @@ rt_find_real_interface(struct ifnet *ifp, struct inpcb *inp, int *error)
 static void
 rl_increment_using(const struct tcp_hwrate_limit_table *rte)
 {
+	struct tcp_hwrate_limit_table *decon_rte;
+
+	decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
+	atomic_add_long(&decon_rte->using, 1);
 }
 
 static void
 rl_decrement_using(const struct tcp_hwrate_limit_table *rte)
 {
+	struct tcp_hwrate_limit_table *decon_rte;
+
+	decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
+	atomic_subtract_long(&decon_rte->using, 1);
 }
 
 void
 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte)
 {
+	struct tcp_hwrate_limit_table *decon_rte;
+
+	decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
+	atomic_add_long(&decon_rte->rs_num_enobufs, 1);
 }
 
 /*
diff --git a/sys/netinet/tcp_ratelimit.h b/sys/netinet/tcp_ratelimit.h
index b69f0e634b60..8747708e8b5e 100644
--- a/sys/netinet/tcp_ratelimit.h
+++ b/sys/netinet/tcp_ratelimit.h
@@ -43,7 +43,9 @@ struct m_snd_tag;
 struct tcp_hwrate_limit_table {
 	const struct tcp_rate_set *ptbl;	/* Pointer to parent table */
 	struct m_snd_tag *tag;	/* Send tag if needed (chelsio) */
-	uint64_t rate;		/* Rate we get in Bytes per second (Bps) */
+	long	 rate;		/* Rate we get in Bytes per second (Bps) */
+	long	 using;		/* How many flows are using this hdwr rate. */
+	long	 rs_num_enobufs;
 	uint32_t time_between;	/* Time-Gap between packets at this rate */
 	uint32_t flags;
 };
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 9187a9fc66d8..7d1e4077c79c 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -150,6 +150,17 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(tcp_sack_globalholes), 0,
     "Global number of TCP SACK holes currently allocated");
 
+int
+tcp_dsack_block_exists(struct tcpcb *tp)
+{
+	/* Return true if a DSACK block exists */
+	if (tp->rcv_numsacks == 0)
+		return (0);
+	if (SEQ_LEQ(tp->sackblks[0].end, tp->rcv_nxt))
+		return(1);
+	return (0);
+}
+
 /*
  * This function will find overlaps with the currently stored sackblocks
  * and add any overlap as a dsack block upfront
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index fbd5ac231496..f19872245e55 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -3930,6 +3930,9 @@ bbr_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type, struct bbr_s
 	struct tcp_bbr *bbr;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef STATS
+	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
+#endif
 	bbr = (struct tcp_bbr *)tp->t_fb_ptr;
 	switch (type) {
 	case CC_NDUPACK:
@@ -4403,6 +4406,7 @@ bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap
 	nrsm->r_start = start;
 	nrsm->r_end = rsm->r_end;
 	nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
+	nrsm-> r_rtt_not_allowed = rsm->r_rtt_not_allowed;
 	nrsm->r_flags = rsm->r_flags;
 	/* We don't transfer forward the SYN flag */
 	nrsm->r_flags &= ~BBR_HAS_SYN;
@@ -6429,65 +6433,6 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
 		bbr->r_ctl.bbr_smallest_srtt_this_state = rtt;
 }
 
-static void
-bbr_earlier_retran(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap *rsm,
-		   uint32_t t, uint32_t cts, int ack_type)
-{
-	/*
-	 * For this RSM, we acknowledged the data from a previous
-	 * transmission, not the last one we made. This means we did a false
-	 * retransmit.
-	 */
-	if (rsm->r_flags & BBR_HAS_FIN) {
-		/*
-		 * The sending of the FIN often is multiple sent when we
-		 * have everything outstanding ack'd. We ignore this case
-		 * since its over now.
-		 */
-		return;
-	}
-	if (rsm->r_flags & BBR_TLP) {
-		/*
-		 * We expect TLP's to have this occur often
-		 */
-		bbr->rc_tlp_rtx_out = 0;
-		return;
-	}
-	if (ack_type != BBR_CUM_ACKED) {
-		/*
-		 * If it was not a cum-ack we
-		 * don't really know for sure since
-		 * the timestamp could be from some
-		 * other transmission.
-		 */
-		return;
-	}
-
-	if (rsm->r_flags & BBR_WAS_SACKPASS) {
-		/*
-		 * We retransmitted based on a sack and the earlier
-		 * retransmission ack'd it - re-ordering is occuring.
-		 */
-		BBR_STAT_INC(bbr_reorder_seen);
-		bbr->r_ctl.rc_reorder_ts = cts;
-	}
-	/* Back down the loss count */
-	if (rsm->r_flags & BBR_MARKED_LOST) {
-		bbr->r_ctl.rc_lost -= rsm->r_end - rsm->r_start;
-		bbr->r_ctl.rc_lost_bytes -= rsm->r_end - rsm->r_start;
-		rsm->r_flags &= ~BBR_MARKED_LOST;
-		if (SEQ_GT(bbr->r_ctl.rc_lt_lost, bbr->r_ctl.rc_lost))
-			/* LT sampling also needs adjustment */
-			bbr->r_ctl.rc_lt_lost = bbr->r_ctl.rc_lost;
-	}
-	/***** RRS HERE ************************/
-	/* Do we need to do this???            */
-	/* bbr_reset_lt_bw_sampling(bbr, cts); */
-	/***** RRS HERE ************************/
-	BBR_STAT_INC(bbr_badfr);
-	BBR_STAT_ADD(bbr_badfr_bytes, (rsm->r_end - rsm->r_start));
-}
-
 static void
 bbr_set_reduced_rtt(struct tcp_bbr *bbr, uint32_t cts, uint32_t line)
 {
@@ -6869,6 +6814,10 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
 		/* Already done */
 		return (0);
 	}
+	if (rsm->r_rtt_not_allowed) {
+		/* Not allowed */
+		return (0);
+	}
 	if (rsm->r_rtr_cnt == 1) {
 		/*
 		 * Only one transmit. Hopefully the normal case.
@@ -6926,7 +6875,7 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
 						    rsm->r_tim_lastsent[i], ack_type, to);
 				if ((i + 1) < rsm->r_rtr_cnt) {
 					/* Likely */
-					bbr_earlier_retran(tp, bbr, rsm, t, cts, ack_type);
+					return (0);
 				} else if (rsm->r_flags & BBR_TLP) {
 					bbr->rc_tlp_rtx_out = 0;
 				}
@@ -6974,7 +6923,7 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
 				t = 1;
 			bbr_update_bbr_info(bbr, rsm, t, cts, to->to_tsecr, uts, BBR_RTT_BY_EARLIER_RET,
 					    rsm->r_tim_lastsent[i], ack_type, to);
-			bbr_earlier_retran(tp, bbr, rsm, t, cts, ack_type);
+			return (0);
 		} else {
 			/*
 			 * Too many prior transmissions, just
@@ -10207,7 +10156,7 @@ bbr_init(struct tcpcb *tp)
 			tp->t_fb_ptr = NULL;
 			return (ENOMEM);
 		}
-		rsm->r_flags = BBR_OVERMAX;
+		rsm->r_rtt_not_allowed = 1;
 		rsm->r_tim_lastsent[0] = cts;
 		rsm->r_rtr_cnt = 1;
 		rsm->r_rtr_bytes = 0;
@@ -10320,6 +10269,10 @@ bbr_fini(struct tcpcb *tp, int32_t tcb_is_purged)
 			counter_u64_add(bbr_flows_whdwr_pacing, -1);
 		else
 			counter_u64_add(bbr_flows_nohdwr_pacing, -1);
+		if (bbr->r_ctl.crte != NULL) {
+			tcp_rel_pacing_rate(bbr->r_ctl.crte, tp);
+			bbr->r_ctl.crte = NULL;
+		}
 		rsm = TAILQ_FIRST(&bbr->r_ctl.rc_map);
 		while (rsm) {
 			TAILQ_REMOVE(&bbr->r_ctl.rc_map, rsm, r_next);
@@ -13463,15 +13416,6 @@ send:
 				th->th_seq = htonl(tp->snd_max);
 				bbr_seq = tp->snd_max;
 			}
-		} else if (flags & TH_RST) {
-			/*
-			 * For a Reset send the last cum ack in sequence
-			 * (this like any other choice may still generate a
-			 * challenge ack, if a ack-update packet is in
-			 * flight).
-			 */
-			th->th_seq = htonl(tp->snd_una);
-			bbr_seq = tp->snd_una;
 		} else {
 			/*
 			 * len == 0 and not persist we use snd_max, sending
@@ -14536,9 +14480,9 @@ bbr_set_sockopt(struct socket *so, struct sockopt *sopt,
 		} else {
 			bbr->bbr_hdw_pace_ena = 0;
 #ifdef RATELIMIT
-			if (bbr->bbr_hdrw_pacing) {
-				bbr->bbr_hdrw_pacing = 0;
-				in_pcbdetach_txrtlmt(bbr->rc_inp);
+			if (bbr->r_ctl.crte != NULL) {
+				tcp_rel_pacing_rate(bbr->r_ctl.crte, tp);
+				bbr->r_ctl.crte = NULL;
 			}
 #endif
 		}
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index a6b439e38ad7..dd60091bbbc0 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -63,7 +63,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/kthread.h>
 #include <sys/kern_prefetch.h>
 #include <sys/protosw.h>
-
+#ifdef TCP_ACCOUNTING
+#include <sys/sched.h>
+#include <machine/cpu.h>
+#endif
 #include <vm/uma.h>
 
 #include <net/route.h>
@@ -91,8 +94,10 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_hpts.h>
 #include <netinet/tcp_ratelimit.h>
+#include <netinet/tcp_accounting.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
+#include <netinet/cc/cc_newreno.h>
 #include <netinet/tcp_fastopen.h>
 #include <netinet/tcp_lro.h>
 #ifdef NETFLIX_SHARED_CWND
@@ -133,6 +138,15 @@ uma_zone_t rack_pcb_zone;
 #define	TICKS2SBT(__t)	(tick_sbt * ((sbintime_t)(__t)))
 #endif
 
+VNET_DECLARE(uint32_t, newreno_beta);
+VNET_DECLARE(uint32_t, newreno_beta_ecn);
+#define V_newreno_beta VNET(newreno_beta)
+#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
+
+
+MALLOC_DEFINE(M_TCPFSB, "tcp_fsb", "TCP fast send block");
+MALLOC_DEFINE(M_TCPDO, "tcp_do", "TCP deferred options");
+
 struct sysctl_ctx_list rack_sysctl_ctx;
 struct sysctl_oid *rack_sysctl_root;
 
@@ -175,30 +189,51 @@ static int32_t rack_tlp_thresh = 1;
 static int32_t rack_tlp_limit = 2;	/* No more than 2 TLPs w-out new data */
 static int32_t rack_tlp_use_greater = 1;
 static int32_t rack_reorder_thresh = 2;
-static int32_t rack_reorder_fade = 60000;	/* 0 - never fade, def 60,000
+static int32_t rack_reorder_fade = 60000000;	/* 0 - never fade, def 60,000,000
 						 * - 60 seconds */
+static uint8_t rack_req_measurements = 1;
 /* Attack threshold detections */
 static uint32_t rack_highest_sack_thresh_seen = 0;
 static uint32_t rack_highest_move_thresh_seen = 0;
-
-static int32_t rack_pkt_delay = 1;
-static int32_t rack_early_recovery = 1;
+static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */
+static int32_t rack_hw_pace_extra_slots = 2;	/* 2 extra MSS time betweens */
+static int32_t rack_hw_rate_caps = 1; /* 1; */
+static int32_t rack_hw_rate_min = 0; /* 1500000;*/
+static int32_t rack_hw_rate_to_low = 0; /* 1200000; */
+static int32_t rack_hw_up_only = 1;
+static int32_t rack_stats_gets_ms_rtt = 1;
+static int32_t rack_prr_addbackmax = 2;
+
+static int32_t rack_pkt_delay = 1000;
 static int32_t rack_send_a_lot_in_prr = 1;
-static int32_t rack_min_to = 1;	/* Number of ms minimum timeout */
+static int32_t rack_min_to = 1000;	/* Number of microsecond  min timeout */
 static int32_t rack_verbose_logging = 0;
 static int32_t rack_ignore_data_after_close = 1;
-static int32_t rack_enable_shared_cwnd = 0;
+static int32_t rack_enable_shared_cwnd = 1;
+static int32_t rack_use_cmp_acks = 1;
+static int32_t rack_use_fsb = 1;
+static int32_t rack_use_rfo = 1;
+static int32_t rack_use_rsm_rfo = 1;
+static int32_t rack_max_abc_post_recovery = 2;
+static int32_t rack_client_low_buf = 0;
+#ifdef TCP_ACCOUNTING
+static int32_t rack_tcp_accounting = 0;
+#endif
 static int32_t rack_limits_scwnd = 1;
 static int32_t rack_enable_mqueue_for_nonpaced = 0;
 static int32_t rack_disable_prr = 0;
 static int32_t use_rack_rr = 1;
 static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the configured rate (ss/ca)? */
-static int32_t rack_persist_min = 250;	/* 250ms */
-static int32_t rack_persist_max = 2000;	/* 2 Second */
-static int32_t rack_sack_not_required = 0;	/* set to one to allow non-sack to use rack */
-static int32_t rack_default_init_window = 0; 	/* Use system default */
+static int32_t rack_persist_min = 250000;	/* 250usec */
+static int32_t rack_persist_max = 2000000;	/* 2 Second in usec's */
+static int32_t rack_sack_not_required = 1;	/* set to one to allow non-sack to use rack */
+static int32_t rack_default_init_window = 0;	/* Use system default */
 static int32_t rack_limit_time_with_srtt = 0;
-static int32_t rack_hw_pace_adjust = 0;
+static int32_t rack_autosndbuf_inc = 20;	/* In percentage form */
+static int32_t rack_enobuf_hw_boost_mult = 2;	/* How many times the hw rate we boost slot using time_between */
+static int32_t rack_enobuf_hw_max = 12000;	/* 12 ms in usecs */
+static int32_t rack_enobuf_hw_min = 10000;	/* 10 ms in usecs */
+static int32_t rack_hw_rwnd_factor = 2;		/* How many max_segs the rwnd must be before we hold off sending */
 /*
  * Currently regular tcp has a rto_min of 30ms
  * the backoff goes 12 times so that ends up
@@ -209,23 +244,21 @@ static uint32_t rack_def_data_window = 20;
 static uint32_t rack_goal_bdp = 2;
 static uint32_t rack_min_srtts = 1;
 static uint32_t rack_min_measure_usec = 0;
-static int32_t rack_tlp_min = 10;
-static int32_t rack_rto_min = 30;	/* 30ms same as main freebsd */
-static int32_t rack_rto_max = 4000;	/* 4 seconds */
+static int32_t rack_tlp_min = 10000;	/* 10ms */
+static int32_t rack_rto_min = 30000;	/* 30,000 usec same as main freebsd */
+static int32_t rack_rto_max = 4000000;	/* 4 seconds in usec's */
 static const int32_t rack_free_cache = 2;
 static int32_t rack_hptsi_segments = 40;
 static int32_t rack_rate_sample_method = USE_RTT_LOW;
 static int32_t rack_pace_every_seg = 0;
-static int32_t rack_delayed_ack_time = 200;	/* 200ms */
+static int32_t rack_delayed_ack_time = 40000;	/* 40ms in usecs */
 static int32_t rack_slot_reduction = 4;
 static int32_t rack_wma_divisor = 8;		/* For WMA calculation */
 static int32_t rack_cwnd_block_ends_measure = 0;
 static int32_t rack_rwnd_block_ends_measure = 0;
+static int32_t rack_def_profile = 0;
 
 static int32_t rack_lower_cwnd_at_tlp = 0;
-static int32_t rack_use_proportional_reduce = 0;
-static int32_t rack_proportional_rate = 10;
-static int32_t rack_tlp_max_resend = 2;
 static int32_t rack_limited_retran = 0;
 static int32_t rack_always_send_oldest = 0;
 static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
@@ -247,13 +280,13 @@ static uint32_t rack_probertt_use_min_rtt_entry = 1;	/* Use the min to calculate
 static uint32_t rack_probertt_use_min_rtt_exit = 0;
 static uint32_t rack_probe_rtt_sets_cwnd = 0;
 static uint32_t rack_probe_rtt_safety_val = 2000000;	/* No more than 2 sec in probe-rtt */
-static uint32_t rack_time_between_probertt = 9600000;	/* 9.6 sec in us */
+static uint32_t rack_time_between_probertt = 9600000;	/* 9.6 sec in usecs */
 static uint32_t rack_probertt_gpsrtt_cnt_mul = 0;	/* How many srtt periods does probe-rtt last top fraction */
-static uint32_t rack_probertt_gpsrtt_cnt_div = 0;	/* How many srtt periods does probe-rtt last bottom fraction  */
-static uint32_t rack_min_probertt_hold = 200000;	/* Equal to delayed ack time */
+static uint32_t rack_probertt_gpsrtt_cnt_div = 0;	/* How many srtt periods does probe-rtt last bottom fraction */
+static uint32_t rack_min_probertt_hold = 40000;		/* Equal to delayed ack time */
 static uint32_t rack_probertt_filter_life = 10000000;
 static uint32_t rack_probertt_lower_within = 10;
-static uint32_t rack_min_rtt_movement = 250;	/* Must move at least 250 useconds to count as a lowering */
+static uint32_t rack_min_rtt_movement = 250000;	/* Must move at least 250ms (in microseconds)  to count as a lowering */
 static int32_t rack_pace_one_seg = 0;		/* Shall we pace for less than 1.4Meg 1MSS at a time */
 static int32_t rack_probertt_clear_is = 1;
 static int32_t rack_max_drain_hbp = 1;		/* Extra drain times gpsrtt for highly buffered paths */
@@ -264,7 +297,7 @@ static int32_t rack_max_per_above = 30;		/* When we go to increment stop if abov
 
 /* Timely information */
 /* Combine these two gives the range of 'no change' to bw */
-/* ie the up/down provide the upper and lower bound  */
+/* ie the up/down provide the upper and lower bound */
 static int32_t rack_gp_per_bw_mul_up = 2;	/* 2% */
 static int32_t rack_gp_per_bw_mul_down = 4;	/* 4% */
 static int32_t rack_gp_rtt_maxmul = 3;		/* 3 x maxmin */
@@ -286,6 +319,7 @@ static int32_t rack_timely_int_timely_only = 0;	/* do interim timely's only use
 static int32_t rack_timely_no_stopping = 0;
 static int32_t rack_down_raise_thresh = 100;
 static int32_t rack_req_segs = 1;
+static uint64_t rack_bw_rate_cap = 0;
 
 /* Weird delayed ack mode */
 static int32_t rack_use_imac_dack = 0;
@@ -301,9 +335,14 @@ counter_u64_t rack_unpaced_segments;
 counter_u64_t rack_calc_zero;
 counter_u64_t rack_calc_nonzero;
 counter_u64_t rack_saw_enobuf;
+counter_u64_t rack_saw_enobuf_hw;
 counter_u64_t rack_saw_enetunreach;
 counter_u64_t rack_per_timer_hole;
-
+counter_u64_t rack_large_ackcmp;
+counter_u64_t rack_small_ackcmp;
+#ifdef INVARIANTS
+counter_u64_t rack_adjust_map_bw;
+#endif
 /* Tail loss probe counters */
 counter_u64_t rack_tlp_tot;
 counter_u64_t rack_tlp_newdata;
@@ -313,6 +352,7 @@ counter_u64_t rack_tlp_retran_fail;
 counter_u64_t rack_to_tot;
 counter_u64_t rack_to_arm_rack;
 counter_u64_t rack_to_arm_tlp;
+counter_u64_t rack_hot_alloc;
 counter_u64_t rack_to_alloc;
 counter_u64_t rack_to_alloc_hard;
 counter_u64_t rack_to_alloc_emerg;
@@ -320,6 +360,17 @@ counter_u64_t rack_to_alloc_limited;
 counter_u64_t rack_alloc_limited_conns;
 counter_u64_t rack_split_limited;
 
+#define MAX_NUM_OF_CNTS 13
+counter_u64_t rack_proc_comp_ack[MAX_NUM_OF_CNTS];
+counter_u64_t rack_multi_single_eq;
+counter_u64_t rack_proc_non_comp_ack;
+
+counter_u64_t rack_fto_send;
+counter_u64_t rack_fto_rsm_send;
+counter_u64_t rack_nfto_resend;
+counter_u64_t rack_non_fto_send;
+counter_u64_t rack_extended_rfo;
+
 counter_u64_t rack_sack_proc_all;
 counter_u64_t rack_sack_proc_short;
 counter_u64_t rack_sack_proc_restart;
@@ -342,6 +393,10 @@ counter_u64_t rack_input_idle_reduces;
 counter_u64_t rack_collapsed_win;
 counter_u64_t rack_tlp_does_nada;
 counter_u64_t rack_try_scwnd;
+counter_u64_t rack_hw_pace_init_fail;
+counter_u64_t rack_hw_pace_lost;
+counter_u64_t rack_sbsndptr_right;
+counter_u64_t rack_sbsndptr_wrong;
 
 /* Temp CPU counters */
 counter_u64_t rack_find_high;
@@ -350,6 +405,17 @@ counter_u64_t rack_progress_drops;
 counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE];
 counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
 
+
+#define	RACK_REXMTVAL(tp) max(rack_rto_min, ((tp)->t_srtt + ((tp)->t_rttvar << 2)))
+
+#define	RACK_TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
+	(tv) = (value) + TICKS_2_USEC(tcp_rexmit_slop);	 \
+	if ((u_long)(tv) < (u_long)(tvmin)) \
+		(tv) = (tvmin); \
+	if ((u_long)(tv) > (u_long)(tvmax)) \
+		(tv) = (tvmax); \
+} while (0)
+
 static void
 rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,  int event, int line);
 
@@ -363,7 +429,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th,
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
 static void
 rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack,
-    struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery);
+   uint32_t th_ack, uint16_t nsegs, uint16_t type, int32_t recovery);
 static struct rack_sendmap *rack_alloc(struct tcp_rack *rack);
 static struct rack_sendmap *rack_alloc_limit(struct tcp_rack *rack,
     uint8_t limit_type);
@@ -371,24 +437,21 @@ static struct rack_sendmap *
 rack_check_recovery_mode(struct tcpcb *tp,
     uint32_t tsused);
 static void
-rack_cong_signal(struct tcpcb *tp, struct tcphdr *th,
-    uint32_t type);
+rack_cong_signal(struct tcpcb *tp,
+		 uint32_t type, uint32_t ack);
 static void rack_counter_destroy(void);
 static int
 rack_ctloutput(struct socket *so, struct sockopt *sopt,
     struct inpcb *inp, struct tcpcb *tp);
 static int32_t rack_ctor(void *mem, int32_t size, void *arg, int32_t how);
 static void
-rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line);
+rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line, uint64_t *fill_override);
 static void
 rack_do_segment(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
     uint8_t iptos);
 static void rack_dtor(void *mem, int32_t size, void *arg);
 static void
-rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm,
-    uint32_t t, uint32_t cts);
-static void
 rack_log_alt_to_to_cancel(struct tcp_rack *rack,
     uint32_t flex1, uint32_t flex2,
     uint32_t flex3, uint32_t flex4,
@@ -416,11 +479,12 @@ static int32_t rack_init(struct tcpcb *tp);
 static void rack_init_sysctls(void);
 static void
 rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
-    struct tcphdr *th);
+    struct tcphdr *th, int entered_rec, int dup_ack_struck);
 static void
 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
-    uint32_t seq_out, uint8_t th_flags, int32_t err, uint32_t ts,
-    uint8_t pass, struct rack_sendmap *hintrsm, uint32_t us_cts);
+    uint32_t seq_out, uint8_t th_flags, int32_t err, uint64_t ts,
+    struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff);
+
 static void
 rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm);
@@ -431,7 +495,7 @@ static uint32_t
 rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack,
     struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm,
     uint32_t cts, int *moved_two);
-static void rack_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+static void rack_post_recovery(struct tcpcb *tp, uint32_t th_seq);
 static void rack_remxt_tmr(struct tcpcb *tp);
 static int
 rack_set_sockopt(struct socket *so, struct sockopt *sopt,
@@ -446,10 +510,10 @@ static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t
 static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type);
 static uint32_t
 rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
-    struct rack_sendmap *rsm, uint32_t ts, int32_t * lenp);
+    struct rack_sendmap *rsm, uint64_t ts, int32_t * lenp, uint16_t add_flag);
 static void
 rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
-    struct rack_sendmap *rsm, uint32_t ts);
+    struct rack_sendmap *rsm, uint64_t ts, uint16_t add_flag);
 static int
 rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type, tcp_seq th_ack);
@@ -496,15 +560,182 @@ tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack,
 static void tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt,
     uint32_t len, uint32_t us_tim, int confidence, struct rack_sendmap *rsm, uint16_t rtrcnt);
 static void
-     tcp_rack_partialack(struct tcpcb *tp, struct tcphdr *th);
+     tcp_rack_partialack(struct tcpcb *tp);
+static int
+rack_set_profile(struct tcp_rack *rack, int prof);
+static void
+rack_apply_deferred_options(struct tcp_rack *rack);
 
 int32_t rack_clear_counter=0;
 
+static void
+rack_set_cc_pacing(struct tcp_rack *rack)
+{
+	struct sockopt sopt;
+	struct cc_newreno_opts opt;
+	struct newreno old, *ptr;
+	struct tcpcb *tp;
+	int error;
+
+	if (rack->rc_pacing_cc_set)
+		return;
+
+	tp = rack->rc_tp;
+	if (tp->cc_algo == NULL) {
+		/* Tcb is leaving */
+		printf("No cc algorithm?\n");
+		return;
+	}
+	rack->rc_pacing_cc_set = 1;
+	if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) {
+		/* Not new-reno we can't play games with beta! */
+		printf("cc_algo:%s is not NEWRENO:%s\n",
+		       tp->cc_algo->name, CCALGONAME_NEWRENO);
+		goto out;
+	}
+	ptr = ((struct newreno *)tp->ccv->cc_data);
+	if (CC_ALGO(tp)->ctl_output == NULL)  {
+		/* Huh, why does new_reno no longer have a set function? */
*** 14150 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106082357.158NvG09043885>