Date: Thu, 5 Jan 2023 17:01:39 GMT From: Randall Stewart <rrs@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 26bdd35c39d8 - main - rack and bbr not loading if TCP_RATELIMIT is not configured. Message-ID: <202301051701.305H1d79059526@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by rrs: URL: https://cgit.FreeBSD.org/src/commit/?id=26bdd35c39d8de2672fac8903a161699ffe38a82 commit 26bdd35c39d8de2672fac8903a161699ffe38a82 Author: Randall Stewart <rrs@FreeBSD.org> AuthorDate: 2023-01-05 16:59:21 +0000 Commit: Randall Stewart <rrs@FreeBSD.org> CommitDate: 2023-01-05 16:59:52 +0000 rack and bbr not loading if TCP_RATELIMIT is not configured. So it turns out that rack and bbr still will not load without TCP_RATELIMIT. This needs to be fixed and lets also at the same time bring tcp_ratelimit up to date where we allow the transports to set a divisor (though still having a default path with the default divisor of 1000) for setting the burst size. Reviewed by: tuexen, gallatin Sponsored by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D37954 --- sys/netinet/tcp_ratelimit.c | 42 ++++++++++++++++++++++----- sys/netinet/tcp_ratelimit.h | 71 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 101 insertions(+), 12 deletions(-) diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c index dc207d10311c..ca619e66c07d 100644 --- a/sys/netinet/tcp_ratelimit.c +++ b/sys/netinet/tcp_ratelimit.c @@ -255,6 +255,10 @@ static uint32_t wait_time_floor = 8000; /* 8 ms */ static uint32_t rs_hw_floor_mss = 16; static uint32_t num_of_waits_allowed = 1; /* How many time blocks are we willing to wait */ +static uint32_t mss_divisor = RL_DEFAULT_DIVISOR; +static uint32_t even_num_segs = 1; +static uint32_t even_threshold = 4; + SYSCTL_NODE(_net_inet_tcp, OID_AUTO, rl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TCP Ratelimit stats"); SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, alive, CTLFLAG_RW, @@ -277,6 +281,15 @@ SYSCTL_UINT(_net_inet_tcp_rl, OID_AUTO, hw_floor_mss, CTLFLAG_RW, &rs_hw_floor_mss, 16, "Number of mss that are a minum for hardware pacing?"); +SYSCTL_INT(_net_inet_tcp_rl, OID_AUTO, divisor, CTLFLAG_RW, + &mss_divisor, RL_DEFAULT_DIVISOR, + "The value divided into bytes per second to help establish mss size"); +SYSCTL_INT(_net_inet_tcp_rl, OID_AUTO, even, CTLFLAG_RW, + &even_num_segs, 1, + "Do we round mss size up to an even number of segments for delayed ack"); +SYSCTL_INT(_net_inet_tcp_rl, OID_AUTO, eventhresh, CTLFLAG_RW, + &even_threshold, 4, + "At what number of mss do we start rounding up to an even number of mss?"); static void rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs) @@ -1583,8 +1596,8 @@ tcp_log_pacing_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, uint32_t new } uint32_t -tcp_get_pacing_burst_size (struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, - const struct tcp_hwrate_limit_table *te, int *err) +tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, + const struct tcp_hwrate_limit_table *te, int *err, int divisor) { /* * We use the google formula to calculate the @@ -1592,20 +1605,35 @@ tcp_get_pacing_burst_size (struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int c * bw < 24Meg * tso = 2mss * else - * tso = min(bw/1000, 64k) + * tso = min(bw/(div=1000), 64k) * * Note for these calculations we ignore the * packet overhead (enet hdr, ip hdr and tcp hdr). + * We only get the google formula when we have + * divisor = 1000, which is the default for now. */ uint64_t lentim, res, bytes; uint32_t new_tso, min_tso_segs; - bytes = bw / 1000; - if (bytes > (64 * 1000)) - bytes = 64 * 1000; + /* It can't be zero */ + if ((divisor == 0) || + (divisor < RL_MIN_DIVISOR)) { + if (mss_divisor) + bytes = bw / mss_divisor; + else + bytes = bw / 1000; + } else + bytes = bw / divisor; + /* We can't ever send more than 65k in a TSO */ + if (bytes > 0xffff) { + bytes = 0xffff; + } /* Round up */ new_tso = (bytes + segsiz - 1) / segsiz; - if (can_use_1mss && (bw < ONE_POINT_TWO_MEG)) + /* Are we enforcing even boundaries? */ + if (even_num_segs && (new_tso & 1) && (new_tso > even_threshold)) + new_tso++; + if (can_use_1mss) min_tso_segs = 1; else min_tso_segs = 2; diff --git a/sys/netinet/tcp_ratelimit.h b/sys/netinet/tcp_ratelimit.h index b689c9127493..f5d8f1ec6b1a 100644 --- a/sys/netinet/tcp_ratelimit.h +++ b/sys/netinet/tcp_ratelimit.h @@ -36,6 +36,9 @@ struct m_snd_tag; +#define RL_MIN_DIVISOR 50 +#define RL_DEFAULT_DIVISOR 1000 + /* Flags on an individual rate */ #define HDWRPACE_INITED 0x0001 #define HDWRPACE_TAGPRESENT 0x0002 @@ -121,6 +124,14 @@ tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, void tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp); + +uint32_t +tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, + const struct tcp_hwrate_limit_table *te, int *err, int divisor); + +void +tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte); + #else static inline const struct tcp_hwrate_limit_table * tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, @@ -160,8 +171,56 @@ tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp) return (0); } +static inline uint32_t +tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, + const struct tcp_hwrate_limit_table *te, int *err, int divisor) +{ + /* + * We use the google formula to calculate the + * TSO size. I.E. + * bw < 24Meg + * tso = 2mss + * else + * tso = min(bw/(div=1000), 64k) + * + * Note for these calculations we ignore the + * packet overhead (enet hdr, ip hdr and tcp hdr). + * We only get the google formula when we have + * divisor = 1000, which is the default for now. + */ + uint64_t bytes; + uint32_t new_tso, min_tso_segs; + + /* It can't be zero */ + if ((divisor == 0) || + (divisor < RL_MIN_DIVISOR)) { + bytes = bw / RL_DEFAULT_DIVISOR; + } else + bytes = bw / divisor; + /* We can't ever send more than 65k in a TSO */ + if (bytes > 0xffff) { + bytes = 0xffff; + } + /* Round up */ + new_tso = (bytes + segsiz - 1) / segsiz; + if (can_use_1mss) + min_tso_segs = 1; + else + min_tso_segs = 2; + if (new_tso < min_tso_segs) + new_tso = min_tso_segs; + new_tso *= segsiz; + return (new_tso); +} + +/* Do nothing if RATELIMIT is not defined */ +static void +tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte) +{ +} #endif + /* * Given a b/w and a segsiz, and optional hardware * rate limit, return the ideal size to burst @@ -170,13 +229,15 @@ tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp) * limit, if not it will bottom out at 2mss (think * delayed ack). */ -uint32_t +static inline uint32_t tcp_get_pacing_burst_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, - const struct tcp_hwrate_limit_table *te, int *err); - + const struct tcp_hwrate_limit_table *te, int *err) +{ -void -tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte); + return (tcp_get_pacing_burst_size_w_divisor(tp, bw, segsiz, + can_use_1mss, + te, err, 0)); +} #endif #endif
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202301051701.305H1d79059526>