Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 9 Jun 2021 18:00:54 GMT
From:      Randall Stewart <rrs@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: b45daaea95ab - main - tcp: LRO timestamps have lost their previous precision
Message-ID:  <202106091800.159I0sdJ085566@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by rrs:

URL: https://cgit.FreeBSD.org/src/commit/?id=b45daaea95abd8bda52caaacf120f9197caab3e7

commit b45daaea95abd8bda52caaacf120f9197caab3e7
Author:     Randall Stewart <rrs@FreeBSD.org>
AuthorDate: 2021-06-09 17:58:54 +0000
Commit:     Randall Stewart <rrs@FreeBSD.org>
CommitDate: 2021-06-09 17:58:54 +0000

    tcp: LRO timestamps have lost their previous precision
    
    Recently we had a rewrite to tcp_lro.c that was tested but one subtle change
    was the move to a less precise timestamp. This causes all kinds of chaos
    in tcp's that do pacing and needs to be fixed to use the more precise
    time that was there before.
    
    Reviewed by: mtuexen, gallatin, hselasky
    Sponsored by: Netflix Inc
    Differential Revision:  https://reviews.freebsd.org/D30695
---
 sys/netinet/tcp_lro.c | 23 +++++++++++++----------
 sys/netinet/tcp_lro.h |  4 ++--
 sys/sys/time.h        | 11 +++++++++++
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 09fc024c3d73..ea84ca191eca 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -562,16 +562,18 @@ void
 tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
 {
 	struct lro_entry *le, *le_tmp;
-	sbintime_t sbt;
+	uint64_t now, tov;
+	struct bintime bt;
 
 	if (LIST_EMPTY(&lc->lro_active))
 		return;
 
-	/* get timeout time */
-	sbt = getsbinuptime() - tvtosbt(*timeout);
-
+	/* get timeout time and current time in ns */
+	binuptime(&bt);
+	now = bintime2ns(&bt);
+	tov = ((timeout->tv_sec * 1000000000) + (timeout->tv_usec * 1000));
 	LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
-		if (sbt >= le->alloc_time) {
+		if (now >= (bintime2ns(&le->alloc_time) + tov)) {
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
 		}
@@ -610,7 +612,7 @@ tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc,
 {
 	if (tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
-		struct timeval tv;
+		struct timeval tv, btv;
 		uint32_t cts;
 
 		cts = tcp_get_usecs(&tv);
@@ -637,7 +639,8 @@ tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc,
 		log.u_bbr.cwnd_gain = le->window;
 		log.u_bbr.cur_del_rate = (uintptr_t)m;
 		log.u_bbr.bw_inuse = (uintptr_t)le->m_head;
-		log.u_bbr.flex6 = sbttous(lc->lro_last_queue_time);
+		bintime2timeval(&lc->lro_last_queue_time, &btv);
+		log.u_bbr.flex6 = tcp_tv_to_usectick(&btv);
 		log.u_bbr.flex7 = le->compressed;
 		log.u_bbr.pacing_gain = le->uncompressed;
 		if (in_epoch(net_epoch_preempt))
@@ -1446,7 +1449,7 @@ tcp_lro_flush_all(struct lro_ctrl *lc)
 	CURVNET_SET(lc->ifp->if_vnet);
 
 	/* get current time */
-	lc->lro_last_queue_time = getsbinuptime();
+	binuptime(&lc->lro_last_queue_time);
 
 	/* sort all mbufs according to stream */
 	tcp_lro_sort(lc->lro_mbuf_data, lc->lro_mbuf_count);
@@ -1739,7 +1742,7 @@ tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, bool use_h
 #endif
 	/* If no hardware or arrival stamp on the packet add timestamp */
 	if ((m->m_flags & (M_TSTMP_LRO | M_TSTMP)) == 0) {
-		m->m_pkthdr.rcv_tstmp = sbttons(lc->lro_last_queue_time);
+		m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); 
 		m->m_flags |= M_TSTMP_LRO;
 	}
 
@@ -1834,7 +1837,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
 	int error;
 
 	/* get current time */
-	lc->lro_last_queue_time = getsbinuptime();
+	binuptime(&lc->lro_last_queue_time);
 
 	CURVNET_SET(lc->ifp->if_vnet);
 	error = tcp_lro_rx_common(lc, m, csum, true);
diff --git a/sys/netinet/tcp_lro.h b/sys/netinet/tcp_lro.h
index d2220a626b81..5ff15e2dc97e 100644
--- a/sys/netinet/tcp_lro.h
+++ b/sys/netinet/tcp_lro.h
@@ -140,7 +140,7 @@ struct lro_entry {
 	uint16_t		uncompressed;
 	uint16_t		window;
 	uint16_t		timestamp;	/* flag, not a TCP hdr field. */
-	sbintime_t		alloc_time;	/* time when entry was allocated */
+	struct bintime		alloc_time;	/* time when entry was allocated */
 };
 
 LIST_HEAD(lro_head, lro_entry);
@@ -154,7 +154,7 @@ struct lro_mbuf_sort {
 struct lro_ctrl {
 	struct ifnet	*ifp;
 	struct lro_mbuf_sort *lro_mbuf_data;
-	sbintime_t	lro_last_queue_time;	/* last time data was queued */
+	struct bintime	lro_last_queue_time;	/* last time data was queued */
 	uint64_t	lro_queued;
 	uint64_t	lro_flushed;
 	uint64_t	lro_bad_csum;
diff --git a/sys/sys/time.h b/sys/sys/time.h
index 9bffca204d56..a48aa3fe5548 100644
--- a/sys/sys/time.h
+++ b/sys/sys/time.h
@@ -286,6 +286,17 @@ bintime2timespec(const struct bintime *_bt, struct timespec *_ts)
 	    (uint32_t)(_bt->frac >> 32)) >> 32;
 }
 
+static __inline uint64_t
+bintime2ns(const struct bintime *_bt)
+{
+	uint64_t ret;
+
+	ret = (uint64_t)(_bt->sec) * (uint64_t)1000000000;
+	ret += (((uint64_t)1000000000 *
+		 (uint32_t)(_bt->frac >> 32)) >> 32);
+	return (ret);
+}
+
 static __inline void
 timespec2bintime(const struct timespec *_ts, struct bintime *_bt)
 {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106091800.159I0sdJ085566>