Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 16 Apr 2026 16:30:50 +0000
From:      Andrew Gallatin <gallatin@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: ce33f96fcf2f - main - mlx5e: Ensure rx timestamps are monotonically increasing
Message-ID:  <69e10eba.268b7.77f6bdc6@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by gallatin:

URL: https://cgit.FreeBSD.org/src/commit/?id=ce33f96fcf2f2d0d49c406274bcc64df72fe530e

commit ce33f96fcf2f2d0d49c406274bcc64df72fe530e
Author:     Andrew Gallatin <gallatin@FreeBSD.org>
AuthorDate: 2026-04-16 16:26:07 +0000
Commit:     Andrew Gallatin <gallatin@FreeBSD.org>
CommitDate: 2026-04-16 16:27:27 +0000

    mlx5e: Ensure rx timestamps are monotonically increasing
    
    The clock calibration routine currently can result in rx timestamps
    jumping backwards, which can confuse the TCP stack.
    Ensure they are monotonically increasing by estimating what
    we'd calculate as the next timestamp and clamp the calibration
    so new timestamps are no earlier in time.
    
    Reviewed by: kib, nickbanks_netflix.com
    Tested by: nickbanks_netflix.com
    Differential Revision: https://reviews.freebsd.org/D56427
    Sponsored by: Netflix
---
 sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 50 +++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index fb8b79c8f787..9bcb0dcf8e16 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1134,6 +1134,25 @@ mlx5e_hw_clock(struct mlx5e_priv *priv)
 	return (((uint64_t)hw_h << 32) | hw_l);
 }
 
+/*
+ * Seed the first calibration point so that base_prev and clbr_hw_prev
+ * are always valid.  Called once during attach before the first
+ * calibration callout fires.
+ */
+static void
+mlx5e_seed_calibration(struct mlx5e_priv *priv)
+{
+	struct mlx5e_clbr_point *cp;
+	struct timespec ts;
+
+	cp = &priv->clbr_points[0];
+	cp->clbr_hw_curr = mlx5e_hw_clock(priv);
+	nanouptime(&ts);
+	cp->base_curr = mlx5e_timespec2usec(&ts);
+	cp->clbr_hw_prev = cp->clbr_hw_curr - 1;
+	cp->base_prev = cp->base_curr - 1;
+}
+
 /*
  * The calibration callout, it runs either in the context of the
  * thread which enables calibration, or in callout.  It takes the
@@ -1147,6 +1166,9 @@ mlx5e_calibration_callout(void *arg)
 	struct mlx5e_priv *priv;
 	struct mlx5e_clbr_point *next, *curr;
 	struct timespec ts;
+	uint64_t hw_delta_new, hw_delta_old;
+	uint64_t old_nsec, old_projected, old_sec;
+	uint64_t res_n, res_s, res_s_mod, rt_delta_old;
 	int clbr_curr_next;
 
 	priv = arg;
@@ -1175,6 +1197,33 @@ mlx5e_calibration_callout(void *arg)
 	nanouptime(&ts);
 	next->base_curr = mlx5e_timespec2usec(&ts);
 
+	/*
+	 * Ensure monotonicity across calibration transitions.  Compute
+	 * what the old calibration would extrapolate to at the new
+	 * hw_curr.  If the new base_curr is less, clamp it so the new
+	 * slope is at least as steep as the old one.  This prevents
+	 * packets from seeing time go backwards when the slope drops.
+	 *
+	 * Use the same split-seconds technique as mlx5e_mbuf_tstmp()
+	 * to avoid overflowing uint64_t in the multiplication.
+	 */
+	hw_delta_new = next->clbr_hw_curr - curr->clbr_hw_curr;
+	rt_delta_old = curr->base_curr - curr->base_prev;
+	hw_delta_old = curr->clbr_hw_curr - curr->clbr_hw_prev;
+	old_sec = hw_delta_new / priv->cclk;
+	old_nsec = hw_delta_new % priv->cclk;
+	res_s = old_sec * rt_delta_old;
+	res_n = old_nsec * rt_delta_old;
+	res_s_mod = res_s % hw_delta_old;
+	res_s /= hw_delta_old;
+	res_s_mod *= priv->cclk;
+	res_n += res_s_mod;
+	res_n /= hw_delta_old;
+	res_s *= priv->cclk;
+	old_projected = curr->base_curr + res_s + res_n;
+	if (next->base_curr < old_projected)
+		next->base_curr = old_projected;
+
 	curr->clbr_gen = 0;
 	atomic_thread_fence_rel();
 	priv->clbr_curr = clbr_curr_next;
@@ -4887,6 +4936,7 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev)
 	callout_init(&priv->tstmp_clbr, 1);
 	/* Pull out the frequency of the clock in hz */
 	priv->cclk = (uint64_t)MLX5_CAP_GEN(mdev, device_frequency_khz) * 1000ULL;
+	mlx5e_seed_calibration(priv);
 	mlx5e_reset_calibration_callout(priv);
 
 	pa.pa_version = PFIL_VERSION;


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69e10eba.268b7.77f6bdc6>