Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 4 Feb 2021 17:46:39 GMT
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 397aaa3bcc78 - stable/13 - cxgbe(4): Fixes to tx coalescing.
Message-ID:  <202102041746.114HkdxE028400@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by np:

URL: https://cgit.FreeBSD.org/src/commit/?id=397aaa3bcc78196f5c794ebd1de751568562a7a3

commit 397aaa3bcc78196f5c794ebd1de751568562a7a3
Author:     Navdeep Parhar <np@FreeBSD.org>
AuthorDate: 2021-02-01 11:00:09 +0000
Commit:     Navdeep Parhar <np@FreeBSD.org>
CommitDate: 2021-02-04 17:43:36 +0000

    cxgbe(4): Fixes to tx coalescing.
    
    - The behavior implemented in r362905 resulted in delayed transmission
      of packets in some cases, causing performance issues.  Use a different
      heuristic to predict tx requests.
    
    - Add a tunable/sysctl (hw.cxgbe.tx_coalesce) to disable tx coalescing
      entirely.  It can be changed at any time.  There is no change in
      default behavior.
    
    (cherry picked from commit 3447df8bc5b342bd88d565641435284ff620ee2a)
---
 sys/dev/cxgbe/adapter.h |  4 ++-
 sys/dev/cxgbe/t4_main.c |  1 +
 sys/dev/cxgbe/t4_sge.c  | 69 ++++++++++++++++++++++++++++++++++++++++---------
 sys/dev/cxgbe/t4_vf.c   |  1 +
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 37afa7cf7780..4b2f86d00052 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -561,7 +561,7 @@ struct txpkts {
 	uint8_t wr_type;	/* type 0 or type 1 */
 	uint8_t npkt;		/* # of packets in this work request */
 	uint8_t len16;		/* # of 16B pieces used by this work request */
-	uint8_t score;		/* 1-10. coalescing attempted if score > 3 */
+	uint8_t score;
 	uint8_t max_npkt;	/* maximum number of packets allowed */
 	uint16_t plen;		/* total payload (sum of all packets) */
 
@@ -584,6 +584,7 @@ struct sge_txq {
 	struct sglist *gl;
 	__be32 cpl_ctrl0;	/* for convenience */
 	int tc_idx;		/* traffic class */
+	uint64_t last_tx;	/* cycle count when eth_tx was last called */
 	struct txpkts txp;
 
 	struct task tx_reclaim_task;
@@ -599,6 +600,7 @@ struct sge_txq {
 	uint64_t txpkts1_wrs;	/* # of type1 coalesced tx work requests */
 	uint64_t txpkts0_pkts;	/* # of frames in type0 coalesced tx WRs */
 	uint64_t txpkts1_pkts;	/* # of frames in type1 coalesced tx WRs */
+	uint64_t txpkts_flush;	/* # of times txp had to be sent by tx_update */
 	uint64_t raw_wrs;	/* # of raw work requests (alloc_wr_mbuf) */
 	uint64_t vxlan_tso_wrs;	/* # of VXLAN TSO work requests */
 	uint64_t vxlan_txcsum;
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 32a229bed52b..e01429f93825 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -10718,6 +10718,7 @@ clear_stats(struct adapter *sc, u_int port_id)
 				txq->txpkts1_wrs = 0;
 				txq->txpkts0_pkts = 0;
 				txq->txpkts1_pkts = 0;
+				txq->txpkts_flush = 0;
 				txq->raw_wrs = 0;
 				txq->vxlan_tso_wrs = 0;
 				txq->vxlan_txcsum = 0;
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index f9553bbbce67..45f07358f0db 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -212,6 +212,22 @@ static counter_u64_t defrags;
 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, defrags, CTLFLAG_RD, &defrags,
     "Number of mbuf defrags performed");
 
+static int t4_tx_coalesce = 1;
+SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce, CTLFLAG_RWTUN, &t4_tx_coalesce, 0,
+    "tx coalescing allowed");
+
+/*
+ * The driver will make aggressive attempts at tx coalescing if it sees these
+ * many packets eligible for coalescing in quick succession, with no more than
+ * the specified gap in between the eth_tx calls that delivered the packets.
+ */
+static int t4_tx_coalesce_pkts = 32;
+SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_pkts, CTLFLAG_RWTUN,
+    &t4_tx_coalesce_pkts, 0,
+    "# of consecutive packets (1 - 255) that will trigger tx coalescing");
+static int t4_tx_coalesce_gap = 5;
+SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_gap, CTLFLAG_RWTUN,
+    &t4_tx_coalesce_gap, 0, "tx gap (in microseconds)");
 
 static int service_iq(struct sge_iq *, int);
 static int service_iq_fl(struct sge_iq *, int);
@@ -3120,6 +3136,26 @@ set_txupdate_flags(struct sge_txq *txq, u_int avail,
 	}
 }
 
+#if defined(__i386__) || defined(__amd64__)
+extern uint64_t tsc_freq;
+#endif
+
+static inline bool
+record_eth_tx_time(struct sge_txq *txq)
+{
+	const uint64_t cycles = get_cyclecount();
+	const uint64_t last_tx = txq->last_tx;
+#if defined(__i386__) || defined(__amd64__)
+	const uint64_t itg = tsc_freq * t4_tx_coalesce_gap / 1000000;
+#else
+	const uint64_t itg = 0;
+#endif
+
+	MPASS(cycles >= last_tx);
+	txq->last_tx = cycles;
+	return (cycles - last_tx < itg);
+}
+
 /*
  * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to
  * be consumed.  Return the actual number consumed.  0 indicates a stall.
@@ -3137,10 +3173,11 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
 	u_int n, avail, dbdiff;		/* # of hardware descriptors */
 	int i, rc;
 	struct mbuf *m0;
-	bool snd;
+	bool snd, recent_tx;
 	void *wr;	/* start of the last WR written to the ring */
 
 	TXQ_LOCK_ASSERT_OWNED(txq);
+	recent_tx = record_eth_tx_time(txq);
 
 	remaining = IDXDIFF(pidx, cidx, r->size);
 	if (__predict_false(discard_tx(eq))) {
@@ -3159,17 +3196,15 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
 	}
 
 	/* How many hardware descriptors do we have readily available. */
-	if (eq->pidx == eq->cidx) {
+	if (eq->pidx == eq->cidx)
 		avail = eq->sidx - 1;
-		if (txp->score++ >= 5)
-			txp->score = 5;	/* tx is completely idle, reset. */
-	} else
+	else
 		avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
 
 	total = 0;
 	if (remaining == 0) {
-		if (txp->score-- == 1)	/* egr_update had to drain txpkts */
-			txp->score = 1;
+		txp->score = 0;
+		txq->txpkts_flush++;
 		goto send_txpkts;
 	}
 
@@ -3183,7 +3218,17 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
 		if (avail < 2 * SGE_MAX_WR_NDESC)
 			avail += reclaim_tx_descs(txq, 64);
 
-		if (txp->npkt > 0 || remaining > 1 || txp->score > 3 ||
+		if (t4_tx_coalesce == 0 && txp->npkt == 0)
+			goto skip_coalescing;
+		if (cannot_use_txpkts(m0))
+			txp->score = 0;
+		else if (recent_tx) {
+			if (++txp->score == 0)
+				txp->score = UINT8_MAX;
+		} else
+			txp->score = 1;
+		if (txp->npkt > 0 || remaining > 1 ||
+		    txp->score >= t4_tx_coalesce_pkts ||
 		    atomic_load_int(&txq->eq.equiq) != 0) {
 			if (vi->flags & TX_USES_VM_WR)
 				rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd);
@@ -3198,8 +3243,6 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
 			for (i = 0; i < txp->npkt; i++)
 				ETHER_BPF_MTAP(ifp, txp->mb[i]);
 			if (txp->npkt > 1) {
-				if (txp->score++ >= 10)
-					txp->score = 10;
 				MPASS(avail >= tx_len16_to_desc(txp->len16));
 				if (vi->flags & TX_USES_VM_WR)
 					n = write_txpkts_vm_wr(sc, txq);
@@ -3239,7 +3282,7 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
 
 		MPASS(rc != 0 && rc != EAGAIN);
 		MPASS(txp->npkt == 0);
-
+skip_coalescing:
 		n = tx_len16_to_desc(mbuf_len16(m0));
 		if (__predict_false(avail < n)) {
 			avail += reclaim_tx_descs(txq, min(n, 32));
@@ -4304,7 +4347,6 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
 	    M_ZERO | M_WAITOK);
 
 	txp = &txq->txp;
-	txp->score = 5;
 	MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr);
 	txq->txp.max_npkt = min(nitems(txp->mb),
 	    sc->params.max_pkts_per_eth_tx_pkts_wr);
@@ -4363,6 +4405,9 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts",
 	    CTLFLAG_RD, &txq->txpkts1_pkts,
 	    "# of frames tx'd using type1 txpkts work requests");
+	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts_flush",
+	    CTLFLAG_RD, &txq->txpkts_flush,
+	    "# of times txpkts had to be flushed out by an egress-update");
 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD,
 	    &txq->raw_wrs, "# of raw work requests (non-packets)");
 	SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_tso_wrs",
diff --git a/sys/dev/cxgbe/t4_vf.c b/sys/dev/cxgbe/t4_vf.c
index 223ffe7aeaa0..6c736e37faac 100644
--- a/sys/dev/cxgbe/t4_vf.c
+++ b/sys/dev/cxgbe/t4_vf.c
@@ -893,6 +893,7 @@ t4vf_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
 					txq->txpkts1_wrs = 0;
 					txq->txpkts0_pkts = 0;
 					txq->txpkts1_pkts = 0;
+					txq->txpkts_flush = 0;
 					mp_ring_reset_stats(txq->r);
 				}
 			}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202102041746.114HkdxE028400>