Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 5 Jul 2007 00:58:55 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 122896 for review
Message-ID:  <200707050058.l650wtt2068405@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=122896

Change 122896 by kmacy@kmacy_vt-x:opentoe_init on 2007/07/05 00:58:37

	- reduce cpu usage by as much as 25% (40%->30%) by doing txq reclaim
	  more efficiently
		- use mtx_trylock when trying to grab the lock to avoid spinning
		  during long encap loop
		- add per-txq reclaim task
		- if mbufs were successfully reclaimed, try another pass
	
	- reduce contention on txq lock in if_start by returning right away
	  if another thread is already transmitting
		- set/clear flag in txq when running in the t3_encap loop
	
	- fix null pointer panic caused by bad merge 
	- track txq overruns with sysctl
	
	MFP4 after: 2 days	

Affected files ...

.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#20 edit
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_main.c#13 edit
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_osdep.h#11 edit
.. //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#23 edit

Differences ...

==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#20 (text+ko) ====

@@ -239,6 +239,8 @@
 struct tx_desc;
 struct tx_sw_desc;
 
+#define TXQ_TRANSMITTING 0x1
+
 struct sge_txq {
 	uint64_t	flags;
 	uint32_t	in_use;
@@ -254,7 +256,9 @@
 	struct tx_sw_desc *sdesc;
 	uint32_t	token;
 	bus_addr_t	phys_addr;
-	struct task     qresume_tsk;
+	struct task     qreclaim_task;
+	struct task     qresume_task;
+	struct port_info *port;
 	uint32_t	cntxt_id;
 	uint64_t	stops;
 	uint64_t	restarts;

==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_main.c#13 (text+ko) ====

@@ -1613,7 +1613,11 @@
 	txq = &qs->txq[TXQ_ETH];
 	err = 0;
 
+	if (txq->flags & TXQ_TRANSMITTING)
+		return (EINPROGRESS);
+
 	mtx_lock(&txq->lock);
+	txq->flags |= TXQ_TRANSMITTING;
 	in_use_init = txq->in_use;
 	while ((txq->in_use - in_use_init < txmax) &&
 	    (txq->size > txq->in_use + TX_MAX_DESC)) {
@@ -1651,6 +1655,7 @@
 			break;
 		BPF_MTAP(ifp, m);
 	}
+	txq->flags &= ~TXQ_TRANSMITTING;
 	mtx_unlock(&txq->lock);
 
 	if (__predict_false(err)) {
@@ -1685,8 +1690,7 @@
 
 	do {
 		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
-			taskqueue_enqueue(pi->adapter->tq,
-			    &pi->timer_reclaim_task);
+			taskqueue_enqueue(pi->adapter->tq, &txq->qreclaim_task);
 
 		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
 	} while (error == 0);
@@ -1704,9 +1708,8 @@
 	txq = &qs->txq[TXQ_ETH];
 	
 	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
-		taskqueue_enqueue(pi->adapter->tq,
-		    &pi->timer_reclaim_task);
-	
+		taskqueue_enqueue(pi->adapter->tq, &txq->qreclaim_task);
+
 	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
 	
 	if (err == 0)

==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_osdep.h#11 (text+ko) ====

@@ -95,7 +95,7 @@
 #define TX_MAX_SIZE                (1 << 16)    /* 64KB                          */
 #define TX_MAX_SEGS                      36     /* maximum supported by card     */
 #define TX_MAX_DESC                       4     /* max descriptors per packet    */
-#define TX_START_MAX_DESC (TX_MAX_DESC << 2)    /* maximum number of descriptors
+#define TX_START_MAX_DESC (TX_MAX_DESC << 3)    /* maximum number of descriptors
 						 * call to start used per 	 */
 #define TX_CLEAN_MAX_DESC (TX_MAX_DESC << 4)    /* maximum tx descriptors
 						 * to clean per iteration        */

==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#23 (text+ko) ====

@@ -67,6 +67,7 @@
 
 uint32_t collapse_free = 0;
 uint32_t mb_free_vec_free = 0;
+int      txq_fills = 0;
 int      collapse_mbufs = 0;
 static int recycle_enable = 1;
 
@@ -187,7 +188,8 @@
 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
 static void sge_timer_cb(void *arg);
 static void sge_timer_reclaim(void *arg, int ncount);
-static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec);
+static void sge_txq_reclaim_handler(void *arg, int ncount);
+static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
 
 /**
  *	reclaim_completed_tx - reclaims completed Tx descriptors
@@ -199,14 +201,14 @@
  *	queue's lock held.
  */
 static __inline int
-reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec)
+reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
 {
 	int reclaimed, reclaim = desc_reclaimable(q);
 	int n = 0;
 
 	mtx_assert(&q->lock, MA_OWNED);
 	if (reclaim > 0) {
-		n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec);
+		n = free_tx_desc(q, min(reclaim, nbufs), mvec);
 		reclaimed = min(reclaim, nbufs);
 		q->cleaned += reclaimed;
 		q->in_use -= reclaimed;
@@ -482,7 +484,7 @@
 	struct refill_fl_cb_arg cb_arg;
 	void *cl;
 	int err;
-
+	
 	cb_arg.error = 0;
 	while (n--) {
 		/*
@@ -743,7 +745,47 @@
 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 }
 
+static __inline void
+sge_txq_reclaim_(struct sge_txq *txq)
+{
+	int reclaimable, i, n;
+	struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
+	struct port_info *p;
 
+	p = txq->port;
+reclaim_more:
+	n = 0;
+	reclaimable = desc_reclaimable(txq);
+	if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
+		n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
+		mtx_unlock(&txq->lock);
+	}
+	if (n == 0)
+		return;
+	
+	for (i = 0; i < n; i++) {
+		m_freem_vec(m_vec[i]);
+	}
+	if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
+	    txq->size - txq->in_use >= TX_START_MAX_DESC) {
+		txq_fills++;
+		p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+		taskqueue_enqueue(p->tq, &p->start_task);
+	}
+
+	if (n)
+		goto reclaim_more;
+}
+
+static void
+sge_txq_reclaim_handler(void *arg, int ncount)
+{
+	struct sge_txq *q = arg;
+
+	sge_txq_reclaim_(q);
+}
+
+
 static void
 sge_timer_reclaim(void *arg, int ncount)
 {
@@ -753,39 +795,15 @@
 	struct sge_qset *qs;
 	struct sge_txq *txq;
 	struct mtx *lock;
-	struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
-	int n, reclaimable;
 
 	for (i = 0; i < nqsets; i++) {
 		qs = &sc->sge.qs[i];
 		txq = &qs->txq[TXQ_ETH];
-		reclaimable = desc_reclaimable(txq);
-		if (reclaimable > 0) {
-			mtx_lock(&txq->lock);
-			n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
-			mtx_unlock(&txq->lock);
-
-			for (i = 0; i < n; i++) 
-				m_freem_vec(m_vec[i]);
-			
-			if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
-			    txq->size - txq->in_use >= TX_START_MAX_DESC) {
-				p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-				taskqueue_enqueue(p->tq, &p->start_task);
-			}
-		}
+		sge_txq_reclaim_(txq);
 
 		txq = &qs->txq[TXQ_OFLD];
-		reclaimable = desc_reclaimable(txq);
-		if (reclaimable > 0) {
-			mtx_lock(&txq->lock);
-			n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec);
-			mtx_unlock(&txq->lock);
+		sge_txq_reclaim_(txq);
 
-			for (i = 0; i < n; i++)
-				m_freem_vec(m_vec[i]);
-		}
-
 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
 			    &sc->sge.qs[0].rspq.lock;
 
@@ -1223,7 +1241,6 @@
 	wr_lo = htonl(V_WR_TID(txq->token));
 	txsd->m = m0;
 	m_set_priority(m0, txqs.pidx); 
-	m_set_priority(m0, txqs.pidx); 
 
 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
 	check_ring_tx_db(p->adapter, txq);
@@ -1563,8 +1580,8 @@
 	for (i = 0; i < nqsets; ++i) {
 		struct sge_qset *qs = &sc->sge.qs[i];
 		
-		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
-		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
+		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
+		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 	}
 }
 
@@ -1579,18 +1596,18 @@
  *	Tx buffers.  Called with the Tx queue lock held.
  */
 int
-free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec)
+free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
 {
 	struct tx_sw_desc *d;
-	unsigned int cidx = q->cidx;
+	unsigned int cidx;
 	int nbufs = 0;
 	
 #ifdef T3_TRACE
 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
 		  "reclaiming %u Tx descriptors at cidx %u", n, cidx);
 #endif
+	cidx = q->cidx;
 	d = &q->sdesc[cidx];
-	
 	while (n-- > 0) {
 		DPRINTF("cidx=%d d=%p\n", cidx, d);
 		if (d->m) {
@@ -1599,21 +1616,13 @@
 				bus_dmamap_destroy(q->entry_tag, d->map);
 				d->flags &= ~TX_SW_DESC_MAPPED;
 			}
-
 			if (m_get_priority(d->m) == cidx) {
 				m_vec[nbufs] = d->m;
 				d->m = NULL;
 				nbufs++;
 			} else {
-				printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
+				printf("pri=%jd cidx=%d\n", m_get_priority(d->m), cidx);
 			}
-			if (m_get_priority(d->m) == cidx) {
-				m_vec[nbufs] = d->m;
-				d->m = NULL;
-				nbufs++;
-			} else {
-				printf("pri=%d cidx=%d\n", m_get_priority(d->m), cidx);
-			}
 		}
 		++d;
 		if (++cidx == q->size) {
@@ -1622,7 +1631,7 @@
 		}
 	}
 	q->cidx = cidx;
-
+	
 	return (nbufs);
 }
 
@@ -1748,7 +1757,7 @@
 		return (ret);
 	}
 	ndesc = calc_tx_descs_ofld(m, nsegs);
-again:	cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
+again:	cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
 
 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
 	if (__predict_false(ret)) {
@@ -1805,7 +1814,7 @@
 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
 		
 	mtx_lock(&q->lock);
-again:	cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec);
+again:	cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
 
 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
 		unsigned int gen, pidx;
@@ -1944,13 +1953,13 @@
 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 		qs->txq[TXQ_OFLD].restarts++;
-		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk);
+		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
 	}
 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 		qs->txq[TXQ_CTRL].restarts++;
-		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk);
+		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
 	}
 }
 
@@ -2028,9 +2037,14 @@
 		    device_get_unit(sc->dev), irq_vec_idx, i);
 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
 	}
+	
+	q->txq[TXQ_ETH].port = pi;
 
-	TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q);
-	TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q);
+	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
+	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
+	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
+	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
+	
 	
 	q->fl[0].gen = q->fl[1].gen = 1;
 	q->fl[0].size = p->fl_size;
@@ -2646,6 +2660,10 @@
 	    "collapse_mbufs",
 	    CTLFLAG_RW, &collapse_mbufs,
 	    0, "collapse mbuf chains into iovecs");
+	SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
+	    "txq_overrun",
+	    CTLFLAG_RD, &txq_fills,
+	    0, "#times txq overrun");
 }
 
 /**



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200707050058.l650wtt2068405>