From owner-p4-projects@FreeBSD.ORG Thu Jul 5 00:58:56 2007 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id BADED16A468; Thu, 5 Jul 2007 00:58:56 +0000 (UTC) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 7793C16A41F for ; Thu, 5 Jul 2007 00:58:56 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.freebsd.org (Postfix) with ESMTP id 6769213C43E for ; Thu, 5 Jul 2007 00:58:56 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.8/8.13.8) with ESMTP id l650wugZ068408 for ; Thu, 5 Jul 2007 00:58:56 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.8/8.13.8/Submit) id l650wtt2068405 for perforce@freebsd.org; Thu, 5 Jul 2007 00:58:55 GMT (envelope-from kmacy@freebsd.org) Date: Thu, 5 Jul 2007 00:58:55 GMT Message-Id: <200707050058.l650wtt2068405@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 122896 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 05 Jul 2007 00:58:57 -0000 http://perforce.freebsd.org/chv.cgi?CH=122896 Change 122896 by kmacy@kmacy_vt-x:opentoe_init on 2007/07/05 00:58:37 - reduce cpu usage by as much as 25% (40%->30%) by doing txq reclaim more efficiently - use mtx_trylock when trying to grab the lock to avoid spinning during long encap loop - add per-txq reclaim task - if mbufs were successfully reclaimed, try another pass - reduce contention on txq lock in if_start by returning right away if another thread is already transmitting - set/clear flag in txq when running in the t3_encap loop - fix null pointer panic caused by bad merge - track txq overruns with sysctl MFP4 after: 2 days Affected files ... .. //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#20 edit .. //depot/projects/opentoe/sys/dev/cxgb/cxgb_main.c#13 edit .. //depot/projects/opentoe/sys/dev/cxgb/cxgb_osdep.h#11 edit .. //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#23 edit Differences ... ==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#20 (text+ko) ==== @@ -239,6 +239,8 @@ struct tx_desc; struct tx_sw_desc; +#define TXQ_TRANSMITTING 0x1 + struct sge_txq { uint64_t flags; uint32_t in_use; @@ -254,7 +256,9 @@ struct tx_sw_desc *sdesc; uint32_t token; bus_addr_t phys_addr; - struct task qresume_tsk; + struct task qreclaim_task; + struct task qresume_task; + struct port_info *port; uint32_t cntxt_id; uint64_t stops; uint64_t restarts; ==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_main.c#13 (text+ko) ==== @@ -1613,7 +1613,11 @@ txq = &qs->txq[TXQ_ETH]; err = 0; + if (txq->flags & TXQ_TRANSMITTING) + return (EINPROGRESS); + mtx_lock(&txq->lock); + txq->flags |= TXQ_TRANSMITTING; in_use_init = txq->in_use; while ((txq->in_use - in_use_init < txmax) && (txq->size > txq->in_use + TX_MAX_DESC)) { @@ -1651,6 +1655,7 @@ break; BPF_MTAP(ifp, m); } + txq->flags &= ~TXQ_TRANSMITTING; mtx_unlock(&txq->lock); if (__predict_false(err)) { @@ -1685,8 +1690,7 @@ do { if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC) - taskqueue_enqueue(pi->adapter->tq, - &pi->timer_reclaim_task); + taskqueue_enqueue(pi->adapter->tq, &txq->qreclaim_task); error = cxgb_start_tx(ifp, TX_START_MAX_DESC); } while (error == 0); @@ -1704,9 +1708,8 @@ txq = &qs->txq[TXQ_ETH]; if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC) - taskqueue_enqueue(pi->adapter->tq, - &pi->timer_reclaim_task); - + taskqueue_enqueue(pi->adapter->tq, &txq->qreclaim_task); + err = cxgb_start_tx(ifp, TX_START_MAX_DESC); if (err == 0) ==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_osdep.h#11 (text+ko) ==== @@ -95,7 +95,7 @@ #define TX_MAX_SIZE (1 << 16) /* 64KB */ #define TX_MAX_SEGS 36 /* maximum supported by card */ #define TX_MAX_DESC 4 /* max descriptors per packet */ -#define TX_START_MAX_DESC (TX_MAX_DESC << 2) /* maximum number of descriptors +#define TX_START_MAX_DESC (TX_MAX_DESC << 3) /* maximum number of descriptors * call to start used per */ #define TX_CLEAN_MAX_DESC (TX_MAX_DESC << 4) /* maximum tx descriptors * to clean per iteration */ ==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#23 (text+ko) ==== @@ -67,6 +67,7 @@ uint32_t collapse_free = 0; uint32_t mb_free_vec_free = 0; +int txq_fills = 0; int collapse_mbufs = 0; static int recycle_enable = 1; @@ -187,7 +188,8 @@ static void t3_free_qset(adapter_t *sc, struct sge_qset *q); static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); -static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); +static void sge_txq_reclaim_handler(void *arg, int ncount); +static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec); /** * reclaim_completed_tx - reclaims completed Tx descriptors @@ -199,14 +201,14 @@ * queue's lock held. */ static __inline int -reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) +reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec) { int reclaimed, reclaim = desc_reclaimable(q); int n = 0; mtx_assert(&q->lock, MA_OWNED); if (reclaim > 0) { - n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); + n = free_tx_desc(q, min(reclaim, nbufs), mvec); reclaimed = min(reclaim, nbufs); q->cleaned += reclaimed; q->in_use -= reclaimed; @@ -482,7 +484,7 @@ struct refill_fl_cb_arg cb_arg; void *cl; int err; - + cb_arg.error = 0; while (n--) { /* @@ -743,7 +745,47 @@ V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); } +static __inline void +sge_txq_reclaim_(struct sge_txq *txq) +{ + int reclaimable, i, n; + struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; + struct port_info *p; + p = txq->port; +reclaim_more: + n = 0; + reclaimable = desc_reclaimable(txq); + if (reclaimable > 0 && mtx_trylock(&txq->lock)) { + n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec); + mtx_unlock(&txq->lock); + } + if (n == 0) + return; + + for (i = 0; i < n; i++) { + m_freem_vec(m_vec[i]); + } + if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE && + txq->size - txq->in_use >= TX_START_MAX_DESC) { + txq_fills++; + p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + taskqueue_enqueue(p->tq, &p->start_task); + } + + if (n) + goto reclaim_more; +} + +static void +sge_txq_reclaim_handler(void *arg, int ncount) +{ + struct sge_txq *q = arg; + + sge_txq_reclaim_(q); +} + + static void sge_timer_reclaim(void *arg, int ncount) { @@ -753,39 +795,15 @@ struct sge_qset *qs; struct sge_txq *txq; struct mtx *lock; - struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; - int n, reclaimable; for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[i]; txq = &qs->txq[TXQ_ETH]; - reclaimable = desc_reclaimable(txq); - if (reclaimable > 0) { - mtx_lock(&txq->lock); - n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); - mtx_unlock(&txq->lock); - - for (i = 0; i < n; i++) - m_freem_vec(m_vec[i]); - - if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE && - txq->size - txq->in_use >= TX_START_MAX_DESC) { - p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - taskqueue_enqueue(p->tq, &p->start_task); - } - } + sge_txq_reclaim_(txq); txq = &qs->txq[TXQ_OFLD]; - reclaimable = desc_reclaimable(txq); - if (reclaimable > 0) { - mtx_lock(&txq->lock); - n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); - mtx_unlock(&txq->lock); + sge_txq_reclaim_(txq); - for (i = 0; i < n; i++) - m_freem_vec(m_vec[i]); - } - lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; @@ -1223,7 +1241,6 @@ wr_lo = htonl(V_WR_TID(txq->token)); txsd->m = m0; m_set_priority(m0, txqs.pidx); - m_set_priority(m0, txqs.pidx); write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(p->adapter, txq); @@ -1563,8 +1580,8 @@ for (i = 0; i < nqsets; ++i) { struct sge_qset *qs = &sc->sge.qs[i]; - taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); - taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); + taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); + taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } } @@ -1579,18 +1596,18 @@ * Tx buffers. Called with the Tx queue lock held. */ int -free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) +free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec) { struct tx_sw_desc *d; - unsigned int cidx = q->cidx; + unsigned int cidx; int nbufs = 0; #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], "reclaiming %u Tx descriptors at cidx %u", n, cidx); #endif + cidx = q->cidx; d = &q->sdesc[cidx]; - while (n-- > 0) { DPRINTF("cidx=%d d=%p\n", cidx, d); if (d->m) { @@ -1599,21 +1616,13 @@ bus_dmamap_destroy(q->entry_tag, d->map); d->flags &= ~TX_SW_DESC_MAPPED; } - if (m_get_priority(d->m) == cidx) { m_vec[nbufs] = d->m; d->m = NULL; nbufs++; } else { - printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); + printf("pri=%jd cidx=%d\n", m_get_priority(d->m), cidx); } - if (m_get_priority(d->m) == cidx) { - m_vec[nbufs] = d->m; - d->m = NULL; - nbufs++; - } else { - printf("pri=%d cidx=%d\n", m_get_priority(d->m), cidx); - } } ++d; if (++cidx == q->size) { @@ -1622,7 +1631,7 @@ } } q->cidx = cidx; - + return (nbufs); } @@ -1748,7 +1757,7 @@ return (ret); } ndesc = calc_tx_descs_ofld(m, nsegs); -again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); +again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); if (__predict_false(ret)) { @@ -1805,7 +1814,7 @@ struct tx_sw_desc *stx = &q->sdesc[q->pidx]; mtx_lock(&q->lock); -again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); +again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); while ((m = mbufq_peek(&q->sendq)) != NULL) { unsigned int gen, pidx; @@ -1944,13 +1953,13 @@ should_restart_tx(&qs->txq[TXQ_OFLD]) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { qs->txq[TXQ_OFLD].restarts++; - taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); + taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); } if (isset(&qs->txq_stopped, TXQ_CTRL) && should_restart_tx(&qs->txq[TXQ_CTRL]) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { qs->txq[TXQ_CTRL].restarts++; - taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); + taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } } @@ -2028,9 +2037,14 @@ device_get_unit(sc->dev), irq_vec_idx, i); MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); } + + q->txq[TXQ_ETH].port = pi; - TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q); - TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q); + TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); + TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); + TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); + TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); + q->fl[0].gen = q->fl[1].gen = 1; q->fl[0].size = p->fl_size; @@ -2646,6 +2660,10 @@ "collapse_mbufs", CTLFLAG_RW, &collapse_mbufs, 0, "collapse mbuf chains into iovecs"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "txq_overrun", + CTLFLAG_RD, &txq_fills, + 0, "#times txq overrun"); } /**