From owner-p4-projects@FreeBSD.ORG Tue Aug 14 00:44:06 2007 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 9636816A41A; Tue, 14 Aug 2007 00:44:06 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 30B3216A417 for ; Tue, 14 Aug 2007 00:44:06 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 1BEA313C465 for ; Tue, 14 Aug 2007 00:44:06 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id l7E0i5iS029273 for ; Tue, 14 Aug 2007 00:44:05 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id l7E0i5El029270 for perforce@freebsd.org; Tue, 14 Aug 2007 00:44:05 GMT (envelope-from kmacy@freebsd.org) Date: Tue, 14 Aug 2007 00:44:05 GMT Message-Id: <200708140044.l7E0i5El029270@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 125128 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Aug 2007 00:44:06 -0000 http://perforce.freebsd.org/chv.cgi?CH=125128 Change 125128 by kmacy@kmacy_home:ethng on 2007/08/14 00:43:10 Allow any cpu to use any tx queue by protecting the txq with its lock as opposed to cpu binding - it isn't clear how much the strong affinity buys us without complicating the scheduler and it doesn't really reduce locking because we still have to go through the motions of waking up the bound service thread + this greatly simplifies the logic because we don't have to special case the local queue any more the txq lock is either free and the queue is not stalled in which case the packet is directly transmitted or the queue is busy in which case the packet is enqueue on the packets ring buffer + this also eliminates the need for the rss_hash to be "correct" as an ack can always be transmitted directly without any queueing delay, the rss_hash simply ends up being there for load balancing and as an affinity hint + the one interesting thing that this approach brings up is the possibility of adding an interface that allows the TCP stack to tell the driver to put a packet at the *head* of a queue so that an inbound stream is not slowed down by an outbound stream Affected files ... .. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#8 edit Differences ... ==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_multiq.c#8 (text+ko) ==== @@ -396,11 +396,9 @@ qs->qs_cpuid, curcpu)); reclaimed = j = 0; - while ((reclaimable = desc_reclaimable(txq)) > 0) { - critical_enter(); - + reclaimable = desc_reclaimable(txq); + while (reclaimable > 0) { n = t3_free_tx_desc(txq, min(reclaimable, TX_CLEAN_MAX_DESC), m_vec); - critical_exit(); reclaimed += min(reclaimable, TX_CLEAN_MAX_DESC); @@ -412,12 +410,11 @@ m_freem_vec(m_vec[i]); j++; - critical_enter(); txq->cleaned += reclaimed; txq->in_use -= reclaimed; if (isset(&qs->txq_stopped, TXQ_ETH)) clrbit(&qs->txq_stopped, TXQ_ETH); - critical_exit(); + reclaimable = desc_reclaimable(txq); } return (reclaimed); @@ -455,7 +452,6 @@ return (err); } - immpkt = NULL; if (desc_reclaimable(txq) > 0) { int reclaimed = 0; @@ -495,70 +491,35 @@ return (err); } -static int -cxgb_pcpu_txq_trylock(struct sge_txq *txq) -{ - critical_enter(); - if (txq->flags & TXQ_TRANSMITTING) { - critical_exit(); - DPRINTF("transmit in progress\n"); - return (0); - } - txq->flags |= TXQ_TRANSMITTING; - critical_exit(); - return (1); -} - -static void -cxgb_pcpu_txq_unlock(struct sge_txq *txq) -{ - critical_enter(); - txq->flags &= ~TXQ_TRANSMITTING; - critical_exit(); -} int cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt) { uint32_t cookie; int err, qidx, locked; struct port_info *pi; - struct sge_qset *immqs, *curqs; + struct sge_qset *qs; struct sge_txq *txq = NULL /* gcc is dumb */; pi = ifp->if_softc; - immqs = curqs = NULL; + qs = NULL; err = cookie = locked = 0; - sched_pin(); + if (immpkt && (immpkt->m_pkthdr.rss_hash != 0)) { cookie = immpkt->m_pkthdr.rss_hash; qidx = cxgb_pcpu_cookie_to_qidx(pi, cookie); DPRINTF("hash=0x%x qidx=%d cpu=%d\n", immpkt->m_pkthdr.rss_hash, qidx, curcpu); - immqs = &pi->adapter->sge.qs[qidx]; - if (immqs->qs_cpuid != curcpu) { - cxgb_pcpu_enqueue_packet_(immqs, immpkt); - immpkt = NULL; - } - } - if (curcpu < pi->first_qset || curcpu >= (pi->first_qset + pi->nqsets)) { - /* - * If packet isn't tagged and there is no queue for this cpu - */ - if (immpkt) { - immqs = &pi->adapter->sge.qs[pi->first_qset]; - cxgb_pcpu_enqueue_packet_(immqs, immpkt); - } - goto done; - } - curqs = &pi->adapter->sge.qs[curcpu]; - txq = &curqs->txq[TXQ_ETH]; - if (cxgb_pcpu_txq_trylock(txq)) { - err = cxgb_pcpu_start_(curqs, immpkt, FALSE); - cxgb_pcpu_txq_unlock(txq); + qs = &pi->adapter->sge.qs[qidx]; + } else + qs = &pi->adapter->sge.qs[pi->first_qset]; + + txq = &qs->txq[TXQ_ETH]; + + if (mtx_trylock(&txq->lock)) { + err = cxgb_pcpu_start_(qs, immpkt, FALSE); + mtx_unlock(&txq->lock); } else if (immpkt) - err = cxgb_pcpu_enqueue_packet_(curqs, immpkt); -done: - sched_unpin(); + err = cxgb_pcpu_enqueue_packet_(qs, immpkt); return ((err == ENOSPC) ? 0 : err); } @@ -584,7 +545,6 @@ IFQ_UNLOCK(&ifp->if_snd); printf("dequeued %d packets\n", i); lhead = ltail = NULL; - sched_pin(); for (m = head; m != NULL; m = head->m_nextpkt) { calc_cookie = cxgb_pcpu_calc_cookie(ifp, m); qidx = cxgb_pcpu_cookie_to_qidx(pi, calc_cookie); @@ -608,18 +568,12 @@ * Assume one-to-one mapping of qset to CPU for now XXX */ - if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) { - (void)cxgb_pcpu_start_(qs, NULL, TRUE); - cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]); - } else { - /* + (void)cxgb_pcpu_start_(qs, NULL, TRUE); + /* * XXX multiple packets */ cxgb_pcpu_enqueue_packet_(qs, lhead); - - } } - sched_unpin(); } static void @@ -638,15 +592,16 @@ thread_unlock(td); DELAY(qs->qs_cpuid*100000); - printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu); + if (bootverbose) + printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu); for (;;) { if (qs->qs_flags & QS_EXITING) break; - if (cxgb_pcpu_txq_trylock(&qs->txq[TXQ_ETH])) { + if (mtx_trylock(&qs->txq[TXQ_ETH].lock)) { err = cxgb_pcpu_start_(qs, NULL, TRUE); - cxgb_pcpu_txq_unlock(&qs->txq[TXQ_ETH]); + mtx_unlock(&qs->txq[TXQ_ETH].lock); } else err = EINPROGRESS;