From owner-p4-projects@FreeBSD.ORG Sun Apr 1 20:20:55 2007 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id E432316A404; Sun, 1 Apr 2007 20:20:54 +0000 (UTC) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 7862716A40B for ; Sun, 1 Apr 2007 20:20:54 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.freebsd.org (Postfix) with ESMTP id 5D13A13C4C1 for ; Sun, 1 Apr 2007 20:20:54 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.8/8.13.8) with ESMTP id l31KKstv097743 for ; Sun, 1 Apr 2007 20:20:54 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.8/8.13.8/Submit) id l31KKr0O097740 for perforce@freebsd.org; Sun, 1 Apr 2007 20:20:53 GMT (envelope-from kmacy@freebsd.org) Date: Sun, 1 Apr 2007 20:20:53 GMT Message-Id: <200704012020.l31KKr0O097740@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 117123 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 01 Apr 2007 20:20:55 -0000 http://perforce.freebsd.org/chv.cgi?CH=117123 Change 117123 by kmacy@kmacy_vt-x:opentoe_init on 2007/04/01 20:20:02 add entry busdma tag to each each queue to allow mappings to be created in parallel track zone and type for rx freelist for simplifying later free convert rx freelists over to only using clusters, allocating the mbuf right before the call to get_packet This change alleviates a good portion of the recent (last 2 weeks) 18% performance drop in peak TCP throughput Affected files ... .. //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#4 edit .. //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#3 edit Differences ... ==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_adapter.h#4 (text+ko) ==== @@ -175,7 +175,9 @@ uint64_t empty; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; - struct mtx fl_locks[8]; + bus_dma_tag_t entry_tag; + uma_zone_t zone; + int type; }; struct tx_desc; @@ -201,6 +203,7 @@ uint64_t restarts; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; + bus_dma_tag_t entry_tag; struct mtx lock; }; ==== //depot/projects/opentoe/sys/dev/cxgb/cxgb_sge.c#3 (text+ko) ==== @@ -126,7 +126,7 @@ }; struct rx_sw_desc { /* SW state per Rx descriptor */ - struct mbuf *m; + void *cl; bus_dmamap_t map; int flags; }; @@ -137,6 +137,12 @@ unsigned int pidx; }; +struct refill_fl_cb_arg { + int error; + bus_dma_segment_t seg; + int nseg; +}; + /* * Maps a number of flits to the number of Tx descriptors that can hold them. * The formula is @@ -440,6 +446,16 @@ qs->rspq.polling = 0 /* p->polling */; } +static void +refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + struct refill_fl_cb_arg *cb_arg = arg; + + cb_arg->error = error; + cb_arg->seg = segs[0]; + cb_arg->nseg = nseg; + +} /** * refill_fl - refill an SGE free-buffer list @@ -453,41 +469,50 @@ static void refill_fl(adapter_t *sc, struct sge_fl *q, int n) { - bus_dma_segment_t seg; struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; - struct mbuf *m; - int err, nsegs; + void *cl; + int err; + struct refill_fl_cb_arg cb_arg; + cb_arg.error = 0; + while (n--) { - m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, q->buf_size); + /* + * We only allocate a cluster, mbuf allocation happens after rx + */ + cl = m_cljget(NULL, M_DONTWAIT, q->buf_size); - if (m == NULL) { - log(LOG_WARNING, "Failed to allocate mbuf\n"); + if (cl == NULL) { + log(LOG_WARNING, "Failed to allocate cluster\n"); goto done; } if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { - if ((err = bus_dmamap_create(sc->rx_jumbo_dmat, 0, &sd->map))) { + if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); + /* + * XXX free cluster + */ goto done; } sd->flags |= RX_SW_DESC_MAP_CREATED; } - sd->flags |= RX_SW_DESC_INUSE; - m->m_pkthdr.len = m->m_len = q->buf_size; - err = bus_dmamap_load_mbuf_sg(sc->rx_jumbo_dmat, sd->map, m, &seg, - &nsegs, BUS_DMA_NOWAIT); - if (err != 0) { - log(LOG_WARNING, "failure in refill_fl %d\n", err); - m_freem(m); + err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, refill_fl_cb, &cb_arg, 0); + + if (err || cb_arg.error) { + log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); + /* + * XXX free cluster + */ return; } - sd->m = m; - d->addr_lo = htobe32(seg.ds_addr & 0xffffffff); - d->addr_hi = htobe32(((uint64_t)seg.ds_addr >>32) & 0xffffffff); + sd->flags |= RX_SW_DESC_INUSE; + sd->cl = cl; + d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); + d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); d->gen2 = htobe32(V_FLD_GEN2(q->gen)); @@ -520,16 +545,17 @@ free_rx_bufs(adapter_t *sc, struct sge_fl *q) { u_int cidx = q->cidx; - + while (q->credits--) { struct rx_sw_desc *d = &q->sdesc[cidx]; if (d->flags & RX_SW_DESC_INUSE) { - bus_dmamap_unload(sc->rx_jumbo_dmat, d->map); - bus_dmamap_destroy(sc->rx_jumbo_dmat, d->map); - m_freem(d->m); + bus_dmamap_unload(q->entry_tag, d->map); + bus_dmamap_destroy(q->entry_tag, d->map); + uma_zfree(q->zone, d->cl); } - d->m = NULL; + + d->cl = NULL; if (++cidx == q->size) cidx = 0; } @@ -552,8 +578,8 @@ static int alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, - bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, - bus_dmamap_t *map) + bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, + bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) { size_t len = nelem * elem_size; void *s = NULL; @@ -584,6 +610,19 @@ bzero(s, len); *(void **)sdesc = s; } + + if (parent_entry_tag == NULL) + return (0); + + if ((err = bus_dma_tag_create(parent_entry_tag, PAGE_SIZE, 0, + BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, + NULL, NULL, PAGE_SIZE, 1, + PAGE_SIZE, BUS_DMA_ALLOCNOW, + NULL, NULL, entry_tag)) != 0) { + device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); + return (ENOMEM); + } + return (0); } @@ -1361,23 +1400,26 @@ if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, - &q->fl[0].desc, &q->fl[0].sdesc, - &q->fl[0].desc_tag, &q->fl[0].desc_map)) != 0) { + &q->fl[0].desc, &q->fl[0].sdesc, + &q->fl[0].desc_tag, &q->fl[0].desc_map, + sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { printf("error %d from alloc ring fl0\n", ret); goto err; } if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, - &q->fl[1].desc, &q->fl[1].sdesc, - &q->fl[1].desc_tag, &q->fl[1].desc_map)) != 0) { + &q->fl[1].desc, &q->fl[1].sdesc, + &q->fl[1].desc_tag, &q->fl[1].desc_map, + sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { printf("error %d from alloc ring fl1\n", ret); goto err; } if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, - &q->rspq.phys_addr, &q->rspq.desc, NULL, - &q->rspq.desc_tag, &q->rspq.desc_map)) != 0) { + &q->rspq.phys_addr, &q->rspq.desc, NULL, + &q->rspq.desc_tag, &q->rspq.desc_map, + NULL, NULL)) != 0) { printf("error %d from alloc ring rspq\n", ret); goto err; } @@ -1393,8 +1435,8 @@ if ((ret = alloc_ring(sc, p->txq_size[i], sizeof(struct tx_desc), sz, &q->txq[i].phys_addr, &q->txq[i].desc, - &q->txq[i].sdesc, &q->txq[i].desc_tag, - &q->txq[i].desc_map)) != 0) { + &q->txq[i].sdesc, &q->txq[i].desc_tag, &q->txq[i].desc_map, + sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { printf("error %d from alloc ring tx %i\n", ret, i); goto err; } @@ -1416,7 +1458,13 @@ flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); q->fl[0].buf_size = MCLBYTES; + q->fl[0].zone = zone_clust; + q->fl[0].type = EXT_CLUSTER; + q->fl[1].buf_size = MJUMPAGESIZE; + q->fl[1].zone = zone_jumbop; + q->fl[1].type = EXT_JUMBOP; + q->lro.enabled = lro_default; mtx_lock(&sc->sge.reg_lock); @@ -1614,8 +1662,8 @@ DPRINTF("cidx=%d d=%p\n", cidx, d); if (d->m) { if (d->flags & TX_SW_DESC_MAPPED) { - bus_dmamap_unload(sc->tx_dmat, d->map); - bus_dmamap_destroy(sc->tx_dmat, d->map); + bus_dmamap_unload(q->entry_tag, d->map); + bus_dmamap_destroy(q->entry_tag, d->map); d->flags &= ~TX_SW_DESC_MAPPED; } m_vec[nbufs] = d->m; @@ -1742,12 +1790,14 @@ * threshold and the packet is too big to copy, or (b) the packet should * be copied but there is no memory for the copy. */ + +#include +#include static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, - struct t3_mbuf_hdr *mh, struct rsp_desc *r) + struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m) { - struct mbuf *m = NULL; unsigned int len_cq = ntohl(r->len_cq); struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; @@ -1756,12 +1806,13 @@ uint8_t sopeop = G_RSPD_SOP_EOP(flags); int ret = 0; - prefetch(sd->m->m_data); + prefetch(sd->cl); fl->credits--; - bus_dmamap_sync(adap->rx_jumbo_dmat, sd->map, BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(adap->rx_jumbo_dmat, sd->map); - m = sd->m; + bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(fl->entry_tag, sd->map); + + m_cljset(m, sd->cl, fl->buf_size); m->m_len = len; switch(sopeop) { @@ -1941,9 +1992,14 @@ rspq->imm_data++; } else if (r->len_cq) { int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; - + struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); + + if (m == NULL) { + log(LOG_WARNING, "failed to get mbuf for packet\n"); + break; + } ethpad = 2; - eop = get_packet(adap, drop_thresh, qs, &rspq->mh, r); + eop = get_packet(adap, drop_thresh, qs, &rspq->mh, r, m); } else { DPRINTF("pure response\n"); rspq->pure_rsps++; @@ -2044,7 +2100,6 @@ adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; struct sge_rspq *q1 = &adap->sge.qs[1].rspq; - t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR);