From owner-p4-projects@FreeBSD.ORG Sat Sep 15 21:02:44 2007 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 838F716A41A; Sat, 15 Sep 2007 21:02:44 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 260C216A418 for ; Sat, 15 Sep 2007 21:02:44 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 042E413C458 for ; Sat, 15 Sep 2007 21:02:44 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id l8FL2hGv012665 for ; Sat, 15 Sep 2007 21:02:43 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id l8FL2hwF012662 for perforce@freebsd.org; Sat, 15 Sep 2007 21:02:43 GMT (envelope-from kmacy@freebsd.org) Date: Sat, 15 Sep 2007 21:02:43 GMT Message-Id: <200709152102.l8FL2hwF012662@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 126446 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 15 Sep 2007 21:02:44 -0000 http://perforce.freebsd.org/chv.cgi?CH=126446 Change 126446 by kmacy@kmacy_home:ethng on 2007/09/15 21:02:32 move refcnt into cluster and tune prefetching slightly Affected files ... .. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#19 edit Differences ... ==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#19 (text+ko) ==== @@ -141,11 +141,18 @@ }; struct rx_sw_desc { /* SW state per Rx descriptor */ - void *cl; - bus_dmamap_t map; - int flags; + union { + void *cl; + uint32_t *ref; + } u; + uint8_t *data; + bus_dmamap_t map; + int flags; }; +#define rxsd_cl u.cl +#define rxsd_ref u.ref + struct txq_state { unsigned int compl; unsigned int gen; @@ -356,8 +363,12 @@ memcpy(mtod(m, uint8_t *), resp->imm_data, len); break; case RSPQ_EOP: - memcpy(cl, resp->imm_data, len); - m_iovappend(m, cl, MSIZE, len, 0); + memcpy(cl, resp->imm_data, len); + /* + * XXX + */ + panic("bad append"); + m_iovappend(m, cl, MSIZE, len, 0, NULL); break; default: bogus_imm++; @@ -534,6 +545,7 @@ struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; void *cl; + uint32_t *ref; int err; cb_arg.error = 0; @@ -541,10 +553,12 @@ /* * We only allocate a cluster, mbuf allocation happens after rx */ - if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { + if ((cl = m_cljget(NULL, M_DONTWAIT, q->zone)) == NULL) { log(LOG_WARNING, "Failed to allocate cluster\n"); goto done; } + ref = cl; + if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); @@ -553,7 +567,8 @@ } sd->flags |= RX_SW_DESC_MAP_CREATED; } - err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, + + err = bus_dmamap_load(q->entry_tag, sd->map, (uint32_t *)cl + 1, q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { @@ -565,7 +580,8 @@ } sd->flags |= RX_SW_DESC_INUSE; - sd->cl = cl; + sd->rxsd_cl = cl; + sd->data = (uint8_t *)(sd->rxsd_ref + 1); d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); @@ -607,9 +623,9 @@ if (d->flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); - uma_zfree(q->zone, d->cl); + uma_zfree(q->zone, d->rxsd_cl); } - d->cl = NULL; + d->rxsd_cl = NULL; if (++cidx == q->size) cidx = 0; } @@ -1250,7 +1266,7 @@ uint32_t wr_hi, wr_lo, sgl_flits; struct tx_desc *txd; - DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset); + #if defined(IFNET_MULTIQUEUE) && defined(STRICT_AFFINITY) KASSERT(qs->qs_cpuid == curcpu, ("cpu qset mismatch cpuid=%d curcpu=%d", qs->qs_cpuid, curcpu)); @@ -1263,7 +1279,8 @@ txsd = &txq->sdesc[txq->pidx]; txd = &txq->desc[txq->pidx]; - DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan); + DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset); + DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, p->txpkt_intf, p->tx_chan); /* * XXX handle checksum, TSO, and VLAN here * @@ -1779,7 +1796,7 @@ { struct tx_sw_desc *txsd; unsigned int cidx; - int i, reclaimed, nbufs; + int i, iter, reclaimed, nbufs; #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], @@ -1787,10 +1804,16 @@ #endif cidx = q->cidx; txsd = &q->sdesc[cidx]; + prefetch(txsd); reclaimed = nbufs = 0; - - for (reclaimed = 0; reclaimed < reclaimable;) { - DPRINTF("cidx=%d d=%p\n", cidx, d); + for (iter = reclaimed = 0; reclaimed < reclaimable; iter++) { + if ((iter & 0x1) == 0) { + prefetch(txsd + 1); + prefetch(txsd + 2); + prefetch(txsd + 3); + prefetch(txsd + 4); + } + DPRINTF("cidx=%d d=%p\n", cidx, txsd); if (txsd->count > 0) { if (nbufs + txsd->count > m_vec_size) break; @@ -1798,13 +1821,17 @@ bus_dmamap_unload(q->entry_tag, txsd->map); txsd->flags &= ~TX_SW_DESC_MAPPED; } - if (m_get_priority(txsd->m[0]) == cidx) { - for (i = 0; i < txsd->count; i++, nbufs++) - m_vec[nbufs] = txsd->m[i]; - txsd->count = 0; - } else { + for (i = 0; i < txsd->count; i++, nbufs++) { + prefetch(txsd->m[i]); + m_vec[nbufs] = txsd->m[i]; + } + txsd->count = 0; + +#ifdef DIAGNOSTIC + if (m_get_priority(txsd->m[0]) != cidx) printf("pri=%d cidx=%d\n", (int)m_get_priority(txsd->m[0]), cidx); - } +#endif + } else q->txq_skipped++; @@ -2254,13 +2281,18 @@ q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); - q->fl[0].buf_size = MCLBYTES; + q->fl[0].buf_size = MCLBYTES - sizeof(uint32_t); q->fl[0].zone = zone_clust; q->fl[0].type = EXT_CLUSTER; - q->fl[1].buf_size = MJUMPAGESIZE; - q->fl[1].zone = zone_jumbop; - q->fl[1].type = EXT_JUMBOP; - + if (jumbo_phys_contig) { + q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t); + q->fl[1].zone = zone_jumbo9; + q->fl[1].type = EXT_JUMBO9; + } else { + q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t); + q->fl[1].zone = zone_jumbop; + q->fl[1].type = EXT_JUMBOP; + } q->lro.enabled = lro_default; mtx_lock(&sc->sge.reg_lock); @@ -2372,7 +2404,9 @@ /* * adjust after conversion to mbuf chain */ - m_adj(m, sizeof(*cpl) + ethpad); + m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); + m->m_len -= (sizeof(*cpl) + ethpad); + m->m_data += (sizeof(*cpl) + ethpad); (*ifp->if_input)(ifp, m); } @@ -2406,17 +2440,24 @@ uint32_t len = G_RSPD_LEN(len_cq); uint32_t flags = ntohl(r->flags); uint8_t sopeop = G_RSPD_SOP_EOP(flags); + uint32_t *ref; int ret = 0; - prefetch(sd->cl); + prefetch(sd->rxsd_cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fl->entry_tag, sd->map); - m_cljset(m, sd->cl, fl->type); + ref = sd->rxsd_ref; + m_cljset(m, sd->rxsd_cl, fl->type, sd->rxsd_ref); + *ref = 1; m->m_len = len; - + /* + * bump past the refcnt address + */ + m->m_data = sd->data; + switch(sopeop) { case RSPQ_SOP_EOP: DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); @@ -2474,9 +2515,11 @@ uint32_t flags = ntohl(r->flags); uint8_t sopeop = G_RSPD_SOP_EOP(flags); void *cl; + uint32_t *ref = NULL; int ret = 0; - - prefetch(sd->cl); + + prefetch((sd + 1)->rxsd_cl); + prefetch((sd + 2)->rxsd_cl); DPRINTF("rx cpu=%d\n", curcpu); fl->credits--; @@ -2484,17 +2527,22 @@ if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { cl = mtod(m, void *); - memcpy(cl, sd->cl, len); + memcpy(cl, sd->data, len); recycle_rx_buf(adap, fl, fl->cidx); } else { - cl = sd->cl; bus_dmamap_unload(fl->entry_tag, sd->map); + cl = sd->rxsd_cl; + ref = sd->rxsd_ref; } + switch(sopeop) { case RSPQ_SOP_EOP: DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); - if (cl == sd->cl) - m_cljset(m, cl, fl->type); + if (cl == sd->rxsd_cl) { + m_cljset(m, cl, fl->type, ref); + *ref = 1; + m->m_data = sd->data; + } m->m_len = m->m_pkthdr.len = len; ret = 1; goto done; @@ -2513,8 +2561,8 @@ ret = 1; break; } - m_iovappend(m, cl, fl->buf_size, len, 0); - + m_iovappend(m, cl, fl->buf_size, len, sizeof(uint32_t), ref); + *ref = 1; done: if (++fl->cidx == fl->size) fl->cidx = 0; @@ -2710,7 +2758,7 @@ if (eop) { prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); - prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); + prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); if (eth) { t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,