From owner-p4-projects@FreeBSD.ORG Mon Sep 24 05:02:22 2007 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 65FB916A41A; Mon, 24 Sep 2007 05:02:22 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 1C87016A419 for ; Mon, 24 Sep 2007 05:02:22 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id E8B7213C45B for ; Mon, 24 Sep 2007 05:02:21 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id l8O52Lpj090989 for ; Mon, 24 Sep 2007 05:02:21 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id l8O52LEU090986 for perforce@freebsd.org; Mon, 24 Sep 2007 05:02:21 GMT (envelope-from kmacy@freebsd.org) Date: Mon, 24 Sep 2007 05:02:21 GMT Message-Id: <200709240502.l8O52LEU090986@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 126763 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 24 Sep 2007 05:02:22 -0000 http://perforce.freebsd.org/chv.cgi?CH=126763 Change 126763 by kmacy@kmacy_home:ethng on 2007/09/24 05:01:29 avoid extra cache/tlb misses and allocations by embedding the mbuf in the cluster on receive Affected files ... .. //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#23 edit Differences ... ==== //depot/projects/ethng/src/sys/dev/cxgb/cxgb_sge.c#23 (text+ko) ==== @@ -140,18 +140,13 @@ }; struct rx_sw_desc { /* SW state per Rx descriptor */ - union { - void *cl; - uint32_t *ref; - } u; - uint8_t *data; + caddr_t rxsd_cl; + uint32_t *rxsd_ref; + caddr_t data; bus_dmamap_t map; int flags; }; -#define rxsd_cl u.cl -#define rxsd_ref u.ref - struct txq_state { unsigned int compl; unsigned int gen; @@ -547,8 +542,7 @@ struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; - void *cl; - uint32_t *ref; + caddr_t cl; int err; cb_arg.error = 0; @@ -560,7 +554,6 @@ log(LOG_WARNING, "Failed to allocate cluster\n"); goto done; } - ref = cl; if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { @@ -571,7 +564,8 @@ sd->flags |= RX_SW_DESC_MAP_CREATED; } #if !defined(__i386__) && !defined(__amd64__) - err = bus_dmamap_load(q->entry_tag, sd->map, (uint32_t *)cl + 1, q->buf_size, + err = bus_dmamap_load(q->entry_tag, sd->map, + cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t), q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { @@ -582,11 +576,13 @@ return; } #else - cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)((uint32_t *)cl + 1)); + cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + sizeof(struct m_hdr) + + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t))); #endif sd->flags |= RX_SW_DESC_INUSE; sd->rxsd_cl = cl; - sd->data = (uint8_t *)(sd->rxsd_ref + 1); + sd->rxsd_ref = (uint32_t *)(cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_)); + sd->data = cl + sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); @@ -1026,7 +1022,7 @@ * packet. Ethernet packets require addition of WR and CPL headers. */ static __inline unsigned int -calc_tx_descs(const struct mbuf *m, int nsegs, int tsoinfo) +calc_tx_descs(const struct mbuf *m, int nsegs) { unsigned int flits; @@ -1035,7 +1031,7 @@ flits = sgl_len(nsegs) + 2; #ifdef TSO_SUPPORTED - if (tsoinfo) + if (m->m_pkthdr.csum_flags & CSUM_TSO) flits++; #endif return flits_to_desc(flits); @@ -1057,7 +1053,7 @@ goto done; } else #endif - err = bus_dmamap_load_mvec_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0); + err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0); if (err == 0) { goto done; @@ -1473,7 +1469,7 @@ return (err); m0 = *m; #endif - ndesc = calc_tx_descs(mi, nsegs, tso_info); + ndesc = calc_tx_descs(m0, nsegs); sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); @@ -1487,7 +1483,7 @@ write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(pi->adapter, txq); - if ((m0->m_type == MT_DATA) && (m0->m_flags & M_EXT)) { + if ((m0->m_type == MT_DATA) && ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT)) { m0->m_flags = 0; m_free(m0); } @@ -2343,15 +2339,15 @@ q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); - q->fl[0].buf_size = MCLBYTES - sizeof(uint32_t); + q->fl[0].buf_size = (MCLBYTES - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_)); q->fl[0].zone = zone_clust; q->fl[0].type = EXT_CLUSTER; if (jumbo_phys_contig) { - q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t); + q->fl[1].buf_size = MJUM9BYTES - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_); q->fl[1].zone = zone_jumbo9; q->fl[1].type = EXT_JUMBO9; } else { - q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t); + q->fl[1].buf_size = MJUMPAGESIZE - sizeof(uint32_t) - sizeof(struct m_hdr) - sizeof(struct pkthdr) - sizeof(struct m_ext_); q->fl[1].zone = zone_jumbop; q->fl[1].type = EXT_JUMBOP; } @@ -2567,9 +2563,32 @@ } #else +static void +init_cluster_mbuf(caddr_t cl, int flags, int type) +{ + struct mbuf *m; + int header_size; + + header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); + + bzero(cl, header_size); + m = (struct mbuf *)cl; + + SLIST_INIT(&m->m_pkthdr.tags); + m->m_type = MT_DATA; + m->m_flags = flags | M_NOFREE | M_EXT; + m->m_data = cl + header_size; + m->m_ext.ext_buf = cl; + m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t)); + m->m_ext.ext_size = m_getsizefromtype(type); + m->m_ext.ext_type = type; + *(m->m_ext.ref_cnt) = 1; + DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); +} + static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, - struct mbuf *m, struct rsp_desc *r) + struct mbuf **m, struct rsp_desc *r) { unsigned int len_cq = ntohl(r->len_cq); @@ -2579,8 +2598,8 @@ uint32_t flags = ntohl(r->flags); uint8_t sopeop = G_RSPD_SOP_EOP(flags); void *cl; - uint32_t *ref = NULL; int ret = 0; + struct mbuf *m0; prefetch((sd + 1)->rxsd_cl); prefetch((sd + 2)->rxsd_cl); @@ -2590,43 +2609,46 @@ bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { - cl = mtod(m, void *); + if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) + goto skip_recycle; + cl = mtod(m0, void *); memcpy(cl, sd->data, len); recycle_rx_buf(adap, fl, fl->cidx); + *m = m0; } else { + skip_recycle: bus_dmamap_unload(fl->entry_tag, sd->map); cl = sd->rxsd_cl; - ref = sd->rxsd_ref; + *m = m0 = (struct mbuf *)cl; } switch(sopeop) { case RSPQ_SOP_EOP: DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); - if (cl == sd->rxsd_cl) { - m_cljset(m, cl, fl->type, ref); - *ref = 1; - m->m_data = sd->data; - } - m->m_len = m->m_pkthdr.len = len; + if (cl == sd->rxsd_cl) + init_cluster_mbuf(cl, M_PKTHDR, fl->type); + m0->m_len = m0->m_pkthdr.len = len; ret = 1; goto done; break; case RSPQ_NSOP_NEOP: DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); + panic("chaining unsupported"); ret = 0; break; case RSPQ_SOP: DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); - m_iovinit(m); + panic("chaining unsupported"); + m_iovinit(m0); ret = 0; break; case RSPQ_EOP: DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); + panic("chaining unsupported"); ret = 1; break; } - m_iovappend(m, cl, fl->buf_size, len, sizeof(uint32_t), ref); - *ref = 1; + m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref); done: if (++fl->cidx == fl->size) fl->cidx = 0; @@ -2779,17 +2801,9 @@ eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m); #else - if (rspq->rspq_mbuf == NULL) - rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); - if (rspq->rspq_mbuf == NULL) { - rspq->next_holdoff = NOMEM_INTR_DELAY; - log(LOG_WARNING, "failed to get mbuf for packet\n"); - break; - } else { - rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash; - rspq->rspq_mbuf->m_next = rspq->rspq_mbuf->m_nextpkt = NULL; - } - eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r); + eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r); + rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash; + #endif ethpad = 2; } else {