From owner-svn-src-head@freebsd.org Tue Feb 4 00:51:12 2020 Return-Path: Delivered-To: svn-src-head@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 3064C235BA2; Tue, 4 Feb 2020 00:51:12 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) server-signature RSA-PSS (4096 bits) client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 48BR2m19ccz4Q56; Tue, 4 Feb 2020 00:51:12 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 235CF9654; Tue, 4 Feb 2020 00:51:12 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 0140pClS069169; Tue, 4 Feb 2020 00:51:12 GMT (envelope-from np@FreeBSD.org) Received: (from np@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 0140pBXw069165; Tue, 4 Feb 2020 00:51:11 GMT (envelope-from np@FreeBSD.org) Message-Id: <202002040051.0140pBXw069165@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org using -f From: Navdeep Parhar Date: Tue, 4 Feb 2020 00:51:11 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r357481 - in head: share/man/man4 sys/dev/cxgbe X-SVN-Group: head X-SVN-Commit-Author: np X-SVN-Commit-Paths: in head: share/man/man4 sys/dev/cxgbe X-SVN-Commit-Revision: 357481 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 04 Feb 2020 00:51:12 -0000 Author: np Date: Tue Feb 4 00:51:10 2020 New Revision: 357481 URL: https://svnweb.freebsd.org/changeset/base/357481 Log: cxgbe(4): Retire the allow_mbufs_in_cluster optimization. This simplifies the driver's rx fast path as well as the bookkeeping code that tracks various rx buffer sizes and layouts. MFC after: 1 week Sponsored by: Chelsio Communications Modified: head/share/man/man4/cxgbe.4 head/sys/dev/cxgbe/adapter.h head/sys/dev/cxgbe/t4_main.c head/sys/dev/cxgbe/t4_netmap.c head/sys/dev/cxgbe/t4_sge.c Modified: head/share/man/man4/cxgbe.4 ============================================================================== --- head/share/man/man4/cxgbe.4 Tue Feb 4 00:06:16 2020 (r357480) +++ head/share/man/man4/cxgbe.4 Tue Feb 4 00:51:10 2020 (r357481) @@ -317,11 +317,6 @@ Allow the hardware to deliver multiple frames in the s opportunistically. The default is -1 which lets the driver decide. 0 or 1 explicitly disable or enable this feature. -.It Va hw.cxgbe.allow_mbufs_in_cluster -1 allows the driver to lay down one or more mbufs within the receive buffer -opportunistically. -This is the default. -0 prohibits the driver from doing so. .It Va hw.cxgbe.largest_rx_cluster .It Va hw.cxgbe.safest_rx_cluster Sizes of rx clusters. Modified: head/sys/dev/cxgbe/adapter.h ============================================================================== --- head/sys/dev/cxgbe/adapter.h Tue Feb 4 00:06:16 2020 (r357480) +++ head/sys/dev/cxgbe/adapter.h Tue Feb 4 00:51:10 2020 (r357481) @@ -316,15 +316,6 @@ struct port_info { #define IS_MAIN_VI(vi) ((vi) == &((vi)->pi->vi[0])) -/* Where the cluster came from, how it has been carved up. */ -struct cluster_layout { - int8_t zidx; - int8_t hwidx; - uint16_t region1; /* mbufs laid out within this region */ - /* region2 is the DMA region */ - uint16_t region3; /* cluster_metadata within this region */ -}; - struct cluster_metadata { uma_zone_t zone; caddr_t cl; @@ -334,7 +325,8 @@ struct cluster_metadata { struct fl_sdesc { caddr_t cl; uint16_t nmbuf; /* # of driver originated mbufs with ref on cluster */ - struct cluster_layout cll; + int16_t moff; /* offset of metadata from cl */ + uint8_t zidx; }; struct tx_desc { @@ -466,20 +458,17 @@ struct sge_eq { char lockname[16]; }; -struct sw_zone_info { +struct rx_buf_info { uma_zone_t zone; /* zone that this cluster comes from */ - int size; /* size of cluster: 2K, 4K, 9K, 16K, etc. */ - int type; /* EXT_xxx type of the cluster */ - int8_t head_hwidx; - int8_t tail_hwidx; + uint16_t size1; /* same as size of cluster: 2K/4K/9K/16K. + * hwsize[hwidx1] = size1. No spare. */ + uint16_t size2; /* hwsize[hwidx2] = size2. + * spare in cluster = size1 - size2. */ + int8_t hwidx1; /* SGE bufsize idx for size1 */ + int8_t hwidx2; /* SGE bufsize idx for size2 */ + uint8_t type; /* EXT_xxx type of the cluster */ }; -struct hw_buf_info { - int8_t zidx; /* backpointer to zone; -ve means unused */ - int8_t next; /* next hwidx for this zone; -1 means no more */ - int size; -}; - enum { NUM_MEMWIN = 3, @@ -519,7 +508,8 @@ struct sge_fl { struct mtx fl_lock; __be64 *desc; /* KVA of descriptor ring, ptr to addresses */ struct fl_sdesc *sdesc; /* KVA of software descriptor ring */ - struct cluster_layout cll_def; /* default refill zone, layout */ + uint16_t zidx; /* refill zone idx */ + uint16_t safe_zidx; uint16_t lowat; /* # of buffers <= this means fl needs help */ int flags; uint16_t buf_boundary; @@ -537,8 +527,6 @@ struct sge_fl { u_int rx_offset; /* offset in fl buf (when buffer packing) */ volatile uint32_t *udb; - uint64_t mbuf_allocated;/* # of mbuf allocated from zone_mbuf */ - uint64_t mbuf_inlined; /* # of mbuf created within clusters */ uint64_t cl_allocated; /* # of clusters allocated */ uint64_t cl_recycled; /* # of clusters recycled */ uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */ @@ -555,7 +543,6 @@ struct sge_fl { bus_dmamap_t desc_map; char lockname[16]; bus_addr_t ba; /* bus address of descriptor ring */ - struct cluster_layout cll_alt; /* alternate refill zone, layout */ }; struct mp_ring; @@ -781,10 +768,8 @@ struct sge { struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ - int8_t safe_hwidx1; /* may not have room for metadata */ - int8_t safe_hwidx2; /* with room for metadata and maybe more */ - struct sw_zone_info sw_zone_info[SW_ZONE_SIZES]; - struct hw_buf_info hw_buf_info[SGE_FLBUF_SIZES]; + int8_t safe_zidx; + struct rx_buf_info rx_buf_info[SW_ZONE_SIZES]; }; struct devnames { Modified: head/sys/dev/cxgbe/t4_main.c ============================================================================== --- head/sys/dev/cxgbe/t4_main.c Tue Feb 4 00:06:16 2020 (r357480) +++ head/sys/dev/cxgbe/t4_main.c Tue Feb 4 00:51:10 2020 (r357481) @@ -10291,8 +10291,6 @@ clear_stats(struct adapter *sc, u_int port_id) rxq->rxcsum = 0; rxq->vlan_extraction = 0; - rxq->fl.mbuf_allocated = 0; - rxq->fl.mbuf_inlined = 0; rxq->fl.cl_allocated = 0; rxq->fl.cl_recycled = 0; rxq->fl.cl_fast_recycled = 0; @@ -10334,8 +10332,6 @@ clear_stats(struct adapter *sc, u_int port_id) #endif #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { - ofld_rxq->fl.mbuf_allocated = 0; - ofld_rxq->fl.mbuf_inlined = 0; ofld_rxq->fl.cl_allocated = 0; ofld_rxq->fl.cl_recycled = 0; ofld_rxq->fl.cl_fast_recycled = 0; Modified: head/sys/dev/cxgbe/t4_netmap.c ============================================================================== --- head/sys/dev/cxgbe/t4_netmap.c Tue Feb 4 00:06:16 2020 (r357480) +++ head/sys/dev/cxgbe/t4_netmap.c Tue Feb 4 00:51:10 2020 (r357481) @@ -345,7 +345,7 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; int rc, i, j, hwidx, defq, nrssq; - struct hw_buf_info *hwb; + struct rx_buf_info *rxb; ASSERT_SYNCHRONIZED_OP(sc); @@ -353,17 +353,22 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (EAGAIN); - hwb = &sc->sge.hw_buf_info[0]; - for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { - if (hwb->size == NETMAP_BUF_SIZE(na)) + rxb = &sc->sge.rx_buf_info[0]; + for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { + if (rxb->size1 == NETMAP_BUF_SIZE(na)) { + hwidx = rxb->hwidx1; break; + } + if (rxb->size2 == NETMAP_BUF_SIZE(na)) { + hwidx = rxb->hwidx2; + break; + } } - if (i >= SGE_FLBUF_SIZES) { + if (i >= SW_ZONE_SIZES) { if_printf(ifp, "no hwidx for netmap buffer size %d.\n", NETMAP_BUF_SIZE(na)); return (ENXIO); } - hwidx = i; /* Must set caps before calling netmap_reset */ nm_set_native_flags(na); Modified: head/sys/dev/cxgbe/t4_sge.c ============================================================================== --- head/sys/dev/cxgbe/t4_sge.c Tue Feb 4 00:06:16 2020 (r357480) +++ head/sys/dev/cxgbe/t4_sge.c Tue Feb 4 00:51:10 2020 (r357481) @@ -148,16 +148,6 @@ SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN "payload pack boundary (bytes)"); /* - * Allow the driver to create mbuf(s) in a cluster allocated for rx. - * 0: never; always allocate mbufs from the zone_mbuf UMA zone. - * 1: ok to create mbuf(s) within a cluster if there is room. - */ -static int allow_mbufs_in_cluster = 1; -SYSCTL_INT(_hw_cxgbe, OID_AUTO, allow_mbufs_in_cluster, CTLFLAG_RDTUN, - &allow_mbufs_in_cluster, 0, - "Allow driver to create mbufs within a rx cluster"); - -/* * Largest rx cluster size that the driver is allowed to allocate. */ static int largest_rx_cluster = MJUM16BYTES; @@ -284,8 +274,7 @@ static int refill_fl(struct adapter *, struct sge_fl * static void refill_sfl(void *); static int alloc_fl_sdesc(struct sge_fl *); static void free_fl_sdesc(struct adapter *, struct sge_fl *); -static void find_best_refill_source(struct adapter *, struct sge_fl *, int); -static void find_safe_refill_source(struct adapter *, struct sge_fl *); +static int find_refill_source(struct adapter *, int, bool); static void add_fl_to_sfl(struct adapter *, struct sge_fl *); static inline void get_pkt_gl(struct mbuf *, struct sglist *); @@ -670,24 +659,19 @@ setup_pad_and_pack_boundaries(struct adapter *sc) void t4_tweak_chip_settings(struct adapter *sc) { - int i; + int i, reg; uint32_t v, m; int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); - static int sge_flbuf_sizes[] = { + static int sw_buf_sizes[] = { MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, - MJUMPAGESIZE - CL_METADATA_SIZE, - MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, #endif MJUM9BYTES, - MJUM16BYTES, - MCLBYTES - MSIZE - CL_METADATA_SIZE, - MJUM9BYTES - CL_METADATA_SIZE, - MJUM16BYTES - CL_METADATA_SIZE, + MJUM16BYTES }; KASSERT(sc->flags & MASTER_PF, @@ -710,13 +694,16 @@ t4_tweak_chip_settings(struct adapter *sc) V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); - KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, - ("%s: hw buffer size table too big", __func__)); t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096); t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536); - for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { - t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE15 - (4 * i), - sge_flbuf_sizes[i]); + reg = A_SGE_FL_BUFFER_SIZE2; + for (i = 0; i < nitems(sw_buf_sizes); i++) { + MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); + t4_write_reg(sc, reg, sw_buf_sizes[i]); + reg += 4; + MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); + t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE); + reg += 4; } v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | @@ -793,11 +780,11 @@ t4_tweak_chip_settings(struct adapter *sc) } /* - * SGE wants the buffer to be at least 64B and then a multiple of 16. If - * padding is in use, the buffer's start and end need to be aligned to the pad - * boundary as well. We'll just make sure that the size is a multiple of the - * boundary here, it is up to the buffer allocation code to make sure the start - * of the buffer is aligned as well. + * SGE wants the buffer to be at least 64B and then a multiple of 16. Its + * address mut be 16B aligned. If padding is in use the buffer's start and end + * need to be aligned to the pad boundary as well. We'll just make sure that + * the size is a multiple of the pad boundary here, it is up to the buffer + * allocation code to make sure the start of the buffer is aligned. */ static inline int hwsz_ok(struct adapter *sc, int hwsz) @@ -826,8 +813,7 @@ t4_read_chip_settings(struct adapter *sc) MJUM9BYTES, MJUM16BYTES }; - struct sw_zone_info *swz, *safe_swz; - struct hw_buf_info *hwb; + struct rx_buf_info *rxb; m = F_RXPKTCPLMODE; v = F_RXPKTCPLMODE; @@ -846,114 +832,51 @@ t4_read_chip_settings(struct adapter *sc) rc = EINVAL; } - /* Filter out unusable hw buffer sizes entirely (mark with -2). */ - hwb = &s->hw_buf_info[0]; - for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { - r = sc->params.sge.sge_fl_buffer_size[i]; - hwb->size = r; - hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; - hwb->next = -1; - } + s->safe_zidx = -1; + rxb = &s->rx_buf_info[0]; + for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { + rxb->size1 = sw_buf_sizes[i]; + rxb->zone = m_getzone(rxb->size1); + rxb->type = m_gettype(rxb->size1); + rxb->size2 = 0; + rxb->hwidx1 = -1; + rxb->hwidx2 = -1; + for (j = 0; j < SGE_FLBUF_SIZES; j++) { + int hwsize = sp->sge_fl_buffer_size[j]; - /* - * Create a sorted list in decreasing order of hw buffer sizes (and so - * increasing order of spare area) for each software zone. - * - * If padding is enabled then the start and end of the buffer must align - * to the pad boundary; if packing is enabled then they must align with - * the pack boundary as well. Allocations from the cluster zones are - * aligned to min(size, 4K), so the buffer starts at that alignment and - * ends at hwb->size alignment. If mbuf inlining is allowed the - * starting alignment will be reduced to MSIZE and the driver will - * exercise appropriate caution when deciding on the best buffer layout - * to use. - */ - n = 0; /* no usable buffer size to begin with */ - swz = &s->sw_zone_info[0]; - safe_swz = NULL; - for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { - int8_t head = -1, tail = -1; - - swz->size = sw_buf_sizes[i]; - swz->zone = m_getzone(swz->size); - swz->type = m_gettype(swz->size); - - if (swz->size < PAGE_SIZE) { - MPASS(powerof2(swz->size)); - if (fl_pad && (swz->size % sp->pad_boundary != 0)) + if (!hwsz_ok(sc, hwsize)) continue; - } - if (swz->size == safest_rx_cluster) - safe_swz = swz; + /* hwidx for size1 */ + if (rxb->hwidx1 == -1 && rxb->size1 == hwsize) + rxb->hwidx1 = j; - hwb = &s->hw_buf_info[0]; - for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { - if (hwb->zidx != -1 || hwb->size > swz->size) + /* hwidx for size2 (buffer packing) */ + if (rxb->size1 - CL_METADATA_SIZE < hwsize) continue; -#ifdef INVARIANTS - if (fl_pad) - MPASS(hwb->size % sp->pad_boundary == 0); -#endif - hwb->zidx = i; - if (head == -1) - head = tail = j; - else if (hwb->size < s->hw_buf_info[tail].size) { - s->hw_buf_info[tail].next = j; - tail = j; - } else { - int8_t *cur; - struct hw_buf_info *t; - - for (cur = &head; *cur != -1; cur = &t->next) { - t = &s->hw_buf_info[*cur]; - if (hwb->size == t->size) { - hwb->zidx = -2; - break; - } - if (hwb->size > t->size) { - hwb->next = *cur; - *cur = j; - break; - } + n = rxb->size1 - hwsize - CL_METADATA_SIZE; + if (n == 0) { + rxb->hwidx2 = j; + rxb->size2 = hwsize; + break; /* stop looking */ + } + if (rxb->hwidx2 != -1) { + if (n < sp->sge_fl_buffer_size[rxb->hwidx2] - + hwsize - CL_METADATA_SIZE) { + rxb->hwidx2 = j; + rxb->size2 = hwsize; } + } else if (n <= 2 * CL_METADATA_SIZE) { + rxb->hwidx2 = j; + rxb->size2 = hwsize; } } - swz->head_hwidx = head; - swz->tail_hwidx = tail; - - if (tail != -1) { - n++; - if (swz->size - s->hw_buf_info[tail].size >= - CL_METADATA_SIZE) - sc->flags |= BUF_PACKING_OK; - } + if (rxb->hwidx2 != -1) + sc->flags |= BUF_PACKING_OK; + if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster) + s->safe_zidx = i; } - if (n == 0) { - device_printf(sc->dev, "no usable SGE FL buffer size.\n"); - rc = EINVAL; - } - s->safe_hwidx1 = -1; - s->safe_hwidx2 = -1; - if (safe_swz != NULL) { - s->safe_hwidx1 = safe_swz->head_hwidx; - for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { - int spare; - - hwb = &s->hw_buf_info[i]; -#ifdef INVARIANTS - if (fl_pad) - MPASS(hwb->size % sp->pad_boundary == 0); -#endif - spare = safe_swz->size - hwb->size; - if (spare >= CL_METADATA_SIZE) { - s->safe_hwidx2 = i; - break; - } - } - } - if (sc->flags & IS_VF) return (0); @@ -1012,7 +935,7 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_l struct sge_params *sp = &sc->params.sge; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", - CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_bufsizes, "A", "freelist buffer sizes"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, @@ -1608,6 +1531,20 @@ last_flit_to_ns(struct adapter *sc, uint64_t lf) return (n * 1000000 / sc->params.vpd.cclk); } +static inline void +move_to_next_rxbuf(struct sge_fl *fl) +{ + + fl->rx_offset = 0; + if (__predict_false((++fl->cidx & 7) == 0)) { + uint16_t cidx = fl->cidx >> 3; + + if (__predict_false(cidx == fl->sidx)) + fl->cidx = cidx = 0; + fl->hw_cidx = cidx; + } +} + /* * Deals with interrupts on an iq+fl queue. */ @@ -1618,8 +1555,8 @@ service_iq_fl(struct sge_iq *iq, int budget) struct sge_fl *fl; struct adapter *sc = iq->adapter; struct iq_desc *d = &iq->desc[iq->cidx]; - int ndescs = 0, limit; - int rsp_type, refill, starved; + int ndescs, limit; + int rsp_type, starved; uint32_t lq; uint16_t fl_hw_cidx; struct mbuf *m0; @@ -1631,10 +1568,7 @@ service_iq_fl(struct sge_iq *iq, int budget) KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); MPASS(iq->flags & IQ_HAS_FL); - limit = budget ? budget : iq->qsize / 16; - fl = &rxq->fl; - fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ - + ndescs = 0; #if defined(INET) || defined(INET6) if (iq->flags & IQ_ADJ_CREDIT) { MPASS(sort_before_lro(lro)); @@ -1652,22 +1586,34 @@ service_iq_fl(struct sge_iq *iq, int budget) MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); #endif + limit = budget ? budget : iq->qsize / 16; + fl = &rxq->fl; + fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { rmb(); - refill = 0; m0 = NULL; rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); lq = be32toh(d->rsp.pldbuflen_qid); switch (rsp_type) { case X_RSPD_TYPE_FLBUF: + if (lq & F_RSPD_NEWBUF) { + if (fl->rx_offset > 0) + move_to_next_rxbuf(fl); + lq = G_RSPD_LEN(lq); + } + if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) { + FL_LOCK(fl); + refill_fl(sc, fl, 64); + FL_UNLOCK(fl); + fl_hw_cidx = fl->hw_cidx; + } m0 = get_fl_payload(sc, fl, lq); if (__predict_false(m0 == NULL)) goto out; - refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; if (iq->flags & IQ_RX_TIMESTAMP) { /* @@ -1727,7 +1673,6 @@ service_iq_fl(struct sge_iq *iq, int budget) t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); - ndescs = 0; #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED && @@ -1736,20 +1681,10 @@ service_iq_fl(struct sge_iq *iq, int budget) tcp_lro_flush_inactive(lro, &lro_timeout); } #endif - if (budget) { - FL_LOCK(fl); - refill_fl(sc, fl, 32); - FL_UNLOCK(fl); - + if (budget) return (EINPROGRESS); - } + ndescs = 0; } - if (refill) { - FL_LOCK(fl); - refill_fl(sc, fl, 32); - FL_UNLOCK(fl); - fl_hw_cidx = fl->hw_cidx; - } } out: #if defined(INET) || defined(INET6) @@ -1777,28 +1712,11 @@ out: return (0); } -static inline int -cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) -{ - int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; - - if (rc) - MPASS(cll->region3 >= CL_METADATA_SIZE); - - return (rc); -} - static inline struct cluster_metadata * -cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, - caddr_t cl) +cl_metadata(struct fl_sdesc *sd) { - if (cl_has_metadata(fl, cll)) { - struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; - - return ((struct cluster_metadata *)(cl + swz->size) - 1); - } - return (NULL); + return ((void *)(sd->cl + sd->moff)); } static void @@ -1811,14 +1729,11 @@ rxb_free(struct mbuf *m) } /* - * The mbuf returned by this function could be allocated from zone_mbuf or - * constructed in spare room in the cluster. - * - * The mbuf carries the payload in one of these ways - * a) frame inside the mbuf (mbuf from zone_mbuf) - * b) m_cljset (for clusters without metadata) zone_mbuf - * c) m_extaddref (cluster with metadata) inline mbuf - * d) m_extaddref (cluster with metadata) zone_mbuf + * The mbuf returned comes from zone_muf and carries the payload in one of these + * ways + * a) complete frame inside the mbuf + * b) m_cljset (for clusters without metadata) + * d) m_extaddref (cluster with metadata) */ static struct mbuf * get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, @@ -1826,125 +1741,86 @@ get_scatter_segment(struct adapter *sc, struct sge_fl { struct mbuf *m; struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; - struct cluster_layout *cll = &sd->cll; - struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; - struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; - struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); + struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; + struct cluster_metadata *clm; int len, blen; caddr_t payload; - blen = hwb->size - fl->rx_offset; /* max possible in this buf */ - len = min(remaining, blen); - payload = sd->cl + cll->region1 + fl->rx_offset; if (fl->flags & FL_BUF_PACKING) { - const u_int l = fr_offset + len; - const u_int pad = roundup2(l, fl->buf_boundary) - l; + u_int l, pad; - if (fl->rx_offset + len + pad < hwb->size) + blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ + len = min(remaining, blen); + payload = sd->cl + fl->rx_offset; + + l = fr_offset + len; + pad = roundup2(l, fl->buf_boundary) - l; + if (fl->rx_offset + len + pad < rxb->size2) blen = len + pad; - MPASS(fl->rx_offset + blen <= hwb->size); + MPASS(fl->rx_offset + blen <= rxb->size2); } else { MPASS(fl->rx_offset == 0); /* not packing */ + blen = rxb->size1; + len = min(remaining, blen); + payload = sd->cl; } - - if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { - - /* - * Copy payload into a freshly allocated mbuf. - */ - - m = fr_offset == 0 ? - m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); - if (m == NULL) + if (fr_offset == 0) { + m = m_gethdr(M_NOWAIT, MT_DATA); + if (__predict_false(m == NULL)) return (NULL); - fl->mbuf_allocated++; + m->m_pkthdr.len = remaining; + } else { + m = m_get(M_NOWAIT, MT_DATA); + if (__predict_false(m == NULL)) + return (NULL); + } + m->m_len = len; + if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { /* copy data to mbuf */ bcopy(payload, mtod(m, caddr_t), len); - - } else if (sd->nmbuf * MSIZE < cll->region1) { - - /* - * There's spare room in the cluster for an mbuf. Create one - * and associate it with the payload that's in the cluster. - */ - - MPASS(clm != NULL); - m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); - /* No bzero required */ - if (m_init(m, M_NOWAIT, MT_DATA, - fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) - return (NULL); - fl->mbuf_inlined++; + if (fl->flags & FL_BUF_PACKING) { + fl->rx_offset += blen; + MPASS(fl->rx_offset <= rxb->size2); + if (fl->rx_offset < rxb->size2) + return (m); /* without advancing the cidx */ + } + } else if (fl->flags & FL_BUF_PACKING) { + clm = cl_metadata(sd); if (sd->nmbuf++ == 0) { clm->refcount = 1; - clm->zone = swz->zone; + clm->zone = rxb->zone; clm->cl = sd->cl; counter_u64_add(extfree_refs, 1); } m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm, NULL); - } else { - /* - * Grab an mbuf from zone_mbuf and associate it with the - * payload in the cluster. - */ - - m = fr_offset == 0 ? - m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); - if (m == NULL) - return (NULL); - fl->mbuf_allocated++; - if (clm != NULL) { - if (sd->nmbuf++ == 0) { - clm->refcount = 1; - clm->zone = swz->zone; - clm->cl = sd->cl; - counter_u64_add(extfree_refs, 1); - } - m_extaddref(m, payload, blen, &clm->refcount, - rxb_free, clm, NULL); - } else { - m_cljset(m, sd->cl, swz->type); - sd->cl = NULL; /* consumed, not a recycle candidate */ - } - } - if (fr_offset == 0) - m->m_pkthdr.len = remaining; - m->m_len = len; - - if (fl->flags & FL_BUF_PACKING) { fl->rx_offset += blen; - MPASS(fl->rx_offset <= hwb->size); - if (fl->rx_offset < hwb->size) + MPASS(fl->rx_offset <= rxb->size2); + if (fl->rx_offset < rxb->size2) return (m); /* without advancing the cidx */ + } else { + m_cljset(m, sd->cl, rxb->type); + sd->cl = NULL; /* consumed, not a recycle candidate */ } - if (__predict_false(++fl->cidx % 8 == 0)) { - uint16_t cidx = fl->cidx / 8; + move_to_next_rxbuf(fl); - if (__predict_false(cidx == fl->sidx)) - fl->cidx = cidx = 0; - fl->hw_cidx = cidx; - } - fl->rx_offset = 0; - return (m); } static struct mbuf * -get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) +get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen) { struct mbuf *m0, *m, **pnext; u_int remaining; - const u_int total = G_RSPD_LEN(len_newbuf); if (__predict_false(fl->flags & FL_BUF_RESUME)) { M_ASSERTPKTHDR(fl->m0); - MPASS(fl->m0->m_pkthdr.len == total); - MPASS(fl->remaining < total); + MPASS(fl->m0->m_pkthdr.len == plen); + MPASS(fl->remaining < plen); m0 = fl->m0; pnext = fl->pnext; @@ -1953,31 +1829,20 @@ get_fl_payload(struct adapter *sc, struct sge_fl *fl, goto get_segment; } - if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { - fl->rx_offset = 0; - if (__predict_false(++fl->cidx % 8 == 0)) { - uint16_t cidx = fl->cidx / 8; - - if (__predict_false(cidx == fl->sidx)) - fl->cidx = cidx = 0; - fl->hw_cidx = cidx; - } - } - /* * Payload starts at rx_offset in the current hw buffer. Its length is * 'len' and it may span multiple hw buffers. */ - m0 = get_scatter_segment(sc, fl, 0, total); + m0 = get_scatter_segment(sc, fl, 0, plen); if (m0 == NULL) return (NULL); - remaining = total - m0->m_len; + remaining = plen - m0->m_len; pnext = &m0->m_next; while (remaining > 0) { get_segment: MPASS(fl->rx_offset == 0); - m = get_scatter_segment(sc, fl, total - remaining, remaining); + m = get_scatter_segment(sc, fl, plen - remaining, remaining); if (__predict_false(m == NULL)) { fl->m0 = m0; fl->pnext = pnext; @@ -2202,7 +2067,8 @@ t4_update_fl_bufsize(struct ifnet *ifp) fl = &rxq->fl; FL_LOCK(fl); - find_best_refill_source(sc, fl, maxp); + fl->zidx = find_refill_source(sc, maxp, + fl->flags & FL_BUF_PACKING); FL_UNLOCK(fl); } #ifdef TCP_OFFLOAD @@ -2210,7 +2076,8 @@ t4_update_fl_bufsize(struct ifnet *ifp) fl = &ofld_rxq->fl; FL_LOCK(fl); - find_best_refill_source(sc, fl, maxp); + fl->zidx = find_refill_source(sc, maxp, + fl->flags & FL_BUF_PACKING); FL_UNLOCK(fl); } #endif @@ -3096,8 +2963,8 @@ init_fl(struct adapter *sc, struct sge_fl *fl, int qsi ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ fl->flags |= FL_BUF_PACKING; - find_best_refill_source(sc, fl, maxp); - find_safe_refill_source(sc, fl); + fl->zidx = find_refill_source(sc, maxp, fl->flags & FL_BUF_PACKING); + fl->safe_zidx = sc->sge.safe_zidx; } static inline void @@ -3459,10 +3326,6 @@ add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_l } SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 0, "producer index"); - SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", - CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); - SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", - CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", @@ -4347,7 +4210,7 @@ ring_fl_db(struct adapter *sc, struct sge_fl *fl) { uint32_t n, v; - n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); + n = IDXDIFF(fl->pidx >> 3, fl->dbidx, fl->sidx); MPASS(n > 0); wmb(); @@ -4373,8 +4236,7 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int n struct fl_sdesc *sd; uintptr_t pa; caddr_t cl; - struct cluster_layout *cll; - struct sw_zone_info *swz; + struct rx_buf_info *rxb; struct cluster_metadata *clm; uint16_t max_pidx; uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ @@ -4392,8 +4254,6 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int n d = &fl->desc[fl->pidx]; sd = &fl->sdesc[fl->pidx]; - cll = &fl->cll_def; /* default layout */ - swz = &sc->sge.sw_zone_info[cll->zidx]; while (n > 0) { @@ -4408,11 +4268,6 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int n * fit within a single mbuf each. */ fl->cl_fast_recycled++; -#ifdef INVARIANTS - clm = cl_metadata(sc, fl, &sd->cll, sd->cl); - if (clm != NULL) - MPASS(clm->refcount == 1); -#endif goto recycled; } @@ -4421,7 +4276,7 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int n * without metadata always take the fast recycle path * when they're recycled. */ - clm = cl_metadata(sc, fl, &sd->cll, sd->cl); + clm = cl_metadata(sd); MPASS(clm != NULL); if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { @@ -4432,32 +4287,34 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int n sd->cl = NULL; /* gave up my reference */ } MPASS(sd->cl == NULL); -alloc: - cl = uma_zalloc(swz->zone, M_NOWAIT); - if (__predict_false(cl == NULL)) { - if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || - fl->cll_def.zidx == fl->cll_alt.zidx) + rxb = &sc->sge.rx_buf_info[fl->zidx]; + cl = uma_zalloc(rxb->zone, M_NOWAIT); + if (__predict_false(cl == NULL) && fl->zidx != fl->safe_zidx) { + rxb = &sc->sge.rx_buf_info[fl->safe_zidx]; + cl = uma_zalloc(rxb->zone, M_NOWAIT); + if (__predict_false(cl == NULL)) break; - - /* fall back to the safe zone */ - cll = &fl->cll_alt; - swz = &sc->sge.sw_zone_info[cll->zidx]; - goto alloc; } fl->cl_allocated++; n--; pa = pmap_kextract((vm_offset_t)cl); - pa += cll->region1; sd->cl = cl; - sd->cll = *cll; - *d = htobe64(pa | cll->hwidx); + sd->zidx = fl->zidx; + + if (fl->flags & FL_BUF_PACKING) { + *d = htobe64(pa | rxb->hwidx2); + sd->moff = rxb->size2; + } else { + *d = htobe64(pa | rxb->hwidx1); + sd->moff = 0; + } recycled: sd->nmbuf = 0; d++; sd++; - if (__predict_false(++fl->pidx % 8 == 0)) { - uint16_t pidx = fl->pidx / 8; + if (__predict_false((++fl->pidx & 7) == 0)) { + uint16_t pidx = fl->pidx >> 3; if (__predict_false(pidx == fl->sidx)) { fl->pidx = 0; @@ -4465,7 +4322,7 @@ recycled: sd = fl->sdesc; d = fl->desc; } - if (pidx == max_pidx) + if (n < 8 || pidx == max_pidx) break; if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) @@ -4473,7 +4330,7 @@ recycled: } } - if (fl->pidx / 8 != fl->dbidx) + if ((fl->pidx >> 3) != fl->dbidx) ring_fl_db(sc, fl); return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); @@ -4518,7 +4375,6 @@ free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) { struct fl_sdesc *sd; struct cluster_metadata *clm; - struct cluster_layout *cll; int i; sd = fl->sdesc; @@ -4526,13 +4382,15 @@ free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) if (sd->cl == NULL) continue; - cll = &sd->cll; - clm = cl_metadata(sc, fl, cll, sd->cl); if (sd->nmbuf == 0) - uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); - else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { - uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); - counter_u64_add(extfree_rels, 1); + uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, sd->cl); + else if (fl->flags & FL_BUF_PACKING) { + clm = cl_metadata(sd); + if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { + uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, + sd->cl); + counter_u64_add(extfree_rels, 1); + } } sd->cl = NULL; } @@ -5428,182 +5286,42 @@ get_flit(struct sglist_seg *segs, int nsegs, int idx) return (0); } -static void -find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) +static int +find_refill_source(struct adapter *sc, int maxp, bool packing) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***