Date: Sat, 16 Jan 2021 08:29:48 GMT From: Vincenzo Maffione <vmaffione@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 2968dde3de6f - main - axgbe: driver changes for netmap support Message-ID: <202101160829.10G8TmEf018024@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by vmaffione: URL: https://cgit.FreeBSD.org/src/commit/?id=2968dde3de6f6274517904add6a0a8e2e3f9662b commit 2968dde3de6f6274517904add6a0a8e2e3f9662b Author: Vincenzo Maffione <vmaffione@FreeBSD.org> AuthorDate: 2021-01-16 08:23:19 +0000 Commit: Vincenzo Maffione <vmaffione@FreeBSD.org> CommitDate: 2021-01-16 08:29:33 +0000 axgbe: driver changes for netmap support AMD 10GbE hardware is designed to have two buffers per receive descriptor to support split header feature. For this purpose, the driver was designed to use 2 iflib freelists per receive queue. So, that buffers from 2 freelists are used to refill an entry in the receive descriptor. The current design holds good with regular data traffic. But, when netmap comes into play, the current design will not fit in. The current netmap interfaces and netmap implementation in iflib doesn't seem to accomodate the design of 2 freelists per receive queue. So, exercising Netmap capability with inbuilt tools like bridge, pkt-gen doesn't work with the 2 freelists driver design. So, the driver design is changed to accomodate the current netmap interfaces and netmap implementation in iflib by using single freelist per receive queue approach when Netmap capability is exercised without disturbing the current 2 freelists approach. The dev.ax.sph_enable tunable can be set to 0 to configure the single free list mode. Thanks to Stephan Dewt for his Initial set of code changes for the stated problem. Submitted by: rajesh1.kumar_amd.com Approved by: vmaffione MFC after: 3 weeks Differential Revision: https://reviews.freebsd.org/D27797 --- sys/dev/axgbe/if_axgbe_pci.c | 100 +++++++++++++++++++++++++++++++----- sys/dev/axgbe/xgbe-dev.c | 33 +++++++++--- sys/dev/axgbe/xgbe-sysctl.c | 4 ++ sys/dev/axgbe/xgbe-txrx.c | 120 +++++++++++++++++++++++++++---------------- sys/dev/axgbe/xgbe.h | 6 +++ 5 files changed, 198 insertions(+), 65 deletions(-) diff --git a/sys/dev/axgbe/if_axgbe_pci.c b/sys/dev/axgbe/if_axgbe_pci.c index 7c4645792321..fe42aaac6568 100644 --- a/sys/dev/axgbe/if_axgbe_pci.c +++ b/sys/dev/axgbe/if_axgbe_pci.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rman.h> #include <sys/socket.h> #include <sys/sysctl.h> +#include <sys/systm.h> #include <net/if.h> #include <net/if_media.h> @@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$"); MALLOC_DEFINE(M_AXGBE, "axgbe", "axgbe data"); extern struct if_txrx axgbe_txrx; +static int axgbe_sph_enable; /* Function prototypes */ static void *axgbe_register(device_t); @@ -252,16 +254,11 @@ static struct if_shared_ctx axgbe_sctx_init = { .isc_vendor_info = axgbe_vendor_info_array, .isc_driver_version = XGBE_DRV_VERSION, - .isc_nrxd_min = {XGBE_RX_DESC_CNT_MIN, XGBE_RX_DESC_CNT_MIN}, - .isc_nrxd_default = {XGBE_RX_DESC_CNT_DEFAULT, XGBE_RX_DESC_CNT_DEFAULT}, - .isc_nrxd_max = {XGBE_RX_DESC_CNT_MAX, XGBE_RX_DESC_CNT_MAX}, .isc_ntxd_min = {XGBE_TX_DESC_CNT_MIN}, .isc_ntxd_default = {XGBE_TX_DESC_CNT_DEFAULT}, .isc_ntxd_max = {XGBE_TX_DESC_CNT_MAX}, - .isc_nfl = 2, .isc_ntxqs = 1, - .isc_nrxqs = 2, .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_NEED_ETHER_PAD, }; @@ -269,6 +266,44 @@ static struct if_shared_ctx axgbe_sctx_init = { static void * axgbe_register(device_t dev) { + int axgbe_nfl; + int axgbe_nrxqs; + int error, i; + char *value = NULL; + + value = kern_getenv("dev.ax.sph_enable"); + if (value) { + axgbe_sph_enable = strtol(value, NULL, 10); + freeenv(value); + } else { + /* + * No tunable found, generate one with default values + * Note: only a reboot will reveal the new kenv + */ + error = kern_setenv("dev.ax.sph_enable", "1"); + if (error) { + printf("Error setting tunable, using default driver values\n"); + } + axgbe_sph_enable = 1; + } + + if (!axgbe_sph_enable) { + axgbe_nfl = 1; + axgbe_nrxqs = 1; + } else { + axgbe_nfl = 2; + axgbe_nrxqs = 2; + } + + axgbe_sctx_init.isc_nfl = axgbe_nfl; + axgbe_sctx_init.isc_nrxqs = axgbe_nrxqs; + + for (i = 0 ; i < axgbe_nrxqs ; i++) { + axgbe_sctx_init.isc_nrxd_min[i] = XGBE_RX_DESC_CNT_MIN; + axgbe_sctx_init.isc_nrxd_default[i] = XGBE_RX_DESC_CNT_DEFAULT; + axgbe_sctx_init.isc_nrxd_max[i] = XGBE_RX_DESC_CNT_MAX; + } + return (&axgbe_sctx_init); } @@ -1292,6 +1327,9 @@ axgbe_if_attach_post(if_ctx_t ctx) if_softc_ctx_t scctx = sc->scctx; int i, ret; + /* set split header support based on tunable */ + pdata->sph_enable = axgbe_sph_enable; + /* Initialize ECC timestamps */ pdata->tx_sec_period = ticks; pdata->tx_ded_period = ticks; @@ -1404,6 +1442,8 @@ axgbe_if_attach_post(if_ctx_t ctx) axgbe_sysctl_init(pdata); + axgbe_pci_init(pdata); + return (0); } /* axgbe_if_attach_post */ @@ -1491,7 +1531,12 @@ axgbe_pci_init(struct xgbe_prv_data *pdata) struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_hw_if *hw_if = &pdata->hw_if; int ret = 0; - + + if (!__predict_false((test_bit(XGBE_DOWN, &pdata->dev_state)))) { + axgbe_printf(1, "%s: Starting when XGBE_UP\n", __func__); + return; + } + hw_if->init(pdata); ret = phy_if->phy_start(pdata); @@ -1656,7 +1701,11 @@ axgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *va, uint64_t *pa, int nrxqs, MPASS(scctx->isc_nrxqsets > 0); MPASS(scctx->isc_nrxqsets == nrxqsets); - MPASS(nrxqs == 2); + if (!pdata->sph_enable) { + MPASS(nrxqs == 1); + } else { + MPASS(nrxqs == 2); + } axgbe_printf(1, "%s: rxqsets %d/%d rxqs %d\n", __func__, scctx->isc_nrxqsets, nrxqsets, nrxqs); @@ -2258,15 +2307,40 @@ axgbe_if_media_change(if_ctx_t ctx) static int axgbe_if_promisc_set(if_ctx_t ctx, int flags) { - struct axgbe_if_softc *sc = iflib_get_softc(ctx); + struct axgbe_if_softc *sc = iflib_get_softc(ctx); + struct xgbe_prv_data *pdata = &sc->pdata; + struct ifnet *ifp = pdata->netdev; - if (XGMAC_IOREAD_BITS(&sc->pdata, MAC_PFR, PR) == 1) - return (0); + axgbe_printf(1, "%s: MAC_PFR 0x%x drv_flags 0x%x if_flags 0x%x\n", + __func__, XGMAC_IOREAD(pdata, MAC_PFR), ifp->if_drv_flags, ifp->if_flags); - XGMAC_IOWRITE_BITS(&sc->pdata, MAC_PFR, PR, 1); - XGMAC_IOWRITE_BITS(&sc->pdata, MAC_PFR, VTFE, 0); + if (ifp->if_flags & IFF_PPROMISC) { - return (0); + axgbe_printf(1, "User requested to enter promisc mode\n"); + + if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == 1) { + axgbe_printf(1, "Already in promisc mode\n"); + return (0); + } + + axgbe_printf(1, "Entering promisc mode\n"); + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 1); + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 0); + } else { + + axgbe_printf(1, "User requested to leave promisc mode\n"); + + if (XGMAC_IOREAD_BITS(pdata, MAC_PFR, PR) == 0) { + axgbe_printf(1, "Already not in promisc mode\n"); + return (0); + } + + axgbe_printf(1, "Leaving promisc mode\n"); + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, PR, 0); + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 1); + } + + return (0); } static uint64_t diff --git a/sys/dev/axgbe/xgbe-dev.c b/sys/dev/axgbe/xgbe-dev.c index 1cfd557a4995..95161802ed8e 100644 --- a/sys/dev/axgbe/xgbe-dev.c +++ b/sys/dev/axgbe/xgbe-dev.c @@ -293,12 +293,14 @@ xgbe_config_tso_mode(struct xgbe_prv_data *pdata) { unsigned int i; + int tso_enabled = (if_getcapenable(pdata->netdev) & IFCAP_TSO); + for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; - axgbe_printf(0, "Enabling TSO in channel %d\n", i); - XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, 1); + axgbe_printf(1, "TSO in channel %d %s\n", i, tso_enabled ? "enabled" : "disabled"); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, tso_enabled ? 1 : 0); } } @@ -306,15 +308,33 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) { unsigned int i; + int sph_enable_flag = XGMAC_IOREAD_BITS(pdata, MAC_HWF1R, SPHEN); + + axgbe_printf(1, "sph_enable %d sph feature enabled?: %d\n", + pdata->sph_enable, sph_enable_flag); + + if (pdata->sph_enable && sph_enable_flag) + axgbe_printf(0, "SPH Enabled\n"); for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->rx_ring) break; + if (pdata->sph_enable && sph_enable_flag) { + /* Enable split header feature */ + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1); + } else { + /* Disable split header feature */ + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); + } - XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1); + /* per-channel confirmation of SPH being disabled/enabled */ + int val = XGMAC_DMA_IOREAD_BITS(pdata->channel[i], DMA_CH_CR, SPH); + axgbe_printf(0, "%s: SPH %s in channel %d\n", __func__, + (val ? "enabled" : "disabled"), i); } - XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); + if (pdata->sph_enable && sph_enable_flag) + XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); } static int @@ -953,8 +973,8 @@ xgbe_config_rx_mode(struct xgbe_prv_data *pdata) { unsigned int pr_mode, am_mode; - pr_mode = ((pdata->netdev->if_drv_flags & IFF_PPROMISC) != 0); - am_mode = ((pdata->netdev->if_drv_flags & IFF_ALLMULTI) != 0); + pr_mode = ((pdata->netdev->if_flags & IFF_PPROMISC) != 0); + am_mode = ((pdata->netdev->if_flags & IFF_ALLMULTI) != 0); xgbe_set_promiscuous_mode(pdata, pr_mode); xgbe_set_all_multicast_mode(pdata, am_mode); @@ -1352,7 +1372,6 @@ xgbe_dev_read(struct xgbe_channel *channel) if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, CTXT)) { /* TODO - Timestamp Context Descriptor */ - XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT, 1); XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, diff --git a/sys/dev/axgbe/xgbe-sysctl.c b/sys/dev/axgbe/xgbe-sysctl.c index eee7c61170de..a3e777e0ca26 100644 --- a/sys/dev/axgbe/xgbe-sysctl.c +++ b/sys/dev/axgbe/xgbe-sysctl.c @@ -1614,6 +1614,10 @@ axgbe_sysctl_init(struct xgbe_prv_data *pdata) SYSCTL_ADD_UINT(clist, top, OID_AUTO, "axgbe_debug_level", CTLFLAG_RWTUN, &pdata->debug_level, 0, "axgbe log level -- higher is verbose"); + SYSCTL_ADD_UINT(clist, top, OID_AUTO, "sph_enable", + CTLFLAG_RDTUN, &pdata->sph_enable, 1, + "shows the split header feature state (1 - enable, 0 - disable"); + SYSCTL_ADD_PROC(clist, top, OID_AUTO, "xgmac_register", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, pdata, 0, sysctl_xgmac_reg_addr_handler, "IU", diff --git a/sys/dev/axgbe/xgbe-txrx.c b/sys/dev/axgbe/xgbe-txrx.c index c6872e584f81..e798bfa37335 100644 --- a/sys/dev/axgbe/xgbe-txrx.c +++ b/sys/dev/axgbe/xgbe-txrx.c @@ -379,20 +379,9 @@ axgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) axgbe_printf(1, "--> %s: flush txq %d pidx %d cur %d dirty %d\n", __func__, txqid, pidx, ring->cur, ring->dirty); - MPASS(ring->cur == pidx); - if (__predict_false(ring->cur != pidx)) { - axgbe_error("--> %s: cur(%d) ne pidx(%d)\n", __func__, - ring->cur, pidx); - } - - wmb(); - /* Ring Doorbell */ - if (XGMAC_DMA_IOREAD(channel, DMA_CH_TDTR_LO) != - lower_32_bits(rdata->rdata_paddr)) { - XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO, - lower_32_bits(rdata->rdata_paddr)); - } + XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO, + lower_32_bits(rdata->rdata_paddr)); } static int @@ -466,6 +455,7 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) unsigned int inte; uint8_t count = iru->iru_count; int i, j; + bool config_intr = false; axgbe_printf(1, "--> %s: rxq %d fl %d pidx %d count %d ring cur %d " "dirty %d\n", __func__, iru->iru_qsidx, iru->iru_flidx, @@ -473,7 +463,7 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) for (i = iru->iru_pidx, j = 0 ; j < count ; i++, j++) { - if (i == XGBE_RX_DESC_CNT_DEFAULT) + if (i == sc->scctx->isc_nrxd[0]) i = 0; rdata = XGBE_GET_DESC_DATA(ring, i); @@ -485,29 +475,42 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) "pidx %d\n", __func__, ring->cur, ring->dirty, j, i); } - /* Assuming split header is enabled */ - if (iru->iru_flidx == 0) { + if (pdata->sph_enable) { + if (iru->iru_flidx == 0) { + + /* Fill header/buffer1 address */ + rdesc->desc0 = + cpu_to_le32(lower_32_bits(iru->iru_paddrs[j])); + rdesc->desc1 = + cpu_to_le32(upper_32_bits(iru->iru_paddrs[j])); + } else { + + /* Fill data/buffer2 address */ + rdesc->desc2 = + cpu_to_le32(lower_32_bits(iru->iru_paddrs[j])); + rdesc->desc3 = + cpu_to_le32(upper_32_bits(iru->iru_paddrs[j])); + config_intr = true; + } + } else { /* Fill header/buffer1 address */ - rdesc->desc0 = + rdesc->desc0 = rdesc->desc2 = cpu_to_le32(lower_32_bits(iru->iru_paddrs[j])); - rdesc->desc1 = + rdesc->desc1 = rdesc->desc3 = cpu_to_le32(upper_32_bits(iru->iru_paddrs[j])); - } else { - /* Fill data/buffer2 address */ - rdesc->desc2 = - cpu_to_le32(lower_32_bits(iru->iru_paddrs[j])); - rdesc->desc3 = - cpu_to_le32(upper_32_bits(iru->iru_paddrs[j])); + config_intr = true; + } + + if (config_intr) { if (!rx_usecs && !rx_frames) { /* No coalescing, interrupt for every descriptor */ inte = 1; } else { /* Set interrupt based on Rx frame coalescing setting */ - if (rx_frames && - !(((ring->dirty + 1) &(ring->rdesc_count - 1)) % rx_frames)) + if (rx_frames && !((ring->dirty + 1) % rx_frames)) inte = 1; else inte = 0; @@ -520,6 +523,8 @@ axgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) wmb(); ring->dirty = ((ring->dirty + 1) & (ring->rdesc_count - 1)); + + config_intr = false; } } @@ -539,15 +544,16 @@ axgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx, qidx_t pidx) axgbe_printf(1, "--> %s: rxq %d fl %d pidx %d cur %d dirty %d\n", __func__, qsidx, flidx, pidx, ring->cur, ring->dirty); - if (flidx == 1) { - - rdata = XGBE_GET_DESC_DATA(ring, pidx); + rdata = XGBE_GET_DESC_DATA(ring, pidx); + /* + * update RX descriptor tail pointer in hardware to indicate + * that new buffers are present in the allocated memory region + */ + if (!pdata->sph_enable || flidx == 1) { XGMAC_DMA_IOWRITE(channel, DMA_CH_RDTR_LO, lower_32_bits(rdata->rdata_paddr)); } - - wmb(); } static int @@ -560,12 +566,17 @@ axgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t idx, qidx_t budget) struct xgbe_ring_data *rdata; struct xgbe_ring_desc *rdesc; unsigned int cur; - int count; + int count = 0; uint8_t incomplete = 1, context_next = 0, running = 0; axgbe_printf(1, "--> %s: rxq %d idx %d budget %d cur %d dirty %d\n", __func__, qsidx, idx, budget, ring->cur, ring->dirty); + if (__predict_false(test_bit(XGBE_DOWN, &pdata->dev_state))) { + axgbe_printf(0, "%s: Polling when XGBE_DOWN\n", __func__); + return (count); + } + cur = ring->cur; for (count = 0; count <= budget; ) { @@ -609,11 +620,14 @@ static unsigned int xgbe_rx_buf1_len(struct xgbe_prv_data *pdata, struct xgbe_ring_data *rdata, struct xgbe_packet_data *packet) { + unsigned int ret = 0; - /* Always zero if not the first descriptor */ - if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST)) { - axgbe_printf(1, "%s: Not First\n", __func__); - return (0); + if (pdata->sph_enable) { + /* Always zero if not the first descriptor */ + if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST)) { + axgbe_printf(1, "%s: Not First\n", __func__); + return (0); + } } /* First descriptor with split header, return header length */ @@ -623,21 +637,33 @@ xgbe_rx_buf1_len(struct xgbe_prv_data *pdata, struct xgbe_ring_data *rdata, } /* First descriptor but not the last descriptor and no split header, - * so the full buffer was used + * so the full buffer was used, 256 represents the hardcoded value of + * a max header split defined in the hardware */ if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST)) { axgbe_printf(1, "%s: Not last %d\n", __func__, pdata->rx_buf_size); - return (256); + if (pdata->sph_enable) { + return (256); + } else { + return (pdata->rx_buf_size); + } } /* First descriptor and last descriptor and no split header, so - * calculate how much of the buffer was used + * calculate how much of the buffer was used, we can return the + * segment length or the remaining bytes of the packet */ axgbe_printf(1, "%s: pkt_len %d buf_size %d\n", __func__, rdata->rx.len, pdata->rx_buf_size); - return (min_t(unsigned int, 256, rdata->rx.len)); + if (pdata->sph_enable) { + ret = min_t(unsigned int, 256, rdata->rx.len); + } else { + ret = rdata->rx.len; + } + + return (ret); } static unsigned int @@ -712,8 +738,10 @@ read_again: /* Get the data length in the descriptor buffers */ buf1_len = xgbe_rx_buf1_len(pdata, rdata, packet); len += buf1_len; - buf2_len = xgbe_rx_buf2_len(pdata, rdata, packet, len); - len += buf2_len; + if (pdata->sph_enable) { + buf2_len = xgbe_rx_buf2_len(pdata, rdata, packet, len); + len += buf2_len; + } } else buf1_len = buf2_len = 0; @@ -724,8 +752,10 @@ read_again: axgbe_add_frag(pdata, ri, prev_cur, buf1_len, i, 0); i++; - axgbe_add_frag(pdata, ri, prev_cur, buf2_len, i, 1); - i++; + if (pdata->sph_enable) { + axgbe_add_frag(pdata, ri, prev_cur, buf2_len, i, 1); + i++; + } if (!last || context_next) goto read_again; @@ -758,7 +788,7 @@ read_again: } if (__predict_false(len == 0)) - axgbe_error("%s: Zero len packet\n", __func__); + axgbe_printf(1, "%s: Discarding Zero len packet\n", __func__); if (__predict_false(len > max_len)) axgbe_error("%s: Big packet %d/%d\n", __func__, len, max_len); diff --git a/sys/dev/axgbe/xgbe.h b/sys/dev/axgbe/xgbe.h index a5a78cd68c39..766c0c6f551a 100644 --- a/sys/dev/axgbe/xgbe.h +++ b/sys/dev/axgbe/xgbe.h @@ -1296,6 +1296,12 @@ struct xgbe_prv_data { uint64_t rx_coalesce_usecs; unsigned int debug_level; + + /* + * Toggles the split header feature. + * This requires a complete restart. + */ + unsigned int sph_enable; }; struct axgbe_if_softc {
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202101160829.10G8TmEf018024>