Date: Wed, 11 Nov 2020 21:27:17 +0000 (UTC) From: Vincenzo Maffione <vmaffione@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r367599 - stable/12/sys/net Message-ID: <202011112127.0ABLRHTB024896@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: vmaffione Date: Wed Nov 11 21:27:16 2020 New Revision: 367599 URL: https://svnweb.freebsd.org/changeset/base/367599 Log: MFC r367093, r367117 iflib: add per-tx-queue netmap timer The way netmap TX is handled in iflib when TX interrupts are not used (IFC_NETMAP_TX_IRQ not set) has some issues: - The netmap_tx_irq() function gets called by iflib_timer(), which gets scheduled with tick granularity (hz). This is not frequent enough for 10Gbps NICs and beyond (e.g., ixgbe or ixl). The end result is that the transmitting netmap application is not woken up fast enough to saturate the link with small packets. - The iflib_timer() functions also calls isc_txd_credits_update() to ask for more TX completion updates. However, this violates the netmap requirement that only txsync can access the TX queue for datapath operations. Only netmap_tx_irq() may be called out of the txsync context. This change introduces per-tx-queue netmap timers, using microsecond granularity to ensure that netmap_tx_irq() can be called often enough to allow for maximum packet rate. The timer routine simply calls netmap_tx_irq() to wake up the netmap application. The latter will wake up and call txsync to collect TX completion updates. This change brings back line rate speed with small packets for ixgbe. For the time being, timer expiration is hardcoded to 90 microseconds, in order to avoid introducing a new sysctl. We may eventually implement an adaptive expiration period or use another deferred work mechanism in place of timers. Also, fix the timers usage to make sure that each queue is serviced by a different CPU. PR: 248652 Reported by: sg@efficientip.com Modified: stable/12/sys/net/iflib.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/net/iflib.c ============================================================================== --- stable/12/sys/net/iflib.c Wed Nov 11 18:45:06 2020 (r367598) +++ stable/12/sys/net/iflib.c Wed Nov 11 21:27:16 2020 (r367599) @@ -348,6 +348,9 @@ struct iflib_txq { qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; +#ifdef DEV_NETMAP + struct callout ift_netmap_timer; +#endif /* DEV_NETMAP */ if_txsd_vec_t ift_sds; uint8_t ift_qstatus; @@ -763,6 +766,7 @@ iflib_num_tx_descs(if_ctx_t ctx) MODULE_DEPEND(iflib, netmap, 1, 1, 1); static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init); +static void iflib_netmap_timer(void *arg); /* * device-specific sysctl variables: @@ -928,6 +932,8 @@ netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring return (0); } +#define NETMAP_TX_TIMER_US 90 + /* * Reconcile kernel and user view of the transmit ring. * @@ -1057,9 +1063,8 @@ iflib_netmap_txsync(struct netmap_kring *kring, int fl * Second part: reclaim buffers for completed transmissions. * * If there are unclaimed buffers, attempt to reclaim them. - * If none are reclaimed, and TX IRQs are not in use, do an initial - * minimal delay, then trigger the tx handler which will spin in the - * group task queue. + * If we don't manage to reclaim them all, and TX IRQs are not in use, + * trigger a per-tx-queue timer to try again later. */ if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { if (iflib_tx_credits_update(ctx, txq)) { @@ -1068,11 +1073,14 @@ iflib_netmap_txsync(struct netmap_kring *kring, int fl kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } } + if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { - callout_reset_on(&txq->ift_timer, hz < 2000 ? 1 : hz / 1000, - iflib_timer, txq, txq->ift_timer.c_cpu); - } + callout_reset_sbt_on(&txq->ift_netmap_timer, + NETMAP_TX_TIMER_US * SBT_1US, SBT_1US, + iflib_netmap_timer, txq, + txq->ift_netmap_timer.c_cpu, 0); + } return (0); } @@ -1275,28 +1283,16 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) } static void -iflib_netmap_timer_adjust(if_ctx_t ctx, iflib_txq_t txq, uint32_t *reset_on) +iflib_netmap_timer(void *arg) { - struct netmap_kring *kring; - uint16_t txqid; + iflib_txq_t txq = arg; + if_ctx_t ctx = txq->ift_ctx; - txqid = txq->ift_id; - kring = netmap_kring_on(NA(ctx->ifc_ifp), txqid, NR_TX); - if (kring == NULL) - return; - - if (kring->nr_hwcur != nm_next(kring->nr_hwtail, kring->nkr_num_slots - 1)) { - bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, - BUS_DMASYNC_POSTREAD); - if (ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) - netmap_tx_irq(ctx->ifc_ifp, txqid); - if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) { - if (hz < 2000) - *reset_on = 1; - else - *reset_on = hz / 1000; - } - } + /* + * Wake up the netmap application, to give it a chance to + * call txsync and reclaim more completed TX buffers. + */ + netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) @@ -1308,8 +1304,6 @@ iflib_netmap_timer_adjust(if_ctx_t ctx, iflib_txq_t tx #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) -#define netmap_tx_irq(ifp, qid) do {} while (0) -#define iflib_netmap_timer_adjust(ctx, txq, reset_on) #endif @@ -2367,7 +2361,6 @@ iflib_timer(void *arg) if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; uint64_t this_tick = ticks; - uint32_t reset_on = hz / 2; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; @@ -2392,17 +2385,13 @@ iflib_timer(void *arg) } txq->ift_cleaned_prev = txq->ift_cleaned; } -#ifdef DEV_NETMAP - if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) - iflib_netmap_timer_adjust(ctx, txq, &reset_on); -#endif /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) - callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu); + callout_reset_on(&txq->ift_timer, hz / 2, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: @@ -2476,6 +2465,9 @@ iflib_init_locked(if_ctx_t ctx) for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); +#ifdef DEV_NETMAP + callout_stop(&txq->ift_netmap_timer); +#endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } @@ -2565,6 +2557,9 @@ iflib_stop(if_ctx_t ctx) CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); +#ifdef DEV_NETMAP + callout_stop(&txq->ift_netmap_timer); +#endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); /* clean any enqueued buffers */ @@ -3904,7 +3899,6 @@ _task_fn_admin(void *context) iflib_txq_t txq; int i; bool oactive, running, do_reset, do_watchdog, in_detach; - uint32_t reset_on = hz / 2; STATE_LOCK(ctx); running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); @@ -3932,12 +3926,8 @@ _task_fn_admin(void *context) } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { -#ifdef DEV_NETMAP - reset_on = hz / 2; - if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) - iflib_netmap_timer_adjust(ctx, txq, &reset_on); -#endif - callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu); + callout_reset_on(&txq->ift_timer, hz / 2, iflib_timer, txq, + txq->ift_timer.c_cpu); } IFDI_LINK_INTR_ENABLE(ctx); if (do_reset) @@ -5090,6 +5080,9 @@ iflib_pseudo_deregister(if_ctx_t ctx) tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); +#ifdef DEV_NETMAP + callout_drain(&txq->ift_netmap_timer); +#endif /* DEV_NETMAP */ if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } @@ -5175,6 +5168,9 @@ iflib_device_deregister(if_ctx_t ctx) tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); +#ifdef DEV_NETMAP + callout_drain(&txq->ift_netmap_timer); +#endif /* DEV_NETMAP */ if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } @@ -5584,8 +5580,6 @@ iflib_queues_alloc(if_ctx_t ctx) } else { txq->ift_br_offset = 0; } - /* XXX fix this */ - txq->ift_timer.c_cpu = cpu; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); @@ -5598,6 +5592,11 @@ iflib_queues_alloc(if_ctx_t ctx) device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); + txq->ift_timer.c_cpu = cpu; +#ifdef DEV_NETMAP + callout_init_mtx(&txq->ift_netmap_timer, &txq->ift_mtx, 0); + txq->ift_netmap_timer.c_cpu = cpu; +#endif /* DEV_NETMAP */ err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202011112127.0ABLRHTB024896>
