Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 7 May 2019 08:28:35 +0000 (UTC)
From:      Marius Strobl <marius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r347221 - head/sys/net
Message-ID:  <201905070828.x478SZ3t019398@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: marius
Date: Tue May  7 08:28:35 2019
New Revision: 347221
URL: https://svnweb.freebsd.org/changeset/base/347221

Log:
  o Use iflib_fast_intr_rxtx() also for "legacy" interrupts, i. e. INTx and
    MSI. Unlike as with iflib_fast_intr_ctx(), the former will also enqueue
    _task_fn_tx() in addition to _task_fn_rx() if appropriate, bringing TCP
    TX throughput of EM-class devices on par with the MSI-X case and, thus,
    close to wirespeed/pre-iflib(4) times again. [1]
    Note that independently of the interrupt type, the UDP performance with
    these MACs still is abysmal and nowhere near to where it was before the
    conversion of em(4) to iflib(4).
  o In iflib_init_locked(), announce which free list failed to set up.
  o In _task_fn_tx() when running netmap(4), issue ifdi_intr_enable instead
    of the ifdi_tx_queue_intr_enable method in case of a "legacy" interrupt
    as the latter is valid with MSI-X only.
  o Instead of adding the missing - and apparently convoluted enough that a
    DBG_COUNTER_INC was put into a wrong spot in _task_fn_rx() - checks for
    ifdi_{r,t}x_queue_intr_enable being available in the MSI-X case also to
    iflib_fast_intr_rxtx(), factor these out to iflib_device_register() and
    make the checks fail gracefully rather than panic. This avoids invoking
    the checks at runtime over and over again in iflib_fast_intr_rxtx() and
    _task_fn_{r,t}x() - even if it's just in case of INVARIANTS - and makes
    these functions more readable.
  o In iflib_rx_structures_setup(), only initialize LRO resources if device
    and driver have LRO capability in order to not waste memory. Also, free
    the LRO resources again if setting them up fails for one of the queues.
    However, don't bother invoking iflib_rx_sds_free() in that case because
    iflib_rx_structures_setup() doesn't call iflib_rxsd_alloc() either (and
    iflib_{device,pseudo}_register() will issue iflib_rx_sds_free() in case
    of failure via iflib_rx_structures_free(), but there definitely is some
    asymmetry left to be fixed, though).
  o Similarly, free LRO resources again in iflib_rx_structures_free().
  o In iflib_irq_set_affinity(), handle get_core_offset() errors gracefully
    instead of panicing (but only in case of INVARIANTS). This is a follow-
    up to r344132, as such driver bugs shouldn't be fatal.
  o Likewise, handle unknown iflib_intr_type_t in iflib_irq_alloc_generic()
    gracefully, too.
  o Bring yet more sanity to iflib_msix_init():
    - If the device doesn't provide enough MSI-X vectors or not all vectors
      can be allocate so the expected number of queues in addition to admin
      interrupts can't be supported, try MSI next (and then INTx) as proper
      MSI-X vector distribution can't be assured in such cases. In essence,
      this change brings r254008 forward to iflib(4). Also, this is the fix
      alluded to in the commit message of r343934.
    - If the MSI-X allocation has failed, don't prematurely announce MSI is
      going to be used as the latter in fact may not be available either.
    - When falling back to MSI, only release the MSI-X table resource again
      if it was allocated in iflib_msix_init(), i. e. isn't supplied by the
      driver, in the first place.
  o In mp_ndesc_handler(), handle unknown type arguments gracefully, too.
  
  PR:		235031 (likely) [1]
  Reviewed by:	shurd
  Differential Revision:	https://reviews.freebsd.org/D20175

Modified:
  head/sys/net/iflib.c

Modified: head/sys/net/iflib.c
==============================================================================
--- head/sys/net/iflib.c	Tue May  7 08:14:30 2019	(r347220)
+++ head/sys/net/iflib.c	Tue May  7 08:28:35 2019	(r347221)
@@ -1461,6 +1461,7 @@ iflib_fast_intr_rxtx(void *arg)
 	void *sc;
 	int i, cidx, result;
 	qidx_t txqid;
+	bool intr_enable, intr_legacy;
 
 	if (!iflib_started)
 		return (FILTER_STRAY);
@@ -1474,6 +1475,8 @@ iflib_fast_intr_rxtx(void *arg)
 
 	ctx = rxq->ifr_ctx;
 	sc = ctx->ifc_softc;
+	intr_enable = false;
+	intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY);
 	MPASS(rxq->ifr_ntxqirq);
 	for (i = 0; i < rxq->ifr_ntxqirq; i++) {
 		txqid = rxq->ifr_txqid[i];
@@ -1481,7 +1484,10 @@ iflib_fast_intr_rxtx(void *arg)
 		bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
 		    BUS_DMASYNC_POSTREAD);
 		if (!ctx->isc_txd_credits_update(sc, txqid, false)) {
-			IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
+			if (intr_legacy)
+				intr_enable = true;
+			else
+				IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
 			continue;
 		}
 		GROUPTASK_ENQUEUE(&txq->ift_task);
@@ -1493,9 +1499,14 @@ iflib_fast_intr_rxtx(void *arg)
 	if (iflib_rxd_avail(ctx, rxq, cidx, 1))
 		GROUPTASK_ENQUEUE(gtask);
 	else {
-		IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+		if (intr_legacy)
+			intr_enable = true;
+		else
+			IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
 		DBG_COUNTER_INC(rx_intr_enables);
 	}
+	if (intr_enable)
+		IFDI_INTR_ENABLE(ctx);
 	return (FILTER_HANDLED);
 }
 
@@ -2352,7 +2363,9 @@ iflib_init_locked(if_ctx_t ctx)
 		}
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			if (iflib_fl_setup(fl)) {
-				device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
+				device_printf(ctx->ifc_dev,
+				    "setting up free list %d failed - "
+				    "check cluster settings\n", j);
 				goto done;
 			}
 		}
@@ -3733,7 +3746,10 @@ _task_fn_tx(void *context)
 		    BUS_DMASYNC_POSTREAD);
 		if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
 			netmap_tx_irq(ifp, txq->ift_id);
-		IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
+		if (ctx->ifc_flags & IFC_LEGACY)
+			IFDI_INTR_ENABLE(ctx);
+		else
+			IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
 		return;
 	}
 #endif
@@ -3752,13 +3768,8 @@ _task_fn_tx(void *context)
 		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 	if (ctx->ifc_flags & IFC_LEGACY)
 		IFDI_INTR_ENABLE(ctx);
-	else {
-#ifdef INVARIANTS
-		int rc =
-#endif
-			IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
-			KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
-	}
+	else
+		IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
 }
 
 static void
@@ -3790,14 +3801,9 @@ _task_fn_rx(void *context)
 	if (more == false || (more = iflib_rxeof(rxq, budget)) == false) {
 		if (ctx->ifc_flags & IFC_LEGACY)
 			IFDI_INTR_ENABLE(ctx);
-		else {
-#ifdef INVARIANTS
-			int rc =
-#endif
-				IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
-			KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
-			DBG_COUNTER_INC(rx_intr_enables);
-		}
+		else
+			IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+		DBG_COUNTER_INC(rx_intr_enables);
 	}
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 		return;
@@ -4532,15 +4538,14 @@ unref_ctx_core_offset(if_ctx_t ctx)
 int
 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
 {
-	int err, rid, msix;
 	if_ctx_t ctx;
 	if_t ifp;
 	if_softc_ctx_t scctx;
-	int i;
-	uint16_t main_txq;
-	uint16_t main_rxq;
+	kobjop_desc_t kobj_desc;
+	kobj_method_t *kobj_method;
+	int err, i, msix, rid;
+	uint16_t main_rxq, main_txq;
 
-
 	ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
 
 	if (sc == NULL) {
@@ -4699,11 +4704,43 @@ iflib_device_register(device_t dev, void *sc, if_share
 	 * interrupt storm.
 	 */
 	IFDI_INTR_DISABLE(ctx);
-	if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
-		device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
-		goto fail_queues;
-	}
-	if (msix <= 1) {
+
+	if (msix > 1) {
+		/*
+		 * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable
+		 * aren't the default NULL implementation.
+		 */
+		kobj_desc = &ifdi_rx_queue_intr_enable_desc;
+		kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL,
+		    kobj_desc);
+		if (kobj_method == &kobj_desc->deflt) {
+			device_printf(dev,
+			    "MSI-X requires ifdi_rx_queue_intr_enable method");
+			err = EOPNOTSUPP;
+			goto fail_queues;
+		}
+		kobj_desc = &ifdi_tx_queue_intr_enable_desc;
+		kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL,
+		    kobj_desc);
+		if (kobj_method == &kobj_desc->deflt) {
+			device_printf(dev,
+			    "MSI-X requires ifdi_tx_queue_intr_enable method");
+			err = EOPNOTSUPP;
+			goto fail_queues;
+		}
+
+		/*
+		 * Assign the MSI-X vectors.
+		 * Note that the default NULL ifdi_msix_intr_assign method will
+		 * fail here, too.
+		 */
+		err = IFDI_MSIX_INTR_ASSIGN(ctx, msix);
+		if (err != 0) {
+			device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n",
+			    err);
+			goto fail_queues;
+		}
+	} else {
 		rid = 0;
 		if (scctx->isc_intr == IFLIB_INTR_MSI) {
 			MPASS(msix == 1);
@@ -4743,6 +4780,7 @@ iflib_device_register(device_t dev, void *sc, if_share
 	iflib_add_pfil(ctx);
 	ctx->ifc_flags |= IFC_INIT_DONE;
 	CTX_UNLOCK(ctx);
+
 	return (0);
 
 fail_detach:
@@ -4957,6 +4995,7 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sc
 	iflib_add_device_sysctl_post(ctx);
 	ctx->ifc_flags |= IFC_INIT_DONE;
 	CTX_UNLOCK(ctx);
+
 	return (0);
 fail_detach:
 	ether_ifdetach(ctx->ifc_ifp);
@@ -5349,7 +5388,6 @@ iflib_register(if_ctx_t ctx)
 	return (0);
 }
 
-
 static int
 iflib_queues_alloc(if_ctx_t ctx)
 {
@@ -5608,17 +5646,20 @@ iflib_rx_structures_setup(if_ctx_t ctx)
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	int q;
 #if defined(INET6) || defined(INET)
-	int i, err;
+	int err, i;
 #endif
 
 	for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
 #if defined(INET6) || defined(INET)
-		tcp_lro_free(&rxq->ifr_lc);
-		if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
-		    TCP_LRO_ENTRIES, min(1024,
-		    ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) {
-			device_printf(ctx->ifc_dev, "LRO Initialization failed!\n");
-			goto fail;
+		if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) {
+			err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
+			    TCP_LRO_ENTRIES, min(1024,
+			    ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]));
+			if (err != 0) {
+				device_printf(ctx->ifc_dev,
+				    "LRO Initialization failed!\n");
+				goto fail;
+			}
 		}
 #endif
 		IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
@@ -5627,14 +5668,14 @@ iflib_rx_structures_setup(if_ctx_t ctx)
 #if defined(INET6) || defined(INET)
 fail:
 	/*
-	 * Free RX software descriptors allocated so far, we will only handle
+	 * Free LRO resources allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
-	 * cleaned up for itself. 'q' failed, so its the terminus.
+	 * cleaned up for itself.  'q' failed, so its the terminus.
 	 */
 	rxq = ctx->ifc_rxqs;
 	for (i = 0; i < q; ++i, rxq++) {
-		iflib_rx_sds_free(rxq);
-		rxq->ifr_cq_cidx = 0;
+		if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO)
+			tcp_lro_free(&rxq->ifr_lc);
 	}
 	return (err);
 #endif
@@ -5649,9 +5690,12 @@ static void
 iflib_rx_structures_free(if_ctx_t ctx)
 {
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
+	int i;
 
-	for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
+	for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
 		iflib_rx_sds_free(rxq);
+		if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO)
+			tcp_lro_free(&rxq->ifr_lc);
 	}
 	free(ctx->ifc_rxqs, M_IFLIB);
 	ctx->ifc_rxqs = NULL;
@@ -5822,7 +5866,10 @@ iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, ifl
 		co += ctx->ifc_softc_ctx.isc_nrxqsets;
 	cpuid = find_nth(ctx, qid + co);
 	tid = get_core_offset(ctx, type, qid);
-	MPASS(tid >= 0);
+	if (tid < 0) {
+		device_printf(dev, "get_core_offset failed\n");
+		return (EOPNOTSUPP);
+	}
 	cpuid = find_close_core(cpuid, tid);
 	err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res,
 	    name);
@@ -5834,7 +5881,7 @@ iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, ifl
 	if (cpuid > ctx->ifc_cpuid_highest)
 		ctx->ifc_cpuid_highest = cpuid;
 #endif
-	return 0;
+	return (0);
 }
 
 int
@@ -5894,7 +5941,9 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, in
 		intr_fast = iflib_fast_intr_ctx;
 		break;
 	default:
-		panic("unknown net intr type");
+		device_printf(ctx->ifc_dev, "%s: unknown net intr type\n",
+		    __func__);
+		return (EINVAL);
 	}
 
 	info->ifi_filter = filter;
@@ -6005,11 +6054,12 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filte
 	info->ifi_filter = filter;
 	info->ifi_filter_arg = filter_arg;
 	info->ifi_task = gtask;
-	info->ifi_ctx = ctx;
+	info->ifi_ctx = q;
 
 	dev = ctx->ifc_dev;
 	/* We allocate a single interrupt resource */
-	if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0)
+	if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_rxtx,
+	    NULL, info, name)) != 0)
 		return (err);
 	GROUPTASK_INIT(gtask, 0, fn, q);
 	res = irq->ii_res;
@@ -6174,9 +6224,8 @@ iflib_msix_init(if_ctx_t ctx)
 	device_t dev = ctx->ifc_dev;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
-	int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs;
-	int iflib_num_tx_queues, iflib_num_rx_queues;
-	int err, admincnt, bar;
+	int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues;
+	int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors;
 
 	iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
 	iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
@@ -6185,8 +6234,6 @@ iflib_msix_init(if_ctx_t ctx)
 		device_printf(dev, "msix_init qsets capped at %d\n",
 		    imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets));
 
-	bar = ctx->ifc_softc_ctx.isc_msix_bar;
-	admincnt = sctx->isc_admin_intrcnt;
 	/* Override by tuneable */
 	if (scctx->isc_disable_msix)
 		goto msi;
@@ -6197,6 +6244,8 @@ iflib_msix_init(if_ctx_t ctx)
 			device_printf(dev, "MSI-X not supported or disabled\n");
 		goto msi;
 	}
+
+	bar = ctx->ifc_softc_ctx.isc_msix_bar;
 	/*
 	 * bar == -1 => "trust me I know what I'm doing"
 	 * Some drivers are for hardware that is so shoddily
@@ -6212,6 +6261,8 @@ iflib_msix_init(if_ctx_t ctx)
 			goto msi;
 		}
 	}
+
+	admincnt = sctx->isc_admin_intrcnt;
 #if IFLIB_DEBUG
 	/* use only 1 qset in debug mode */
 	queuemsgs = min(msgs - admincnt, 1);
@@ -6263,11 +6314,30 @@ iflib_msix_init(if_ctx_t ctx)
 		rx_queues = min(rx_queues, tx_queues);
 	}
 
+	vectors = rx_queues + admincnt;
+	if (msgs < vectors) {
+		device_printf(dev,
+		    "insufficient number of MSI-X vectors "
+		    "(supported %d, need %d)\n", msgs, vectors);
+		goto msi;
+	}
+
 	device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues,
 	    tx_queues);
-
-	vectors = rx_queues + admincnt;
+	msgs = vectors;
 	if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
+		if (vectors != msgs) {
+			device_printf(dev,
+			    "Unable to allocate sufficient MSI-X vectors "
+			    "(got %d, need %d)\n", vectors, msgs);
+			pci_release_msi(dev);
+			if (bar != -1) {
+				bus_release_resource(dev, SYS_RES_MEMORY, bar,
+				    ctx->ifc_msix_mem);
+				ctx->ifc_msix_mem = NULL;
+			}
+			goto msi;
+		}
 		device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
 		    vectors);
 		scctx->isc_vectors = vectors;
@@ -6278,12 +6348,15 @@ iflib_msix_init(if_ctx_t ctx)
 		return (vectors);
 	} else {
 		device_printf(dev,
-		    "failed to allocate %d MSI-X vectors, err: %d - using MSI\n",
-		    vectors, err);
-		bus_release_resource(dev, SYS_RES_MEMORY, bar,
-		    ctx->ifc_msix_mem);
-		ctx->ifc_msix_mem = NULL;
+		    "failed to allocate %d MSI-X vectors, err: %d\n", vectors,
+		    err);
+		if (bar != -1) {
+			bus_release_resource(dev, SYS_RES_MEMORY, bar,
+			    ctx->ifc_msix_mem);
+			ctx->ifc_msix_mem = NULL;
+		}
 	}
+
 msi:
 	vectors = pci_msi_count(dev);
 	scctx->isc_nrxqsets = 1;
@@ -6345,8 +6418,6 @@ mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
 	char *p, *next;
 	int nqs, rc, i;
 
-	MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER);
-
 	nqs = 8;
 	switch(type) {
 	case IFLIB_NTXD_HANDLER:
@@ -6360,7 +6431,8 @@ mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
 			nqs = ctx->ifc_sctx->isc_nrxqs;
 		break;
 	default:
-			panic("unhandled type");
+		printf("%s: unhandled type\n", __func__);
+		return (EINVAL);
 	}
 	if (nqs == 0)
 		nqs = 8;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905070828.x478SZ3t019398>