Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 20 Oct 2021 13:57:45 GMT
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 8ec8f56ce25b - stable/13 - cxgbe(4): Separate the sw- and hw-specific parts of resource allocations
Message-ID:  <202110201357.19KDvjsm097065@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by np:

URL: https://cgit.FreeBSD.org/src/commit/?id=8ec8f56ce25b0bcb95bba5ae52d284bcbd88617e

commit 8ec8f56ce25b0bcb95bba5ae52d284bcbd88617e
Author:     Navdeep Parhar <np@FreeBSD.org>
AuthorDate: 2021-04-26 21:03:06 +0000
Commit:     Navdeep Parhar <np@FreeBSD.org>
CommitDate: 2021-10-20 13:55:38 +0000

    cxgbe(4): Separate the sw- and hw-specific parts of resource allocations
    
    The driver uses both software resources (locks, callouts, memory for
    descriptors and for bookkeeping, sysctls, etc.) and hardware resources
    (VIs, DMA queues, TCAM entries, etc.) to operate the NIC.  This commit
    splits the single *_ALLOCATED flag used to track all these resources
    into separate *_SW_ALLOCATED and *_HW_ALLOCATED flags.
    
    This is the simplified pseudocode that now applies to most queues (foo
    can be ctrlq/txq/rxq/ofld_txq/ofld_rxq):
    
    /* Idempotent */
    alloc_foo
    {
            if (!SW_ALLOCATED)
                    init_iq/init_eq/init_fl         no-fail sw init
                    alloc_iq_fl/alloc_eq/alloc_wrq  may-fail sw alloc
                    add_foo_sysctls, etc.           no-fail post-alloc items
            if (!HW_ALLOCATED)
                    alloc_iq_fl_hwq/alloc_eq_hwq    hw resource allocation
    }
    
    /* Idempotent */
    free_foo
    {
            if (!HW_ALLOCATED)
                    free_iq_fl_hwq/free_eq_hwq      release hw resources
            if (!SW_ALLOCATED)
                    free_iq_fl/free_eq/free_wrq     release sw resources
    }
    
    The routines that take the driver to FULL_INIT_DONE and VI_INIT_DONE and
    back are now all idempotent.  The quiesce routines pay attention to the
    HW_ALLOCATED flag and will not wait on the hardware for pidx/cidx
    updates and other completions if this flag is not set.
    
    Sponsored by:   Chelsio Communications
    
    (cherry picked from commit 43bbae19483fbde0a91e61acad8a6e71e334c8b8)
---
 sys/dev/cxgbe/adapter.h       |   44 +-
 sys/dev/cxgbe/t4_filter.c     |    9 +-
 sys/dev/cxgbe/t4_main.c       |  376 +++++++----
 sys/dev/cxgbe/t4_netmap.c     |   11 +-
 sys/dev/cxgbe/t4_sched.c      |   13 +-
 sys/dev/cxgbe/t4_sge.c        | 1466 ++++++++++++++++++++++++-----------------
 sys/dev/cxgbe/tom/t4_cpl_io.c |    2 +-
 7 files changed, 1135 insertions(+), 786 deletions(-)

diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 7f6e1ceb1550..8809a10269a1 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -231,7 +231,13 @@ struct vi_info {
 	struct mtx tick_mtx;
 	struct callout tick;
 
-	struct sysctl_ctx_list ctx;	/* from ifconfig up to driver detach */
+	struct sysctl_ctx_list ctx;
+	struct sysctl_oid *rxq_oid;
+	struct sysctl_oid *txq_oid;
+	struct sysctl_oid *nm_rxq_oid;
+	struct sysctl_oid *nm_txq_oid;
+	struct sysctl_oid *ofld_rxq_oid;
+	struct sysctl_oid *ofld_txq_oid;
 
 	uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */
 };
@@ -345,11 +351,12 @@ CTASSERT(sizeof(struct iq_desc) == IQ_ESIZE);
 
 enum {
 	/* iq flags */
-	IQ_ALLOCATED	= (1 << 0),	/* firmware resources allocated */
+	IQ_SW_ALLOCATED	= (1 << 0),	/* sw resources allocated */
 	IQ_HAS_FL	= (1 << 1),	/* iq associated with a freelist */
 	IQ_RX_TIMESTAMP	= (1 << 2),	/* provide the SGE rx timestamp */
 	IQ_LRO_ENABLED	= (1 << 3),	/* iq is an eth rxq with LRO enabled */
 	IQ_ADJ_CREDIT	= (1 << 4),	/* hw is off by 1 credit for this iq */
+	IQ_HW_ALLOCATED	= (1 << 5),	/* fw/hw resources allocated */
 
 	/* iq state */
 	IQS_DISABLED	= 0,
@@ -393,12 +400,13 @@ struct sge_iq {
 	int8_t   intr_pktc_idx;	/* packet count threshold index */
 	uint8_t  gen;		/* generation bit */
 	uint8_t  intr_params;	/* interrupt holdoff parameters */
-	uint8_t  intr_next;	/* XXX: holdoff for next interrupt */
+	int8_t   cong;		/* congestion settings */
 	uint16_t qsize;		/* size (# of entries) of the queue */
 	uint16_t sidx;		/* index of the entry with the status page */
 	uint16_t cidx;		/* consumer index */
 	uint16_t cntxt_id;	/* SGE context id for the iq */
 	uint16_t abs_id;	/* absolute SGE id for the iq */
+	int16_t intr_idx;	/* interrupt used by the queue */
 
 	STAILQ_ENTRY(sge_iq) link;
 
@@ -408,13 +416,14 @@ struct sge_iq {
 };
 
 enum {
+	/* eq type */
 	EQ_CTRL		= 1,
 	EQ_ETH		= 2,
 	EQ_OFLD		= 3,
 
 	/* eq flags */
-	EQ_TYPEMASK	= 0x3,		/* 2 lsbits hold the type (see above) */
-	EQ_ALLOCATED	= (1 << 2),	/* firmware resources allocated */
+	EQ_SW_ALLOCATED	= (1 << 0),	/* sw resources allocated */
+	EQ_HW_ALLOCATED	= (1 << 1),	/* hw/fw resources allocated */
 	EQ_ENABLED	= (1 << 3),	/* open for business */
 	EQ_QFLUSH	= (1 << 4),	/* if_qflush in progress */
 };
@@ -432,10 +441,12 @@ struct sge_eq {
 	unsigned int flags;	/* MUST be first */
 	unsigned int cntxt_id;	/* SGE context id for the eq */
 	unsigned int abs_id;	/* absolute SGE id for the eq */
+	uint8_t type;		/* EQ_CTRL/EQ_ETH/EQ_OFLD */
+	uint8_t doorbells;
+	uint8_t tx_chan;	/* tx channel used by the eq */
 	struct mtx eq_lock;
 
 	struct tx_desc *desc;	/* KVA of descriptor ring */
-	uint8_t doorbells;
 	volatile uint32_t *udb;	/* KVA of doorbell (lies within BAR2) */
 	u_int udb_qid;		/* relative qid within the doorbell page */
 	uint16_t sidx;		/* index of the entry with the status page */
@@ -443,9 +454,9 @@ struct sge_eq {
 	uint16_t pidx;		/* producer idx (desc idx) */
 	uint16_t equeqidx;	/* EQUEQ last requested at this pidx */
 	uint16_t dbidx;		/* pidx of the most recent doorbell */
-	uint16_t iqid;		/* iq that gets egr_update for the eq */
-	uint8_t tx_chan;	/* tx channel used by the eq */
+	uint16_t iqid;		/* cached iq->cntxt_id (see iq below) */
 	volatile u_int equiq;	/* EQUIQ outstanding */
+	struct sge_iq *iq;	/* iq that receives egr_update for the eq */
 
 	bus_dma_tag_t desc_tag;
 	bus_dmamap_t desc_map;
@@ -922,7 +933,9 @@ struct adapter {
 	uint16_t iscsicaps;
 	uint16_t fcoecaps;
 
-	struct sysctl_ctx_list ctx; /* from adapter_full_init to full_uninit */
+	struct sysctl_ctx_list ctx;
+	struct sysctl_oid *ctrlq_oid;
+	struct sysctl_oid *fwq_oid;
 
 	struct mtx sc_lock;
 	char lockname[16];
@@ -1201,10 +1214,8 @@ int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *);
 void doom_vi(struct adapter *, struct vi_info *);
 void end_synchronized_op(struct adapter *, int);
 int update_mac_settings(struct ifnet *, int);
-int adapter_full_init(struct adapter *);
-int adapter_full_uninit(struct adapter *);
-int vi_full_init(struct vi_info *);
-int vi_full_uninit(struct vi_info *);
+int adapter_init(struct adapter *);
+int vi_init(struct vi_info *);
 void vi_sysctls(struct vi_info *);
 int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int);
 int alloc_atid(struct adapter *, void *);
@@ -1243,11 +1254,9 @@ struct sge_nm_rxq;
 void cxgbe_nm_attach(struct vi_info *);
 void cxgbe_nm_detach(struct vi_info *);
 void service_nm_rxq(struct sge_nm_rxq *);
-int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int,
-    struct sysctl_oid *);
+int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int);
 int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *);
-int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int,
-    struct sysctl_oid *);
+int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int);
 int free_nm_txq(struct vi_info *, struct sge_nm_txq *);
 #endif
 
@@ -1266,6 +1275,7 @@ int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
     bus_addr_t *, void **);
 int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
     void *);
+void free_fl_buffers(struct adapter *, struct sge_fl *);
 int t4_setup_adapter_queues(struct adapter *);
 int t4_teardown_adapter_queues(struct adapter *);
 int t4_setup_vi_queues(struct vi_info *);
diff --git a/sys/dev/cxgbe/t4_filter.c b/sys/dev/cxgbe/t4_filter.c
index 3afab0d1d6b9..cddd2c96a620 100644
--- a/sys/dev/cxgbe/t4_filter.c
+++ b/sys/dev/cxgbe/t4_filter.c
@@ -960,11 +960,10 @@ set_filter(struct adapter *sc, struct t4_filter *t)
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
 	if (rc)
 		return (rc);
-	if (!(sc->flags & FULL_INIT_DONE) &&
-	    ((rc = adapter_full_init(sc)) != 0)) {
-		end_synchronized_op(sc, 0);
-		return (rc);
-	}
+
+	if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_init(sc)) != 0))
+		goto done;
+
 	if (t->fs.hash) {
 		if (__predict_false(ti->hftid_hash_4t == NULL)) {
 			rc = alloc_hftid_hash(&sc->tids, HASH_NOWAIT);
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 5f969829c2d1..e5c11402d9ab 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -725,10 +725,15 @@ static int fixup_link_config(struct port_info *);
 static int apply_link_config(struct port_info *);
 static int cxgbe_init_synchronized(struct vi_info *);
 static int cxgbe_uninit_synchronized(struct vi_info *);
-static void quiesce_txq(struct adapter *, struct sge_txq *);
-static void quiesce_wrq(struct adapter *, struct sge_wrq *);
-static void quiesce_iq(struct adapter *, struct sge_iq *);
-static void quiesce_fl(struct adapter *, struct sge_fl *);
+static int adapter_full_init(struct adapter *);
+static void adapter_full_uninit(struct adapter *);
+static int vi_full_init(struct vi_info *);
+static void vi_full_uninit(struct vi_info *);
+static int alloc_extra_vi(struct adapter *, struct port_info *, struct vi_info *);
+static void quiesce_txq(struct sge_txq *);
+static void quiesce_wrq(struct sge_wrq *);
+static void quiesce_iq_fl(struct adapter *, struct sge_iq *, struct sge_fl *);
+static void quiesce_vi(struct vi_info *);
 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
     driver_intr_t *, void *, char *);
 static int t4_free_irq(struct adapter *, struct irq *);
@@ -1132,6 +1137,13 @@ t4_attach(device_t dev)
 
 	refcount_init(&sc->vxlan_refcount, 0);
 
+	sc->ctrlq_oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(sc->dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "ctrlq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "control queues");
+	sc->fwq_oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(sc->dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "fwq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "firmware event queue");
+
 	rc = t4_map_bars_0_and_4(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
@@ -1429,6 +1441,7 @@ t4_attach(device_t dev)
 		for_each_vi(pi, j, vi) {
 			vi->pi = pi;
 			vi->adapter = sc;
+			vi->first_intr = -1;
 			vi->qsize_rxq = t4_qsize_rxq;
 			vi->qsize_txq = t4_qsize_txq;
 
@@ -1680,9 +1693,7 @@ t4_detach_common(device_t dev)
 	}
 
 	device_delete_children(dev);
-
-	if (sc->flags & FULL_INIT_DONE)
-		adapter_full_uninit(sc);
+	adapter_full_uninit(sc);
 
 	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
 		t4_fw_bye(sc, sc->mbox);
@@ -1797,9 +1808,32 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
 {
 	struct ifnet *ifp;
 	struct sbuf *sb;
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid_list *children;
 	struct pfil_head_args pa;
 	struct adapter *sc = vi->adapter;
 
+	ctx = device_get_sysctl_ctx(vi->dev);
+	children = SYSCTL_CHILDREN(device_get_sysctl_tree(vi->dev));
+	vi->rxq_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rxq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "NIC rx queues");
+	vi->txq_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "txq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "NIC tx queues");
+#ifdef DEV_NETMAP
+	vi->nm_rxq_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "nm_rxq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "netmap rx queues");
+	vi->nm_txq_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "nm_txq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "netmap tx queues");
+#endif
+#ifdef TCP_OFFLOAD
+	vi->ofld_rxq_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "ofld_rxq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE rx queues");
+#endif
+#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
+	vi->ofld_txq_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "ofld_txq",
+	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE/ETHOFLD tx queues");
+#endif
+
 	vi->xact_addr_filt = -1;
 	mtx_init(&vi->tick_mtx, "vi tick", NULL, MTX_DEF);
 	callout_init_mtx(&vi->tick, &vi->tick_mtx, 0);
@@ -5536,12 +5570,10 @@ cxgbe_init_synchronized(struct vi_info *vi)
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return (0);	/* already running */
 
-	if (!(sc->flags & FULL_INIT_DONE) &&
-	    ((rc = adapter_full_init(sc)) != 0))
+	if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_init(sc)) != 0))
 		return (rc);	/* error message displayed already */
 
-	if (!(vi->flags & VI_INIT_DONE) &&
-	    ((rc = vi_full_init(vi)) != 0))
+	if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_init(vi)) != 0))
 		return (rc); /* error message displayed already */
 
 	rc = update_mac_settings(ifp, XGMAC_ALL);
@@ -5836,31 +5868,36 @@ write_global_rss_key(struct adapter *sc)
 #endif
 }
 
-int
+/*
+ * Idempotent.
+ */
+static int
 adapter_full_init(struct adapter *sc)
 {
 	int rc, i;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
-	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
-	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
-	    ("%s: FULL_INIT_DONE already", __func__));
+
+	if (!(sc->flags & ADAP_SYSCTL_CTX)) {
+		sysctl_ctx_init(&sc->ctx);
+		sc->flags |= ADAP_SYSCTL_CTX;
+	}
 
 	/*
 	 * queues that belong to the adapter (not any particular port).
 	 */
 	rc = t4_setup_adapter_queues(sc);
 	if (rc != 0)
-		goto done;
+		return (rc);
 
 	for (i = 0; i < nitems(sc->tq); i++) {
+		if (sc->tq[i] != NULL)
+			continue;
 		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &sc->tq[i]);
 		if (sc->tq[i] == NULL) {
-			device_printf(sc->dev,
-			    "failed to allocate task queue %d\n", i);
-			rc = ENOMEM;
-			goto done;
+			CH_ERR(sc, "failed to allocate task queue %d\n", i);
+			return (ENOMEM);
 		}
 		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
 		    device_get_nameunit(sc->dev), i);
@@ -5875,20 +5912,41 @@ adapter_full_init(struct adapter *sc)
 		callout_reset_sbt(&sc->ktls_tick, SBT_1MS, 0, ktls_tick, sc,
 		    C_HARDCLOCK);
 #endif
-	sc->flags |= FULL_INIT_DONE;
-done:
+	return (0);
+}
+
+int
+adapter_init(struct adapter *sc)
+{
+	int rc;
+
+	ASSERT_SYNCHRONIZED_OP(sc);
+	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
+	    ("%s: FULL_INIT_DONE already", __func__));
+
+	rc = adapter_full_init(sc);
 	if (rc != 0)
 		adapter_full_uninit(sc);
+	else
+		sc->flags |= FULL_INIT_DONE;
 
 	return (rc);
 }
 
-int
+/*
+ * Idempotent.
+ */
+static void
 adapter_full_uninit(struct adapter *sc)
 {
 	int i;
 
-	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+	/* Do this before freeing the adapter queues. */
+	if (sc->flags & ADAP_SYSCTL_CTX) {
+		sysctl_ctx_free(&sc->ctx);
+		sc->flags &= ~ADAP_SYSCTL_CTX;
+	}
 
 	t4_teardown_adapter_queues(sc);
 
@@ -5898,8 +5956,6 @@ adapter_full_uninit(struct adapter *sc)
 	}
 
 	sc->flags &= ~FULL_INIT_DONE;
-
-	return (0);
 }
 
 #ifdef RSS
@@ -5967,12 +6023,13 @@ hashen_to_hashconfig(int hashen)
 }
 #endif
 
-int
+/*
+ * Idempotent.
+ */
+static int
 vi_full_init(struct vi_info *vi)
 {
 	struct adapter *sc = vi->adapter;
-	struct ifnet *ifp = vi->ifp;
-	uint16_t *rss;
 	struct sge_rxq *rxq;
 	int rc, i, j;
 #ifdef RSS
@@ -5982,59 +6039,60 @@ vi_full_init(struct vi_info *vi)
 #endif
 
 	ASSERT_SYNCHRONIZED_OP(sc);
-	KASSERT((vi->flags & VI_INIT_DONE) == 0,
-	    ("%s: VI_INIT_DONE already", __func__));
 
-	sysctl_ctx_init(&vi->ctx);
-	vi->flags |= VI_SYSCTL_CTX;
+	if (!(vi->flags & VI_SYSCTL_CTX)) {
+		sysctl_ctx_init(&vi->ctx);
+		vi->flags |= VI_SYSCTL_CTX;
+	}
 
 	/*
 	 * Allocate tx/rx/fl queues for this VI.
 	 */
 	rc = t4_setup_vi_queues(vi);
 	if (rc != 0)
-		goto done;	/* error message displayed already */
+		return (rc);
 
 	/*
 	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
 	 */
 	if (vi->nrxq > vi->rss_size) {
-		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
+		CH_ALERT(vi, "nrxq (%d) > hw RSS table size (%d); "
 		    "some queues will never receive traffic.\n", vi->nrxq,
 		    vi->rss_size);
 	} else if (vi->rss_size % vi->nrxq) {
-		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
+		CH_ALERT(vi, "nrxq (%d), hw RSS table size (%d); "
 		    "expect uneven traffic distribution.\n", vi->nrxq,
 		    vi->rss_size);
 	}
 #ifdef RSS
 	if (vi->nrxq != nbuckets) {
-		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
+		CH_ALERT(vi, "nrxq (%d) != kernel RSS buckets (%d);"
 		    "performance will be impacted.\n", vi->nrxq, nbuckets);
 	}
 #endif
-	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
+	if (vi->rss == NULL)
+		vi->rss = malloc(vi->rss_size * sizeof (*vi->rss), M_CXGBE,
+		    M_ZERO | M_WAITOK);
 	for (i = 0; i < vi->rss_size;) {
 #ifdef RSS
 		j = rss_get_indirection_to_bucket(i);
 		j %= vi->nrxq;
 		rxq = &sc->sge.rxq[vi->first_rxq + j];
-		rss[i++] = rxq->iq.abs_id;
+		vi->rss[i++] = rxq->iq.abs_id;
 #else
 		for_each_rxq(vi, j, rxq) {
-			rss[i++] = rxq->iq.abs_id;
+			vi->rss[i++] = rxq->iq.abs_id;
 			if (i == vi->rss_size)
 				break;
 		}
 #endif
 	}
 
-	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
-	    vi->rss_size);
+	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size,
+	    vi->rss, vi->rss_size);
 	if (rc != 0) {
-		free(rss, M_CXGBE);
-		if_printf(ifp, "rss_config failed: %d\n", rc);
-		goto done;
+		CH_ERR(vi, "rss_config failed: %d\n", rc);
+		return (rc);
 	}
 
 #ifdef RSS
@@ -6057,40 +6115,52 @@ vi_full_init(struct vi_info *vi)
 	MPASS((extra & hashconfig) == 0);
 
 	if (extra) {
-		if_printf(ifp,
+		CH_ALERT(vi,
 		    "global RSS config (0x%x) cannot be accommodated.\n",
 		    hashconfig);
 	}
 	if (extra & RSS_HASHTYPE_RSS_IPV4)
-		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
+		CH_ALERT(vi, "IPv4 2-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
-		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
+		CH_ALERT(vi, "TCP/IPv4 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_IPV6)
-		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
+		CH_ALERT(vi, "IPv6 2-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
-		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
+		CH_ALERT(vi, "TCP/IPv6 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
-		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
+		CH_ALERT(vi, "UDP/IPv4 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
-		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
+		CH_ALERT(vi, "UDP/IPv6 4-tuple hashing forced on.\n");
 #else
 	vi->hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
 #endif
-	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, rss[0], 0, 0);
+	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, vi->rss[0],
+	    0, 0);
 	if (rc != 0) {
-		free(rss, M_CXGBE);
-		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
-		goto done;
+		CH_ERR(vi, "rss hash/defaultq config failed: %d\n", rc);
+		return (rc);
 	}
 
-	vi->rss = rss;
-	vi->flags |= VI_INIT_DONE;
-done:
+	return (0);
+}
+
+int
+vi_init(struct vi_info *vi)
+{
+	int rc;
+
+	ASSERT_SYNCHRONIZED_OP(vi->adapter);
+	KASSERT((vi->flags & VI_INIT_DONE) == 0,
+	    ("%s: VI_INIT_DONE already", __func__));
+
+	rc = vi_full_init(vi);
 	if (rc != 0)
 		vi_full_uninit(vi);
+	else
+		vi->flags |= VI_INIT_DONE;
 
 	return (rc);
 }
@@ -6098,118 +6168,149 @@ done:
 /*
  * Idempotent.
  */
-int
+static void
 vi_full_uninit(struct vi_info *vi)
 {
-	struct port_info *pi = vi->pi;
-	struct adapter *sc = pi->adapter;
-	int i;
-	struct sge_rxq *rxq;
-	struct sge_txq *txq;
-#ifdef TCP_OFFLOAD
-	struct sge_ofld_rxq *ofld_rxq;
-#endif
-#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
-	struct sge_ofld_txq *ofld_txq;
-#endif
 
 	if (vi->flags & VI_INIT_DONE) {
-
-		/* Need to quiesce queues.  */
-
-		/* XXX: Only for the first VI? */
-		if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
-			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
-
-		for_each_txq(vi, i, txq) {
-			quiesce_txq(sc, txq);
-		}
-
-#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
-		for_each_ofld_txq(vi, i, ofld_txq) {
-			quiesce_wrq(sc, &ofld_txq->wrq);
-		}
-#endif
-
-		for_each_rxq(vi, i, rxq) {
-			quiesce_iq(sc, &rxq->iq);
-			quiesce_fl(sc, &rxq->fl);
-		}
-
-#ifdef TCP_OFFLOAD
-		for_each_ofld_rxq(vi, i, ofld_rxq) {
-			quiesce_iq(sc, &ofld_rxq->iq);
-			quiesce_fl(sc, &ofld_rxq->fl);
-		}
-#endif
+		quiesce_vi(vi);
 		free(vi->rss, M_CXGBE);
 		free(vi->nm_rss, M_CXGBE);
 	}
 
+	/* Do this before freeing the VI queues. */
+	if (vi->flags & VI_SYSCTL_CTX) {
+		sysctl_ctx_free(&vi->ctx);
+		vi->flags &= ~VI_SYSCTL_CTX;
+	}
+
 	t4_teardown_vi_queues(vi);
 	vi->flags &= ~VI_INIT_DONE;
-
-	return (0);
 }
 
 static void
-quiesce_txq(struct adapter *sc, struct sge_txq *txq)
+quiesce_txq(struct sge_txq *txq)
 {
 	struct sge_eq *eq = &txq->eq;
 	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
 
-	(void) sc;	/* unused */
-
-#ifdef INVARIANTS
-	TXQ_LOCK(txq);
-	MPASS((eq->flags & EQ_ENABLED) == 0);
-	TXQ_UNLOCK(txq);
-#endif
+	MPASS(eq->flags & EQ_SW_ALLOCATED);
+	MPASS(!(eq->flags & EQ_ENABLED));
 
 	/* Wait for the mp_ring to empty. */
 	while (!mp_ring_is_idle(txq->r)) {
 		mp_ring_check_drainage(txq->r, 4096);
 		pause("rquiesce", 1);
 	}
+	MPASS(txq->txp.npkt == 0);
 
-	/* Then wait for the hardware to finish. */
-	while (spg->cidx != htobe16(eq->pidx))
-		pause("equiesce", 1);
-
-	/* Finally, wait for the driver to reclaim all descriptors. */
-	while (eq->cidx != eq->pidx)
-		pause("dquiesce", 1);
+	if (eq->flags & EQ_HW_ALLOCATED) {
+		/*
+		 * Hardware is alive and working normally.  Wait for it to
+		 * finish and then wait for the driver to catch up and reclaim
+		 * all descriptors.
+		 */
+		while (spg->cidx != htobe16(eq->pidx))
+			pause("equiesce", 1);
+		while (eq->cidx != eq->pidx)
+			pause("dquiesce", 1);
+	} else {
+		/*
+		 * Hardware is unavailable.  Discard all pending tx and reclaim
+		 * descriptors directly.
+		 */
+		TXQ_LOCK(txq);
+		while (eq->cidx != eq->pidx) {
+			struct mbuf *m, *nextpkt;
+			struct tx_sdesc *txsd;
+
+			txsd = &txq->sdesc[eq->cidx];
+			for (m = txsd->m; m != NULL; m = nextpkt) {
+				nextpkt = m->m_nextpkt;
+				m->m_nextpkt = NULL;
+				m_freem(m);
+			}
+			IDXINCR(eq->cidx, txsd->desc_used, eq->sidx);
+		}
+		spg->pidx = spg->cidx = htobe16(eq->cidx);
+		TXQ_UNLOCK(txq);
+	}
 }
 
 static void
-quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
+quiesce_wrq(struct sge_wrq *wrq)
 {
 
 	/* XXXTX */
 }
 
 static void
-quiesce_iq(struct adapter *sc, struct sge_iq *iq)
+quiesce_iq_fl(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl)
 {
-	(void) sc;	/* unused */
-
 	/* Synchronize with the interrupt handler */
 	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
 		pause("iqfree", 1);
+
+	if (fl != NULL) {
+		MPASS(iq->flags & IQ_HAS_FL);
+
+		mtx_lock(&sc->sfl_lock);
+		FL_LOCK(fl);
+		fl->flags |= FL_DOOMED;
+		FL_UNLOCK(fl);
+		callout_stop(&sc->sfl_callout);
+		mtx_unlock(&sc->sfl_lock);
+
+		KASSERT((fl->flags & FL_STARVING) == 0,
+		    ("%s: still starving", __func__));
+
+		/* Release all buffers if hardware is no longer available. */
+		if (!(iq->flags & IQ_HW_ALLOCATED))
+			free_fl_buffers(sc, fl);
+	}
 }
 
+/*
+ * Wait for all activity on all the queues of the VI to complete.  It is assumed
+ * that no new work is being enqueued by the hardware or the driver.  That part
+ * should be arranged before calling this function.
+ */
 static void
-quiesce_fl(struct adapter *sc, struct sge_fl *fl)
+quiesce_vi(struct vi_info *vi)
 {
-	mtx_lock(&sc->sfl_lock);
-	FL_LOCK(fl);
-	fl->flags |= FL_DOOMED;
-	FL_UNLOCK(fl);
-	callout_stop(&sc->sfl_callout);
-	mtx_unlock(&sc->sfl_lock);
+	int i;
+	struct adapter *sc = vi->adapter;
+	struct sge_rxq *rxq;
+	struct sge_txq *txq;
+#ifdef TCP_OFFLOAD
+	struct sge_ofld_rxq *ofld_rxq;
+#endif
+#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
+	struct sge_ofld_txq *ofld_txq;
+#endif
+
+	if (!(vi->flags & VI_INIT_DONE))
+		return;
+
+	for_each_txq(vi, i, txq) {
+		quiesce_txq(txq);
+	}
+
+#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
+	for_each_ofld_txq(vi, i, ofld_txq) {
+		quiesce_wrq(&ofld_txq->wrq);
+	}
+#endif
 
-	KASSERT((fl->flags & FL_STARVING) == 0,
-	    ("%s: still starving", __func__));
+	for_each_rxq(vi, i, rxq) {
+		quiesce_iq_fl(sc, &rxq->iq, &rxq->fl);
+	}
+
+#ifdef TCP_OFFLOAD
+	for_each_ofld_rxq(vi, i, ofld_rxq) {
+		quiesce_iq_fl(sc, &ofld_rxq->iq, &ofld_rxq->fl);
+	}
+#endif
 }
 
 static int
@@ -11119,16 +11220,11 @@ toe_capability(struct vi_info *vi, bool enable)
 		 * and receive CPLs to/from the TOE even if the ifnet for this
 		 * port has never been UP'd administratively.
 		 */
-		if (!(vi->flags & VI_INIT_DONE)) {
-			rc = vi_full_init(vi);
-			if (rc)
-				return (rc);
-		}
-		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
-			rc = vi_full_init(&pi->vi[0]);
-			if (rc)
-				return (rc);
-		}
+		if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_init(vi)) != 0))
+			return (rc);
+		if (!(pi->vi[0].flags & VI_INIT_DONE) &&
+		    ((rc = vi_init(&pi->vi[0])) != 0))
+			return (rc);
 
 		if (isset(&sc->offload_map, pi->port_id)) {
 			/* TOE is enabled on another VI of this port. */
@@ -11239,7 +11335,7 @@ t4_activate_uld(struct adapter *sc, int id)
 	SLIST_FOREACH(ui, &t4_uld_list, link) {
 		if (ui->uld_id == id) {
 			if (!(sc->flags & FULL_INIT_DONE)) {
-				rc = adapter_full_init(sc);
+				rc = adapter_init(sc);
 				if (rc != 0)
 					break;
 			}
diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c
index 6078dd89829d..8c5b9f500661 100644
--- a/sys/dev/cxgbe/t4_netmap.c
+++ b/sys/dev/cxgbe/t4_netmap.c
@@ -125,9 +125,10 @@ static int free_nm_txq_hwq(struct vi_info *, struct sge_nm_txq *);
 
 int
 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx,
-    int idx, struct sysctl_oid *oid)
+    int idx)
 {
 	int rc;
+	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children;
 	struct sysctl_ctx_list *ctx;
 	char name[16];
@@ -161,7 +162,7 @@ alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx,
 	nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID;
 
 	ctx = &vi->ctx;
-	children = SYSCTL_CHILDREN(oid);
+	children = SYSCTL_CHILDREN(vi->nm_rxq_oid);
 
 	snprintf(name, sizeof(name), "%d", idx);
 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name,
@@ -211,8 +212,7 @@ free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
 }
 
 int
-alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx,
-    struct sysctl_oid *oid)
+alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx)
 {
 	int rc;
 	size_t len;
@@ -220,7 +220,8 @@ alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx,
 	struct adapter *sc = pi->adapter;
 	struct netmap_adapter *na = NA(vi->ifp);
 	char name[16];
-	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
+	struct sysctl_oid *oid;
+	struct sysctl_oid_list *children = SYSCTL_CHILDREN(vi->nm_txq_oid);
 
 	len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len;
 	rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map,
diff --git a/sys/dev/cxgbe/t4_sched.c b/sys/dev/cxgbe/t4_sched.c
index 99a157eca08c..0cb44e1d4ffb 100644
--- a/sys/dev/cxgbe/t4_sched.c
+++ b/sys/dev/cxgbe/t4_sched.c
@@ -296,8 +296,8 @@ bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx)
 	int rc, old_idx;
 	uint32_t fw_mnem, fw_class;
 
-	if (!(txq->eq.flags & EQ_ALLOCATED))
-		return (EAGAIN);
+	if (!(txq->eq.flags & EQ_HW_ALLOCATED))
+		return (ENXIO);
 
 	mtx_lock(&sc->tc_lock);
 	if (txq->tc_idx == -2) {
@@ -565,16 +565,13 @@ int
 sysctl_tc(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
-	struct port_info *pi;
-	struct adapter *sc;
+	struct adapter *sc = vi->adapter;
 	struct sge_txq *txq;
 	int qidx = arg2, rc, tc_idx;
 
-	MPASS(qidx >= 0 && qidx < vi->ntxq);
-	pi = vi->pi;
-	sc = pi->adapter;
-	txq = &sc->sge.txq[vi->first_txq + qidx];
+	MPASS(qidx >= vi->first_txq && qidx < vi->first_txq + vi->ntxq);
 
+	txq = &sc->sge.txq[qidx];
 	tc_idx = txq->tc_idx;
 	rc = sysctl_handle_int(oidp, &tc_idx, 0, req);
 	if (rc != 0 || req->newptr == NULL)
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index c09ba29213a1..2f6c7e2e7914 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -234,53 +234,66 @@ static int service_iq_fl(struct sge_iq *, int);
 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t);
 static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *,
     u_int);
-static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int);
+static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
+    int, int);
 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *);
 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t,
-    uint16_t, char *);
+    struct sge_iq *, char *);
 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *,
-    int, int);
-static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *);
+    struct sysctl_ctx_list *, struct sysctl_oid *);
+static void free_iq_fl(struct adapter *, struct sge_iq *, struct sge_fl *);
 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
     struct sge_iq *);
 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *,
     struct sysctl_oid *, struct sge_fl *);
+static int alloc_iq_fl_hwq(struct vi_info *, struct sge_iq *, struct sge_fl *);
+static int free_iq_fl_hwq(struct adapter *, struct sge_iq *, struct sge_fl *);
 static int alloc_fwq(struct adapter *);
-static int free_fwq(struct adapter *);
-static int alloc_ctrlq(struct adapter *, struct sge_wrq *, int,
-    struct sysctl_oid *);
-static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int,
-    struct sysctl_oid *);
-static int free_rxq(struct vi_info *, struct sge_rxq *);
+static void free_fwq(struct adapter *);
+static int alloc_ctrlq(struct adapter *, int);
+static void free_ctrlq(struct adapter *, int);
+static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, int);
+static void free_rxq(struct vi_info *, struct sge_rxq *);
+static void add_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
+    struct sge_rxq *);
 #ifdef TCP_OFFLOAD
 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int,
-    struct sysctl_oid *);
-static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *);
+    int);
+static void free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *);
+static void add_ofld_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
+    struct sge_ofld_rxq *);
 #endif
 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *);
 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *);
 #endif
-static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *);
-static int free_eq(struct adapter *, struct sge_eq *);
-static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *,
+static int alloc_eq(struct adapter *, struct sge_eq *, struct sysctl_ctx_list *,
     struct sysctl_oid *);
-static int free_wrq(struct adapter *, struct sge_wrq *);
-static int alloc_txq(struct vi_info *, struct sge_txq *, int,
-    struct sysctl_oid *);
-static int free_txq(struct vi_info *, struct sge_txq *);
+static void free_eq(struct adapter *, struct sge_eq *);
+static void add_eq_sysctls(struct adapter *, struct sysctl_ctx_list *,
+    struct sysctl_oid *, struct sge_eq *);
+static int alloc_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *);
+static int free_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *);
+static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *,
+    struct sysctl_ctx_list *, struct sysctl_oid *);
+static void free_wrq(struct adapter *, struct sge_wrq *);
+static void add_wrq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
+    struct sge_wrq *);
*** 2021 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202110201357.19KDvjsm097065>