Date: Mon, 29 Sep 2025 14:37:54 GMT From: Navdeep Parhar <np@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 3d76a4feeead - main - cxgbe(4): Updates for T7 CIM multicore operation Message-ID: <202509291437.58TEbsq5017927@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by np: URL: https://cgit.FreeBSD.org/src/commit/?id=3d76a4feeead2bbda7792a3c4ca534fd4c159721 commit 3d76a4feeead2bbda7792a3c4ca534fd4c159721 Author: Navdeep Parhar <np@FreeBSD.org> AuthorDate: 2025-09-29 09:17:51 +0000 Commit: Navdeep Parhar <np@FreeBSD.org> CommitDate: 2025-09-29 14:26:00 +0000 cxgbe(4): Updates for T7 CIM multicore operation T7 has a multicore microprocessor and each core has its own queue configuration, inbound/outbound queues, and logic analyzer. A work request involving a tid can only be handled on queues where (tid & tid_qid_sel_mask) == (eq->cntxt_id & tid_qid_sel_mask). MFC after: 3 days Sponsored by: Chelsio Communications --- sys/dev/cxgbe/adapter.h | 14 +- sys/dev/cxgbe/t4_main.c | 553 ++++++++++++++++++++++++++++++----------- sys/dev/cxgbe/t4_netmap.c | 10 +- sys/dev/cxgbe/t4_sge.c | 77 ++++-- sys/dev/cxgbe/tom/t4_connect.c | 6 + sys/dev/cxgbe/tom/t4_listen.c | 2 + sys/dev/cxgbe/tom/t4_tom.c | 31 ++- sys/dev/cxgbe/tom/t4_tom.h | 2 + 8 files changed, 515 insertions(+), 180 deletions(-) diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 36c3b48cccbd..e3906f8058a7 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -1,8 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2011 Chelsio Communications, Inc. - * All rights reserved. + * Copyright (c) 2011, 2025 Chelsio Communications. * Written by: Navdeep Parhar <np@FreeBSD.org> * * Redistribution and use in source and binary forms, with or without @@ -770,6 +769,16 @@ struct sge_ofld_txq { counter_u64_t tx_toe_tls_octets; } __aligned(CACHE_LINE_SIZE); +static inline int +ofld_txq_group(int val, int mask) +{ + const uint32_t ngroup = 1 << bitcount32(mask); + const int mshift = ffs(mask) - 1; + const uint32_t gmask = ngroup - 1; + + return (val >> mshift & gmask); +} + #define INVALID_NM_RXQ_CNTXT_ID ((uint16_t)(-1)) struct sge_nm_rxq { /* Items used by the driver rx ithread are in this cacheline. */ @@ -837,6 +846,7 @@ struct sge_nm_txq { } __aligned(CACHE_LINE_SIZE); struct sge { + int nctrlq; /* total # of control queues */ int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx queues */ int nofldrxq; /* total # of TOE rx queues */ diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 588d59418a71..df6314ca6e18 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -1,8 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2011 Chelsio Communications, Inc. - * All rights reserved. + * Copyright (c) 2011, 2025 Chelsio Communications. * Written by: Navdeep Parhar <np@FreeBSD.org> * * Redistribution and use in source and binary forms, with or without @@ -859,11 +858,13 @@ static int sysctl_vdd(SYSCTL_HANDLER_ARGS); static int sysctl_reset_sensor(SYSCTL_HANDLER_ARGS); static int sysctl_loadavg(SYSCTL_HANDLER_ARGS); static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); -static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); +static int sysctl_cim_ibq(SYSCTL_HANDLER_ARGS); +static int sysctl_cim_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); +static int sysctl_cim_qcfg_t7(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tid_stats(SYSCTL_HANDLER_ARGS); @@ -1569,6 +1570,7 @@ t4_attach(device_t dev) sc->intr_count = iaq.nirq; s = &sc->sge; + s->nctrlq = max(sc->params.nports, sc->params.ncores); s->nrxq = nports * iaq.nrxq; s->ntxq = nports * iaq.ntxq; if (num_vis > 1) { @@ -1623,7 +1625,7 @@ t4_attach(device_t dev) MPASS(s->niq <= s->iqmap_sz); MPASS(s->neq <= s->eqmap_sz); - s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE, + s->ctrlq = malloc(s->nctrlq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); @@ -4564,8 +4566,27 @@ calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype, iaq->nrxq_vi = t4_nrxq_vi; #if defined(TCP_OFFLOAD) || defined(RATELIMIT) if (is_offload(sc) || is_ethoffload(sc)) { - iaq->nofldtxq = t4_nofldtxq; - iaq->nofldtxq_vi = t4_nofldtxq_vi; + if (sc->params.tid_qid_sel_mask == 0) { + iaq->nofldtxq = t4_nofldtxq; + iaq->nofldtxq_vi = t4_nofldtxq_vi; + } else { + iaq->nofldtxq = roundup(t4_nofldtxq, sc->params.ncores); + iaq->nofldtxq_vi = roundup(t4_nofldtxq_vi, + sc->params.ncores); + if (iaq->nofldtxq != t4_nofldtxq) + device_printf(sc->dev, + "nofldtxq updated (%d -> %d) for correct" + " operation with %d firmware cores.\n", + t4_nofldtxq, iaq->nofldtxq, + sc->params.ncores); + if (iaq->num_vis > 1 && + iaq->nofldtxq_vi != t4_nofldtxq_vi) + device_printf(sc->dev, + "nofldtxq_vi updated (%d -> %d) for correct" + " operation with %d firmware cores.\n", + t4_nofldtxq_vi, iaq->nofldtxq_vi, + sc->params.ncores); + } } #endif #ifdef TCP_OFFLOAD @@ -4666,6 +4687,10 @@ calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype, if (iaq->nofldrxq > 0) { iaq->nofldrxq = 1; iaq->nofldtxq = 1; + if (sc->params.tid_qid_sel_mask == 0) + iaq->nofldtxq = 1; + else + iaq->nofldtxq = sc->params.ncores; } iaq->nnmtxq = 0; iaq->nnmrxq = 0; @@ -4678,9 +4703,10 @@ done: MPASS(iaq->nirq > 0); MPASS(iaq->nrxq > 0); MPASS(iaq->ntxq > 0); - if (itype == INTR_MSI) { + if (itype == INTR_MSI) MPASS(powerof2(iaq->nirq)); - } + if (sc->params.tid_qid_sel_mask != 0) + MPASS(iaq->nofldtxq % sc->params.ncores == 0); } static int @@ -5640,6 +5666,14 @@ get_params__post_init(struct adapter *sc) } } + if (sc->params.ncores > 1) { + MPASS(chip_id(sc) >= CHELSIO_T7); + + param[0] = FW_PARAM_DEV(TID_QID_SEL_MASK); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val); + sc->params.tid_qid_sel_mask = rc == 0 ? val[0] : 0; + } + /* * The parameters that follow may not be available on all firmwares. We * query them individually rather than in a compound query because old @@ -7622,6 +7656,150 @@ vi_tick(void *arg) callout_schedule(&vi->tick, hz); } +/* CIM inbound queues */ +static const char *t4_ibq[CIM_NUM_IBQ] = { + "ibq_tp0", "ibq_tp1", "ibq_ulp", "ibq_sge0", "ibq_sge1", "ibq_ncsi" +}; +static const char *t7_ibq[CIM_NUM_IBQ_T7] = { + "ibq_tp0", "ibq_tp1", "ibq_tp2", "ibq_tp3", "ibq_ulp", "ibq_sge0", + "ibq_sge1", "ibq_ncsi", NULL, "ibq_ipc1", "ibq_ipc2", "ibq_ipc3", + "ibq_ipc4", "ibq_ipc5", "ibq_ipc6", "ibq_ipc7" +}; +static const char *t7_ibq_sec[] = { + "ibq_tp0", "ibq_tp1", "ibq_tp2", "ibq_tp3", "ibq_ulp", "ibq_sge0", + NULL, NULL, NULL, "ibq_ipc0" +}; + +/* CIM outbound queues */ +static const char *t4_obq[CIM_NUM_OBQ_T5] = { + "obq_ulp0", "obq_ulp1", "obq_ulp2", "obq_ulp3", "obq_sge", "obq_ncsi", + "obq_sge_rx_q0", "obq_sge_rx_q1" /* These two are T5/T6 only */ +}; +static const char *t7_obq[CIM_NUM_OBQ_T7] = { + "obq_ulp0", "obq_ulp1", "obq_ulp2", "obq_ulp3", "obq_sge", "obq_ncsi", + "obq_sge_rx_q0", NULL, NULL, "obq_ipc1", "obq_ipc2", "obq_ipc3", + "obq_ipc4", "obq_ipc5", "obq_ipc6", "obq_ipc7" +}; +static const char *t7_obq_sec[] = { + "obq_ulp0", "obq_ulp1", "obq_ulp2", "obq_ulp3", "obq_sge", NULL, + "obq_sge_rx_q0", NULL, NULL, "obq_ipc0" +}; + +static void +cim_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *c0) +{ + struct sysctl_oid *oid; + struct sysctl_oid_list *children1; + int i, j, qcount; + char s[16]; + const char **qname; + + oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "cim", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "CIM block"); + c0 = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_U8(ctx, c0, OID_AUTO, "ncores", CTLFLAG_RD, NULL, + sc->params.ncores, "# of active CIM cores"); + + for (i = 0; i < sc->params.ncores; i++) { + snprintf(s, sizeof(s), "%u", i); + oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, s, + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "CIM core"); + children1 = SYSCTL_CHILDREN(oid); + + /* + * CTLFLAG_SKIP because the misc.devlog sysctl already displays + * the log for all cores. Use this sysctl to get the log for a + * particular core only. + */ + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, "devlog", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, + sc, i, sysctl_devlog, "A", "firmware's device log"); + + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, "loadavg", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, i, + sysctl_loadavg, "A", + "microprocessor load averages (select firmwares only)"); + + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, "qcfg", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, i, + chip_id(sc) > CHELSIO_T6 ? sysctl_cim_qcfg_t7 : sysctl_cim_qcfg, + "A", "Queue configuration"); + + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, "la", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, i, + sysctl_cim_la, "A", "Logic analyzer"); + + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, "ma_la", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, i, + sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); + + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, "pif_la", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, i, + sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); + + /* IBQs */ + switch (chip_id(sc)) { + case CHELSIO_T4: + case CHELSIO_T5: + case CHELSIO_T6: + qname = &t4_ibq[0]; + qcount = nitems(t4_ibq); + break; + case CHELSIO_T7: + default: + if (i == 0) { + qname = &t7_ibq[0]; + qcount = nitems(t7_ibq); + } else { + qname = &t7_ibq_sec[0]; + qcount = nitems(t7_ibq_sec); + } + break; + } + MPASS(qcount <= sc->chip_params->cim_num_ibq); + for (j = 0; j < qcount; j++) { + if (qname[j] == NULL) + continue; + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, qname[j], + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, + (i << 16) | j, sysctl_cim_ibq, "A", NULL); + } + + /* OBQs */ + switch (chip_id(sc)) { + case CHELSIO_T4: + qname = t4_obq; + qcount = CIM_NUM_OBQ; + break; + case CHELSIO_T5: + case CHELSIO_T6: + qname = t4_obq; + qcount = nitems(t4_obq); + break; + case CHELSIO_T7: + default: + if (i == 0) { + qname = t7_obq; + qcount = nitems(t7_obq); + } else { + qname = t7_obq_sec; + qcount = nitems(t7_obq_sec); + } + break; + } + MPASS(qcount <= sc->chip_params->cim_num_obq); + for (j = 0; j < qcount; j++) { + if (qname[j] == NULL) + continue; + SYSCTL_ADD_PROC(ctx, children1, OID_AUTO, qname[j], + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, + (i << 16) | j, sysctl_cim_obq, "A", NULL); + } + } +} + /* * Should match fw_caps_config_<foo> enums in t4fw_interface.h */ @@ -7766,11 +7944,6 @@ t4_sysctls(struct adapter *sc) CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, sysctl_reset_sensor, "I", "reset the chip's temperature sensor."); - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - sysctl_loadavg, "A", - "microprocessor load averages (debug firmwares only)"); - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "core_vdd", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, sysctl_vdd, "I", "core Vdd (in mV)"); @@ -7802,81 +7975,7 @@ t4_sysctls(struct adapter *sc) CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, sysctl_cctrl, "A", "congestion control"); - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 1, - sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 2, - sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 3, - sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 4, - sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 5, - sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - sysctl_cim_la, "A", "CIM logic analyzer"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); - - if (chip_id(sc) > CHELSIO_T4) { - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", - "CIM OBQ 6 (SGE0-RX)"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, - 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", - "CIM OBQ 7 (SGE1-RX)"); - } - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); - - SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - sysctl_cim_qcfg, "A", "CIM queue configuration"); + cim_sysctls(sc, ctx, children); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, @@ -7891,8 +7990,8 @@ t4_sysctls(struct adapter *sc) sysctl_tid_stats, "A", "tid stats"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - sysctl_devlog, "A", "firmware's device log"); + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, -1, + sysctl_devlog, "A", "firmware's device log (all cores)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, @@ -9207,6 +9306,10 @@ sysctl_loadavg(SYSCTL_HANDLER_ARGS) struct sbuf *sb; int rc; uint32_t param, val; + uint8_t coreid = (uint8_t)arg2; + + KASSERT(coreid < sc->params.ncores, + ("%s: bad coreid %u\n", __func__, coreid)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg"); if (rc) @@ -9215,7 +9318,8 @@ sysctl_loadavg(SYSCTL_HANDLER_ARGS) rc = ENXIO; else { param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | - V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD); + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD) | + V_FW_PARAMS_PARAM_Y(coreid); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); } end_synchronized_op(sc, 0); @@ -9281,50 +9385,30 @@ done: return (rc); } -static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { - "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ - "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ - "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ -}; - static int -sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) +sysctl_cim_ibq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; - int rc, i, n, qid = arg2; + int rc, i, n, qid, coreid; uint32_t *buf, *p; - char *qtype; - u_int cim_num_obq = sc->chip_params->cim_num_obq; - KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, - ("%s: bad qid %d\n", __func__, qid)); + qid = arg2 & 0xffff; + coreid = arg2 >> 16; - if (qid < CIM_NUM_IBQ) { - /* inbound queue */ - qtype = "IBQ"; - n = 4 * CIM_IBQ_SIZE; - buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); - mtx_lock(&sc->reg_lock); - if (hw_off_limits(sc)) - rc = -ENXIO; - else - rc = t4_read_cim_ibq(sc, qid, buf, n); - mtx_unlock(&sc->reg_lock); - } else { - /* outbound queue */ - qtype = "OBQ"; - qid -= CIM_NUM_IBQ; - n = 4 * cim_num_obq * CIM_OBQ_SIZE; - buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); - mtx_lock(&sc->reg_lock); - if (hw_off_limits(sc)) - rc = -ENXIO; - else - rc = t4_read_cim_obq(sc, qid, buf, n); - mtx_unlock(&sc->reg_lock); - } + KASSERT(qid >= 0 && qid < sc->chip_params->cim_num_ibq, + ("%s: bad ibq qid %d\n", __func__, qid)); + KASSERT(coreid >= 0 && coreid < sc->params.ncores, + ("%s: bad coreid %d\n", __func__, coreid)); + n = 4 * CIM_IBQ_SIZE; + buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); + mtx_lock(&sc->reg_lock); + if (hw_off_limits(sc)) + rc = -ENXIO; + else + rc = t4_read_cim_ibq_core(sc, coreid, qid, buf, n); + mtx_unlock(&sc->reg_lock); if (rc < 0) { rc = -rc; goto done; @@ -9336,12 +9420,58 @@ sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) rc = ENOMEM; goto done; } - - sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); + rc = sbuf_finish(sb); + sbuf_delete(sb); +done: + free(buf, M_CXGBE); + return (rc); +} + +static int +sysctl_cim_obq(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc, i, n, qid, coreid; + uint32_t *buf, *p; + + qid = arg2 & 0xffff; + coreid = arg2 >> 16; + + KASSERT(qid >= 0 && qid < sc->chip_params->cim_num_obq, + ("%s: bad obq qid %d\n", __func__, qid)); + KASSERT(coreid >= 0 && coreid < sc->params.ncores, + ("%s: bad coreid %d\n", __func__, coreid)); + + n = 6 * CIM_OBQ_SIZE * 4; + buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); + mtx_lock(&sc->reg_lock); + if (hw_off_limits(sc)) + rc = -ENXIO; + else + rc = t4_read_cim_obq_core(sc, coreid, qid, buf, n); + mtx_unlock(&sc->reg_lock); + if (rc < 0) { + rc = -rc; + goto done; + } + n = rc * sizeof(uint32_t); /* rc has # of words actually read */ + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + goto done; + + sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); + if (sb == NULL) { + rc = ENOMEM; + goto done; + } + for (i = 0, p = buf; i < n; i += 16, p += 4) + sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], + p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: @@ -9412,7 +9542,7 @@ sbuf_cim_la6(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg) } static int -sbuf_cim_la(struct adapter *sc, struct sbuf *sb, int flags) +sbuf_cim_la(struct adapter *sc, int coreid, struct sbuf *sb, int flags) { uint32_t cfg, *buf; int rc; @@ -9427,9 +9557,10 @@ sbuf_cim_la(struct adapter *sc, struct sbuf *sb, int flags) if (hw_off_limits(sc)) rc = ENXIO; else { - rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); + rc = -t4_cim_read_core(sc, 1, coreid, A_UP_UP_DBG_LA_CFG, 1, + &cfg); if (rc == 0) - rc = -t4_cim_read_la(sc, buf, NULL); + rc = -t4_cim_read_la_core(sc, coreid, buf, NULL); } mtx_unlock(&sc->reg_lock); if (rc == 0) { @@ -9446,6 +9577,7 @@ static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; + int coreid = arg2; struct sbuf *sb; int rc; @@ -9453,7 +9585,7 @@ sysctl_cim_la(SYSCTL_HANDLER_ARGS) if (sb == NULL) return (ENOMEM); - rc = sbuf_cim_la(sc, sb, M_WAITOK); + rc = sbuf_cim_la(sc, coreid, sb, M_WAITOK); if (rc == 0) rc = sbuf_finish(sb); sbuf_delete(sb); @@ -9490,7 +9622,7 @@ dump_cimla(struct adapter *sc) device_get_nameunit(sc->dev)); return; } - rc = sbuf_cim_la(sc, &sb, M_WAITOK); + rc = sbuf_cim_la(sc, 0, &sb, M_WAITOK); if (rc == 0) { rc = sbuf_finish(&sb); if (rc == 0) { @@ -9614,6 +9746,13 @@ sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; + static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { + "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ + "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ + "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ + }; + + MPASS(chip_id(sc) < CHELSIO_T7); cim_num_obq = sc->chip_params->cim_num_obq; if (is_t4(sc)) { @@ -9665,6 +9804,104 @@ sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) return (rc); } +static int +sysctl_cim_qcfg_t7(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + u_int coreid = arg2; + struct sbuf *sb; + int rc, i; + u_int addr; + uint16_t base[CIM_NUM_IBQ_T7 + CIM_NUM_OBQ_T7]; + uint16_t size[CIM_NUM_IBQ_T7 + CIM_NUM_OBQ_T7]; + uint16_t thres[CIM_NUM_IBQ_T7]; + uint32_t obq_wr[2 * CIM_NUM_OBQ_T7], *wr = obq_wr; + uint32_t stat[4 * (CIM_NUM_IBQ_T7 + CIM_NUM_OBQ_T7)], *p = stat; + static const char * const qname_ibq_t7[] = { + "TP0", "TP1", "TP2", "TP3", "ULP", "SGE0", "SGE1", "NC-SI", + "RSVD", "IPC1", "IPC2", "IPC3", "IPC4", "IPC5", "IPC6", "IPC7", + }; + static const char * const qname_obq_t7[] = { + "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", "SGE0-RX", + "RSVD", "RSVD", "IPC1", "IPC2", "IPC3", "IPC4", "IPC5", + "IPC6", "IPC7" + }; + static const char * const qname_ibq_sec_t7[] = { + "TP0", "TP1", "TP2", "TP3", "ULP", "SGE0", "RSVD", "RSVD", + "RSVD", "IPC0", "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", + }; + static const char * const qname_obq_sec_t7[] = { + "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "RSVD", "SGE0-RX", + "RSVD", "RSVD", "IPC0", "RSVD", "RSVD", "RSVD", "RSVD", + "RSVD", "RSVD", + }; + + MPASS(chip_id(sc) >= CHELSIO_T7); + + mtx_lock(&sc->reg_lock); + if (hw_off_limits(sc)) + rc = ENXIO; + else { + rc = -t4_cim_read_core(sc, 1, coreid, + A_T7_UP_IBQ_0_SHADOW_RDADDR, 4 * CIM_NUM_IBQ_T7, stat); + if (rc != 0) + goto unlock; + + rc = -t4_cim_read_core(sc, 1, coreid, + A_T7_UP_OBQ_0_SHADOW_RDADDR, 4 * CIM_NUM_OBQ_T7, + &stat[4 * CIM_NUM_IBQ_T7]); + if (rc != 0) + goto unlock; + + addr = A_T7_UP_OBQ_0_SHADOW_REALADDR; + for (i = 0; i < CIM_NUM_OBQ_T7 * 2; i++, addr += 8) { + rc = -t4_cim_read_core(sc, 1, coreid, addr, 1, + &obq_wr[i]); + if (rc != 0) + goto unlock; + } + t4_read_cimq_cfg_core(sc, coreid, base, size, thres); + } +unlock: + mtx_unlock(&sc->reg_lock); + if (rc) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); + if (sb == NULL) + return (ENOMEM); + + sbuf_printf(sb, + " Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); + + for (i = 0; i < CIM_NUM_IBQ_T7; i++, p += 4) { + if (!size[i]) + continue; + + sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", + coreid == 0 ? qname_ibq_t7[i] : qname_ibq_sec_t7[i], + base[i], size[i], thres[i], G_IBQRDADDR(p[0]) & 0xfff, + G_IBQWRADDR(p[1]) & 0xfff, G_QUESOPCNT(p[3]), + G_QUEEOPCNT(p[3]), G_T7_QUEREMFLITS(p[2]) * 16); + } + + for ( ; i < CIM_NUM_IBQ_T7 + CIM_NUM_OBQ_T7; i++, p += 4, wr += 2) { + if (!size[i]) + continue; + + sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", + coreid == 0 ? qname_obq_t7[i - CIM_NUM_IBQ_T7] : + qname_obq_sec_t7[i - CIM_NUM_IBQ_T7], + base[i], size[i], G_QUERDADDR(p[0]) & 0xfff, + wr[0] << 1, G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), + G_T7_QUEREMFLITS(p[2]) * 16); + } + + rc = sbuf_finish(sb); + sbuf_delete(sb); + return (rc); +} + static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { @@ -9807,18 +10044,25 @@ static const char * const devlog_facility_strings[] = { }; static int -sbuf_devlog(struct adapter *sc, struct sbuf *sb, int flags) +sbuf_devlog(struct adapter *sc, int coreid, struct sbuf *sb, int flags) { int i, j, rc, nentries, first = 0; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; + uint32_t addr, size; uint64_t ftstamp = UINT64_MAX; + KASSERT(coreid >= 0 && coreid < sc->params.ncores, + ("%s: bad coreid %d\n", __func__, coreid)); + if (dparams->addr == 0) return (ENXIO); + size = dparams->size / sc->params.ncores; + addr = dparams->addr + coreid * size; + MPASS(flags == M_WAITOK || flags == M_NOWAIT); - buf = malloc(dparams->size, M_CXGBE, M_ZERO | flags); + buf = malloc(size, M_CXGBE, M_ZERO | flags); if (buf == NULL) return (ENOMEM); @@ -9826,13 +10070,12 @@ sbuf_devlog(struct adapter *sc, struct sbuf *sb, int flags) if (hw_off_limits(sc)) rc = ENXIO; else - rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, - dparams->size); + rc = read_via_memwin(sc, 1, addr, (void *)buf, size); mtx_unlock(&sc->reg_lock); if (rc != 0) goto done; - nentries = dparams->size / sizeof(struct fw_devlog_e); + nentries = size / sizeof(struct fw_devlog_e); for (i = 0; i < nentries; i++) { e = &buf[i]; @@ -9884,14 +10127,24 @@ static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; - int rc; + int rc, i, coreid = arg2; struct sbuf *sb; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); - - rc = sbuf_devlog(sc, sb, M_WAITOK); + if (coreid == -1) { + /* -1 means all cores */ + for (i = rc = 0; i < sc->params.ncores && rc == 0; i++) { + if (sc->params.ncores > 0) + sbuf_printf(sb, "=== CIM core %u ===\n", i); + rc = sbuf_devlog(sc, i, sb, M_WAITOK); + } + } else { + KASSERT(coreid >= 0 && coreid < sc->params.ncores, + ("%s: bad coreid %d\n", __func__, coreid)); + rc = sbuf_devlog(sc, coreid, sb, M_WAITOK); + } if (rc == 0) rc = sbuf_finish(sb); sbuf_delete(sb); @@ -9901,7 +10154,7 @@ sysctl_devlog(SYSCTL_HANDLER_ARGS) static void dump_devlog(struct adapter *sc) { - int rc; + int rc, i; struct sbuf sb; if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb) { @@ -9909,13 +10162,15 @@ dump_devlog(struct adapter *sc) device_get_nameunit(sc->dev)); return; } - rc = sbuf_devlog(sc, &sb, M_WAITOK); + for (i = rc = 0; i < sc->params.ncores && rc == 0; i++) { + if (sc->params.ncores > 0) + sbuf_printf(&sb, "=== CIM core %u ===\n", i); + rc = sbuf_devlog(sc, i, &sb, M_WAITOK); + } if (rc == 0) { - rc = sbuf_finish(&sb); - if (rc == 0) { - log(LOG_DEBUG, "%s: device log follows.\n%s", - device_get_nameunit(sc->dev), sbuf_data(&sb)); - } + sbuf_finish(&sb); + log(LOG_DEBUG, "%s: device log follows.\n%s", + device_get_nameunit(sc->dev), sbuf_data(&sb)); } sbuf_delete(&sb); } diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c index 12aaca6ee77a..0135bec6e2c1 100644 --- a/sys/dev/cxgbe/t4_netmap.c +++ b/sys/dev/cxgbe/t4_netmap.c @@ -424,9 +424,13 @@ alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); - if (nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID) - c.alloc_to_len16 |= htobe32(F_FW_EQ_ETH_CMD_ALLOC); - else + if (nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID) { + const int core = sc->params.ncores > 1 ? + nm_txq->nid % sc->params.ncores : 0; + + c.alloc_to_len16 |= htobe32(F_FW_EQ_ETH_CMD_ALLOC | + V_FW_EQ_ETH_CMD_COREGROUP(core)); + } else c.eqid_pkd = htobe32(V_FW_EQ_ETH_CMD_EQID(nm_txq->cntxt_id)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index ce506f6c9192..46f95606743c 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1,8 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2011 Chelsio Communications, Inc. - * All rights reserved. + * Copyright (c) 2011, 2025 Chelsio Communications. * Written by: Navdeep Parhar <np@FreeBSD.org> * * Redistribution and use in source and binary forms, with or without @@ -259,17 +258,20 @@ static void free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); static void add_ofld_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_ofld_rxq *); #endif -static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); -static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); +static int ctrl_eq_alloc(struct adapter *, struct sge_eq *, int); +static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *, + int); #if defined(TCP_OFFLOAD) || defined(RATELIMIT) -static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); +static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *, + int); #endif static int alloc_eq(struct adapter *, struct sge_eq *, struct sysctl_ctx_list *, struct sysctl_oid *); static void free_eq(struct adapter *, struct sge_eq *); static void add_eq_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_eq *); -static int alloc_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); +static int alloc_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *, + int); static int free_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, struct sysctl_ctx_list *, struct sysctl_oid *); @@ -1064,9 +1066,9 @@ t4_setup_adapter_queues(struct adapter *sc) */ /* - * Control queues, one per port. + * Control queues. At least one per port and per internal core. */ - for_each_port(sc, i) { + for (i = 0; i < sc->sge.nctrlq; i++) { rc = alloc_ctrlq(sc, i); if (rc != 0) return (rc); @@ -1087,7 +1089,7 @@ t4_teardown_adapter_queues(struct adapter *sc) if (sc->sge.ctrlq != NULL) { MPASS(!(sc->flags & IS_VF)); /* VFs don't allocate ctrlq. */ - for_each_port(sc, i) + for (i = 0; i < sc->sge.nctrlq; i++) free_ctrlq(sc, i); } free_fwq(sc); @@ -3849,7 +3851,7 @@ alloc_ctrlq(struct adapter *sc, int idx) struct sysctl_oid *oid; struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx]; - MPASS(idx < sc->params.nports); + MPASS(idx < sc->sge.nctrlq); if (!(ctrlq->eq.flags & EQ_SW_ALLOCATED)) { MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); @@ -3861,8 +3863,8 @@ alloc_ctrlq(struct adapter *sc, int idx) snprintf(name, sizeof(name), "%s ctrlq%d", device_get_nameunit(sc->dev), idx); - init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, idx, - &sc->sge.fwq, name); + init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, + idx % sc->params.nports, &sc->sge.fwq, name); rc = alloc_wrq(sc, NULL, ctrlq, &sc->ctx, oid); if (rc != 0) { CH_ERR(sc, "failed to allocate ctrlq%d: %d\n", idx, rc); @@ -3877,7 +3879,7 @@ alloc_ctrlq(struct adapter *sc, int idx) MPASS(ctrlq->nwr_pending == 0); MPASS(ctrlq->ndesc_needed == 0); - rc = alloc_eq_hwq(sc, NULL, &ctrlq->eq); + rc = alloc_eq_hwq(sc, NULL, &ctrlq->eq, idx); if (rc != 0) { CH_ERR(sc, "failed to create hw ctrlq%d: %d\n", idx, rc); return (rc); @@ -4265,18 +4267,20 @@ qsize_to_fthresh(int qsize) } static int -ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) +ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq, int idx) { - int rc, cntxt_id; + int rc, cntxt_id, core; struct fw_eq_ctrl_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; + core = sc->params.tid_qid_sel_mask != 0 ? idx % sc->params.ncores : 0; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | *** 226 LINES SKIPPED ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202509291437.58TEbsq5017927>