Date: Tue, 14 Apr 2026 09:14:18 +0000 From: Sumit Saxena <ssaxena@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Cc: Sreekanth Reddy <sreekanth.reddy@broadcom.com> Subject: git: d2b96f654a67 - main - iflib: Fix panic observed while doing sysctl -a with if_bnxt unload Message-ID: <69de056a.1efdc.319ecdb4@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by ssaxena: URL: https://cgit.FreeBSD.org/src/commit/?id=d2b96f654a672f6059c5c623c276dcd76841ed12 commit d2b96f654a672f6059c5c623c276dcd76841ed12 Author: Sreekanth Reddy <sreekanth.reddy@broadcom.com> AuthorDate: 2026-04-13 06:28:08 +0000 Commit: Sumit Saxena <ssaxena@FreeBSD.org> CommitDate: 2026-04-14 09:13:34 +0000 iflib: Fix panic observed while doing sysctl -a with if_bnxt unload Observed below kernel panic calltrace while performing sysctl -a operation while unloading the if_bnxt driver, Fatal trap 9: general protection fault while in kernel mode KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe02a7569940 vpanic() at vpanic+0x136/frame 0xfffffe02a7569a70 panic() at panic+0x43/frame 0xfffffe02a7569ad0 trap_fatal() at trap_fatal+0x68/frame 0xfffffe02a7569af0 calltrap() at calltrap+0x8/frame 0xfffffe02a7569af0 trap 0x9, rip = 0xffffffff80c0b411, rsp = 0xfffffe02a7569bc0, rbp = 0xfffffe02a7569be0 --- sysctl_handle_counter_u64() at sysctl_handle_counter_u64+0x61/frame 0xfffffe02a7569be0 sysctl_root_handler_locked() at sysctl_root_handler_locked+0x9c/frame 0xfffffe02a7569c30 sysctl_root() at sysctl_root+0x22f/frame 0xfffffe02a7569cb0 userland_sysctl() at userland_sysctl+0x196/frame 0xfffffe02a7569d50 sys___sysctl() at sys___sysctl+0x65/frame 0xfffffe02a7569e00 amd64_syscall() at amd64_syscall+0x169/frame 0xfffffe02a7569f30 fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe02a7569f30 Root Cause: iflib adds per-device sysctl nodes under the device tree using the device sysctl context. Some of those nodes are counter sysctl that point at fields inside txq→ift_br. When the if_bnxt driver is unloaded, iflib_device_deregister runs and calls iflib_tx_structures_free, which frees the txqs ift_br. The device sysctl tree is only freed when the device is destroyed. If sysctl -a runs during unload, it can still traverse the device tree and call sysctl_handle_counter_u64 for those nodes. The handler does counter_u64_fetch(*(counter_u64_t *)arg1). By then arg1 can point into freed memory and leads to use after free type kernel panic. Fix: flib now uses its own sysctl context for all iflib-related nodes instead of using device’s context. And iflib sysctl context is now removed before any queue/ring memory is freed. MFC after: 2 weeks Reviewed by: gallatin, ssaxena, #iflib Differential Revision: https://reviews.freebsd.org/D55981 --- sys/net/iflib.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index f9d0b1af0f83..186c41d9f839 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -190,6 +190,7 @@ struct iflib_ctx { struct ifmedia ifc_media; struct ifmedia *ifc_mediap; + struct sysctl_ctx_list ifc_sysctl_ctx; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; @@ -5293,6 +5294,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_queues: + sysctl_ctx_free(&ctx->ifc_sysctl_ctx); + ctx->ifc_sysctl_node = NULL; taskqueue_free(ctx->ifc_tq); iflib_tqg_detach(ctx); iflib_tx_structures_free(ctx); @@ -5332,6 +5335,9 @@ iflib_device_deregister(if_ctx_t ctx) if_t ifp = ctx->ifc_ifp; device_t dev = ctx->ifc_dev; + sysctl_ctx_free(&ctx->ifc_sysctl_ctx); + ctx->ifc_sysctl_node = NULL; + /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev, "Vlan in use, detach first\n"); @@ -6787,62 +6793,61 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; - struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; - ctx_list = device_get_sysctl_ctx(dev); + sysctl_ctx_init(&ctx->ifc_sysctl_ctx); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); - ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, + ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(&ctx->ifc_sysctl_ctx, child, OID_AUTO, "iflib", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); - SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", + SYSCTL_ADD_CONST_STRING(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); - SYSCTL_ADD_BOOL(ctx_list, oid_list, OID_AUTO, "simple_tx", + SYSCTL_ADD_BOOL(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "simple_tx", CTLFLAG_RDTUN, &ctx->ifc_sysctl_simple_tx, 0, "use simple tx ring"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", + SYSCTL_ADD_INT(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSI-X (default 0)"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the RX budget"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "tx_abdicate", CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, "cause TX to abdicate instead of running to completion"); ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "core_offset", CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, "offset to start using cores at"); - SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", + SYSCTL_ADD_U8(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "separate_txrx", CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, "use separate cores for TX and RX"); - SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "use_logical_cores", + SYSCTL_ADD_U8(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "use_logical_cores", CTLFLAG_RDTUN, &ctx->ifc_sysctl_use_logical_cores, 0, "try to make use of logical cores for TX and RX"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "use_extra_msix_vectors", + SYSCTL_ADD_U16(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "use_extra_msix_vectors", CTLFLAG_RDTUN, &ctx->ifc_sysctl_extra_msix_vectors, 0, "attempt to reserve the given number of extra MSI-X vectors during driver load for the creation of additional interfaces later"); - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "allocated_msix_vectors", + SYSCTL_ADD_INT(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "allocated_msix_vectors", CTLFLAG_RDTUN, &ctx->ifc_softc_ctx.isc_vectors, 0, "total # of MSI-X vectors allocated by driver"); /* XXX change for per-queue sizes */ - SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", + SYSCTL_ADD_PROC(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of TX descriptors to use, 0 = use default #"); - SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", + SYSCTL_ADD_PROC(&ctx->ifc_sysctl_ctx, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of RX descriptors to use, 0 = use default #"); @@ -6853,9 +6858,8 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; - device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; - struct sysctl_ctx_list *ctx_list; + struct sysctl_ctx_list *ctx_list = &ctx->ifc_sysctl_ctx; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; @@ -6864,7 +6868,6 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; - ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node);home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69de056a.1efdc.319ecdb4>
