Date: Wed, 7 Apr 2021 10:56:05 GMT From: Ka Ho Ng <khng@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: d7ffa208d929 - stable/13 - AMD-vi: Fix IOMMU device interrupts being overridden Message-ID: <202104071056.137Au5cr046747@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by khng: URL: https://cgit.FreeBSD.org/src/commit/?id=d7ffa208d92933e0c72564bfd422aec49dca2fd4 commit d7ffa208d92933e0c72564bfd422aec49dca2fd4 Author: Ka Ho Ng <khng@FreeBSD.org> AuthorDate: 2021-03-22 09:33:43 +0000 Commit: Ka Ho Ng <khng@FreeBSD.org> CommitDate: 2021-04-07 10:55:38 +0000 AMD-vi: Fix IOMMU device interrupts being overridden Currently, AMD-vi PCI-e passthrough will lead to the following lines in dmesg: "kernel: CPU0: local APIC error 0x40 ivhd0: Error: completion failed tail:0x720, head:0x0." After some tracing, the problem is due to the interaction with amdvi_alloc_intr_resources() and pci_driver_added(). In ivrs_drv, the identification of AMD-vi IVHD is done by walking over the ACPI IVRS table and ivhdX device_ts are added under the acpi bus, while there are no driver handling the corresponding IOMMU PCI function. In amdvi_alloc_intr_resources(), the MSI intr are allocated with the ivhdX device_t instead of the IOMMU PCI function device_t. bus_setup_intr() is called on ivhdX. the IOMMU pci function device_t is only used for pci_enable_msi(). Since bus_setup_intr() is not called on IOMMU pci function, the IOMMU PCI function device_t's dinfo->cfg.msi is never updated to reflect the supposed msi_data and msi_addr. So the msi_data and msi_addr stay in the value 0. When pci_driver_added() tried to loop over the children of a pci bus, and do pci_cfg_restore() on each of them, msi_addr and msi_data with value 0 will be written to the MSI capability of the IOMMU pci function, thus explaining the errors in dmesg. This change includes an amdiommu driver which currently does attaching, detaching and providing DEVMETHODs for setting up and tearing down interrupt. The purpose of the driver is to prevent pci_driver_added() from calling pci_cfg_restore() on the IOMMU PCI function device_t. The introduction of the amdiommu driver handles allocation of an IRQ resource within the IOMMU PCI function, so that the dinfo->cfg.msi is populated. This has been tested on EPYC Rome 7282 with Radeon 5700XT GPU. Sponsored by: The FreeBSD Foundation Reviewed by: jhb Approved by: philip (mentor) MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D28984 (cherry picked from commit 74ada297e8978b8efda3dffdd1bb24aee7c5faa4) --- sys/amd64/vmm/amd/amdiommu.c | 185 +++++++++++++++++++++++++++++++++++++++++ sys/amd64/vmm/amd/amdvi_hw.c | 89 +++----------------- sys/amd64/vmm/amd/amdvi_priv.h | 5 +- sys/amd64/vmm/amd/ivhd_if.m | 46 ++++++++++ sys/amd64/vmm/amd/ivrs_drv.c | 6 ++ sys/modules/vmm/Makefile | 3 + 6 files changed, 253 insertions(+), 81 deletions(-) diff --git a/sys/amd64/vmm/amd/amdiommu.c b/sys/amd64/vmm/amd/amdiommu.c new file mode 100644 index 000000000000..4ded23dff003 --- /dev/null +++ b/sys/amd64/vmm/amd/amdiommu.c @@ -0,0 +1,185 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 The FreeBSD Fondation + * + * Portions of this software were developed by Ka Ho Ng + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/rman.h> + +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> + +#include "amdvi_priv.h" +#include "ivhd_if.h" + +struct amdiommu_softc { + struct resource *event_res; /* Event interrupt resource. */ + void *event_tag; /* Event interrupt tag. */ + int event_rid; +}; + +static int amdiommu_probe(device_t); +static int amdiommu_attach(device_t); +static int amdiommu_detach(device_t); +static int ivhd_setup_intr(device_t, driver_intr_t, void *, + const char *); +static int ivhd_teardown_intr(device_t); + +static device_method_t amdiommu_methods[] = { + /* device interface */ + DEVMETHOD(device_probe, amdiommu_probe), + DEVMETHOD(device_attach, amdiommu_attach), + DEVMETHOD(device_detach, amdiommu_detach), + DEVMETHOD(ivhd_setup_intr, ivhd_setup_intr), + DEVMETHOD(ivhd_teardown_intr, ivhd_teardown_intr), + DEVMETHOD_END +}; +static driver_t amdiommu_driver = { + "amdiommu", + amdiommu_methods, + sizeof(struct amdiommu_softc), +}; + +static int +amdiommu_probe(device_t dev) +{ + int error; + int capoff; + + /* + * Check base class and sub-class + */ + if (pci_get_class(dev) != PCIC_BASEPERIPH || + pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU) + return (ENXIO); + + /* + * A IOMMU capability block carries a 0Fh capid. + */ + error = pci_find_cap(dev, PCIY_SECDEV, &capoff); + if (error) + return (ENXIO); + + /* + * bit [18:16] == 011b indicates the capability block is IOMMU + * capability block. If the field is not set to 011b, bail out. + */ + if ((pci_read_config(dev, capoff + 2, 2) & 0x7) != 0x3) + return (ENXIO); + + return (BUS_PROBE_SPECIFIC); +} + +static int +amdiommu_attach(device_t dev) +{ + + device_set_desc(dev, "AMD-Vi/IOMMU PCI function"); + return (0); +} + +static int +amdiommu_detach(device_t dev) +{ + + return (0); +} + +static int +ivhd_setup_intr(device_t dev, driver_intr_t handler, void *arg, + const char *desc) +{ + struct amdiommu_softc *sc; + int error, msicnt; + + sc = device_get_softc(dev); + msicnt = 1; + if (sc->event_res != NULL) + panic("%s is called without intr teardown", __func__); + sc->event_rid = 1; + + error = pci_alloc_msi(dev, &msicnt); + if (error) { + device_printf(dev, "Couldn't find event MSI IRQ resource.\n"); + return (ENOENT); + } + + sc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &sc->event_rid, RF_ACTIVE); + if (sc->event_res == NULL) { + device_printf(dev, "Unable to allocate event INTR resource.\n"); + error = ENOMEM; + goto fail; + } + + error = bus_setup_intr(dev, sc->event_res, INTR_TYPE_MISC | INTR_MPSAFE, + NULL, handler, arg, &sc->event_tag); + if (error) { + device_printf(dev, "Fail to setup event intr\n"); + goto fail; + } + + bus_describe_intr(dev, sc->event_res, sc->event_tag, "%s", desc); + return (0); + +fail: + ivhd_teardown_intr(dev); + return (error); +} + +static int +ivhd_teardown_intr(device_t dev) +{ + struct amdiommu_softc *sc; + + sc = device_get_softc(dev); + + if (sc->event_res != NULL) { + bus_teardown_intr(dev, sc->event_res, sc->event_tag); + sc->event_tag = NULL; + } + if (sc->event_res != NULL) { + bus_release_resource(dev, SYS_RES_IRQ, sc->event_rid, + sc->event_res); + sc->event_res = NULL; + } + pci_release_msi(dev); + return (0); +} + +static devclass_t amdiommu_devclass; + +/* This driver has to be loaded before ivhd */ +DRIVER_MODULE(amdiommu, pci, amdiommu_driver, amdiommu_devclass, 0, 0); +MODULE_DEPEND(amdiommu, pci, 1, 1, 1); diff --git a/sys/amd64/vmm/amd/amdvi_hw.c b/sys/amd64/vmm/amd/amdvi_hw.c index 132ae8389f2a..3581b43ea4de 100644 --- a/sys/amd64/vmm/amd/amdvi_hw.c +++ b/sys/amd64/vmm/amd/amdvi_hw.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmparam.h> #include <machine/pci_cfgreg.h> +#include "ivhd_if.h" #include "pcib_if.h" #include "io/iommu.h" @@ -771,99 +772,33 @@ amdvi_free_evt_intr_res(device_t dev) { struct amdvi_softc *softc; + device_t mmio_dev; softc = device_get_softc(dev); - if (softc->event_tag != NULL) { - bus_teardown_intr(dev, softc->event_res, softc->event_tag); - } - if (softc->event_res != NULL) { - bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid, - softc->event_res); - } - bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid); - PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)), - dev, 1, &softc->event_irq); + mmio_dev = softc->pci_dev; + + IVHD_TEARDOWN_INTR(mmio_dev); } static bool amdvi_alloc_intr_resources(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; - device_t dev, pcib; - device_t mmio_dev; - uint64_t msi_addr; - uint32_t msi_data; + device_t dev, mmio_dev; int err; dev = softc->dev; - pcib = device_get_parent(device_get_parent(dev)); - mmio_dev = pci_find_bsf(PCI_RID2BUS(softc->pci_rid), - PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid)); - if (device_is_attached(mmio_dev)) { - device_printf(dev, - "warning: IOMMU device is claimed by another driver %s\n", - device_get_driver(mmio_dev)->name); - } - - softc->event_irq = -1; - softc->event_rid = 0; - - /* - * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one - * interrupt. XXX: Enable MSI/X support. - */ - err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq); - if (err) { - device_printf(dev, - "Couldn't find event MSI IRQ resource.\n"); - return (ENOENT); - } - - err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid, - softc->event_irq, 1); - if (err) { - device_printf(dev, "Couldn't set event MSI resource.\n"); - return (ENXIO); - } - - softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, - &softc->event_rid, RF_ACTIVE); - if (!softc->event_res) { - device_printf(dev, - "Unable to allocate event INTR resource.\n"); - return (ENOMEM); - } - - if (bus_setup_intr(dev, softc->event_res, - INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr, - softc, &softc->event_tag)) { - device_printf(dev, "Fail to setup event intr\n"); - bus_release_resource(softc->dev, SYS_RES_IRQ, - softc->event_rid, softc->event_res); - softc->event_res = NULL; - return (ENXIO); - } - - bus_describe_intr(dev, softc->event_res, softc->event_tag, - "fault"); - - err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr, - &msi_data); - if (err) { - device_printf(dev, - "Event interrupt config failed, err=%d.\n", - err); - amdvi_free_evt_intr_res(softc->dev); - return (err); - } + mmio_dev = softc->pci_dev; /* Clear interrupt status bits. */ ctrl = softc->ctrl; ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR; - /* Now enable MSI interrupt. */ - pci_enable_msi(mmio_dev, msi_addr, msi_data); - return (0); + err = IVHD_SETUP_INTR(mmio_dev, amdvi_event_intr, softc, "fault"); + if (err) + device_printf(dev, "Interrupt setup failed on %s\n", + device_get_nameunit(mmio_dev)); + return (err); } static void diff --git a/sys/amd64/vmm/amd/amdvi_priv.h b/sys/amd64/vmm/amd/amdvi_priv.h index 2db6914f08f4..0eae7ca6ca4c 100644 --- a/sys/amd64/vmm/amd/amdvi_priv.h +++ b/sys/amd64/vmm/amd/amdvi_priv.h @@ -375,17 +375,14 @@ enum IvrsType struct amdvi_softc { struct amdvi_ctrl *ctrl; /* Control area. */ device_t dev; /* IOMMU device. */ + device_t pci_dev; /* IOMMU PCI function device. */ enum IvrsType ivhd_type; /* IOMMU IVHD type. */ bool iotlb; /* IOTLB supported by IOMMU */ struct amdvi_cmd *cmd; /* Command descriptor area. */ int cmd_max; /* Max number of commands. */ uint64_t cmp_data; /* Command completion write back. */ struct amdvi_event *event; /* Event descriptor area. */ - struct resource *event_res; /* Event interrupt resource. */ - void *event_tag; /* Event interrupt tag. */ int event_max; /* Max number of events. */ - int event_irq; - int event_rid; /* ACPI various flags. */ uint32_t ivhd_flag; /* ACPI IVHD flag. */ uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */ diff --git a/sys/amd64/vmm/amd/ivhd_if.m b/sys/amd64/vmm/amd/ivhd_if.m new file mode 100644 index 000000000000..eaae01ae0131 --- /dev/null +++ b/sys/amd64/vmm/amd/ivhd_if.m @@ -0,0 +1,46 @@ +#- +# Copyright (c) 2021 The FreeBSD Fondation +# +# Portions of this software were developed by Ka Ho Ng +# under sponsorship from the FreeBSD Foundation. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/bus.h> + +INTERFACE ivhd; + +METHOD int setup_intr { + device_t dev; + driver_intr_t handler; + void *arg; + const char *desc; +}; + +METHOD int teardown_intr { + device_t dev; +}; diff --git a/sys/amd64/vmm/amd/ivrs_drv.c b/sys/amd64/vmm/amd/ivrs_drv.c index d33229623a96..6291895c212f 100644 --- a/sys/amd64/vmm/amd/ivrs_drv.c +++ b/sys/amd64/vmm/amd/ivrs_drv.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016, Anish Gupta (anish@freebsd.org) + * Copyright (c) 2021 The FreeBSD Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include <contrib/dev/acpica/include/acpi.h> #include <contrib/dev/acpica/include/accommon.h> #include <dev/acpica/acpivar.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> #include "io/iommu.h" #include "amdvi_priv.h" @@ -628,6 +631,9 @@ ivhd_attach(device_t dev) softc->dev = dev; ivhd = ivhd_hdrs[unit]; KASSERT(ivhd, ("ivhd is NULL")); + softc->pci_dev = pci_find_bsf(PCI_RID2BUS(ivhd->Header.DeviceId), + PCI_RID2SLOT(ivhd->Header.DeviceId), + PCI_RID2FUNC(ivhd->Header.DeviceId)); softc->ivhd_type = ivhd->Header.Type; softc->pci_seg = ivhd->PciSegmentGroup; diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index b5d62c358272..ef0d9dcb6786 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -51,6 +51,9 @@ SRCS+= ept.c \ # amd-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/amd SRCS+= vmcb.c \ + amdiommu.c \ + ivhd_if.c \ + ivhd_if.h \ svm.c \ svm_support.S \ npt.c \
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202104071056.137Au5cr046747>