From owner-svn-src-all@FreeBSD.ORG Wed Jul 11 02:57:19 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D236C1065670; Wed, 11 Jul 2012 02:57:19 +0000 (UTC) (envelope-from grehan@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id BA9108FC15; Wed, 11 Jul 2012 02:57:19 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q6B2vJFr018102; Wed, 11 Jul 2012 02:57:19 GMT (envelope-from grehan@svn.freebsd.org) Received: (from grehan@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q6B2vJvf018096; Wed, 11 Jul 2012 02:57:19 GMT (envelope-from grehan@svn.freebsd.org) Message-Id: <201207110257.q6B2vJvf018096@svn.freebsd.org> From: Peter Grehan Date: Wed, 11 Jul 2012 02:57:19 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r238360 - in head/sys/dev/virtio: . balloon block network pci X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 11 Jul 2012 02:57:19 -0000 Author: grehan Date: Wed Jul 11 02:57:19 2012 New Revision: 238360 URL: http://svn.freebsd.org/changeset/base/238360 Log: Various VirtIO improvements PCI: - Properly handle interrupt fallback from MSIX to MSI to legacy. The host may not have sufficient resources to support MSIX, so we must be able to fallback to legacy interrupts. - Add interface to get the (sub) vendor and device IDs. - Rename flags to VTPCI_FLAG_* like other VirtIO drivers. Block: - No longer allocate vtblk_requests from separate UMA zone. malloc(9) from M_DEVBUF is sufficient. Assert segment counts at allocation. - More verbose error and debug messages. Network: - Remove stray write once variable. Virtqueue: - Shuffle code around in preparation of converting the mb()s to the appropriate atomic(9) operations. - Only walk the descriptor chain when freeing if INVARIANTS is defined since the result is only KASSERT()ed. Submitted by: Bryan Venteicher (bryanv@daemoninthecloset.org) Modified: head/sys/dev/virtio/balloon/virtio_balloon.c head/sys/dev/virtio/block/virtio_blk.c head/sys/dev/virtio/network/if_vtnet.c head/sys/dev/virtio/pci/virtio_pci.c head/sys/dev/virtio/pci/virtio_pci.h head/sys/dev/virtio/virtio.c head/sys/dev/virtio/virtio.h head/sys/dev/virtio/virtio_ring.h head/sys/dev/virtio/virtqueue.c head/sys/dev/virtio/virtqueue.h Modified: head/sys/dev/virtio/balloon/virtio_balloon.c ============================================================================== --- head/sys/dev/virtio/balloon/virtio_balloon.c Wed Jul 11 01:04:59 2012 (r238359) +++ head/sys/dev/virtio/balloon/virtio_balloon.c Wed Jul 11 02:57:19 2012 (r238360) @@ -412,7 +412,6 @@ vtballoon_send_page_frames(struct vtball * interrupt handler will wake us up. */ VTBALLOON_LOCK(sc); - while ((c = virtqueue_dequeue(vq, NULL)) == NULL) msleep_spin(sc, VTBALLOON_MTX(sc), "vtbspf", 0); VTBALLOON_UNLOCK(sc); Modified: head/sys/dev/virtio/block/virtio_blk.c ============================================================================== --- head/sys/dev/virtio/block/virtio_blk.c Wed Jul 11 01:04:59 2012 (r238359) +++ head/sys/dev/virtio/block/virtio_blk.c Wed Jul 11 02:57:19 2012 (r238360) @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include @@ -119,7 +118,7 @@ static int vtblk_shutdown(device_t); static int vtblk_open(struct disk *); static int vtblk_close(struct disk *); static int vtblk_ioctl(struct disk *, u_long, void *, int, - struct thread *); + struct thread *); static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); static void vtblk_strategy(struct bio *); @@ -193,7 +192,7 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_ mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED) #define VTBLK_DISK_NAME "vtbd" -#define VTBLK_QUIESCE_TIMEOUT (30 * hz) +#define VTBLK_QUIESCE_TIMEOUT (30 * hz) /* * Each block request uses at least two segments - one for the header @@ -201,8 +200,6 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_ */ #define VTBLK_MIN_SEGMENTS 2 -static uma_zone_t vtblk_req_zone; - static device_method_t vtblk_methods[] = { /* Device methods. */ DEVMETHOD(device_probe, vtblk_probe), @@ -236,19 +233,8 @@ vtblk_modevent(module_t mod, int type, v switch (type) { case MOD_LOAD: - vtblk_req_zone = uma_zcreate("vtblk_request", - sizeof(struct vtblk_request), - NULL, NULL, NULL, NULL, 0, 0); - break; case MOD_QUIESCE: case MOD_UNLOAD: - if (uma_zone_get_cur(vtblk_req_zone) > 0) - error = EBUSY; - else if (type == MOD_UNLOAD) { - uma_zdestroy(vtblk_req_zone); - vtblk_req_zone = NULL; - } - break; case MOD_SHUTDOWN: break; default: @@ -316,7 +302,7 @@ vtblk_attach(device_t dev) } sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); - if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { + if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { error = EINVAL; device_printf(dev, "fewer than minimum number of segments " "allowed: %d\n", sc->vtblk_max_nsegs); @@ -493,7 +479,6 @@ vtblk_dump(void *arg, void *virtual, vm_ int error; dp = arg; - error = 0; if ((sc = dp->d_drv1) == NULL) return (ENXIO); @@ -539,7 +524,7 @@ vtblk_strategy(struct bio *bp) return; } -#ifdef INVARIANTS +#ifdef INVARIANTS /* * Prevent read/write buffers spanning too many segments from * getting into the queue. This should only trip if d_maxsize @@ -547,13 +532,13 @@ vtblk_strategy(struct bio *bp) */ if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { int nsegs, max_nsegs; - + nsegs = sglist_count(bp->bio_data, bp->bio_bcount); max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS; KASSERT(nsegs <= max_nsegs, - ("bio spanned too many segments: %d, max: %d", - nsegs, max_nsegs)); + ("bio %p spanned too many segments: %d, max: %d", + bp, nsegs, max_nsegs)); } #endif @@ -800,27 +785,22 @@ vtblk_execute_request(struct vtblk_softc VTBLK_LOCK_ASSERT(sc); sglist_reset(sg); - error = sglist_append(sg, &req->vbr_hdr, - sizeof(struct virtio_blk_outhdr)); - KASSERT(error == 0, ("error adding header to sglist")); - KASSERT(sg->sg_nseg == 1, - ("header spanned multiple segments: %d", sg->sg_nseg)); + + sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { error = sglist_append(sg, bp->bio_data, bp->bio_bcount); - KASSERT(error == 0, ("error adding buffer to sglist")); + if (error || sg->sg_nseg == sg->sg_maxseg) + panic("%s: data buffer too big bio:%p error:%d", + __FUNCTION__, bp, error); /* BIO_READ means the host writes into our buffer. */ if (bp->bio_cmd == BIO_READ) - writable += sg->sg_nseg - 1; + writable = sg->sg_nseg - 1; } - error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); - KASSERT(error == 0, ("error adding ack to sglist")); writable++; - - KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS, - ("fewer than min segments: %d", sg->sg_nseg)); + sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); readable = sg->sg_nseg - writable; @@ -995,12 +975,10 @@ vtblk_flush_dump(struct vtblk_softc *sc) static int vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) { - device_t dev; struct virtqueue *vq; struct vtblk_request *r; int error; - dev = sc->vtblk_dev; vq = sc->vtblk_vq; if (!virtqueue_empty(vq)) @@ -1013,12 +991,12 @@ vtblk_poll_request(struct vtblk_softc *s virtqueue_notify(vq); r = virtqueue_poll(vq, NULL); - KASSERT(r == req, ("unexpected request response")); + KASSERT(r == req, ("unexpected request response: %p/%p", r, req)); error = vtblk_request_error(req); if (error && bootverbose) { - device_printf(dev, "vtblk_poll_request: IO error: %d\n", - error); + device_printf(sc->vtblk_dev, + "%s: IO error: %d\n", __FUNCTION__, error); } return (error); @@ -1090,6 +1068,20 @@ vtblk_drain(struct vtblk_softc *sc) vtblk_free_requests(sc); } +#ifdef INVARIANTS +static void +vtblk_request_invariants(struct vtblk_request *req) +{ + int hdr_nsegs, ack_nsegs; + + hdr_nsegs = sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)); + ack_nsegs = sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)); + + KASSERT(hdr_nsegs == 1, ("request header crossed page boundary")); + KASSERT(ack_nsegs == 1, ("request ack crossed page boundary")); +} +#endif + static int vtblk_alloc_requests(struct vtblk_softc *sc) { @@ -1107,10 +1099,14 @@ vtblk_alloc_requests(struct vtblk_softc nreqs /= VTBLK_MIN_SEGMENTS; for (i = 0; i < nreqs; i++) { - req = uma_zalloc(vtblk_req_zone, M_NOWAIT); + req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT); if (req == NULL) return (ENOMEM); +#ifdef INVARIANTS + vtblk_request_invariants(req); +#endif + sc->vtblk_request_count++; vtblk_enqueue_request(sc, req); } @@ -1128,10 +1124,11 @@ vtblk_free_requests(struct vtblk_softc * while ((req = vtblk_dequeue_request(sc)) != NULL) { sc->vtblk_request_count--; - uma_zfree(vtblk_req_zone, req); + free(req, M_DEVBUF); } - KASSERT(sc->vtblk_request_count == 0, ("leaked requests")); + KASSERT(sc->vtblk_request_count == 0, + ("leaked requests: %d", sc->vtblk_request_count)); } static struct vtblk_request * Modified: head/sys/dev/virtio/network/if_vtnet.c ============================================================================== --- head/sys/dev/virtio/network/if_vtnet.c Wed Jul 11 01:04:59 2012 (r238359) +++ head/sys/dev/virtio/network/if_vtnet.c Wed Jul 11 02:57:19 2012 (r238360) @@ -748,11 +748,9 @@ vtnet_is_link_up(struct vtnet_softc *sc) static void vtnet_update_link_status(struct vtnet_softc *sc) { - device_t dev; struct ifnet *ifp; int link; - dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; link = vtnet_is_link_up(sc); Modified: head/sys/dev/virtio/pci/virtio_pci.c ============================================================================== --- head/sys/dev/virtio/pci/virtio_pci.c Wed Jul 11 01:04:59 2012 (r238359) +++ head/sys/dev/virtio/pci/virtio_pci.c Wed Jul 11 02:57:19 2012 (r238360) @@ -57,12 +57,15 @@ struct vtpci_softc { struct resource *vtpci_msix_res; uint64_t vtpci_features; uint32_t vtpci_flags; -#define VIRTIO_PCI_FLAG_NO_MSI 0x0001 -#define VIRTIO_PCI_FLAG_MSI 0x0002 -#define VIRTIO_PCI_FLAG_NO_MSIX 0x0010 -#define VIRTIO_PCI_FLAG_MSIX 0x0020 -#define VIRTIO_PCI_FLAG_SHARED_MSIX 0x0040 +#define VTPCI_FLAG_NO_MSI 0x0001 +#define VTPCI_FLAG_NO_MSIX 0x0002 +#define VTPCI_FLAG_LEGACY 0x1000 +#define VTPCI_FLAG_MSI 0x2000 +#define VTPCI_FLAG_MSIX 0x4000 +#define VTPCI_FLAG_SHARED_MSIX 0x8000 +#define VTPCI_FLAG_ITYPE_MASK 0xF000 + /* This "bus" will only ever have one child. */ device_t vtpci_child_dev; struct virtio_feature_desc *vtpci_child_feat_desc; @@ -80,7 +83,8 @@ struct vtpci_softc { int vtpci_nvqs; struct vtpci_virtqueue { struct virtqueue *vq; - + /* Device did not provide a callback for this virtqueue. */ + int no_intr; /* Index into vtpci_intr_res[] below. Unused, then -1. */ int ires_idx; } vtpci_vqx[VIRTIO_MAX_VIRTQUEUES]; @@ -130,24 +134,39 @@ static void vtpci_describe_features(stru uint64_t); static void vtpci_probe_and_attach_child(struct vtpci_softc *); -static int vtpci_alloc_interrupts(struct vtpci_softc *, int, int, - struct vq_alloc_info *); -static int vtpci_alloc_intr_resources(struct vtpci_softc *, int, - struct vq_alloc_info *); -static int vtpci_alloc_msi(struct vtpci_softc *); -static int vtpci_alloc_msix(struct vtpci_softc *, int); +static int vtpci_alloc_msix(struct vtpci_softc *, int); +static int vtpci_alloc_msi(struct vtpci_softc *); +static int vtpci_alloc_intr_msix_pervq(struct vtpci_softc *); +static int vtpci_alloc_intr_msix_shared(struct vtpci_softc *); +static int vtpci_alloc_intr_msi(struct vtpci_softc *); +static int vtpci_alloc_intr_legacy(struct vtpci_softc *); +static int vtpci_alloc_intr_resources(struct vtpci_softc *); + +static int vtpci_setup_legacy_interrupt(struct vtpci_softc *, + enum intr_type); +static int vtpci_setup_msix_interrupts(struct vtpci_softc *, + enum intr_type); +static int vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type); + static int vtpci_register_msix_vector(struct vtpci_softc *, int, int); +static int vtpci_set_host_msix_vectors(struct vtpci_softc *); +static int vtpci_reinit_virtqueue(struct vtpci_softc *, int); static void vtpci_free_interrupts(struct vtpci_softc *); static void vtpci_free_virtqueues(struct vtpci_softc *); +static void vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *); static void vtpci_release_child_resources(struct vtpci_softc *); static void vtpci_reset(struct vtpci_softc *); +static void vtpci_select_virtqueue(struct vtpci_softc *, int); + static int vtpci_legacy_intr(void *); static int vtpci_vq_shared_intr(void *); static int vtpci_vq_intr(void *); static int vtpci_config_intr(void *); +#define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt + /* * I/O port read/write wrappers. */ @@ -252,7 +271,7 @@ vtpci_attach(device_t dev) } if (pci_find_cap(dev, PCIY_MSI, NULL) != 0) - sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI; + sc->vtpci_flags |= VTPCI_FLAG_NO_MSI; if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) { rid = PCIR_BAR(1); @@ -261,7 +280,7 @@ vtpci_attach(device_t dev) } if (sc->vtpci_msix_res == NULL) - sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX; + sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX; vtpci_reset(sc); @@ -372,6 +391,16 @@ vtpci_read_ivar(device_t dev, device_t c switch (index) { case VIRTIO_IVAR_DEVTYPE: + case VIRTIO_IVAR_SUBDEVICE: + *result = pci_get_subdevice(dev); + break; + case VIRTIO_IVAR_VENDOR: + *result = pci_get_vendor(dev); + break; + case VIRTIO_IVAR_DEVICE: + *result = pci_get_device(dev); + break; + case VIRTIO_IVAR_SUBVENDOR: *result = pci_get_subdevice(dev); break; default: @@ -442,102 +471,97 @@ vtpci_alloc_virtqueues(device_t dev, int struct vq_alloc_info *vq_info) { struct vtpci_softc *sc; + struct virtqueue *vq; struct vtpci_virtqueue *vqx; struct vq_alloc_info *info; - int queue, error; - uint16_t vq_size; + int idx, error; + uint16_t size; sc = device_get_softc(dev); + error = 0; - if (sc->vtpci_nvqs != 0 || nvqs <= 0 || - nvqs > VIRTIO_MAX_VIRTQUEUES) + if (sc->vtpci_nvqs != 0) + return (EALREADY); + if (nvqs <= 0 || nvqs > VIRTIO_MAX_VIRTQUEUES) return (EINVAL); - error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info); - if (error) { - device_printf(dev, "cannot allocate interrupts\n"); - return (error); - } - - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_CONFIG_VECTOR, 0); - if (error) - return (error); - } + if (flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX) + sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX; - for (queue = 0; queue < nvqs; queue++) { - vqx = &sc->vtpci_vqx[queue]; - info = &vq_info[queue]; - - vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue); - - vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); - error = virtqueue_alloc(dev, queue, vq_size, - VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq); - if (error) - return (error); - - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx); - if (error) - return (error); + for (idx = 0; idx < nvqs; idx++) { + vqx = &sc->vtpci_vqx[idx]; + info = &vq_info[idx]; + + vtpci_select_virtqueue(sc, idx); + size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); + + error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN, + 0xFFFFFFFFUL, info, &vq); + if (error) { + device_printf(dev, + "cannot allocate virtqueue %d: %d\n", idx, error); + break; } vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, - virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + + vqx->vq = *info->vqai_vq = vq; + vqx->no_intr = info->vqai_intr == NULL; - *info->vqai_vq = vqx->vq; sc->vtpci_nvqs++; } - return (0); + return (error); } static int vtpci_setup_intr(device_t dev, enum intr_type type) { struct vtpci_softc *sc; - struct vtpci_intr_resource *ires; - struct vtpci_virtqueue *vqx; - int i, flags, error; + int attempt, error; sc = device_get_softc(dev); - flags = type | INTR_MPSAFE; - ires = &sc->vtpci_intr_res[0]; - - if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) { - error = bus_setup_intr(dev, ires->irq, flags, - vtpci_legacy_intr, NULL, sc, &ires->intrhand); - return (error); - } - - error = bus_setup_intr(dev, ires->irq, flags, vtpci_config_intr, - NULL, sc, &ires->intrhand); - if (error) - return (error); + for (attempt = 0; attempt < 5; attempt++) { + /* + * Start with the most desirable interrupt configuration and + * fallback towards less desirable ones. + */ + switch (attempt) { + case 0: + error = vtpci_alloc_intr_msix_pervq(sc); + break; + case 1: + error = vtpci_alloc_intr_msix_shared(sc); + break; + case 2: + error = vtpci_alloc_intr_msi(sc); + break; + case 3: + error = vtpci_alloc_intr_legacy(sc); + break; + default: + device_printf(dev, + "exhausted all interrupt allocation attempts\n"); + return (ENXIO); + } - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) { - ires = &sc->vtpci_intr_res[1]; - error = bus_setup_intr(dev, ires->irq, flags, - vtpci_vq_shared_intr, NULL, sc, &ires->intrhand); + if (error == 0 && vtpci_setup_interrupts(sc, type) == 0) + break; - return (error); + vtpci_cleanup_setup_intr_attempt(sc); } - /* Setup an interrupt handler for each virtqueue. */ - for (i = 0; i < sc->vtpci_nvqs; i++) { - vqx = &sc->vtpci_vqx[i]; - if (vqx->ires_idx < 1) - continue; - - ires = &sc->vtpci_intr_res[vqx->ires_idx]; - error = bus_setup_intr(dev, ires->irq, flags, - vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand); - if (error) - return (error); + if (bootverbose) { + if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) + device_printf(dev, "using legacy interrupt\n"); + else if (sc->vtpci_flags & VTPCI_FLAG_MSI) + device_printf(dev, "using MSI interrupt\n"); + else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) + device_printf(dev, "using shared MSIX interrupts\n"); + else + device_printf(dev, "using per VQ MSIX interrupts\n"); } return (0); @@ -554,20 +578,19 @@ static int vtpci_reinit(device_t dev, uint64_t features) { struct vtpci_softc *sc; - struct vtpci_virtqueue *vqx; - struct virtqueue *vq; - int queue, error; - uint16_t vq_size; + int idx, error; sc = device_get_softc(dev); /* - * Redrive the device initialization. This is a bit of an abuse - * of the specification, but both VirtualBox and QEMU/KVM seem - * to play nice. We do not allow the host device to change from - * what was originally negotiated beyond what the guest driver - * changed (MSIX state should not change, number of virtqueues - * and their size remain the same, etc). + * Redrive the device initialization. This is a bit of an abuse of + * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to + * play nice. + * + * We do not allow the host device to change from what was originally + * negotiated beyond what the guest driver changed. MSIX state should + * not change, number of virtqueues and their size remain the same, etc. + * This will need to be rethought when we want to support migration. */ if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET) @@ -582,34 +605,16 @@ vtpci_reinit(device_t dev, uint64_t feat vtpci_negotiate_features(dev, features); - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_CONFIG_VECTOR, 0); + for (idx = 0; idx < sc->vtpci_nvqs; idx++) { + error = vtpci_reinit_virtqueue(sc, idx); if (error) return (error); } - for (queue = 0; queue < sc->vtpci_nvqs; queue++) { - vqx = &sc->vtpci_vqx[queue]; - vq = vqx->vq; - - KASSERT(vq != NULL, ("vq %d not allocated", queue)); - vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue); - - vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); - error = virtqueue_reinit(vq, vq_size); + if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { + error = vtpci_set_host_msix_vectors(sc); if (error) return (error); - - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - error = vtpci_register_msix_vector(sc, - VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx); - if (error) - return (error); - } - - vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, - virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); } return (0); @@ -744,7 +749,6 @@ vtpci_probe_and_attach_child(struct vtpc vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); vtpci_reset(sc); vtpci_release_child_resources(sc); - /* Reset status for future attempt. */ vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); } else @@ -752,42 +756,126 @@ vtpci_probe_and_attach_child(struct vtpc } static int -vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs, - struct vq_alloc_info *vq_info) +vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors) +{ + device_t dev; + int nmsix, cnt, required; + + dev = sc->vtpci_dev; + + /* Allocate an additional vector for the config changes. */ + required = nvectors + 1; + + nmsix = pci_msix_count(dev); + if (nmsix < required) + return (1); + + cnt = required; + if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { + sc->vtpci_nintr_res = required; + return (0); + } + + pci_release_msi(dev); + + return (1); +} + +static int +vtpci_alloc_msi(struct vtpci_softc *sc) +{ + device_t dev; + int nmsi, cnt, required; + + dev = sc->vtpci_dev; + required = 1; + + nmsi = pci_msi_count(dev); + if (nmsi < required) + return (1); + + cnt = required; + if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) { + sc->vtpci_nintr_res = required; + return (0); + } + + pci_release_msi(dev); + + return (1); +} + +static int +vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc) { int i, nvectors, error; - /* - * Only allocate a vector for virtqueues that are actually - * expecting an interrupt. - */ - for (nvectors = 0, i = 0; i < nvqs; i++) - if (vq_info[i].vqai_intr != NULL) + if (vtpci_disable_msix != 0 || + sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) + return (ENOTSUP); + + for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) { + if (sc->vtpci_vqx[i].no_intr == 0) nvectors++; + } + + error = vtpci_alloc_msix(sc, nvectors); + if (error) + return (error); + + sc->vtpci_flags |= VTPCI_FLAG_MSIX; + + return (0); +} + +static int +vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc) +{ + int error; if (vtpci_disable_msix != 0 || - sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX || - flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX || - vtpci_alloc_msix(sc, nvectors) != 0) { - /* - * Use MSI interrupts if available. Otherwise, we fallback - * to legacy interrupts. - */ - if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 && - vtpci_alloc_msi(sc) == 0) - sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI; + sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) + return (ENOTSUP); - sc->vtpci_nintr_res = 1; - } + error = vtpci_alloc_msix(sc, 1); + if (error) + return (error); - error = vtpci_alloc_intr_resources(sc, nvqs, vq_info); + sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX; - return (error); + return (0); } static int -vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs, - struct vq_alloc_info *vq_info) +vtpci_alloc_intr_msi(struct vtpci_softc *sc) +{ + int error; + + /* Only BHyVe supports MSI. */ + if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI) + return (ENOTSUP); + + error = vtpci_alloc_msi(sc); + if (error) + return (error); + + sc->vtpci_flags |= VTPCI_FLAG_MSI; + + return (0); +} + +static int +vtpci_alloc_intr_legacy(struct vtpci_softc *sc) +{ + + sc->vtpci_flags |= VTPCI_FLAG_LEGACY; + sc->vtpci_nintr_res = 1; + + return (0); +} + +static int +vtpci_alloc_intr_resources(struct vtpci_softc *sc) { device_t dev; struct resource *irq; @@ -795,14 +883,14 @@ vtpci_alloc_intr_resources(struct vtpci_ int i, rid, flags, res_idx; dev = sc->vtpci_dev; - flags = RF_ACTIVE; - if ((sc->vtpci_flags & - (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) { + if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) { rid = 0; - flags |= RF_SHAREABLE; - } else + flags = RF_ACTIVE | RF_SHAREABLE; + } else { rid = 1; + flags = RF_ACTIVE; + } for (i = 0; i < sc->vtpci_nintr_res; i++) { irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags); @@ -814,16 +902,16 @@ vtpci_alloc_intr_resources(struct vtpci_ } /* - * Map the virtqueue into the correct index in vq_intr_res[]. Note the - * first index is reserved for configuration changes notifications. + * Map the virtqueue into the correct index in vq_intr_res[]. The + * first index is reserved for configuration changed notifications. */ - for (i = 0, res_idx = 1; i < nvqs; i++) { + for (i = 0, res_idx = 1; i < sc->vtpci_nvqs; i++) { vqx = &sc->vtpci_vqx[i]; - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) { - if (vq_info[i].vqai_intr == NULL) + if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { + if (vqx->no_intr != 0) vqx->ires_idx = -1; - else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) + else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) vqx->ires_idx = res_idx; else vqx->ires_idx = res_idx++; @@ -835,110 +923,180 @@ vtpci_alloc_intr_resources(struct vtpci_ } static int -vtpci_alloc_msi(struct vtpci_softc *sc) +vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type) { device_t dev; - int nmsi, cnt; + struct vtpci_intr_resource *ires; + int error; dev = sc->vtpci_dev; - nmsi = pci_msi_count(dev); - - if (nmsi < 1) - return (1); - cnt = 1; - if (pci_alloc_msi(dev, &cnt) == 0 && cnt == 1) - return (0); + ires = &sc->vtpci_intr_res[0]; + error = bus_setup_intr(dev, ires->irq, type, vtpci_legacy_intr, NULL, + sc, &ires->intrhand); - return (1); + return (error); } static int -vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors) +vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type) { device_t dev; - int nmsix, cnt, required; + struct vtpci_intr_resource *ires; + struct vtpci_virtqueue *vqx; + int i, error; dev = sc->vtpci_dev; - nmsix = pci_msix_count(dev); - if (nmsix < 1) - return (1); + /* + * The first resource is used for configuration changed interrupts. + */ + ires = &sc->vtpci_intr_res[0]; + error = bus_setup_intr(dev, ires->irq, type, vtpci_config_intr, + NULL, sc, &ires->intrhand); + if (error) + return (error); - /* An additional vector is needed for the config changes. */ - required = nvectors + 1; - if (nmsix >= required) { - cnt = required; - if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) - goto out; + if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) { + ires = &sc->vtpci_intr_res[1]; - pci_release_msi(dev); + error = bus_setup_intr(dev, ires->irq, type, + vtpci_vq_shared_intr, NULL, sc, &ires->intrhand); + if (error) + return (error); + } else { + /* + * Each remaining resource is assigned to a specific virtqueue. + */ + for (i = 0; i < sc->vtpci_nvqs; i++) { + vqx = &sc->vtpci_vqx[i]; + if (vqx->ires_idx < 1) + continue; + + ires = &sc->vtpci_intr_res[vqx->ires_idx]; + error = bus_setup_intr(dev, ires->irq, type, + vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand); + if (error) + return (error); + } } - /* Attempt shared MSIX configuration. */ - required = 2; - if (nmsix >= required) { - cnt = required; - if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { - sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX; - goto out; - } + error = vtpci_set_host_msix_vectors(sc); + if (error) + return (error); - pci_release_msi(dev); - } + return (0); +} - return (1); +static int +vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type) +{ + int error; -out: - sc->vtpci_nintr_res = required; - sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX; + type |= INTR_MPSAFE; + KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK, + ("no interrupt type selected: %#x", sc->vtpci_flags)); - if (bootverbose) { - if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) - device_printf(dev, "using shared virtqueue MSIX\n"); - else - device_printf(dev, "using per virtqueue MSIX\n"); - } + error = vtpci_alloc_intr_resources(sc); + if (error) + return (error); - return (0); + if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) + error = vtpci_setup_legacy_interrupt(sc, type); + else if (sc->vtpci_flags & VTPCI_FLAG_MSI) + error = vtpci_setup_msi_interrupt(sc, type); + else + error = vtpci_setup_msix_interrupts(sc, type); + + return (error); } static int vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx) { device_t dev; - uint16_t vector; + uint16_t vector, rdvector; dev = sc->vtpci_dev; - if (offset != VIRTIO_MSI_CONFIG_VECTOR && - offset != VIRTIO_MSI_QUEUE_VECTOR) - return (EINVAL); - if (res_idx != -1) { - /* Map from rid to host vector. */ + /* Map from guest rid to host vector. */ vector = sc->vtpci_intr_res[res_idx].rid - 1; } else vector = VIRTIO_MSI_NO_VECTOR; - /* The first resource is special; make sure it is used correctly. */ + /* + * Assert the first resource is always used for the configuration + * changed interrupts. + */ if (res_idx == 0) { - KASSERT(vector == 0, ("unexpected config vector")); - KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR, - ("unexpected config offset")); - } + KASSERT(vector == 0 && offset == VIRTIO_MSI_CONFIG_VECTOR, + ("bad first res use vector:%d offset:%d", vector, offset)); + } else + KASSERT(offset == VIRTIO_MSI_QUEUE_VECTOR, ("bad offset")); vtpci_write_config_2(sc, offset, vector); - if (vtpci_read_config_2(sc, offset) != vector) { - device_printf(dev, "insufficient host resources for " - "MSIX interrupts\n"); + /* Read vector to determine if the host had sufficient resources. */ + rdvector = vtpci_read_config_2(sc, offset); + if (rdvector != vector) { + device_printf(dev, + "insufficient host resources for MSIX interrupts\n"); return (ENODEV); } return (0); } +static int +vtpci_set_host_msix_vectors(struct vtpci_softc *sc) +{ + struct vtpci_virtqueue *vqx; + int idx, error; + + error = vtpci_register_msix_vector(sc, VIRTIO_MSI_CONFIG_VECTOR, 0); + if (error) + return (error); + + for (idx = 0; idx < sc->vtpci_nvqs; idx++) { + vqx = &sc->vtpci_vqx[idx]; + + vtpci_select_virtqueue(sc, idx); + error = vtpci_register_msix_vector(sc, VIRTIO_MSI_QUEUE_VECTOR, + vqx->ires_idx); + if (error) + return (error); + } + + return (0); +} + +static int *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***