From owner-svn-src-head@FreeBSD.ORG Wed Jul 17 23:37:34 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) by hub.freebsd.org (Postfix) with ESMTP id 4566518C; Wed, 17 Jul 2013 23:37:34 +0000 (UTC) (envelope-from grehan@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 37171372; Wed, 17 Jul 2013 23:37:34 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r6HNbY9Y079840; Wed, 17 Jul 2013 23:37:34 GMT (envelope-from grehan@svn.freebsd.org) Received: (from grehan@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r6HNbXud079836; Wed, 17 Jul 2013 23:37:33 GMT (envelope-from grehan@svn.freebsd.org) Message-Id: <201307172337.r6HNbXud079836@svn.freebsd.org> From: Peter Grehan Date: Wed, 17 Jul 2013 23:37:33 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r253440 - head/usr.sbin/bhyve X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 17 Jul 2013 23:37:34 -0000 Author: grehan Date: Wed Jul 17 23:37:33 2013 New Revision: 253440 URL: http://svnweb.freebsd.org/changeset/base/253440 Log: Major rework of the virtio code. Split out common parts, and modify the net/block devices accordingly. Submitted by: Chris Torek torek at torek dot net Reviewed by: grehan Added: head/usr.sbin/bhyve/virtio.c (contents, props changed) Modified: head/usr.sbin/bhyve/Makefile head/usr.sbin/bhyve/pci_virtio_block.c head/usr.sbin/bhyve/pci_virtio_net.c head/usr.sbin/bhyve/virtio.h Modified: head/usr.sbin/bhyve/Makefile ============================================================================== --- head/usr.sbin/bhyve/Makefile Wed Jul 17 23:29:56 2013 (r253439) +++ head/usr.sbin/bhyve/Makefile Wed Jul 17 23:37:33 2013 (r253440) @@ -10,7 +10,7 @@ SRCS= acpi.c atpic.c bhyverun.c consport SRCS+= ioapic.c mem.c mevent.c mptbl.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c pmtmr.c post.c rtc.c -SRCS+= xmsr.c spinup_ap.c +SRCS+= virtio.c xmsr.c spinup_ap.c .PATH: ${.CURDIR}/../../sys/amd64/vmm SRCS+= vmm_instruction_emul.c Modified: head/usr.sbin/bhyve/pci_virtio_block.c ============================================================================== --- head/usr.sbin/bhyve/pci_virtio_block.c Wed Jul 17 23:29:56 2013 (r253439) +++ head/usr.sbin/bhyve/pci_virtio_block.c Wed Jul 17 23:37:33 2013 (r253440) @@ -53,14 +53,6 @@ __FBSDID("$FreeBSD$"); #define VTBLK_RINGSZ 64 -#define VTBLK_CFGSZ 28 - -#define VTBLK_R_CFG VTCFG_R_CFG1 -#define VTBLK_R_CFG_END VTBLK_R_CFG + VTBLK_CFGSZ -1 -#define VTBLK_R_MAX VTBLK_R_CFG_END - -#define VTBLK_REGSZ VTBLK_R_MAX+1 - #define VTBLK_MAXSEGS 32 #define VTBLK_S_OK 0 @@ -71,28 +63,10 @@ __FBSDID("$FreeBSD$"); */ #define VTBLK_S_HOSTCAPS \ ( 0x00000004 | /* host maximum request segments */ \ - 0x10000000 ) /* supports indirect descriptors */ - -static int use_msix = 1; - -struct vring_hqueue { - /* Internal state */ - uint16_t hq_size; - uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ - - /* Host-context pointers to the queue */ - struct virtio_desc *hq_dtable; - uint16_t *hq_avail_flags; - uint16_t *hq_avail_idx; /* monotonically increasing */ - uint16_t *hq_avail_ring; - - uint16_t *hq_used_flags; - uint16_t *hq_used_idx; /* monotonically increasing */ - struct virtio_used *hq_used_ring; -}; + VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ /* - * Config space + * Config space "registers" */ struct vtblk_config { uint64_t vbc_capacity; @@ -104,7 +78,6 @@ struct vtblk_config { uint32_t vbc_blk_size; uint32_t vbc_sectors_max; } __packed; -CTASSERT(sizeof(struct vtblk_config) == VTBLK_CFGSZ); /* * Fixed-size block header @@ -129,113 +102,69 @@ static int pci_vtblk_debug; * Per-device softc */ struct pci_vtblk_softc { - struct pci_devinst *vbsc_pi; + struct virtio_softc vbsc_vs; + struct vqueue_info vbsc_vq; int vbsc_fd; - int vbsc_status; - int vbsc_isr; - int vbsc_lastq; - uint32_t vbsc_features; - uint64_t vbsc_pfn; - struct vring_hqueue vbsc_q; struct vtblk_config vbsc_cfg; - uint16_t msix_table_idx_req; - uint16_t msix_table_idx_cfg; }; -#define vtblk_ctx(sc) ((sc)->vbsc_pi->pi_vmctx) - -/* - * Return the size of IO BAR that maps virtio header and device specific - * region. The size would vary depending on whether MSI-X is enabled or - * not - */ -static uint64_t -pci_vtblk_iosize(struct pci_devinst *pi) -{ - - if (pci_msix_enabled(pi)) - return (VTBLK_REGSZ); - else - return (VTBLK_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); -} -/* - * Return the number of available descriptors in the vring taking care - * of the 16-bit index wraparound. - */ -static int -hq_num_avail(struct vring_hqueue *hq) -{ - uint16_t ndesc; - - /* - * We're just computing (a-b) in GF(216). - * - * The only glitch here is that in standard C, - * uint16_t promotes to (signed) int when int has - * more than 16 bits (pretty much always now), so - * we have to force it back to unsigned. - */ - ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; - - assert(ndesc <= hq->hq_size); - - return (ndesc); -} +static void pci_vtblk_reset(void *); +static void pci_vtblk_notify(void *, struct vqueue_info *); +static int pci_vtblk_cfgread(void *, int, int, uint32_t *); +static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); + +static struct virtio_consts vtblk_vi_consts = { + "vtblk", /* our name */ + 1, /* we support 1 virtqueue */ + sizeof(struct vtblk_config), /* config reg size */ + pci_vtblk_reset, /* reset */ + pci_vtblk_notify, /* device-wide qnotify */ + pci_vtblk_cfgread, /* read PCI config */ + pci_vtblk_cfgwrite, /* write PCI config */ + VTBLK_S_HOSTCAPS, /* our capabilities */ +}; static void -pci_vtblk_update_status(struct pci_vtblk_softc *sc, uint32_t value) +pci_vtblk_reset(void *vsc) { - if (value == 0) { - DPRINTF(("vtblk: device reset requested !\n")); - sc->vbsc_isr = 0; - sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR; - sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR; - sc->vbsc_features = 0; - sc->vbsc_pfn = 0; - sc->vbsc_lastq = 0; - memset(&sc->vbsc_q, 0, sizeof(struct vring_hqueue)); - } + struct pci_vtblk_softc *sc = vsc; - sc->vbsc_status = value; + DPRINTF(("vtblk: device reset requested !\n")); + vi_reset_dev(&sc->vbsc_vs); } static void -pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq) +pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) { - struct iovec iov[VTBLK_MAXSEGS]; struct virtio_blk_hdr *vbh; - struct virtio_desc *vd, *vid; - struct virtio_used *vu; uint8_t *status; - int i; + int i, n; int err; int iolen; - int uidx, aidx, didx; - int indirect, writeop, type; + int writeop, type; off_t offset; + struct iovec iov[VTBLK_MAXSEGS + 2]; + uint16_t flags[VTBLK_MAXSEGS + 2]; - uidx = *hq->hq_used_idx; - aidx = hq->hq_cur_aidx; - didx = hq->hq_avail_ring[aidx % hq->hq_size]; - assert(didx >= 0 && didx < hq->hq_size); - - vd = &hq->hq_dtable[didx]; - - indirect = ((vd->vd_flags & VRING_DESC_F_INDIRECT) != 0); - - if (indirect) { - vid = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, vd->vd_len); - vd = &vid[0]; - } + n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); /* - * The first descriptor will be the read-only fixed header + * The first descriptor will be the read-only fixed header, + * and the last is for status (hence +2 above and below). + * The remaining iov's are the actual data I/O vectors. + * + * XXX - note - this fails on crash dump, which does a + * VIRTIO_BLK_T_FLUSH with a zero transfer length */ - vbh = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, - sizeof(struct virtio_blk_hdr)); - assert(vd->vd_len == sizeof(struct virtio_blk_hdr)); - assert(vd->vd_flags & VRING_DESC_F_NEXT); - assert((vd->vd_flags & VRING_DESC_F_WRITE) == 0); + assert (n >= 3 && n < VTBLK_MAXSEGS + 2); + + assert((flags[0] & VRING_DESC_F_WRITE) == 0); + assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); + vbh = iov[0].iov_base; + + status = iov[--n].iov_base; + assert(iov[n].iov_len == 1); + assert(flags[n] & VRING_DESC_F_WRITE); /* * XXX @@ -247,120 +176,44 @@ pci_vtblk_proc(struct pci_vtblk_softc *s offset = vbh->vbh_sector * DEV_BSIZE; - /* - * Build up the iovec based on the guest's data descriptors - */ - i = iolen = 0; - while (1) { - if (indirect) - vd = &vid[i + 1]; /* skip first indirect desc */ - else - vd = &hq->hq_dtable[vd->vd_next]; - - if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) - break; - - if (i == VTBLK_MAXSEGS) - break; - + iolen = 0; + for (i = 1; i < n; i++) { /* * - write op implies read-only descriptor, * - read op implies write-only descriptor, * therefore test the inverse of the descriptor bit * to the op. */ - assert(((vd->vd_flags & VRING_DESC_F_WRITE) == 0) == - writeop); - - iov[i].iov_base = paddr_guest2host(vtblk_ctx(sc), - vd->vd_addr, - vd->vd_len); - iov[i].iov_len = vd->vd_len; - iolen += vd->vd_len; - i++; + assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); + iolen += iov[i].iov_len; } - /* Lastly, get the address of the status byte */ - status = paddr_guest2host(vtblk_ctx(sc), vd->vd_addr, 1); - assert(vd->vd_len == 1); - assert((vd->vd_flags & VRING_DESC_F_NEXT) == 0); - assert(vd->vd_flags & VRING_DESC_F_WRITE); - DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", - writeop ? "write" : "read", iolen, i, offset)); + writeop ? "write" : "read", iolen, i - 1, offset)); if (writeop) - err = pwritev(sc->vbsc_fd, iov, i, offset); + err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); else - err = preadv(sc->vbsc_fd, iov, i, offset); + err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK; /* - * Return the single descriptor back to the host + * Return the descriptor back to the host. + * We wrote 1 byte (our status) to host. */ - vu = &hq->hq_used_ring[uidx % hq->hq_size]; - vu->vu_idx = didx; - vu->vu_tlen = 1; - hq->hq_cur_aidx++; - *hq->hq_used_idx += 1; - - /* - * Generate an interrupt if able - */ - if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { - if (use_msix) { - pci_generate_msix(sc->vbsc_pi, sc->msix_table_idx_req); - } else if (sc->vbsc_isr == 0) { - sc->vbsc_isr = 1; - pci_generate_msi(sc->vbsc_pi, 0); - } - } + vq_relchain(vq, 1); } static void -pci_vtblk_qnotify(struct pci_vtblk_softc *sc) +pci_vtblk_notify(void *vsc, struct vqueue_info *vq) { - struct vring_hqueue *hq = &sc->vbsc_q; - int ndescs; + struct pci_vtblk_softc *sc = vsc; - while ((ndescs = hq_num_avail(hq)) != 0) { - /* - * Run through all the entries, placing them into iovecs and - * sending when an end-of-packet is found - */ - pci_vtblk_proc(sc, hq); - } -} - -static void -pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn) -{ - struct vring_hqueue *hq; - - sc->vbsc_pfn = pfn << VRING_PFN; - - /* - * Set up host pointers to the various parts of the - * queue - */ - hq = &sc->vbsc_q; - hq->hq_size = VTBLK_RINGSZ; - - hq->hq_dtable = paddr_guest2host(vtblk_ctx(sc), pfn << VRING_PFN, - vring_size(VTBLK_RINGSZ)); - hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); - hq->hq_avail_idx = hq->hq_avail_flags + 1; - hq->hq_avail_ring = hq->hq_avail_flags + 2; - hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, - VRING_ALIGN); - hq->hq_used_idx = hq->hq_used_flags + 1; - hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); - - /* - * Initialize queue indexes - */ - hq->hq_cur_aidx = 0; + vq_startchains(vq); + while (vq_has_descs(vq)) + pci_vtblk_proc(sc, vq); + vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ } static int @@ -371,6 +224,7 @@ pci_vtblk_init(struct vmctx *ctx, struct off_t size; int fd; int sectsz; + int use_msix; const char *env_msi; if (opts == NULL) { @@ -412,10 +266,14 @@ pci_vtblk_init(struct vmctx *ctx, struct sc = malloc(sizeof(struct pci_vtblk_softc)); memset(sc, 0, sizeof(struct pci_vtblk_softc)); - pi->pi_arg = sc; - sc->vbsc_pi = pi; + /* record fd of storage device/file */ sc->vbsc_fd = fd; + /* init virtio softc and virtqueues */ + vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); + sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; + /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ + /* setup virtio block config space */ sc->vbsc_cfg.vbc_capacity = size / sectsz; sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; @@ -426,206 +284,51 @@ pci_vtblk_init(struct vmctx *ctx, struct sc->vbsc_cfg.vbc_geom_s = 0; sc->vbsc_cfg.vbc_sectors_max = 0; - /* initialize config space */ + /* + * Should we move some of this into virtio.c? Could + * have the device, class, and subdev_0 as fields in + * the virtio constants structure. + */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); + use_msix = 1; if ((env_msi = getenv("BHYVE_USE_MSI"))) { if (strcasecmp(env_msi, "yes") == 0) use_msix = 0; } - - if (use_msix) { - /* MSI-X Support */ - sc->msix_table_idx_req = VIRTIO_MSI_NO_VECTOR; - sc->msix_table_idx_cfg = VIRTIO_MSI_NO_VECTOR; - - if (pci_emul_add_msixcap(pi, 2, 1)) - return (1); - } else { - /* MSI Support */ - pci_emul_add_msicap(pi, 1); - } - - pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ); - + if (vi_intr_init(&sc->vbsc_vs, 1, use_msix)) + return (1); + vi_set_io_bar(&sc->vbsc_vs, 0); return (0); } -static uint64_t -vtblk_adjust_offset(struct pci_devinst *pi, uint64_t offset) -{ - /* - * Device specific offsets used by guest would change - * based on whether MSI-X capability is enabled or not - */ - if (!pci_msix_enabled(pi)) { - if (offset >= VTCFG_R_MSIX) - return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX)); - } - - return (offset); -} - -static void -pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value) +static int +pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) { - struct pci_vtblk_softc *sc = pi->pi_arg; - - if (use_msix) { - if (baridx == pci_msix_table_bar(pi) || - baridx == pci_msix_pba_bar(pi)) { - pci_emul_msix_twrite(pi, offset, size, value); - return; - } - } - - assert(baridx == 0); - if (offset + size > pci_vtblk_iosize(pi)) { - DPRINTF(("vtblk_write: 2big, offset %ld size %d\n", - offset, size)); - return; - } - - offset = vtblk_adjust_offset(pi, offset); - - switch (offset) { - case VTCFG_R_GUESTCAP: - assert(size == 4); - sc->vbsc_features = value & VTBLK_S_HOSTCAPS; - break; - case VTCFG_R_PFN: - assert(size == 4); - pci_vtblk_ring_init(sc, value); - break; - case VTCFG_R_QSEL: - assert(size == 2); - sc->vbsc_lastq = value; - break; - case VTCFG_R_QNOTIFY: - assert(size == 2); - assert(value == 0); - pci_vtblk_qnotify(sc); - break; - case VTCFG_R_STATUS: - assert(size == 1); - pci_vtblk_update_status(sc, value); - break; - case VTCFG_R_CFGVEC: - assert(size == 2); - sc->msix_table_idx_cfg = value; - break; - case VTCFG_R_QVEC: - assert(size == 2); - sc->msix_table_idx_req = value; - break; - case VTCFG_R_HOSTCAP: - case VTCFG_R_QNUM: - case VTCFG_R_ISR: - case VTBLK_R_CFG ... VTBLK_R_CFG_END: - DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset)); - break; - default: - DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset)); - value = 0; - break; - } + DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); + return (1); } -uint64_t -pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size) +static int +pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) { - struct pci_vtblk_softc *sc = pi->pi_arg; + struct pci_vtblk_softc *sc = vsc; void *ptr; - uint32_t value; - if (use_msix) { - if (baridx == pci_msix_table_bar(pi) || - baridx == pci_msix_pba_bar(pi)) { - return (pci_emul_msix_tread(pi, offset, size)); - } - } - - assert(baridx == 0); - - if (offset + size > pci_vtblk_iosize(pi)) { - DPRINTF(("vtblk_read: 2big, offset %ld size %d\n", - offset, size)); - return (0); - } - - offset = vtblk_adjust_offset(pi, offset); - - switch (offset) { - case VTCFG_R_HOSTCAP: - assert(size == 4); - value = VTBLK_S_HOSTCAPS; - break; - case VTCFG_R_GUESTCAP: - assert(size == 4); - value = sc->vbsc_features; /* XXX never read ? */ - break; - case VTCFG_R_PFN: - assert(size == 4); - value = sc->vbsc_pfn >> VRING_PFN; - break; - case VTCFG_R_QNUM: - value = (sc->vbsc_lastq == 0) ? VTBLK_RINGSZ: 0; - break; - case VTCFG_R_QSEL: - assert(size == 2); - value = sc->vbsc_lastq; /* XXX never read ? */ - break; - case VTCFG_R_QNOTIFY: - assert(size == 2); - value = 0; /* XXX never read ? */ - break; - case VTCFG_R_STATUS: - assert(size == 1); - value = sc->vbsc_status; - break; - case VTCFG_R_ISR: - assert(size == 1); - value = sc->vbsc_isr; - sc->vbsc_isr = 0; /* a read clears this flag */ - break; - case VTCFG_R_CFGVEC: - assert(size == 2); - value = sc->msix_table_idx_cfg; - break; - case VTCFG_R_QVEC: - assert(size == 2); - value = sc->msix_table_idx_req; - break; - case VTBLK_R_CFG ... VTBLK_R_CFG_END: - assert(size + offset <= (VTBLK_R_CFG_END + 1)); - ptr = (uint8_t *)&sc->vbsc_cfg + offset - VTBLK_R_CFG; - if (size == 1) { - value = *(uint8_t *) ptr; - } else if (size == 2) { - value = *(uint16_t *) ptr; - } else { - value = *(uint32_t *) ptr; - } - break; - default: - DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset)); - value = 0; - break; - } - - return (value); + /* our caller has already verified offset and size */ + ptr = (uint8_t *)&sc->vbsc_cfg + offset; + memcpy(retval, ptr, size); + return (0); } struct pci_devemu pci_de_vblk = { .pe_emu = "virtio-blk", .pe_init = pci_vtblk_init, - .pe_barwrite = pci_vtblk_write, - .pe_barread = pci_vtblk_read + .pe_barwrite = vi_pci_write, + .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vblk); Modified: head/usr.sbin/bhyve/pci_virtio_net.c ============================================================================== --- head/usr.sbin/bhyve/pci_virtio_net.c Wed Jul 17 23:29:56 2013 (r253439) +++ head/usr.sbin/bhyve/pci_virtio_net.c Wed Jul 17 23:37:33 2013 (r253440) @@ -59,56 +59,49 @@ __FBSDID("$FreeBSD$"); #define VTNET_MAXSEGS 32 /* - * PCI config-space register offsets + * Host capabilities. Note that we only offer a few of these. */ -#define VTNET_R_CFG0 24 -#define VTNET_R_CFG1 25 -#define VTNET_R_CFG2 26 -#define VTNET_R_CFG3 27 -#define VTNET_R_CFG4 28 -#define VTNET_R_CFG5 29 -#define VTNET_R_CFG6 30 -#define VTNET_R_CFG7 31 -#define VTNET_R_MAX 31 +#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */ +#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */ +#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ +#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */ +#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */ +#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */ +#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */ +#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */ +#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */ +#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */ +#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */ +#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */ +#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */ +#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */ +#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */ +#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */ +#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE \ + (1 << 21) /* guest can send gratuitous pkts */ -#define VTNET_REGSZ VTNET_R_MAX+1 +#define VTNET_S_HOSTCAPS \ + ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \ + VIRTIO_F_NOTIFY_ON_EMPTY) /* - * Host capabilities + * PCI config-space "registers" */ -#define VTNET_S_HOSTCAPS \ - ( 0x00000020 | /* host supplies MAC */ \ - 0x00008000 | /* host can merge Rx buffers */ \ - 0x00010000 | /* config status available */ \ - VIRTIO_F_NOTIFY_ON_EMPTY) +struct virtio_net_config { + uint8_t mac[6]; + uint16_t status; +} __packed; /* * Queue definitions. */ #define VTNET_RXQ 0 #define VTNET_TXQ 1 -#define VTNET_CTLQ 2 +#define VTNET_CTLQ 2 /* NB: not yet supported */ #define VTNET_MAXQ 3 -static int use_msix = 1; - -struct vring_hqueue { - /* Internal state */ - uint16_t hq_size; - uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ - - /* Host-context pointers to the queue */ - struct virtio_desc *hq_dtable; - uint16_t *hq_avail_flags; - uint16_t *hq_avail_idx; /* monotonically increasing */ - uint16_t *hq_avail_ring; - - uint16_t *hq_used_flags; - uint16_t *hq_used_idx; /* monotonically increasing */ - struct virtio_used *hq_used_ring; -}; - /* * Fixed network header size */ @@ -133,23 +126,17 @@ static int pci_vtnet_debug; * Per-device softc */ struct pci_vtnet_softc { - struct pci_devinst *vsc_pi; + struct virtio_softc vsc_vs; + struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; pthread_mutex_t vsc_mtx; struct mevent *vsc_mevp; - int vsc_curq; - int vsc_status; - int vsc_isr; int vsc_tapfd; int vsc_rx_ready; - int resetting; + volatile int resetting; /* set and checked outside lock */ uint32_t vsc_features; - uint8_t vsc_macaddr[6]; - - uint64_t vsc_pfn[VTNET_MAXQ]; - struct vring_hqueue vsc_hq[VTNET_MAXQ]; - uint16_t vsc_msix_table_idx[VTNET_MAXQ]; + struct virtio_net_config vsc_config; pthread_mutex_t rx_mtx; int rx_in_progress; @@ -159,73 +146,22 @@ struct pci_vtnet_softc { pthread_cond_t tx_cond; int tx_in_progress; }; -#define vtnet_ctx(sc) ((sc)->vsc_pi->pi_vmctx) -#define notify_on_empty(sc) ((sc)->vsc_features & VIRTIO_F_NOTIFY_ON_EMPTY) - -/* - * Return the size of IO BAR that maps virtio header and device specific - * region. The size would vary depending on whether MSI-X is enabled or - * not. - */ -static uint64_t -pci_vtnet_iosize(struct pci_devinst *pi) -{ - if (pci_msix_enabled(pi)) - return (VTNET_REGSZ); - else - return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); -} - -/* - * Return the number of available descriptors in the vring taking care - * of the 16-bit index wraparound. - */ -static int -hq_num_avail(struct vring_hqueue *hq) -{ - uint16_t ndesc; - /* - * We're just computing (a-b) mod 2^16 - * - * The only glitch here is that in standard C, - * uint16_t promotes to (signed) int when int has - * more than 16 bits (pretty much always now), so - * we have to force it back to unsigned. - */ - ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; - - assert(ndesc <= hq->hq_size); - - return (ndesc); -} - -static uint16_t -pci_vtnet_qsize(int qnum) -{ - /* XXX no ctl queue currently */ - if (qnum == VTNET_CTLQ) { - return (0); - } - - /* XXX fixed currently. Maybe different for tx/rx/ctl */ - return (VTNET_RINGSZ); -} - -static void -pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) -{ - struct vring_hqueue *hq; - - assert(ring < VTNET_MAXQ); - - hq = &sc->vsc_hq[ring]; - - /* - * Reset all soft state - */ - hq->hq_cur_aidx = 0; -} +static void pci_vtnet_reset(void *); +/* static void pci_vtnet_notify(void *, struct vqueue_info *); */ +static int pci_vtnet_cfgread(void *, int, int, uint32_t *); +static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); + +static struct virtio_consts vtnet_vi_consts = { + "vtnet", /* our name */ + VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ + sizeof(struct virtio_net_config), /* config reg size */ + pci_vtnet_reset, /* reset */ + NULL, /* device-wide qnotify -- not used */ + pci_vtnet_cfgread, /* read PCI config */ + pci_vtnet_cfgwrite, /* write PCI config */ + VTNET_S_HOSTCAPS, /* our capabilities */ +}; /* * If the transmit thread is active then stall until it is done. @@ -260,48 +196,27 @@ pci_vtnet_rxwait(struct pci_vtnet_softc } static void -pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) +pci_vtnet_reset(void *vsc) { - int i; - - if (value == 0) { - DPRINTF(("vtnet: device reset requested !\n")); - - sc->resetting = 1; - - /* - * Wait for the transmit and receive threads to finish their - * processing. - */ - pci_vtnet_txwait(sc); - pci_vtnet_rxwait(sc); + struct pci_vtnet_softc *sc = vsc; - sc->vsc_rx_ready = 0; - pci_vtnet_ring_reset(sc, VTNET_RXQ); - pci_vtnet_ring_reset(sc, VTNET_TXQ); + DPRINTF(("vtnet: device reset requested !\n")); - for (i = 0; i < VTNET_MAXQ; i++) - sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; + sc->resetting = 1; - sc->vsc_isr = 0; - sc->vsc_features = 0; + /* + * Wait for the transmit and receive threads to finish their + * processing. + */ + pci_vtnet_txwait(sc); + pci_vtnet_rxwait(sc); - sc->resetting = 0; - } + sc->vsc_rx_ready = 0; - sc->vsc_status = value; -} + /* now reset rings, MSI-X vectors, and negotiated capabilities */ + vi_reset_dev(&sc->vsc_vs); -static void -vtnet_generate_interrupt(struct pci_vtnet_softc *sc, int qidx) -{ - - if (use_msix) { - pci_generate_msix(sc->vsc_pi, sc->vsc_msix_table_idx[qidx]); - } else { - sc->vsc_isr |= 1; - pci_generate_msi(sc->vsc_pi, 0); - } + sc->resetting = 0; } /* @@ -311,7 +226,7 @@ static void pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, int len) { - char pad[60]; + static char pad[60]; /* all zero bytes */ if (sc->vsc_tapfd == -1) return; @@ -322,7 +237,6 @@ pci_vtnet_tap_tx(struct pci_vtnet_softc * there is always an extra iov available by the caller. */ if (len < 60) { - memset(pad, 0, 60 - len); iov[iovcnt].iov_base = pad; iov[iovcnt].iov_len = 60 - len; iovcnt++; @@ -342,15 +256,11 @@ static uint8_t dummybuf[2048]; static void pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) { - struct virtio_desc *vd; - struct virtio_used *vu; - struct vring_hqueue *hq; + struct vqueue_info *vq; struct virtio_net_rxhdr *vrx; uint8_t *buf; - int i; int len; - int ndescs; - int didx, uidx, aidx; /* descriptor, avail and used index */ + struct iovec iov; /* * Should never be called without a valid tap fd @@ -370,47 +280,45 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc } /* - * Calculate the number of available rx buffers + * Check for available rx buffers */ - hq = &sc->vsc_hq[VTNET_RXQ]; - - ndescs = hq_num_avail(hq); - - if (ndescs == 0) { + vq = &sc->vsc_queues[VTNET_RXQ]; + vq_startchains(vq); + if (!vq_has_descs(vq)) { /* - * Drop the packet and try later + * Drop the packet and try later. Interrupt on + * empty, if that's negotiated. */ (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); - - if (notify_on_empty(sc)) - vtnet_generate_interrupt(sc, VTNET_RXQ); - + vq_endchains(vq, 1); return; } - aidx = hq->hq_cur_aidx; - uidx = *hq->hq_used_idx; - for (i = 0; i < ndescs; i++) { + do { /* - * 'aidx' indexes into the an array of descriptor indexes + * Get descriptor chain, which should have just + * one descriptor in it. + * ??? allow guests to use multiple descs? */ - didx = hq->hq_avail_ring[aidx % hq->hq_size]; - assert(didx >= 0 && didx < hq->hq_size); - - vd = &hq->hq_dtable[didx]; + assert(vq_getchain(vq, &iov, 1, NULL) == 1); /* * Get a pointer to the rx header, and use the * data immediately following it for the packet buffer. */ - vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len); + vrx = iov.iov_base; buf = (uint8_t *)(vrx + 1); len = read(sc->vsc_tapfd, buf, - vd->vd_len - sizeof(struct virtio_net_rxhdr)); + iov.iov_len - sizeof(struct virtio_net_rxhdr)); if (len < 0 && errno == EWOULDBLOCK) { - break; + /* + * No more packets, but still some avail ring + * entries. Interrupt if needed/appropriate. + */ + vq_endchains(vq, 0); + return; } /* @@ -422,23 +330,13 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc vrx->vrh_bufs = 1; /* - * Write this descriptor into the used ring + * Release this chain and handle more chains. */ - vu = &hq->hq_used_ring[uidx % hq->hq_size]; - vu->vu_idx = didx; - vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); - uidx++; - aidx++; - } + vq_relchain(vq, len + sizeof(struct virtio_net_rxhdr)); + } while (vq_has_descs(vq)); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***