Date: Fri, 8 Aug 2014 03:49:02 +0000 (UTC) From: Neel Natu <neel@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r269700 - in head: sys/amd64/vmm usr.sbin/bhyve Message-ID: <53e448ae.2b64.363ea56f@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: neel Date: Fri Aug 8 03:49:01 2014 New Revision: 269700 URL: http://svnweb.freebsd.org/changeset/base/269700 Log: Support PCI extended config space in bhyve. Add the ACPI MCFG table to advertise the extended config memory window. Introduce a new flag MEM_F_IMMUTABLE for memory ranges that cannot be deleted or moved in the guest's address space. The PCI extended config space is an example of an immutable memory range. Add emulation for the "movzw" instruction. This instruction is used by FreeBSD to read a 16-bit extended config space register. CR: https://phabric.freebsd.org/D505 Reviewed by: jhb, grehan Requested by: tychon Modified: head/sys/amd64/vmm/vmm_instruction_emul.c head/usr.sbin/bhyve/acpi.c head/usr.sbin/bhyve/mem.c head/usr.sbin/bhyve/mem.h head/usr.sbin/bhyve/pci_emul.c head/usr.sbin/bhyve/pci_emul.h Modified: head/sys/amd64/vmm/vmm_instruction_emul.c ============================================================================== --- head/sys/amd64/vmm/vmm_instruction_emul.c Fri Aug 8 01:57:15 2014 (r269699) +++ head/sys/amd64/vmm/vmm_instruction_emul.c Fri Aug 8 03:49:01 2014 (r269700) @@ -82,6 +82,10 @@ static const struct vie_op two_byte_opco .op_byte = 0xB6, .op_type = VIE_OP_TYPE_MOVZX, }, + [0xB7] = { + .op_byte = 0xB7, + .op_type = VIE_OP_TYPE_MOVZX, + }, [0xBE] = { .op_byte = 0xBE, .op_type = VIE_OP_TYPE_MOVSX, @@ -505,6 +509,25 @@ emulate_movx(void *vm, int vcpuid, uint6 /* write the result */ error = vie_update_register(vm, vcpuid, reg, val, size); break; + case 0xB7: + /* + * MOV and zero extend word from mem (ModRM:r/m) to + * reg (ModRM:reg). + * + * 0F B7/r movzx r32, r/m16 + * REX.W + 0F B7/r movzx r64, r/m16 + */ + error = memread(vm, vcpuid, gpa, &val, 2, arg); + if (error) + return (error); + + reg = gpr_map[vie->reg]; + + /* zero-extend word */ + val = (uint16_t)val; + + error = vie_update_register(vm, vcpuid, reg, val, size); + break; case 0xBE: /* * MOV and sign extend byte from mem (ModRM:r/m) to Modified: head/usr.sbin/bhyve/acpi.c ============================================================================== --- head/usr.sbin/bhyve/acpi.c Fri Aug 8 01:57:15 2014 (r269699) +++ head/usr.sbin/bhyve/acpi.c Fri Aug 8 03:49:01 2014 (r269700) @@ -40,12 +40,13 @@ * Layout * ------ * RSDP -> 0xf2400 (36 bytes fixed) - * RSDT -> 0xf2440 (36 bytes + 4*N table addrs, 2 used) - * XSDT -> 0xf2480 (36 bytes + 8*N table addrs, 2 used) + * RSDT -> 0xf2440 (36 bytes + 4*7 table addrs, 4 used) + * XSDT -> 0xf2480 (36 bytes + 8*7 table addrs, 4 used) * MADT -> 0xf2500 (depends on #CPUs) * FADT -> 0xf2600 (268 bytes) * HPET -> 0xf2740 (56 bytes) - * FACS -> 0xf2780 (64 bytes) + * MCFG -> 0xf2780 (60 bytes) + * FACS -> 0xf27C0 (64 bytes) * DSDT -> 0xf2800 (variable - can go up to 0x100000) */ @@ -80,7 +81,8 @@ __FBSDID("$FreeBSD$"); #define MADT_OFFSET 0x100 #define FADT_OFFSET 0x200 #define HPET_OFFSET 0x340 -#define FACS_OFFSET 0x380 +#define MCFG_OFFSET 0x380 +#define FACS_OFFSET 0x3C0 #define DSDT_OFFSET 0x400 #define BHYVE_ASL_TEMPLATE "bhyve.XXXXXXX" @@ -178,6 +180,8 @@ basl_fwrite_rsdt(FILE *fp) basl_acpi_base + FADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : %08X\n", basl_acpi_base + HPET_OFFSET); + EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : %08X\n", + basl_acpi_base + MCFG_OFFSET); EFFLUSH(fp); @@ -216,6 +220,8 @@ basl_fwrite_xsdt(FILE *fp) basl_acpi_base + FADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : 00000000%08X\n", basl_acpi_base + HPET_OFFSET); + EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : 00000000%08X\n", + basl_acpi_base + MCFG_OFFSET); EFFLUSH(fp); @@ -583,6 +589,39 @@ err_exit: } static int +basl_fwrite_mcfg(FILE *fp) +{ + int err = 0; + + EFPRINTF(fp, "/*\n"); + EFPRINTF(fp, " * bhyve MCFG template\n"); + EFPRINTF(fp, " */\n"); + EFPRINTF(fp, "[0004]\t\tSignature : \"MCFG\"\n"); + EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); + EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); + EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); + EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); + EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMCFG \"\n"); + EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); + + /* iasl will fill in the compiler ID/revision fields */ + EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); + EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); + EFPRINTF(fp, "[0008]\t\tReserved : 0\n"); + EFPRINTF(fp, "\n"); + + EFPRINTF(fp, "[0008]\t\tBase Address : %016lX\n", pci_ecfg_base()); + EFPRINTF(fp, "[0002]\t\tSegment Group: 0000\n"); + EFPRINTF(fp, "[0001]\t\tStart Bus: 00\n"); + EFPRINTF(fp, "[0001]\t\tEnd Bus: FF\n"); + EFPRINTF(fp, "[0004]\t\tReserved : 0\n"); + EFFLUSH(fp); + return (0); +err_exit: + return (errno); +} + +static int basl_fwrite_facs(FILE *fp) { int err; @@ -921,6 +960,7 @@ static struct { { basl_fwrite_madt, MADT_OFFSET }, { basl_fwrite_fadt, FADT_OFFSET }, { basl_fwrite_hpet, HPET_OFFSET }, + { basl_fwrite_mcfg, MCFG_OFFSET }, { basl_fwrite_facs, FACS_OFFSET }, { basl_fwrite_dsdt, DSDT_OFFSET }, { NULL } Modified: head/usr.sbin/bhyve/mem.c ============================================================================== --- head/usr.sbin/bhyve/mem.c Fri Aug 8 01:57:15 2014 (r269699) +++ head/usr.sbin/bhyve/mem.c Fri Aug 8 03:49:01 2014 (r269700) @@ -162,7 +162,7 @@ emulate_mem(struct vmctx *ctx, int vcpu, { struct mmio_rb_range *entry; - int err; + int err, immutable; pthread_rwlock_rdlock(&mmio_rwlock); /* @@ -186,9 +186,27 @@ emulate_mem(struct vmctx *ctx, int vcpu, } assert(entry != NULL); + + /* + * An 'immutable' memory range is guaranteed to be never removed + * so there is no need to hold 'mmio_rwlock' while calling the + * handler. + * + * XXX writes to the PCIR_COMMAND register can cause register_mem() + * to be called. If the guest is using PCI extended config space + * to modify the PCIR_COMMAND register then register_mem() can + * deadlock on 'mmio_rwlock'. However by registering the extended + * config space window as 'immutable' the deadlock can be avoided. + */ + immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE); + if (immutable) + pthread_rwlock_unlock(&mmio_rwlock); + err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging, mem_read, mem_write, &entry->mr_param); - pthread_rwlock_unlock(&mmio_rwlock); + + if (!immutable) + pthread_rwlock_unlock(&mmio_rwlock); return (err); } @@ -246,6 +264,7 @@ unregister_mem(struct mem_range *memp) mr = &entry->mr_param; assert(mr->name == memp->name); assert(mr->base == memp->base && mr->size == memp->size); + assert((mr->flags & MEM_F_IMMUTABLE) == 0); RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); /* flush Per-vCPU cache */ Modified: head/usr.sbin/bhyve/mem.h ============================================================================== --- head/usr.sbin/bhyve/mem.h Fri Aug 8 01:57:15 2014 (r269699) +++ head/usr.sbin/bhyve/mem.h Fri Aug 8 03:49:01 2014 (r269700) @@ -48,6 +48,7 @@ struct mem_range { #define MEM_F_READ 0x1 #define MEM_F_WRITE 0x2 #define MEM_F_RW 0x3 +#define MEM_F_IMMUTABLE 0x4 /* mem_range cannot be unregistered */ void init_mem(void); int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, Modified: head/usr.sbin/bhyve/pci_emul.c ============================================================================== --- head/usr.sbin/bhyve/pci_emul.c Fri Aug 8 01:57:15 2014 (r269699) +++ head/usr.sbin/bhyve/pci_emul.c Fri Aug 8 03:49:01 2014 (r269700) @@ -109,16 +109,20 @@ static uint64_t pci_emul_membase64; #define PCI_EMUL_IOBASE 0x2000 #define PCI_EMUL_IOLIMIT 0x10000 -#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ +#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ +#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ +SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); + +#define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE #define PCI_EMUL_MEMBASE64 0xD000000000UL #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL static struct pci_devemu *pci_emul_finddev(char *name); -static void pci_lintr_route(struct pci_devinst *pi); -static void pci_lintr_update(struct pci_devinst *pi); - -static struct mem_range pci_mem_hole; +static void pci_lintr_route(struct pci_devinst *pi); +static void pci_lintr_update(struct pci_devinst *pi); +static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, + int func, int coff, int bytes, uint32_t *val); /* * I/O access @@ -1023,12 +1027,37 @@ pci_emul_fallback_handler(struct vmctx * return (0); } +static int +pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, + int bytes, uint64_t *val, void *arg1, long arg2) +{ + int bus, slot, func, coff, in; + + coff = addr & 0xfff; + func = (addr >> 12) & 0x7; + slot = (addr >> 15) & 0x1f; + bus = (addr >> 20) & 0xff; + in = (dir == MEM_F_READ); + if (in) + *val = ~0UL; + pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); + return (0); +} + +uint64_t +pci_ecfg_base(void) +{ + + return (PCI_EMUL_ECFG_BASE); +} + #define BUSIO_ROUNDUP 32 #define BUSMEM_ROUNDUP (1024 * 1024) int init_pci(struct vmctx *ctx) { + struct mem_range mr; struct pci_devemu *pde; struct businfo *bi; struct slotinfo *si; @@ -1112,22 +1141,34 @@ init_pci(struct vmctx *ctx) * The guest physical memory map looks like the following: * [0, lowmem) guest system memory * [lowmem, lowmem_limit) memory hole (may be absent) - * [lowmem_limit, 4GB) PCI hole (32-bit BAR allocation) + * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) + * [0xE0000000, 0xF0000000) PCI extended config window + * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware * [4GB, 4GB + highmem) - * + */ + + /* * Accesses to memory addresses that are not allocated to system * memory or PCI devices return 0xff's. */ lowmem = vm_get_lowmem_size(ctx); + bzero(&mr, sizeof(struct mem_range)); + mr.name = "PCI hole"; + mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; + mr.base = lowmem; + mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; + mr.handler = pci_emul_fallback_handler; + error = register_mem_fallback(&mr); + assert(error == 0); - memset(&pci_mem_hole, 0, sizeof(struct mem_range)); - pci_mem_hole.name = "PCI hole"; - pci_mem_hole.flags = MEM_F_RW; - pci_mem_hole.base = lowmem; - pci_mem_hole.size = (4ULL * 1024 * 1024 * 1024) - lowmem; - pci_mem_hole.handler = pci_emul_fallback_handler; - - error = register_mem_fallback(&pci_mem_hole); + /* PCI extended config space */ + bzero(&mr, sizeof(struct mem_range)); + mr.name = "PCI ECFG"; + mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; + mr.base = PCI_EMUL_ECFG_BASE; + mr.size = PCI_EMUL_ECFG_SIZE; + mr.handler = pci_emul_ecfg_handler; + error = register_mem(&mr); assert(error == 0); return (0); @@ -1612,41 +1653,6 @@ pci_emul_hdrtype_fixup(int bus, int slot } } -static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; - -static int -pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - uint32_t x; - - if (bytes != 4) { - if (in) - *eax = (bytes == 2) ? 0xffff : 0xff; - return (0); - } - - if (in) { - x = (cfgbus << 16) | - (cfgslot << 11) | - (cfgfunc << 8) | - cfgoff; - if (cfgenable) - x |= CONF1_ENABLE; - *eax = x; - } else { - x = *eax; - cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; - cfgoff = x & PCI_REGMAX; - cfgfunc = (x >> 8) & PCI_FUNCMAX; - cfgslot = (x >> 11) & PCI_SLOTMAX; - cfgbus = (x >> 16) & PCI_BUSMAX; - } - - return (0); -} -INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); - static uint32_t bits_changed(uint32_t old, uint32_t new, uint32_t mask) { @@ -1709,41 +1715,51 @@ pci_emul_cmdwrite(struct pci_devinst *pi pci_lintr_update(pi); } -static int -pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) +static void +pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, + int coff, int bytes, uint32_t *eax) { struct businfo *bi; struct slotinfo *si; struct pci_devinst *pi; struct pci_devemu *pe; - int coff, idx, needcfg; + int idx, needcfg; uint64_t addr, bar, mask; - assert(bytes == 1 || bytes == 2 || bytes == 4); - - if ((bi = pci_businfo[cfgbus]) != NULL) { - si = &bi->slotinfo[cfgslot]; - pi = si->si_funcs[cfgfunc].fi_devi; + if ((bi = pci_businfo[bus]) != NULL) { + si = &bi->slotinfo[slot]; + pi = si->si_funcs[func].fi_devi; } else pi = NULL; - coff = cfgoff + (port - CONF1_DATA_PORT); - -#if 0 - printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r", - in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc); -#endif - /* - * Just return if there is no device at this cfgslot:cfgfunc, - * if the guest is doing an un-aligned access, or if the config - * address word isn't enabled. + * Just return if there is no device at this slot:func or if the + * the guest is doing an un-aligned access. */ - if (!cfgenable || pi == NULL || (coff & (bytes - 1)) != 0) { + if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || + (coff & (bytes - 1)) != 0) { if (in) *eax = 0xffffffff; - return (0); + return; + } + + /* + * Ignore all writes beyond the standard config space and return all + * ones on reads. + */ + if (coff >= PCI_REGMAX + 1) { + if (in) { + *eax = 0xffffffff; + /* + * Extended capabilities begin at offset 256 in config + * space. Absence of extended capabilities is signaled + * with all 0s in the extended capability header at + * offset 256. + */ + if (coff <= PCI_REGMAX + 4) + *eax = 0x00000000; + } + return; } pe = pi->pi_d; @@ -1754,8 +1770,8 @@ pci_emul_cfgdata(struct vmctx *ctx, int if (in) { /* Let the device emulation override the default handler */ if (pe->pe_cfgread != NULL) { - needcfg = pe->pe_cfgread(ctx, vcpu, pi, - coff, bytes, eax); + needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, + eax); } else { needcfg = 1; } @@ -1769,12 +1785,12 @@ pci_emul_cfgdata(struct vmctx *ctx, int *eax = pci_get_cfgdata32(pi, coff); } - pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); + pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); } else { /* Let the device emulation override the default handler */ if (pe->pe_cfgwrite != NULL && (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) - return (0); + return; /* * Special handling for write to BAR registers @@ -1785,7 +1801,7 @@ pci_emul_cfgdata(struct vmctx *ctx, int * 4-byte aligned. */ if (bytes != 4 || (coff & 0x3) != 0) - return (0); + return; idx = (coff - PCIR_BAR(0)) / 4; mask = ~(pi->pi_bar[idx].size - 1); switch (pi->pi_bar[idx].type) { @@ -1843,7 +1859,57 @@ pci_emul_cfgdata(struct vmctx *ctx, int CFGWRITE(pi, coff, *eax, bytes); } } +} + +static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; + +static int +pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + uint32_t x; + if (bytes != 4) { + if (in) + *eax = (bytes == 2) ? 0xffff : 0xff; + return (0); + } + + if (in) { + x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; + if (cfgenable) + x |= CONF1_ENABLE; + *eax = x; + } else { + x = *eax; + cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; + cfgoff = x & PCI_REGMAX; + cfgfunc = (x >> 8) & PCI_FUNCMAX; + cfgslot = (x >> 11) & PCI_SLOTMAX; + cfgbus = (x >> 16) & PCI_BUSMAX; + } + + return (0); +} +INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); + +static int +pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + int coff; + + assert(bytes == 1 || bytes == 2 || bytes == 4); + + coff = cfgoff + (port - CONF1_DATA_PORT); + if (cfgenable) { + pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, + eax); + } else { + /* Ignore accesses to cfgdata if not enabled by cfgaddr */ + if (in) + *eax = 0xffffffff; + } return (0); } Modified: head/usr.sbin/bhyve/pci_emul.h ============================================================================== --- head/usr.sbin/bhyve/pci_emul.h Fri Aug 8 01:57:15 2014 (r269699) +++ head/usr.sbin/bhyve/pci_emul.h Fri Aug 8 03:49:01 2014 (r269700) @@ -235,6 +235,7 @@ uint64_t pci_emul_msix_tread(struct pci_ int pci_count_lintr(int bus); void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg); void pci_write_dsdt(void); +uint64_t pci_ecfg_base(void); int pci_bus_configured(int bus); static __inline void
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?53e448ae.2b64.363ea56f>