Date: Fri, 19 Oct 2012 18:11:18 +0000 (UTC) From: Peter Grehan <grehan@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r241744 - projects/bhyve/usr.sbin/bhyve Message-ID: <201210191811.q9JIBIQu049356@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: grehan Date: Fri Oct 19 18:11:17 2012 New Revision: 241744 URL: http://svn.freebsd.org/changeset/base/241744 Log: Rework how guest MMIO regions are dealt with. - New memory region interface. An RB tree holds the regions, with a last-found per-vCPU cache to deal with the common case of repeated guest accesses to MMIO registers in the same page. - Support memory-mapped BARs in PCI emulation. mem.c/h - memory region interface instruction_emul.c/h - remove old region interface. Use gpa from EPT exit to avoid a tablewalk to determine operand address. Determine operand size and use when calling through to region handler. fbsdrun.c - call into region interface on paging exit. Distinguish between instruction emul error and region not found pci_emul.c/h - implement new BAR callback api. Split BAR alloc routine into routines that require/don't require the BAR phys address. ioapic.c pci_passthru.c pci_virtio_block.c pci_virtio_net.c pci_uart.c - update to new BAR callback i/f Reviewed by: neel Obtained from: NetApp Added: projects/bhyve/usr.sbin/bhyve/mem.c (contents, props changed) projects/bhyve/usr.sbin/bhyve/mem.h (contents, props changed) Modified: projects/bhyve/usr.sbin/bhyve/Makefile projects/bhyve/usr.sbin/bhyve/fbsdrun.c projects/bhyve/usr.sbin/bhyve/instruction_emul.c projects/bhyve/usr.sbin/bhyve/instruction_emul.h projects/bhyve/usr.sbin/bhyve/ioapic.c projects/bhyve/usr.sbin/bhyve/pci_emul.c projects/bhyve/usr.sbin/bhyve/pci_emul.h projects/bhyve/usr.sbin/bhyve/pci_passthru.c projects/bhyve/usr.sbin/bhyve/pci_uart.c projects/bhyve/usr.sbin/bhyve/pci_virtio_block.c projects/bhyve/usr.sbin/bhyve/pci_virtio_net.c Modified: projects/bhyve/usr.sbin/bhyve/Makefile ============================================================================== --- projects/bhyve/usr.sbin/bhyve/Makefile Fri Oct 19 17:45:56 2012 (r241743) +++ projects/bhyve/usr.sbin/bhyve/Makefile Fri Oct 19 18:11:17 2012 (r241744) @@ -5,7 +5,7 @@ PROG= bhyve SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c -SRCS+= instruction_emul.c ioapic.c mevent.c +SRCS+= instruction_emul.c ioapic.c mem.c mevent.c SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c SRCS+= spinup_ap.c Modified: projects/bhyve/usr.sbin/bhyve/fbsdrun.c ============================================================================== --- projects/bhyve/usr.sbin/bhyve/fbsdrun.c Fri Oct 19 17:45:56 2012 (r241743) +++ projects/bhyve/usr.sbin/bhyve/fbsdrun.c Fri Oct 19 18:11:17 2012 (r241744) @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include "fbsdrun.h" #include "inout.h" #include "dbgport.h" +#include "mem.h" #include "mevent.h" #include "pci_emul.h" #include "xmsr.h" @@ -446,11 +447,21 @@ vmexit_mtrap(struct vmctx *ctx, struct v static int vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { - + int err; stats.vmexit_paging++; - if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) { - printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip); + err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip, + vmexit->u.paging.cr3, vmexit->u.paging.rwx); + + if (err) { + if (err == EINVAL) { + printf("Failed to emulate instruction at 0x%lx\n", + vmexit->rip); + } else if (err == ESRCH) { + printf("Unhandled memory access to 0x%lx\n", + vmexit->u.paging.gpa); + } + return (VMEXIT_ABORT); } Modified: projects/bhyve/usr.sbin/bhyve/instruction_emul.c ============================================================================== --- projects/bhyve/usr.sbin/bhyve/instruction_emul.c Fri Oct 19 17:45:56 2012 (r241743) +++ projects/bhyve/usr.sbin/bhyve/instruction_emul.c Fri Oct 19 18:11:17 2012 (r241744) @@ -28,10 +28,12 @@ #include <strings.h> #include <unistd.h> +#include <assert.h> #include <machine/vmm.h> #include <vmmapi.h> #include "fbsdrun.h" +#include "mem.h" #include "instruction_emul.h" #define PREFIX_LOCK 0xF0 @@ -46,6 +48,7 @@ #define PREFIX_BRANCH_NOT_TAKEN 0x2E #define PREFIX_BRANCH_TAKEN 0x3E #define PREFIX_OPSIZE 0x66 +#define is_opsz_prefix(x) ((x) == PREFIX_OPSIZE) #define PREFIX_ADDRSIZE 0x67 #define OPCODE_2BYTE_ESCAPE 0x0F @@ -95,6 +98,11 @@ #define FROM_REG (1<<2) #define TO_RM (1<<3) #define TO_REG (1<<4) +#define ZEXT (1<<5) +#define FROM_8 (1<<6) +#define FROM_16 (1<<7) +#define TO_8 (1<<8) +#define TO_16 (1<<9) #define REX_MASK 0xF0 #define REX_PREFIX 0x40 @@ -118,16 +126,7 @@ #define PML4E_OFFSET_MASK 0x0000FF8000000000 #define PML4E_SHIFT 39 -#define MAX_EMULATED_REGIONS 8 -int registered_regions = 0; -struct memory_region -{ - uintptr_t start; - uintptr_t end; - emulated_read_func_t memread; - emulated_write_func_t memwrite; - void *arg; -} emulated_regions[MAX_EMULATED_REGIONS]; +#define INSTR_VERIFY struct decoded_instruction { @@ -138,11 +137,12 @@ struct decoded_instruction uint8_t *displacement; uint8_t *immediate; - uint8_t opcode_flags; + uint16_t opcode_flags; uint8_t addressing_mode; uint8_t rm; uint8_t reg; + uint8_t opsz; uint8_t rex_r; uint8_t rex_w; uint8_t rex_b; @@ -170,11 +170,17 @@ static enum vm_reg_name vm_reg_name_mapp [REG_R15] = VM_REG_GUEST_R15 }; -uint8_t one_byte_opcodes[256] = { - [0x89] = HAS_MODRM | FROM_REG | TO_RM, +uint16_t one_byte_opcodes[256] = { + [0x88] = HAS_MODRM | FROM_REG | TO_RM | TO_8 | FROM_8, + [0x89] = HAS_MODRM | FROM_REG | TO_RM, [0x8B] = HAS_MODRM | FROM_RM | TO_REG, }; +uint16_t two_byte_opcodes[256] = { + [0xB6] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_8, + [0xB7] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_16, +}; + static uintptr_t gla2gpa(uint64_t gla, uint64_t guest_cr3) { @@ -211,7 +217,8 @@ gla2hla(uint64_t gla, uint64_t guest_cr3 uintptr_t gpa; gpa = gla2gpa(gla, guest_cr3); - return paddr_guest2host(gpa); + + return (paddr_guest2host(gpa)); } /* @@ -232,6 +239,9 @@ decode_prefixes(struct decoded_instructi decoded->rex_x = *current_prefix & REX_X_MASK; decoded->rex_b = *current_prefix & REX_B_MASK; current_prefix++; + } else if (is_opsz_prefix(*current_prefix)) { + decoded->opsz = 1; + current_prefix++; } else if (is_prefix(*current_prefix)) { return (-1); } @@ -248,16 +258,26 @@ decode_prefixes(struct decoded_instructi static int decode_opcode(struct decoded_instruction *decoded) { - uint8_t opcode, flags; + uint8_t opcode; + uint16_t flags; + int extra; opcode = *decoded->opcode; - flags = one_byte_opcodes[opcode]; + extra = 0; + if (opcode != 0xf) + flags = one_byte_opcodes[opcode]; + else { + opcode = *(decoded->opcode + 1); + flags = two_byte_opcodes[opcode]; + extra = 1; + } + if (!flags) return (-1); if (flags & HAS_MODRM) { - decoded->modrm = decoded->opcode + 1; + decoded->modrm = decoded->opcode + 1 + extra; } decoded->opcode_flags = flags; @@ -381,37 +401,70 @@ decode_instruction(void *instr, struct d return (0); } -static struct memory_region * -find_region(uintptr_t addr) +static enum vm_reg_name +get_vm_reg_name(uint8_t reg) { - int i; - for (i = 0; i < registered_regions; ++i) { - if (emulated_regions[i].start <= addr && - emulated_regions[i].end >= addr) { - return &emulated_regions[i]; - } - } - - return (0); + return (vm_reg_name_mappings[reg]); } -static enum vm_reg_name -get_vm_reg_name(uint8_t reg) +static uint64_t +adjust_operand(const struct decoded_instruction *instruction, uint64_t val, + int size) { - return vm_reg_name_mappings[reg]; + uint64_t ret; + + if (instruction->opcode_flags & ZEXT) { + switch (size) { + case 1: + ret = val & 0xff; + break; + case 2: + ret = val & 0xffff; + break; + case 4: + ret = val & 0xffffffff; + break; + case 8: + ret = val; + break; + default: + break; + } + } else { + /* + * Extend the sign + */ + switch (size) { + case 1: + ret = (int8_t)(val & 0xff); + break; + case 2: + ret = (int16_t)(val & 0xffff); + break; + case 4: + ret = (int32_t)(val & 0xffffffff); + break; + case 8: + ret = val; + break; + default: + break; + } + } + + return (ret); } static int -get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3, - const struct decoded_instruction *instruction, uint64_t *operand) +get_operand(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3, + const struct decoded_instruction *instruction, uint64_t *operand, + struct mem_range *mr) { enum vm_reg_name regname; uint64_t reg; - uintptr_t target; int error; - uint8_t rm, addressing_mode; - struct memory_region *emulated_memory; + uint8_t rm, addressing_mode, size; if (instruction->opcode_flags & FROM_RM) { rm = instruction->rm; @@ -422,6 +475,17 @@ get_operand(struct vmctx *vm, int vcpu, } else return (-1); + /* + * Determine size of operand + */ + size = 4; + if (instruction->opcode_flags & FROM_8) { + size = 1; + } else if (instruction->opcode_flags & FROM_16 || + instruction->opsz) { + size = 2; + } + regname = get_vm_reg_name(rm); error = vm_get_register(vm, vcpu, regname, ®); if (error) @@ -430,33 +494,67 @@ get_operand(struct vmctx *vm, int vcpu, switch (addressing_mode) { case MOD_DIRECT: *operand = reg; - return (0); + error = 0; + break; case MOD_INDIRECT: case MOD_INDIRECT_DISP8: case MOD_INDIRECT_DISP32: +#ifdef INSTR_VERIFY + { + uintptr_t target; + target = gla2gpa(reg, guest_cr3); target += instruction->disp; - emulated_memory = find_region(target); - if (emulated_memory) { - return emulated_memory->memread(vm, vcpu, target, - 4, operand, - emulated_memory->arg); - } - return (-1); + assert(gpa == target); + } +#endif + error = (*mr->handler)(vm, vcpu, MEM_F_READ, gpa, size, + operand, mr->arg1, mr->arg2); + break; default: return (-1); } + + if (!error) + *operand = adjust_operand(instruction, *operand, size); + + return (error); +} + +static uint64_t +adjust_write(uint64_t reg, uint64_t operand, int size) +{ + uint64_t val; + + switch (size) { + case 1: + val = (reg & ~0xff) | (operand & 0xff); + break; + case 2: + val = (reg & ~0xffff) | (operand & 0xffff); + break; + case 4: + val = (reg & ~0xffffffff) | (operand & 0xffffffff); + break; + case 8: + val = operand; + default: + break; + } + + return (val); } static int -perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3, - const struct decoded_instruction *instruction, uint64_t operand) +perform_write(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3, + const struct decoded_instruction *instruction, uint64_t operand, + struct mem_range *mr) { enum vm_reg_name regname; uintptr_t target; int error; + int size; uint64_t reg; - struct memory_region *emulated_memory; uint8_t addressing_mode; if (instruction->opcode_flags & TO_RM) { @@ -467,83 +565,77 @@ perform_write(struct vmctx *vm, int vcpu addressing_mode = MOD_DIRECT; } else return (-1); - - regname = get_vm_reg_name(reg); - error = vm_get_register(vm, vcpu, regname, ®); - if (error) - return (error); - + + /* + * Determine the operand size. rex.w has priority + */ + size = 4; + if (instruction->rex_w) { + size = 8; + } else if (instruction->opcode_flags & TO_8) { + size = 1; + } else if (instruction->opsz) { + size = 2; + }; + switch(addressing_mode) { case MOD_DIRECT: - return vm_set_register(vm, vcpu, regname, operand); + regname = get_vm_reg_name(reg); + error = vm_get_register(vm, vcpu, regname, ®); + if (error) + return (error); + operand = adjust_write(reg, operand, size); + + return (vm_set_register(vm, vcpu, regname, operand)); case MOD_INDIRECT: case MOD_INDIRECT_DISP8: case MOD_INDIRECT_DISP32: +#ifdef INSTR_VERIFY + regname = get_vm_reg_name(reg); + error = vm_get_register(vm, vcpu, regname, ®); + assert(!error); target = gla2gpa(reg, guest_cr3); target += instruction->disp; - emulated_memory = find_region(target); - if (emulated_memory) { - return emulated_memory->memwrite(vm, vcpu, target, - 4, operand, - emulated_memory->arg); - } - return (-1); + assert(gpa == target); +#endif + error = (*mr->handler)(vm, vcpu, MEM_F_WRITE, gpa, size, + &operand, mr->arg1, mr->arg2); + return (error); default: return (-1); } } static int -emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t cr3, - const struct decoded_instruction *instruction) +emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t gpa, + uint64_t cr3, + const struct decoded_instruction *instruction, + struct mem_range *mr) { uint64_t operand; int error; - error = get_operand(vm, vcpu, cr3, instruction, &operand); + error = get_operand(vm, vcpu, gpa, cr3, instruction, &operand, mr); if (error) return (error); - return perform_write(vm, vcpu, cr3, instruction, operand); + return perform_write(vm, vcpu, gpa, cr3, instruction, operand, mr); } -int -emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3) +int +emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3, + uint64_t gpa, int flags, struct mem_range *mr) { struct decoded_instruction instr; int error; - void *instruction = gla2hla(rip, cr3); - - if ((error = decode_instruction(instruction, &instr)) != 0) - return (error); - - return emulate_decoded_instruction(vm, vcpu, cr3, &instr); -} - -struct memory_region * -register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread, - emulated_write_func_t memwrite, void *arg) -{ - if (registered_regions >= MAX_EMULATED_REGIONS) - return (NULL); - - struct memory_region *region = &emulated_regions[registered_regions]; - region->start = start; - region->end = start + len; - region->memread = memread; - region->memwrite = memwrite; - region->arg = arg; + void *instruction; - registered_regions++; - return (region); -} + instruction = gla2hla(rip, cr3); -void -move_memory_region(struct memory_region *region, uintptr_t start) -{ - size_t len; + error = decode_instruction(instruction, &instr); + if (!error) + error = emulate_decoded_instruction(vm, vcpu, gpa, cr3, + &instr, mr); - len = region->end - region->start; - region->start = start; - region->end = start + len; + return (error); } Modified: projects/bhyve/usr.sbin/bhyve/instruction_emul.h ============================================================================== --- projects/bhyve/usr.sbin/bhyve/instruction_emul.h Fri Oct 19 17:45:56 2012 (r241743) +++ projects/bhyve/usr.sbin/bhyve/instruction_emul.h Fri Oct 19 18:11:17 2012 (r241744) @@ -29,19 +29,8 @@ #ifndef _INSTRUCTION_EMUL_H_ #define _INSTRUCTION_EMUL_H_ -struct memory_region; - -typedef int (*emulated_read_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, - int size, uint64_t *data, void *arg); -typedef int (*emulated_write_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr, - int size, uint64_t data, void *arg); - int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, - uint64_t cr3); -struct memory_region *register_emulated_memory(uintptr_t start, size_t len, - emulated_read_func_t memread, - emulated_write_func_t memwrite, - void *arg); -void move_memory_region(struct memory_region *memory_region, uintptr_t start); + uint64_t cr3, uint64_t gpa, int flags, + struct mem_range *mr); #endif Modified: projects/bhyve/usr.sbin/bhyve/ioapic.c ============================================================================== --- projects/bhyve/usr.sbin/bhyve/ioapic.c Fri Oct 19 17:45:56 2012 (r241743) +++ projects/bhyve/usr.sbin/bhyve/ioapic.c Fri Oct 19 18:11:17 2012 (r241744) @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <vmmapi.h> #include "inout.h" +#include "mem.h" #include "instruction_emul.h" #include "fbsdrun.h" @@ -67,10 +68,13 @@ struct ioapic { static struct ioapic ioapics[1]; /* only a single ioapic for now */ -static int ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, - int size, uint64_t *data, void *arg); -static int ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, - int size, uint64_t data, void *arg); +static int ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, + int size, uint64_t *data); +static int ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, + int size, uint64_t data); +static int ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, + uintptr_t paddr, int size, uint64_t *val, + void *arg1, long arg2); static void ioapic_set_pinstate(struct vmctx *ctx, int pin, bool newstate) @@ -139,8 +143,10 @@ ioapic_assert_pin(struct vmctx *ctx, int void ioapic_init(int which) { - int i; + struct mem_range memp; struct ioapic *ioapic; + int error; + int i; assert(which == 0); @@ -153,14 +159,19 @@ ioapic_init(int which) for (i = 0; i < REDIR_ENTRIES; i++) ioapic->redtbl[i] = 0x0001000000010000UL; - /* Register emulated memory region */ ioapic->paddr = IOAPIC_PADDR; - ioapic->region = register_emulated_memory(ioapic->paddr, - sizeof(struct IOAPIC), - ioapic_region_read, - ioapic_region_write, - (void *)(uintptr_t)which); - assert(ioapic->region != NULL); + + /* Register emulated memory region */ + memp.name = "ioapic"; + memp.flags = MEM_F_RW; + memp.handler = ioapic_region_handler; + memp.arg1 = ioapic; + memp.arg2 = which; + memp.base = ioapic->paddr; + memp.size = sizeof(struct IOAPIC); + error = register_mem(&memp); + + assert (error == 0); ioapic->inited = 1; } @@ -237,15 +248,11 @@ ioapic_write(struct ioapic *ioapic, uint } static int -ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, - uint64_t *data, void *arg) +ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, int size, + uint64_t *data) { - int which, offset; - struct ioapic *ioapic; - - which = (uintptr_t)arg; + int offset; - ioapic = &ioapics[which]; offset = paddr - ioapic->paddr; /* @@ -255,7 +262,7 @@ ioapic_region_read(struct vmctx *vm, int if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) { #if 1 printf("invalid access to ioapic%d: size %d, offset %d\n", - which, size, offset); + (int)(ioapic - ioapics), size, offset); #endif *data = 0; return (0); @@ -270,15 +277,11 @@ ioapic_region_read(struct vmctx *vm, int } static int -ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size, - uint64_t data, void *arg) +ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, int size, + uint64_t data) { - int which, offset; - struct ioapic *ioapic; - - which = (uintptr_t)arg; + int offset; - ioapic = &ioapics[which]; offset = paddr - ioapic->paddr; /* @@ -288,7 +291,7 @@ ioapic_region_write(struct vmctx *vm, in if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) { #if 1 printf("invalid access to ioapic%d: size %d, offset %d\n", - which, size, offset); + (int)(ioapic - ioapics), size, offset); #endif return (0); } @@ -300,3 +303,23 @@ ioapic_region_write(struct vmctx *vm, in return (0); } + +static int +ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, uintptr_t paddr, + int size, uint64_t *val, void *arg1, long arg2) +{ + struct ioapic *ioapic; + int which; + + ioapic = arg1; + which = arg2; + + assert(ioapic == &ioapics[which]); + + if (dir == MEM_F_READ) + ioapic_region_read(ioapic, paddr, size, val); + else + ioapic_region_write(ioapic, paddr, size, *val); + + return (0); +} Added: projects/bhyve/usr.sbin/bhyve/mem.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ projects/bhyve/usr.sbin/bhyve/mem.c Fri Oct 19 18:11:17 2012 (r241744) @@ -0,0 +1,196 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Memory ranges are represented with an RB tree. On insertion, the range + * is checked for overlaps. On lookup, the key has the same base and limit + * so it can be searched within the range. + * + * It is assumed that all setup of ranges takes place in single-threaded + * mode before vCPUs have been started. As such, no locks are used on the + * RB tree. If this is no longer the case, then a r/w lock could be used, + * with readers on the lookup and a writer if the tree needs to be changed + * (and per vCPU caches flushed) + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/tree.h> +#include <sys/errno.h> +#include <machine/vmm.h> + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "mem.h" +#include "instruction_emul.h" + +struct mmio_rb_range { + RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ + struct mem_range mr_param; + uint64_t mr_base; + uint64_t mr_end; +}; + +struct mmio_rb_tree; +RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); + +RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rbroot; + +/* + * Per-vCPU cache. Since most accesses from a vCPU will be to + * consecutive addresses in a range, it makes sense to cache the + * result of a lookup. + */ +static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; + +static int +mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) +{ + if (a->mr_end < b->mr_base) + return (-1); + else if (a->mr_base > b->mr_end) + return (1); + return (0); +} + +static int +mmio_rb_lookup(uint64_t addr, struct mmio_rb_range **entry) +{ + struct mmio_rb_range find, *res; + + find.mr_base = find.mr_end = addr; + + res = RB_FIND(mmio_rb_tree, &mmio_rbroot, &find); + + if (res != NULL) { + *entry = res; + return (0); + } + + return (ENOENT); +} + +static int +mmio_rb_add(struct mmio_rb_range *new) +{ + struct mmio_rb_range *overlap; + + overlap = RB_INSERT(mmio_rb_tree, &mmio_rbroot, new); + + if (overlap != NULL) { +#ifdef RB_DEBUG + printf("overlap detected: new %lx:%lx, tree %lx:%lx\n", + new->mr_base, new->mr_end, + overlap->mr_base, overlap->mr_end); +#endif + + return (EEXIST); + } + + return (0); +} + +#if 0 +static void +mmio_rb_dump(void) +{ + struct mmio_rb_range *np; + + RB_FOREACH(np, mmio_rb_tree, &mmio_rbroot) { + printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, + np->mr_param.name); + } +} +#endif + +RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); + +int +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip, + uint64_t cr3, int mode) +{ + struct mmio_rb_range *entry; + int err; + + err = 0; + + /* + * First check the per-vCPU cache + */ + if (mmio_hint[vcpu] && + paddr >= mmio_hint[vcpu]->mr_base && + paddr <= mmio_hint[vcpu]->mr_end) { + err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, mode, + &mmio_hint[vcpu]->mr_param); + } else { + if (mmio_rb_lookup(paddr, &entry)) { + err = ENOENT; + } else { + mmio_hint[vcpu] = entry; + err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, + mode, &entry->mr_param); + } + } + + return (err); +} + +int +register_mem(struct mem_range *memp) +{ + struct mmio_rb_range *mrp; + int err; + + err = 0; + + mrp = malloc(sizeof(struct mmio_rb_range)); + + if (mrp != NULL) { + mrp->mr_param = *memp; + mrp->mr_base = memp->base; + mrp->mr_end = memp->base + memp->size - 1; + + err = mmio_rb_add(mrp); + if (err) + free(mrp); + } else + err = ENOMEM; + + return (err); +} + +void +init_mem(void) +{ + + RB_INIT(&mmio_rbroot); +} Added: projects/bhyve/usr.sbin/bhyve/mem.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ projects/bhyve/usr.sbin/bhyve/mem.h Fri Oct 19 18:11:17 2012 (r241744) @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MEM_H_ +#define _MEM_H_ + +#include <sys/linker_set.h> + +struct vmctx; + +typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, + int size, uint64_t *val, void *arg1, long arg2); + +struct mem_range { + const char *name; + int flags; + mem_func_t handler; + void *arg1; + long arg2; + uint64_t base; + uint64_t size; +}; +#define MEM_F_READ 0x1 +#define MEM_F_WRITE 0x2 +#define MEM_F_RW 0x3 + +void init_mem(void); +int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip, + uint64_t cr3, int mode); + +int register_mem(struct mem_range *memp); + +#endif /* _MEM_H_ */ Modified: projects/bhyve/usr.sbin/bhyve/pci_emul.c ============================================================================== --- projects/bhyve/usr.sbin/bhyve/pci_emul.c Fri Oct 19 17:45:56 2012 (r241743) +++ projects/bhyve/usr.sbin/bhyve/pci_emul.c Fri Oct 19 18:11:17 2012 (r241744) @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include "fbsdrun.h" #include "inout.h" +#include "mem.h" #include "pci_emul.h" #include "ioapic.h" @@ -364,22 +365,26 @@ pci_finish_mptable_names(void) } static int -pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) +pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) { struct pci_devinst *pdi = arg; struct pci_devemu *pe = pdi->pi_d; - int offset, i; + uint64_t offset; + int i; for (i = 0; i <= PCI_BARMAX; i++) { if (pdi->pi_bar[i].type == PCIBAR_IO && port >= pdi->pi_bar[i].addr && - port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { + port + bytes <= + pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { offset = port - pdi->pi_bar[i].addr; if (in) - *eax = (*pe->pe_ior)(pdi, i, offset, bytes); + *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, + offset, bytes); else - (*pe->pe_iow)(pdi, i, offset, bytes, *eax); + (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, + bytes, *eax); return (0); } } @@ -387,6 +392,32 @@ pci_emul_handler(struct vmctx *ctx, int } static int +pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, + int size, uint64_t *val, void *arg1, long arg2) +{ + struct pci_devinst *pdi = arg1; + struct pci_devemu *pe = pdi->pi_d; + uint64_t offset; + int bidx = (int) arg2; + + assert(bidx <= PCI_BARMAX); + assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || + pdi->pi_bar[bidx].type == PCIBAR_MEM64); + assert(addr >= pdi->pi_bar[bidx].addr && + addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); + + offset = addr - pdi->pi_bar[bidx].addr; + + if (dir == MEM_F_WRITE) + (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); + else + *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); + + return (0); +} *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201210191811.q9JIBIQu049356>