From owner-svn-src-stable@FreeBSD.ORG Tue Jul 22 04:39:18 2014 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id EB2441FF; Tue, 22 Jul 2014 04:39:18 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id D5A2E2846; Tue, 22 Jul 2014 04:39:18 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s6M4dI5g047912; Tue, 22 Jul 2014 04:39:18 GMT (envelope-from jhb@svn.freebsd.org) Received: (from jhb@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s6M4dHeV047899; Tue, 22 Jul 2014 04:39:17 GMT (envelope-from jhb@svn.freebsd.org) Message-Id: <201407220439.s6M4dHeV047899@svn.freebsd.org> From: John Baldwin Date: Tue, 22 Jul 2014 04:39:17 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r268976 - in stable/10: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel usr.sbin/bhyve X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 22 Jul 2014 04:39:19 -0000 Author: jhb Date: Tue Jul 22 04:39:16 2014 New Revision: 268976 URL: http://svnweb.freebsd.org/changeset/base/268976 Log: MFC 266424,266476,266524,266573,266595,266626,266627,266633,266641,266642, 266708,266724,266934,266935,268521: Emulation of the "ins" and "outs" instructions. Various fixes for translating guest linear addresses to guest physical addresses. Modified: stable/10/lib/libvmmapi/vmmapi.c stable/10/lib/libvmmapi/vmmapi.h stable/10/sys/amd64/include/vmm.h stable/10/sys/amd64/include/vmm_dev.h stable/10/sys/amd64/include/vmm_instruction_emul.h stable/10/sys/amd64/vmm/intel/vmx.c stable/10/sys/amd64/vmm/vmm.c stable/10/sys/amd64/vmm/vmm_dev.c stable/10/sys/amd64/vmm/vmm_instruction_emul.c stable/10/sys/amd64/vmm/vmm_ioport.c stable/10/sys/amd64/vmm/vmm_ioport.h stable/10/sys/amd64/vmm/vmm_ktr.h stable/10/usr.sbin/bhyve/bhyverun.c stable/10/usr.sbin/bhyve/inout.c stable/10/usr.sbin/bhyve/inout.h stable/10/usr.sbin/bhyve/mem.c stable/10/usr.sbin/bhyve/pci_virtio_block.c Directory Properties: stable/10/ (props changed) Modified: stable/10/lib/libvmmapi/vmmapi.c ============================================================================== --- stable/10/lib/libvmmapi/vmmapi.c Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/lib/libvmmapi/vmmapi.c Tue Jul 22 04:39:16 2014 (r268976) @@ -33,8 +33,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include @@ -937,3 +939,107 @@ vm_get_hpet_capabilities(struct vmctx *c *capabilities = cap.capabilities; return (error); } + +static int +gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, int *fault, uint64_t *gpa) +{ + struct vm_gla2gpa gg; + int error; + + bzero(&gg, sizeof(struct vm_gla2gpa)); + gg.vcpuid = vcpu; + gg.prot = prot; + gg.gla = gla; + gg.paging = *paging; + + error = ioctl(ctx->fd, VM_GLA2GPA, &gg); + if (error == 0) { + *fault = gg.fault; + *gpa = gg.gpa; + } + return (error); +} + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +int +vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt) +{ + uint64_t gpa; + int error, fault, i, n, off; + + for (i = 0; i < iovcnt; i++) { + iov[i].iov_base = 0; + iov[i].iov_len = 0; + } + + while (len) { + assert(iovcnt > 0); + error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa); + if (error) + return (-1); + if (fault) + return (1); + + off = gpa & PAGE_MASK; + n = min(len, PAGE_SIZE - off); + + iov->iov_base = (void *)gpa; + iov->iov_len = n; + iov++; + iovcnt--; + + gla += n; + len -= n; + } + return (0); +} + +void +vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len) +{ + const char *src; + char *dst; + uint64_t gpa; + size_t n; + + dst = vp; + while (len) { + assert(iov->iov_len); + gpa = (uint64_t)iov->iov_base; + n = min(len, iov->iov_len); + src = vm_map_gpa(ctx, gpa, n); + bcopy(src, dst, n); + + iov++; + dst += n; + len -= n; + } +} + +void +vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov, + size_t len) +{ + const char *src; + char *dst; + uint64_t gpa; + size_t n; + + src = vp; + while (len) { + assert(iov->iov_len); + gpa = (uint64_t)iov->iov_base; + n = min(len, iov->iov_len); + dst = vm_map_gpa(ctx, gpa, n); + bcopy(src, dst, n); + + iov++; + src += n; + len -= n; + } +} Modified: stable/10/lib/libvmmapi/vmmapi.h ============================================================================== --- stable/10/lib/libvmmapi/vmmapi.h Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/lib/libvmmapi/vmmapi.h Tue Jul 22 04:39:16 2014 (r268976) @@ -29,6 +29,7 @@ #ifndef _VMMAPI_H_ #define _VMMAPI_H_ +struct iovec; struct vmctx; enum x2apic_state; @@ -109,6 +110,18 @@ int vm_set_x2apic_state(struct vmctx *ct int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); +/* + * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'. + * The 'iovcnt' should be big enough to accomodate all GPA segments. + * Returns 0 on success, 1 on a guest fault condition and -1 otherwise. + */ +int vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt); +void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov, + void *host_dst, size_t len); +void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, + struct iovec *guest_iov, size_t len); + /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); Modified: stable/10/sys/amd64/include/vmm.h ============================================================================== --- stable/10/sys/amd64/include/vmm.h Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/sys/amd64/include/vmm.h Tue Jul 22 04:39:16 2014 (r268976) @@ -54,6 +54,7 @@ struct vmspace; struct vm_object; struct pmap; +enum vm_reg_name; enum x2apic_state; typedef int (*vmm_init_func_t)(int ipinum); @@ -236,10 +237,11 @@ int vm_exception_pending(struct vm *vm, void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ +void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2); -#endif /* KERNEL */ +enum vm_reg_name vm_segment_name(int seg_encoding); -#include +#endif /* KERNEL */ #define VM_MAXCPU 16 /* maximum virtual cpus */ @@ -280,6 +282,7 @@ enum vm_reg_name { VM_REG_GUEST_IDTR, VM_REG_GUEST_GDTR, VM_REG_GUEST_EFER, + VM_REG_GUEST_CR2, VM_REG_LAST }; @@ -318,6 +321,76 @@ struct seg_desc { uint32_t limit; uint32_t access; }; +#define SEG_DESC_TYPE(desc) ((desc)->access & 0x001f) +#define SEG_DESC_PRESENT(desc) ((desc)->access & 0x0080) +#define SEG_DESC_DEF32(desc) ((desc)->access & 0x4000) +#define SEG_DESC_GRANULARITY(desc) ((desc)->access & 0x8000) +#define SEG_DESC_UNUSABLE(desc) ((desc)->access & 0x10000) + +enum vm_cpu_mode { + CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ + CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ +}; + +enum vm_paging_mode { + PAGING_MODE_FLAT, + PAGING_MODE_32, + PAGING_MODE_PAE, + PAGING_MODE_64, +}; + +struct vm_guest_paging { + uint64_t cr3; + int cpl; + enum vm_cpu_mode cpu_mode; + enum vm_paging_mode paging_mode; +}; + +/* + * The data structures 'vie' and 'vie_op' are meant to be opaque to the + * consumers of instruction decoding. The only reason why their contents + * need to be exposed is because they are part of the 'vm_exit' structure. + */ +struct vie_op { + uint8_t op_byte; /* actual opcode byte */ + uint8_t op_type; /* type of operation (e.g. MOV) */ + uint16_t op_flags; +}; + +#define VIE_INST_SIZE 15 +struct vie { + uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ + uint8_t num_valid; /* size of the instruction */ + uint8_t num_processed; + + uint8_t rex_w:1, /* REX prefix */ + rex_r:1, + rex_x:1, + rex_b:1, + rex_present:1; + + uint8_t mod:2, /* ModRM byte */ + reg:4, + rm:4; + + uint8_t ss:2, /* SIB byte */ + index:4, + base:4; + + uint8_t disp_bytes; + uint8_t imm_bytes; + + uint8_t scale; + int base_register; /* VM_REG_GUEST_xyz */ + int index_register; /* VM_REG_GUEST_xyz */ + + int64_t displacement; /* optional addr displacement */ + int64_t immediate; /* optional immediate operand */ + + uint8_t decoded; /* set to 1 if successfully decoded */ + + struct vie_op op; /* opcode description */ +}; enum vm_exitcode { VM_EXITCODE_INOUT, @@ -335,22 +408,38 @@ enum vm_exitcode { VM_EXITCODE_RENDEZVOUS, VM_EXITCODE_IOAPIC_EOI, VM_EXITCODE_SUSPENDED, + VM_EXITCODE_INOUT_STR, VM_EXITCODE_MAX }; +struct vm_inout { + uint16_t bytes:3; /* 1 or 2 or 4 */ + uint16_t in:1; + uint16_t string:1; + uint16_t rep:1; + uint16_t port; + uint32_t eax; /* valid for out */ +}; + +struct vm_inout_str { + struct vm_inout inout; /* must be the first element */ + struct vm_guest_paging paging; + uint64_t rflags; + uint64_t cr0; + uint64_t index; + uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ + int addrsize; + enum vm_reg_name seg_name; + struct seg_desc seg_desc; +}; + struct vm_exit { enum vm_exitcode exitcode; int inst_length; /* 0 means unknown */ uint64_t rip; union { - struct { - uint16_t bytes:3; /* 1 or 2 or 4 */ - uint16_t in:1; /* out is 0, in is 1 */ - uint16_t string:1; - uint16_t rep:1; - uint16_t port; - uint32_t eax; /* valid for out */ - } inout; + struct vm_inout inout; + struct vm_inout_str inout_str; struct { uint64_t gpa; int fault_type; @@ -358,9 +447,7 @@ struct vm_exit { struct { uint64_t gpa; uint64_t gla; - uint64_t cr3; - enum vie_cpu_mode cpu_mode; - enum vie_paging_mode paging_mode; + struct vm_guest_paging paging; struct vie vie; } inst_emul; /* Modified: stable/10/sys/amd64/include/vmm_dev.h ============================================================================== --- stable/10/sys/amd64/include/vmm_dev.h Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/sys/amd64/include/vmm_dev.h Tue Jul 22 04:39:16 2014 (r268976) @@ -168,6 +168,15 @@ struct vm_suspend { enum vm_suspend_how how; }; +struct vm_gla2gpa { + int vcpuid; /* inputs */ + int prot; /* PROT_READ or PROT_WRITE */ + uint64_t gla; + struct vm_guest_paging paging; + int fault; /* outputs */ + uint64_t gpa; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -180,6 +189,7 @@ enum { IOCNUM_MAP_MEMORY = 10, IOCNUM_GET_MEMORY_SEG = 11, IOCNUM_GET_GPA_PMAP = 12, + IOCNUM_GLA2GPA = 13, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, @@ -289,4 +299,6 @@ enum { _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) +#define VM_GLA2GPA \ + _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) #endif Modified: stable/10/sys/amd64/include/vmm_instruction_emul.h ============================================================================== --- stable/10/sys/amd64/include/vmm_instruction_emul.h Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/sys/amd64/include/vmm_instruction_emul.h Tue Jul 22 04:39:16 2014 (r268976) @@ -29,63 +29,7 @@ #ifndef _VMM_INSTRUCTION_EMUL_H_ #define _VMM_INSTRUCTION_EMUL_H_ -enum vie_cpu_mode { - CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ - CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ -}; - -enum vie_paging_mode { - PAGING_MODE_FLAT, - PAGING_MODE_32, - PAGING_MODE_PAE, - PAGING_MODE_64, -}; - -/* - * The data structures 'vie' and 'vie_op' are meant to be opaque to the - * consumers of instruction decoding. The only reason why their contents - * need to be exposed is because they are part of the 'vm_exit' structure. - */ -struct vie_op { - uint8_t op_byte; /* actual opcode byte */ - uint8_t op_type; /* type of operation (e.g. MOV) */ - uint16_t op_flags; -}; - -#define VIE_INST_SIZE 15 -struct vie { - uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ - uint8_t num_valid; /* size of the instruction */ - uint8_t num_processed; - - uint8_t rex_w:1, /* REX prefix */ - rex_r:1, - rex_x:1, - rex_b:1, - rex_present:1; - - uint8_t mod:2, /* ModRM byte */ - reg:4, - rm:4; - - uint8_t ss:2, /* SIB byte */ - index:4, - base:4; - - uint8_t disp_bytes; - uint8_t imm_bytes; - - uint8_t scale; - int base_register; /* VM_REG_GUEST_xyz */ - int index_register; /* VM_REG_GUEST_xyz */ - - int64_t displacement; /* optional addr displacement */ - int64_t immediate; /* optional immediate operand */ - - uint8_t decoded; /* set to 1 if successfully decoded */ - - struct vie_op op; /* opcode description */ -}; +#include /* * Callback functions to read and write memory regions. @@ -111,6 +55,24 @@ int vmm_emulate_instruction(void *vm, in mem_region_read_t mrr, mem_region_write_t mrw, void *mrarg); +int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, + uint64_t val, int size); + +/* + * Returns 1 if an alignment check exception should be injected and 0 otherwise. + */ +int vie_alignment_check(int cpl, int operand_size, uint64_t cr0, + uint64_t rflags, uint64_t gla); + +/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */ +int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla); + +uint64_t vie_size2mask(int size); + +int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, + struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot, + uint64_t *gla); + #ifdef _KERNEL /* * APIs to fetch and decode the instruction from nested page fault handler. @@ -118,8 +80,18 @@ int vmm_emulate_instruction(void *vm, in * 'vie' must be initialized before calling 'vmm_fetch_instruction()' */ int vmm_fetch_instruction(struct vm *vm, int cpuid, - uint64_t rip, int inst_length, uint64_t cr3, - enum vie_paging_mode paging_mode, struct vie *vie); + struct vm_guest_paging *guest_paging, + uint64_t rip, int inst_length, struct vie *vie); + +/* + * Translate the guest linear address 'gla' to a guest physical address. + * + * Returns 0 on success and '*gpa' contains the result of the translation. + * Returns 1 if an exception was injected into the guest. + * Returns -1 otherwise. + */ +int vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa); void vie_init(struct vie *vie); @@ -136,7 +108,7 @@ void vie_init(struct vie *vie); */ #define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */ int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, - enum vie_cpu_mode cpu_mode, struct vie *vie); + enum vm_cpu_mode cpu_mode, struct vie *vie); #endif /* _KERNEL */ #endif /* _VMM_INSTRUCTION_EMUL_H_ */ Modified: stable/10/sys/amd64/vmm/intel/vmx.c ============================================================================== --- stable/10/sys/amd64/vmm/intel/vmx.c Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/sys/amd64/vmm/intel/vmx.c Tue Jul 22 04:39:16 2014 (r268976) @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include "vmm_host.h" #include "vmm_ioport.h" #include "vmm_ipi.h" @@ -185,6 +186,8 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_ */ #define APIC_ACCESS_ADDRESS 0xFFFFF000 +static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); +static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); static void vmx_inject_pir(struct vlapic *vlapic); #ifdef KTR @@ -539,7 +542,7 @@ static int vmx_init(int ipinum) { int error, use_tpr_shadow; - uint64_t fixed0, fixed1, feature_control; + uint64_t basic, fixed0, fixed1, feature_control; uint32_t tmp, procbased2_vid_bits; /* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */ @@ -559,6 +562,17 @@ vmx_init(int ipinum) return (ENXIO); } + /* + * Verify capabilities MSR_VMX_BASIC: + * - bit 54 indicates support for INS/OUTS decoding + */ + basic = rdmsr(MSR_VMX_BASIC); + if ((basic & (1UL << 54)) == 0) { + printf("vmx_init: processor does not support desired basic " + "capabilities\n"); + return (EINVAL); + } + /* Check support for primary processor-based VM-execution controls */ error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS, @@ -1539,7 +1553,19 @@ vmx_emulate_cr_access(struct vmx *vmx, i return (HANDLED); } -static enum vie_cpu_mode +/* + * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL + */ +static int +vmx_cpl(void) +{ + uint32_t ssar; + + ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS); + return ((ssar >> 5) & 0x3); +} + +static enum vm_cpu_mode vmx_cpu_mode(void) { @@ -1549,7 +1575,7 @@ vmx_cpu_mode(void) return (CPU_MODE_COMPATIBILITY); } -static enum vie_paging_mode +static enum vm_paging_mode vmx_paging_mode(void) { @@ -1563,6 +1589,89 @@ vmx_paging_mode(void) return (PAGING_MODE_PAE); } +static uint64_t +inout_str_index(struct vmx *vmx, int vcpuid, int in) +{ + uint64_t val; + int error; + enum vm_reg_name reg; + + reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; + error = vmx_getreg(vmx, vcpuid, reg, &val); + KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error)); + return (val); +} + +static uint64_t +inout_str_count(struct vmx *vmx, int vcpuid, int rep) +{ + uint64_t val; + int error; + + if (rep) { + error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val); + KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error)); + } else { + val = 1; + } + return (val); +} + +static int +inout_str_addrsize(uint32_t inst_info) +{ + uint32_t size; + + size = (inst_info >> 7) & 0x7; + switch (size) { + case 0: + return (2); /* 16 bit */ + case 1: + return (4); /* 32 bit */ + case 2: + return (8); /* 64 bit */ + default: + panic("%s: invalid size encoding %d", __func__, size); + } +} + +static void +inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in, + struct vm_inout_str *vis) +{ + int error, s; + + if (in) { + vis->seg_name = VM_REG_GUEST_ES; + } else { + s = (inst_info >> 15) & 0x7; + vis->seg_name = vm_segment_name(s); + } + + error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc); + KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error)); + + /* XXX modify svm.c to update bit 16 of seg_desc.access (unusable) */ +} + +static void +vmx_paging_info(struct vm_guest_paging *paging) +{ + paging->cr3 = vmcs_guest_cr3(); + paging->cpl = vmx_cpl(); + paging->cpu_mode = vmx_cpu_mode(); + paging->paging_mode = vmx_paging_mode(); +} + +static void +vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla) +{ + vmexit->exitcode = VM_EXITCODE_INST_EMUL; + vmexit->u.inst_emul.gpa = gpa; + vmexit->u.inst_emul.gla = gla; + vmx_paging_info(&vmexit->u.inst_emul.paging); +} + static int ept_fault_type(uint64_t ept_qual) { @@ -1754,12 +1863,8 @@ vmx_handle_apic_access(struct vmx *vmx, } if (allowed) { - vmexit->exitcode = VM_EXITCODE_INST_EMUL; - vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset; - vmexit->u.inst_emul.gla = VIE_INVALID_GLA; - vmexit->u.inst_emul.cr3 = vmcs_guest_cr3(); - vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode(); - vmexit->u.inst_emul.paging_mode = vmx_paging_mode(); + vmexit_inst_emul(vmexit, DEFAULT_APIC_BASE + offset, + VIE_INVALID_GLA); } /* @@ -1776,10 +1881,12 @@ vmx_handle_apic_access(struct vmx *vmx, static int vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) { - int error, handled; + int error, handled, in; struct vmxctx *vmxctx; struct vlapic *vlapic; - uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, reason; + struct vm_inout_str *vis; + uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; + uint32_t reason; uint64_t qual, gpa; bool retu; @@ -1936,15 +2043,22 @@ vmx_exit_process(struct vmx *vmx, int vc vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); vmexit->exitcode = VM_EXITCODE_INOUT; vmexit->u.inout.bytes = (qual & 0x7) + 1; - vmexit->u.inout.in = (qual & 0x8) ? 1 : 0; + vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0; vmexit->u.inout.string = (qual & 0x10) ? 1 : 0; vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0; vmexit->u.inout.port = (uint16_t)(qual >> 16); vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax); - error = emulate_ioport(vmx->vm, vcpu, vmexit); - if (error == 0) { - handled = 1; - vmxctx->guest_rax = vmexit->u.inout.eax; + if (vmexit->u.inout.string) { + inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); + vmexit->exitcode = VM_EXITCODE_INOUT_STR; + vis = &vmexit->u.inout_str; + vmx_paging_info(&vis->paging); + vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS); + vis->cr0 = vmcs_read(VMCS_GUEST_CR0); + vis->index = inout_str_index(vmx, vcpu, in); + vis->count = inout_str_count(vmx, vcpu, vis->inout.rep); + vis->addrsize = inout_str_addrsize(inst_info); + inout_str_seginfo(vmx, vcpu, inst_info, in, vis); } break; case EXIT_REASON_CPUID: @@ -1990,12 +2104,7 @@ vmx_exit_process(struct vmx *vmx, int vc vmexit->u.paging.fault_type = ept_fault_type(qual); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); } else if (ept_emulation_fault(qual)) { - vmexit->exitcode = VM_EXITCODE_INST_EMUL; - vmexit->u.inst_emul.gpa = gpa; - vmexit->u.inst_emul.gla = vmcs_gla(); - vmexit->u.inst_emul.cr3 = vmcs_guest_cr3(); - vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode(); - vmexit->u.inst_emul.paging_mode = vmx_paging_mode(); + vmexit_inst_emul(vmexit, gpa, vmcs_gla()); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1); } /* @@ -2324,6 +2433,8 @@ vmxctx_regptr(struct vmxctx *vmxctx, int return (&vmxctx->guest_r14); case VM_REG_GUEST_R15: return (&vmxctx->guest_r15); + case VM_REG_GUEST_CR2: + return (&vmxctx->guest_cr2); default: break; } Modified: stable/10/sys/amd64/vmm/vmm.c ============================================================================== --- stable/10/sys/amd64/vmm/vmm.c Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/sys/amd64/vmm/vmm.c Tue Jul 22 04:39:16 2014 (r268976) @@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include "vmm_ioport.h" #include "vmm_ktr.h" #include "vmm_host.h" #include "vmm_mem.h" @@ -1131,34 +1133,33 @@ vm_handle_inst_emul(struct vm *vm, int v struct vie *vie; struct vcpu *vcpu; struct vm_exit *vme; - int error, inst_length; - uint64_t rip, gla, gpa, cr3; - enum vie_cpu_mode cpu_mode; - enum vie_paging_mode paging_mode; + uint64_t gla, gpa; + struct vm_guest_paging *paging; mem_region_read_t mread; mem_region_write_t mwrite; + int error; vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; - rip = vme->rip; - inst_length = vme->inst_length; - gla = vme->u.inst_emul.gla; gpa = vme->u.inst_emul.gpa; - cr3 = vme->u.inst_emul.cr3; - cpu_mode = vme->u.inst_emul.cpu_mode; - paging_mode = vme->u.inst_emul.paging_mode; vie = &vme->u.inst_emul.vie; + paging = &vme->u.inst_emul.paging; vie_init(vie); /* Fetch, decode and emulate the faulting instruction */ - if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, - paging_mode, vie) != 0) + error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip, + vme->inst_length, vie); + if (error == 1) + return (0); /* Resume guest to handle page fault */ + else if (error == -1) return (EFAULT); + else if (error != 0) + panic("%s: vmm_fetch_instruction error %d", __func__, error); - if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0) + if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0) return (EFAULT); /* return to userland unless this is an in-kernel emulated device */ @@ -1348,6 +1349,10 @@ restart: case VM_EXITCODE_INST_EMUL: error = vm_handle_inst_emul(vm, vcpuid, &retu); break; + case VM_EXITCODE_INOUT: + case VM_EXITCODE_INOUT_STR: + error = vm_handle_inout(vm, vcpuid, vme, &retu); + break; default: retu = true; /* handled in userland */ break; @@ -1430,6 +1435,25 @@ vm_inject_fault(struct vm *vm, int vcpui } void +vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2) +{ + struct vm_exception pf = { + .vector = IDT_PF, + .error_code_valid = 1, + .error_code = error_code + }; + int error; + + VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", + error_code, cr2); + + error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); + KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); + + vm_inject_fault(vm, vcpuid, &pf); +} + +void vm_inject_gp(struct vm *vm, int vcpuid) { struct vm_exception gpf = { @@ -1856,3 +1880,20 @@ vm_atpit(struct vm *vm) { return (vm->vatpit); } + +enum vm_reg_name +vm_segment_name(int seg) +{ + static enum vm_reg_name seg_names[] = { + VM_REG_GUEST_ES, + VM_REG_GUEST_CS, + VM_REG_GUEST_SS, + VM_REG_GUEST_DS, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS + }; + + KASSERT(seg >= 0 && seg < nitems(seg_names), + ("%s: invalid segment encoding %d", __func__, seg)); + return (seg_names[seg]); +} Modified: stable/10/sys/amd64/vmm/vmm_dev.c ============================================================================== --- stable/10/sys/amd64/vmm/vmm_dev.c Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/sys/amd64/vmm/vmm_dev.c Tue Jul 22 04:39:16 2014 (r268976) @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include "vmm_lapic.h" @@ -168,6 +169,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c struct vm_x2apic *x2apic; struct vm_gpa_pte *gpapte; struct vm_suspend *vmsuspend; + struct vm_gla2gpa *gg; sc = vmmdev_lookup2(cdev); if (sc == NULL) @@ -192,6 +194,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c case VM_PPTDEV_MSI: case VM_PPTDEV_MSIX: case VM_SET_X2APIC_STATE: + case VM_GLA2GPA: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. @@ -415,6 +418,27 @@ vmmdev_ioctl(struct cdev *cdev, u_long c case VM_GET_HPET_CAPABILITIES: error = vhpet_getcap((struct vm_hpet_cap *)data); break; + case VM_GLA2GPA: { + CTASSERT(PROT_READ == VM_PROT_READ); + CTASSERT(PROT_WRITE == VM_PROT_WRITE); + CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); + gg = (struct vm_gla2gpa *)data; + error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, + gg->prot, &gg->gpa); + KASSERT(error == 0 || error == 1 || error == -1, + ("%s: vmm_gla2gpa unknown error %d", __func__, error)); + if (error >= 0) { + /* + * error = 0: the translation was successful + * error = 1: a fault was injected into the guest + */ + gg->fault = error; + error = 0; + } else { + error = EFAULT; + } + break; + } default: error = ENOTTY; break; Modified: stable/10/sys/amd64/vmm/vmm_instruction_emul.c ============================================================================== --- stable/10/sys/amd64/vmm/vmm_instruction_emul.c Tue Jul 22 04:38:28 2014 (r268975) +++ stable/10/sys/amd64/vmm/vmm_instruction_emul.c Tue Jul 22 04:39:16 2014 (r268976) @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -46,9 +47,15 @@ __FBSDID("$FreeBSD$"); #include +#include #include +#define KASSERT(exp,msg) assert((exp)) #endif /* _KERNEL */ +#include +#include +#include + /* struct vie_op.op_type */ enum { VIE_OP_TYPE_NONE = 0, @@ -205,7 +212,7 @@ vie_read_bytereg(void *vm, int vcpuid, s return (error); } -static int +int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val, int size) { @@ -560,6 +567,155 @@ vmm_emulate_instruction(void *vm, int vc return (error); } +int +vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) +{ + KASSERT(size == 1 || size == 2 || size == 4 || size == 8, + ("%s: invalid size %d", __func__, size)); + KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl)); + + if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) + return (0); + + return ((gla & (size - 1)) ? 1 : 0); +} + +int +vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) +{ + uint64_t mask; + + if (cpu_mode != CPU_MODE_64BIT) + return (0); + + /* + * The value of the bit 47 in the 'gla' should be replicated in the + * most significant 16 bits. + */ + mask = ~((1UL << 48) - 1); + if (gla & (1UL << 47)) + return ((gla & mask) != mask); + else + return ((gla & mask) != 0); +} + +uint64_t +vie_size2mask(int size) +{ + KASSERT(size == 1 || size == 2 || size == 4 || size == 8, + ("vie_size2mask: invalid size %d", size)); + return (size2mask[size]); +} + +int +vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, + struct seg_desc *desc, uint64_t offset, int length, int addrsize, + int prot, uint64_t *gla) +{ + uint64_t firstoff, low_limit, high_limit, segbase; + int glasize, type; + + KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS, + ("%s: invalid segment %d", __func__, seg)); + KASSERT(length == 1 || length == 2 || length == 4 || length == 8, + ("%s: invalid operand size %d", __func__, length)); + KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, + ("%s: invalid prot %#x", __func__, prot)); + + firstoff = offset; + if (cpu_mode == CPU_MODE_64BIT) { + KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address " + "size %d for cpu_mode %d", __func__, addrsize, cpu_mode)); + glasize = 8; + } else { + KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address " + "size %d for cpu mode %d", __func__, addrsize, cpu_mode)); + glasize = 4; + /* + * If the segment selector is loaded with a NULL selector + * then the descriptor is unusable and attempting to use + * it results in a #GP(0). + */ + if (SEG_DESC_UNUSABLE(desc)) + return (-1); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***