Date: Sat, 27 Jun 2015 22:48:22 +0000 (UTC) From: Neel Natu <neel@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r284894 - in stable/10: lib/libvmmapi share/examples/bhyve sys/amd64/include sys/amd64/vmm sys/amd64/vmm/amd sys/amd64/vmm/intel sys/amd64/vmm/io sys/modules/vmm usr.sbin/bhyve usr.sbin... Message-ID: <201506272248.t5RMmM6h043264@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: neel Date: Sat Jun 27 22:48:22 2015 New Revision: 284894 URL: https://svnweb.freebsd.org/changeset/base/284894 Log: MFC r276428: Replace bhyve's minimal RTC emulation with a fully featured one in vmm.ko. MFC r276432: Initialize all fields of 'struct vm_exception exception' before passing it to vm_inject_exception(). MFC r276763: Clear blocking due to STI or MOV SS in the hypervisor when an instruction is emulated or when the vcpu incurs an exception. MFC r277149: Clean up usage of 'struct vm_exception' to only to communicate information from userspace to vmm.ko when injecting an exception. MFC r277168: Fix typo (missing comma). MFC r277309: Make the error message explicit instead of just printing the usage if the virtual machine name is not specified. MFC r277310: Simplify instruction restart logic in bhyve. MFC r277359: Fix a bug in libvmmapi 'vm_copy_setup()' where it would return success even if the 'gpa' was in the guest MMIO region. MFC r277360: MOVS instruction emulation. MFC r277626: Add macro to identify AVIC capability (advanced virtual interrupt controller) in AMD processors. MFC r279220: Don't close a block context if it couldn't be opened avoiding a null deref. MFC r279225: Add "-u" option to bhyve(8) to indicate that the RTC should maintain UTC time. MFC r279227: Emulate MSR 0xC0011024 when running on AMD processors. MFC r279228: Always emulate MSR_PAT on Intel processors and don't rely on PAT save/restore capability of VT-x. This lets bhyve run nested in older VMware versions that don't support the PAT save/restore capability. MFC r279540: Fix warnings/errors when building vmm.ko with gcc. Added: stable/10/sys/amd64/vmm/io/vrtc.c - copied unchanged from r276428, head/sys/amd64/vmm/io/vrtc.c stable/10/sys/amd64/vmm/io/vrtc.h - copied unchanged from r276428, head/sys/amd64/vmm/io/vrtc.h Modified: stable/10/lib/libvmmapi/vmmapi.c stable/10/lib/libvmmapi/vmmapi.h stable/10/share/examples/bhyve/vmrun.sh stable/10/sys/amd64/include/vmm.h stable/10/sys/amd64/include/vmm_dev.h stable/10/sys/amd64/vmm/amd/svm.c stable/10/sys/amd64/vmm/amd/svm_softc.h stable/10/sys/amd64/vmm/amd/svm_support.S stable/10/sys/amd64/vmm/intel/vmcs.c stable/10/sys/amd64/vmm/intel/vmx.c stable/10/sys/amd64/vmm/intel/vmx.h stable/10/sys/amd64/vmm/intel/vmx_msr.c stable/10/sys/amd64/vmm/io/vhpet.c stable/10/sys/amd64/vmm/vmm.c stable/10/sys/amd64/vmm/vmm_dev.c stable/10/sys/amd64/vmm/vmm_instruction_emul.c stable/10/sys/amd64/vmm/vmm_ioport.c stable/10/sys/modules/vmm/Makefile stable/10/usr.sbin/bhyve/bhyve.8 stable/10/usr.sbin/bhyve/bhyverun.c stable/10/usr.sbin/bhyve/bhyverun.h stable/10/usr.sbin/bhyve/inout.c stable/10/usr.sbin/bhyve/pci_ahci.c stable/10/usr.sbin/bhyve/rtc.c stable/10/usr.sbin/bhyve/rtc.h stable/10/usr.sbin/bhyve/task_switch.c stable/10/usr.sbin/bhyve/xmsr.c stable/10/usr.sbin/bhyvectl/bhyvectl.c Directory Properties: stable/10/ (props changed) Modified: stable/10/lib/libvmmapi/vmmapi.c ============================================================================== --- stable/10/lib/libvmmapi/vmmapi.c Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/lib/libvmmapi/vmmapi.c Sat Jun 27 22:48:22 2015 (r284894) @@ -368,14 +368,13 @@ vm_get_register(struct vmctx *ctx, int v } int -vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) +vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit) { int error; struct vm_run vmrun; bzero(&vmrun, sizeof(vmrun)); vmrun.cpuid = vcpu; - vmrun.rip = rip; error = ioctl(ctx->fd, VM_RUN, &vmrun); bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); @@ -399,36 +398,22 @@ vm_reinit(struct vmctx *ctx) return (ioctl(ctx->fd, VM_REINIT, 0)); } -static int -vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector, - int error_code, int error_code_valid) +int +vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid, + uint32_t errcode, int restart_instruction) { struct vm_exception exc; - bzero(&exc, sizeof(exc)); exc.cpuid = vcpu; exc.vector = vector; - exc.error_code = error_code; - exc.error_code_valid = error_code_valid; + exc.error_code = errcode; + exc.error_code_valid = errcode_valid; + exc.restart_instruction = restart_instruction; return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc)); } int -vm_inject_exception(struct vmctx *ctx, int vcpu, int vector) -{ - - return (vm_inject_exception_real(ctx, vcpu, vector, 0, 0)); -} - -int -vm_inject_exception2(struct vmctx *ctx, int vcpu, int vector, int errcode) -{ - - return (vm_inject_exception_real(ctx, vcpu, vector, errcode, 1)); -} - -int vm_apicid2vcpu(struct vmctx *ctx, int apicid) { /* @@ -1002,6 +987,7 @@ int vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt) { + void *va; uint64_t gpa; int error, fault, i, n, off; @@ -1021,7 +1007,11 @@ vm_copy_setup(struct vmctx *ctx, int vcp off = gpa & PAGE_MASK; n = min(len, PAGE_SIZE - off); - iov->iov_base = (void *)gpa; + va = vm_map_gpa(ctx, gpa, n); + if (va == NULL) + return (-1); + + iov->iov_base = va; iov->iov_len = n; iov++; iovcnt--; @@ -1033,19 +1023,24 @@ vm_copy_setup(struct vmctx *ctx, int vcp } void +vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt) +{ + + return; +} + +void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len) { const char *src; char *dst; - uint64_t gpa; size_t n; dst = vp; while (len) { assert(iov->iov_len); - gpa = (uint64_t)iov->iov_base; n = min(len, iov->iov_len); - src = vm_map_gpa(ctx, gpa, n); + src = iov->iov_base; bcopy(src, dst, n); iov++; @@ -1060,15 +1055,13 @@ vm_copyout(struct vmctx *ctx, int vcpu, { const char *src; char *dst; - uint64_t gpa; size_t n; src = vp; while (len) { assert(iov->iov_len); - gpa = (uint64_t)iov->iov_base; n = min(len, iov->iov_len); - dst = vm_map_gpa(ctx, gpa, n); + dst = iov->iov_base; bcopy(src, dst, n); iov++; @@ -1146,3 +1139,63 @@ vm_set_intinfo(struct vmctx *ctx, int vc error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); return (error); } + +int +vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) +{ + struct vm_rtc_data rtcdata; + int error; + + bzero(&rtcdata, sizeof(struct vm_rtc_data)); + rtcdata.offset = offset; + rtcdata.value = value; + error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); + return (error); +} + +int +vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) +{ + struct vm_rtc_data rtcdata; + int error; + + bzero(&rtcdata, sizeof(struct vm_rtc_data)); + rtcdata.offset = offset; + error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); + if (error == 0) + *retval = rtcdata.value; + return (error); +} + +int +vm_rtc_settime(struct vmctx *ctx, time_t secs) +{ + struct vm_rtc_time rtctime; + int error; + + bzero(&rtctime, sizeof(struct vm_rtc_time)); + rtctime.secs = secs; + error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); + return (error); +} + +int +vm_rtc_gettime(struct vmctx *ctx, time_t *secs) +{ + struct vm_rtc_time rtctime; + int error; + + bzero(&rtctime, sizeof(struct vm_rtc_time)); + error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); + if (error == 0) + *secs = rtctime.secs; + return (error); +} + +int +vm_restart_instruction(void *arg, int vcpu) +{ + struct vmctx *ctx = arg; + + return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu)); +} Modified: stable/10/lib/libvmmapi/vmmapi.h ============================================================================== --- stable/10/lib/libvmmapi/vmmapi.h Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/lib/libvmmapi/vmmapi.h Sat Jun 27 22:48:22 2015 (r284894) @@ -32,6 +32,12 @@ #include <sys/param.h> #include <sys/cpuset.h> +/* + * API version for out-of-tree consumers like grub-bhyve for making compile + * time decisions. + */ +#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */ + struct iovec; struct vmctx; enum x2apic_state; @@ -70,13 +76,12 @@ int vm_get_seg_desc(struct vmctx *ctx, i struct seg_desc *seg_desc); int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); -int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, - struct vm_exit *ret_vmexit); +int vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit); int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how); int vm_reinit(struct vmctx *ctx); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); -int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec); -int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode); +int vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, + int errcode_valid, uint32_t errcode, int restart_instruction); int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector); int vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector); int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg); @@ -132,6 +137,14 @@ void vm_copyin(struct vmctx *ctx, int vc void *host_dst, size_t len); void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, struct iovec *guest_iov, size_t len); +void vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, + int iovcnt); + +/* RTC */ +int vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value); +int vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval); +int vm_rtc_settime(struct vmctx *ctx, time_t secs); +int vm_rtc_gettime(struct vmctx *ctx, time_t *secs); /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); Modified: stable/10/share/examples/bhyve/vmrun.sh ============================================================================== --- stable/10/share/examples/bhyve/vmrun.sh Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/share/examples/bhyve/vmrun.sh Sat Jun 27 22:48:22 2015 (r284894) @@ -39,7 +39,13 @@ DEFAULT_CONSOLE=stdio DEFAULT_VIRTIO_DISK="./diskdev" DEFAULT_ISOFILE="./release.iso" +errmsg() { + echo "*** $1" +} + usage() { + local msg=$1 + echo "Usage: vmrun.sh [-ahi] [-c <CPUs>] [-C <console>] [-d <disk file>]" echo " [-e <name=value>] [-g <gdbport> ] [-H <directory>]" echo " [-I <location of installation iso>] [-m <memsize>]" @@ -58,18 +64,18 @@ usage() { echo " -m: memory size (default is ${DEFAULT_MEMSIZE})" echo " -t: tap device for virtio-net (default is $DEFAULT_TAPDEV)" echo "" - echo " This script needs to be executed with superuser privileges" - echo "" + [ -n "$msg" ] && errmsg "$msg" exit 1 } if [ `id -u` -ne 0 ]; then - usage + errmsg "This script must be executed with superuser privileges" + exit 1 fi kldstat -n vmm > /dev/null 2>&1 if [ $? -ne 0 ]; then - echo "vmm.ko is not loaded!" + errmsg "vmm.ko is not loaded" exit 1 fi @@ -143,7 +149,7 @@ fi shift $((${OPTIND} - 1)) if [ $# -ne 1 ]; then - usage + usage "virtual machine name not specified" fi vmname="$1" Modified: stable/10/sys/amd64/include/vmm.h ============================================================================== --- stable/10/sys/amd64/include/vmm.h Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/include/vmm.h Sat Jun 27 22:48:22 2015 (r284894) @@ -286,9 +286,10 @@ int vm_unassign_pptdev(struct vm *vm, in struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); struct vpmtmr *vm_pmtmr(struct vm *vm); +struct vrtc *vm_rtc(struct vm *vm); /* - * Inject exception 'vme' into the guest vcpu. This function returns 0 on + * Inject exception 'vector' into the guest vcpu. This function returns 0 on * success and non-zero on failure. * * Wrapper functions like 'vm_inject_gp()' should be preferred to calling @@ -298,7 +299,8 @@ struct vpmtmr *vm_pmtmr(struct vm *vm); * This function should only be called in the context of the thread that is * executing this vcpu. */ -int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme); +int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid, + uint32_t errcode, int restart_instruction); /* * This function is called after a VM-exit that occurred during exception or @@ -444,8 +446,11 @@ struct vie { rex_x:1, rex_b:1, rex_present:1, + repz_present:1, /* REP/REPE/REPZ prefix */ + repnz_present:1, /* REPNE/REPNZ prefix */ opsize_override:1, /* Operand size override */ - addrsize_override:1; /* Address size override */ + addrsize_override:1, /* Address size override */ + segment_override:1; /* Segment override */ uint8_t mod:2, /* ModRM byte */ reg:4, @@ -461,6 +466,7 @@ struct vie { uint8_t scale; int base_register; /* VM_REG_GUEST_xyz */ int index_register; /* VM_REG_GUEST_xyz */ + int segment_register; /* VM_REG_GUEST_xyz */ int64_t displacement; /* optional addr displacement */ int64_t immediate; /* optional immediate operand */ @@ -627,4 +633,6 @@ vm_inject_ss(void *vm, int vcpuid, int e void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2); +int vm_restart_instruction(void *vm, int vcpuid); + #endif /* _VMM_H_ */ Modified: stable/10/sys/amd64/include/vmm_dev.h ============================================================================== --- stable/10/sys/amd64/include/vmm_dev.h Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/include/vmm_dev.h Sat Jun 27 22:48:22 2015 (r284894) @@ -54,7 +54,6 @@ struct vm_seg_desc { /* data or code s struct vm_run { int cpuid; - uint64_t rip; /* start running here */ struct vm_exit vm_exit; }; @@ -63,6 +62,7 @@ struct vm_exception { int vector; uint32_t error_code; int error_code_valid; + int restart_instruction; }; struct vm_lapic_msi { @@ -195,6 +195,15 @@ struct vm_intinfo { uint64_t info2; }; +struct vm_rtc_time { + time_t secs; +}; + +struct vm_rtc_data { + int offset; + uint8_t value; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -228,6 +237,7 @@ enum { IOCNUM_LAPIC_MSI = 36, IOCNUM_LAPIC_LOCAL_IRQ = 37, IOCNUM_IOAPIC_PINCOUNT = 38, + IOCNUM_RESTART_INSTRUCTION = 39, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, @@ -254,6 +264,12 @@ enum { /* vm_cpuset */ IOCNUM_ACTIVATE_CPU = 90, IOCNUM_GET_CPUSET = 91, + + /* RTC */ + IOCNUM_RTC_READ = 100, + IOCNUM_RTC_WRITE = 101, + IOCNUM_RTC_SETTIME = 102, + IOCNUM_RTC_GETTIME = 103, }; #define VM_RUN \ @@ -336,4 +352,14 @@ enum { _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) #define VM_GET_INTINFO \ _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) +#define VM_RTC_WRITE \ + _IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data) +#define VM_RTC_READ \ + _IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data) +#define VM_RTC_SETTIME \ + _IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time) +#define VM_RTC_GETTIME \ + _IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time) +#define VM_RESTART_INSTRUCTION \ + _IOW('v', IOCNUM_RESTART_INSTRUCTION, int) #endif Modified: stable/10/sys/amd64/vmm/amd/svm.c ============================================================================== --- stable/10/sys/amd64/vmm/amd/svm.c Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/amd/svm.c Sat Jun 27 22:48:22 2015 (r284894) @@ -80,6 +80,7 @@ SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLF #define AMD_CPUID_SVM_DECODE_ASSIST BIT(7) /* Decode assist */ #define AMD_CPUID_SVM_PAUSE_INC BIT(10) /* Pause intercept filter. */ #define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */ +#define AMD_CPUID_SVM_AVIC BIT(13) /* AVIC present */ #define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \ VMCB_CACHE_IOPM | \ @@ -554,6 +555,7 @@ svm_vminit(struct vm *vm, pmap_t pmap) pml4_pa = svm_sc->nptp; for (i = 0; i < VM_MAXCPU; i++) { vcpu = svm_get_vcpu(svm_sc, i); + vcpu->nextrip = ~0; vcpu->lastcpu = NOCPU; vcpu->vmcb_pa = vtophys(&vcpu->vmcb); vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa); @@ -1200,7 +1202,6 @@ svm_vmexit(struct svm_softc *svm_sc, int struct vmcb_state *state; struct vmcb_ctrl *ctrl; struct svm_regctx *ctx; - struct vm_exception exception; uint64_t code, info1, info2, val; uint32_t eax, ecx, edx; int error, errcode_valid, handled, idtvec, reflect; @@ -1314,6 +1315,7 @@ svm_vmexit(struct svm_softc *svm_sc, int /* fallthru */ default: errcode_valid = 0; + info1 = 0; break; } KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) " @@ -1322,14 +1324,10 @@ svm_vmexit(struct svm_softc *svm_sc, int if (reflect) { /* Reflect the exception back into the guest */ - exception.vector = idtvec; - exception.error_code_valid = errcode_valid; - exception.error_code = errcode_valid ? info1 : 0; VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception " - "%d/%#x into the guest", exception.vector, - exception.error_code); - error = vm_inject_exception(svm_sc->vm, vcpu, - &exception); + "%d/%#x into the guest", idtvec, (int)info1); + error = vm_inject_exception(svm_sc->vm, vcpu, idtvec, + errcode_valid, info1, 0); KASSERT(error == 0, ("%s: vm_inject_exception error %d", __func__, error)); } @@ -1476,15 +1474,24 @@ svm_inj_interrupts(struct svm_softc *sc, { struct vmcb_ctrl *ctrl; struct vmcb_state *state; + struct svm_vcpu *vcpustate; uint8_t v_tpr; int vector, need_intr_window, pending_apic_vector; state = svm_get_vmcb_state(sc, vcpu); ctrl = svm_get_vmcb_ctrl(sc, vcpu); + vcpustate = svm_get_vcpu(sc, vcpu); need_intr_window = 0; pending_apic_vector = 0; + if (vcpustate->nextrip != state->rip) { + ctrl->intr_shadow = 0; + VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking " + "cleared due to rip change: %#lx/%#lx", + vcpustate->nextrip, state->rip); + } + /* * Inject pending events or exceptions for this vcpu. * @@ -1634,7 +1641,7 @@ done: * VMRUN. */ v_tpr = vlapic_get_cr8(vlapic); - KASSERT(v_tpr >= 0 && v_tpr <= 15, ("invalid v_tpr %#x", v_tpr)); + KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr)); if (ctrl->v_tpr != v_tpr) { VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x", ctrl->v_tpr, v_tpr); @@ -1801,14 +1808,14 @@ static __inline void disable_gintr(void) { - __asm __volatile("clgi" : : :); + __asm __volatile("clgi"); } static __inline void enable_gintr(void) { - __asm __volatile("stgi" : : :); + __asm __volatile("stgi"); } /* @@ -1955,6 +1962,9 @@ svm_vmrun(void *arg, int vcpu, register_ /* #VMEXIT disables interrupts so re-enable them here. */ enable_gintr(); + /* Update 'nextrip' */ + vcpustate->nextrip = state->rip; + /* Handle #VMEXIT and if required return to user space. */ handled = svm_vmexit(svm_sc, vcpu, vmexit); } while (handled); Modified: stable/10/sys/amd64/vmm/amd/svm_softc.h ============================================================================== --- stable/10/sys/amd64/vmm/amd/svm_softc.h Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/amd/svm_softc.h Sat Jun 27 22:48:22 2015 (r284894) @@ -45,6 +45,7 @@ struct svm_vcpu { struct vmcb vmcb; /* hardware saved vcpu context */ struct svm_regctx swctx; /* software saved vcpu context */ uint64_t vmcb_pa; /* VMCB physical address */ + uint64_t nextrip; /* next instruction to be executed by guest */ int lastcpu; /* host cpu that the vcpu last ran on */ uint32_t dirty; /* state cache bits that must be cleared */ long eptgen; /* pmap->pm_eptgen when the vcpu last ran */ Modified: stable/10/sys/amd64/vmm/amd/svm_support.S ============================================================================== --- stable/10/sys/amd64/vmm/amd/svm_support.S Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/amd/svm_support.S Sat Jun 27 22:48:22 2015 (r284894) @@ -22,6 +22,8 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ */ #include <machine/asmacros.h> @@ -35,6 +37,10 @@ #define VENTER push %rbp ; mov %rsp,%rbp #define VLEAVE pop %rbp +#define VMLOAD .byte 0x0f, 0x01, 0xda +#define VMRUN .byte 0x0f, 0x01, 0xd8 +#define VMSAVE .byte 0x0f, 0x01, 0xdb + /* * svm_launch(uint64_t vmcb, struct svm_regctx *gctx) * %rdi: physical address of VMCB @@ -79,9 +85,9 @@ ENTRY(svm_launch) movq SCTX_RDI(%rsi), %rdi movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */ - vmload %rax - vmrun %rax - vmsave %rax + VMLOAD + VMRUN + VMSAVE pop %rax /* pop guest context pointer from the stack */ Modified: stable/10/sys/amd64/vmm/intel/vmcs.c ============================================================================== --- stable/10/sys/amd64/vmm/intel/vmcs.c Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/intel/vmcs.c Sat Jun 27 22:48:22 2015 (r284894) @@ -342,18 +342,6 @@ vmcs_init(struct vmcs *vmcs) */ VMPTRLD(vmcs); - /* Initialize guest IA32_PAT MSR with the default value */ - pat = PAT_VALUE(0, PAT_WRITE_BACK) | - PAT_VALUE(1, PAT_WRITE_THROUGH) | - PAT_VALUE(2, PAT_UNCACHED) | - PAT_VALUE(3, PAT_UNCACHEABLE) | - PAT_VALUE(4, PAT_WRITE_BACK) | - PAT_VALUE(5, PAT_WRITE_THROUGH) | - PAT_VALUE(6, PAT_UNCACHED) | - PAT_VALUE(7, PAT_UNCACHEABLE); - if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0) - goto done; - /* Host state */ /* Initialize host IA32_PAT MSR */ Modified: stable/10/sys/amd64/vmm/intel/vmx.c ============================================================================== --- stable/10/sys/amd64/vmm/intel/vmx.c Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/intel/vmx.c Sat Jun 27 22:48:22 2015 (r284894) @@ -100,13 +100,11 @@ __FBSDID("$FreeBSD$"); (VM_EXIT_HOST_LMA | \ VM_EXIT_SAVE_EFER | \ VM_EXIT_LOAD_EFER | \ - VM_EXIT_ACKNOWLEDGE_INTERRUPT | \ - VM_EXIT_SAVE_PAT | \ - VM_EXIT_LOAD_PAT) + VM_EXIT_ACKNOWLEDGE_INTERRUPT) #define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS -#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER | VM_ENTRY_LOAD_PAT) +#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER) #define VM_ENTRY_CTLS_ZERO_SETTING \ (VM_ENTRY_LOAD_DEBUG_CONTROLS | \ @@ -859,10 +857,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap) * VM exit and entry respectively. It is also restored from the * host VMCS area on a VM exit. * - * MSR_PAT is saved and restored in the guest VMCS are on a VM exit - * and entry respectively. It is also restored from the host VMCS - * area on a VM exit. - * * The TSC MSR is exposed read-only. Writes are disallowed as that * will impact the host TSC. * XXX Writes would be implemented with a wrmsr trap, and @@ -874,7 +868,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap) guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || guest_msr_rw(vmx, MSR_EFER) || - guest_msr_rw(vmx, MSR_PAT) || guest_msr_ro(vmx, MSR_TSC)) panic("vmx_vminit: error setting guest msr access"); @@ -941,6 +934,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vmx->cap[i].proc_ctls = procbased_ctls; vmx->cap[i].proc_ctls2 = procbased_ctls2; + vmx->state[i].nextrip = ~0; vmx->state[i].lastcpu = NOCPU; vmx->state[i].vpid = vpid[i]; @@ -1169,12 +1163,24 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu } static void -vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic) +vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic, + uint64_t guestrip) { int vector, need_nmi_exiting, extint_pending; uint64_t rflags, entryinfo; uint32_t gi, info; + if (vmx->state[vcpu].nextrip != guestrip) { + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + if (gi & HWINTR_BLOCKING) { + VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking " + "cleared due to rip change: %#lx/%#lx", + vmx->state[vcpu].nextrip, guestrip); + gi &= ~HWINTR_BLOCKING; + vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); + } + } + if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry " "intinfo is not valid: %#lx", __func__, entryinfo)); @@ -1771,7 +1777,7 @@ vmexit_inst_emul(struct vm_exit *vmexit, { struct vm_guest_paging *paging; uint32_t csar; - + paging = &vmexit->u.inst_emul.paging; vmexit->exitcode = VM_EXITCODE_INST_EMUL; @@ -2060,12 +2066,11 @@ emulate_rdmsr(struct vmx *vmx, int vcpui static int vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) { - int error, handled, in; + int error, errcode, errcode_valid, handled, in; struct vmxctx *vmxctx; struct vlapic *vlapic; struct vm_inout_str *vis; struct vm_task_switch *ts; - struct vm_exception vmexc; uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; uint32_t intr_type, intr_vec, reason; uint64_t exitintinfo, qual, gpa; @@ -2250,6 +2255,7 @@ vmx_exit_process(struct vmx *vmx, int vc case EXIT_REASON_MTF: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1); vmexit->exitcode = VM_EXITCODE_MTRAP; + vmexit->inst_length = 0; break; case EXIT_REASON_PAUSE: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1); @@ -2376,15 +2382,15 @@ vmx_exit_process(struct vmx *vmx, int vc vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); /* Reflect all other exceptions back into the guest */ - bzero(&vmexc, sizeof(struct vm_exception)); - vmexc.vector = intr_vec; + errcode_valid = errcode = 0; if (intr_info & VMCS_INTR_DEL_ERRCODE) { - vmexc.error_code_valid = 1; - vmexc.error_code = vmcs_read(VMCS_EXIT_INTR_ERRCODE); + errcode_valid = 1; + errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE); } VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into " - "the guest", vmexc.vector, vmexc.error_code); - error = vm_inject_exception(vmx->vm, vcpu, &vmexc); + "the guest", intr_vec, errcode); + error = vm_inject_exception(vmx->vm, vcpu, intr_vec, + errcode_valid, errcode, 0); KASSERT(error == 0, ("%s: vm_inject_exception error %d", __func__, error)); return (1); @@ -2399,6 +2405,7 @@ vmx_exit_process(struct vmx *vmx, int vc if (vm_mem_allocated(vmx->vm, gpa) || apic_access_fault(vmx, vcpu, gpa)) { vmexit->exitcode = VM_EXITCODE_PAGING; + vmexit->inst_length = 0; vmexit->u.paging.gpa = gpa; vmexit->u.paging.fault_type = ept_fault_type(qual); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); @@ -2540,7 +2547,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int } static int -vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, +vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, void *rendezvous_cookie, void *suspend_cookie) { int rc, handled, launched; @@ -2550,7 +2557,6 @@ vmx_run(void *arg, int vcpu, register_t struct vmcs *vmcs; struct vm_exit *vmexit; struct vlapic *vlapic; - uint64_t rip; uint32_t exit_reason; vmx = arg; @@ -2578,11 +2584,13 @@ vmx_run(void *arg, int vcpu, register_t */ vmcs_write(VMCS_HOST_CR3, rcr3()); - vmcs_write(VMCS_GUEST_RIP, startrip); + vmcs_write(VMCS_GUEST_RIP, rip); vmx_set_pcpu_defaults(vmx, vcpu, pmap); do { - handled = UNHANDLED; + KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch " + "%#lx/%#lx", __func__, vmcs_guest_rip(), rip)); + handled = UNHANDLED; /* * Interrupts are disabled from this point on until the * guest starts executing. This is done for the following @@ -2602,7 +2610,7 @@ vmx_run(void *arg, int vcpu, register_t * pmap_invalidate_ept(). */ disable_intr(); - vmx_inject_interrupts(vmx, vcpu, vlapic); + vmx_inject_interrupts(vmx, vcpu, vlapic, rip); /* * Check for vcpu suspension after injecting events because @@ -2611,20 +2619,20 @@ vmx_run(void *arg, int vcpu, register_t */ if (vcpu_suspended(suspend_cookie)) { enable_intr(); - vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip()); + vm_exit_suspended(vmx->vm, vcpu, rip); break; } if (vcpu_rendezvous_pending(rendezvous_cookie)) { enable_intr(); - vm_exit_rendezvous(vmx->vm, vcpu, vmcs_guest_rip()); + vm_exit_rendezvous(vmx->vm, vcpu, rip); break; } if (vcpu_should_yield(vm, vcpu)) { enable_intr(); - vm_exit_astpending(vmx->vm, vcpu, vmcs_guest_rip()); - vmx_astpending_trace(vmx, vcpu, vmexit->rip); + vm_exit_astpending(vmx->vm, vcpu, rip); + vmx_astpending_trace(vmx, vcpu, rip); handled = HANDLED; break; } @@ -2638,6 +2646,9 @@ vmx_run(void *arg, int vcpu, register_t vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason(); vmexit->u.vmx.exit_qualification = vmcs_exit_qualification(); + /* Update 'nextrip' */ + vmx->state[vcpu].nextrip = rip; + if (rc == VMX_GUEST_VMEXIT) { vmx_exit_handle_nmi(vmx, vcpu, vmexit); enable_intr(); @@ -2648,6 +2659,7 @@ vmx_run(void *arg, int vcpu, register_t } launched = 1; vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled); + rip = vmexit->rip; } while (handled); /* Modified: stable/10/sys/amd64/vmm/intel/vmx.h ============================================================================== --- stable/10/sys/amd64/vmm/intel/vmx.h Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/intel/vmx.h Sat Jun 27 22:48:22 2015 (r284894) @@ -78,6 +78,7 @@ struct vmxcap { }; struct vmxstate { + uint64_t nextrip; /* next instruction to be executed by guest */ int lastcpu; /* host cpu that this 'vcpu' last ran on */ uint16_t vpid; }; @@ -102,6 +103,7 @@ enum { IDX_MSR_STAR, IDX_MSR_SF_MASK, IDX_MSR_KGSBASE, + IDX_MSR_PAT, GUEST_MSR_NUM /* must be the last enumeration */ }; Modified: stable/10/sys/amd64/vmm/intel/vmx_msr.c ============================================================================== --- stable/10/sys/amd64/vmm/intel/vmx_msr.c Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/intel/vmx_msr.c Sat Jun 27 22:48:22 2015 (r284894) @@ -230,6 +230,25 @@ westmere_cpu(void) return (false); } +static bool +pat_valid(uint64_t val) +{ + int i, pa; + + /* + * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" + * + * Extract PA0 through PA7 and validate that each one encodes a + * valid memory type. + */ + for (i = 0; i < 8; i++) { + pa = (val >> (i * 8)) & 0xff; + if (pa == 2 || pa == 3 || pa >= 8) + return (false); + } + return (true); +} + void vmx_msr_init(void) { @@ -302,6 +321,10 @@ vmx_msr_init(void) void vmx_msr_guest_init(struct vmx *vmx, int vcpuid) { + uint64_t *guest_msrs; + + guest_msrs = vmx->guest_msrs[vcpuid]; + /* * The permissions bitmap is shared between all vcpus so initialize it * once when initializing the vBSP. @@ -313,6 +336,19 @@ vmx_msr_guest_init(struct vmx *vmx, int guest_msr_rw(vmx, MSR_SF_MASK); guest_msr_rw(vmx, MSR_KGSBASE); } + + /* + * Initialize guest IA32_PAT MSR with default value after reset. + */ + guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | + PAT_VALUE(1, PAT_WRITE_THROUGH) | + PAT_VALUE(2, PAT_UNCACHED) | + PAT_VALUE(3, PAT_UNCACHEABLE) | + PAT_VALUE(4, PAT_WRITE_BACK) | + PAT_VALUE(5, PAT_WRITE_THROUGH) | + PAT_VALUE(6, PAT_UNCACHED) | + PAT_VALUE(7, PAT_UNCACHEABLE); + return; } @@ -353,7 +389,11 @@ vmx_msr_guest_exit(struct vmx *vmx, int int vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) { - int error = 0; + const uint64_t *guest_msrs; + int error; + + guest_msrs = vmx->guest_msrs[vcpuid]; + error = 0; switch (num) { case MSR_IA32_MISC_ENABLE: @@ -366,6 +406,9 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u case MSR_TURBO_RATIO_LIMIT1: *val = turbo_ratio_limit; break; + case MSR_PAT: + *val = guest_msrs[IDX_MSR_PAT]; + break; default: error = EINVAL; break; @@ -376,10 +419,13 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u int vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) { + uint64_t *guest_msrs; uint64_t changed; int error; + guest_msrs = vmx->guest_msrs[vcpuid]; error = 0; + switch (num) { case MSR_IA32_MISC_ENABLE: changed = val ^ misc_enable; @@ -401,6 +447,12 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u error = EINVAL; break; + case MSR_PAT: + if (pat_valid(val)) + guest_msrs[IDX_MSR_PAT] = val; + else + vm_inject_gp(vmx->vm, vcpuid); + break; default: error = EINVAL; break; Modified: stable/10/sys/amd64/vmm/io/vhpet.c ============================================================================== --- stable/10/sys/amd64/vmm/io/vhpet.c Sat Jun 27 20:39:13 2015 (r284893) +++ stable/10/sys/amd64/vmm/io/vhpet.c Sat Jun 27 22:48:22 2015 (r284894) @@ -104,7 +104,6 @@ vhpet_capabilities(void) uint64_t cap = 0; cap |= 0x8086 << 16; /* vendor id */ - cap |= HPET_CAP_LEG_RT; /* legacy routing capable */ cap |= (VHPET_NUM_TIMERS - 1) << 8; /* number of timers */ cap |= 1; /* revision */ cap &= ~HPET_CAP_COUNT_SIZE; /* 32-bit timer */ @@ -127,15 +126,6 @@ vhpet_timer_msi_enabled(struct vhpet *vh { const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN; - /* - * LegacyReplacement Route configuration takes precedence over MSI - * for timers 0 and 1. - */ - if (n == 0 || n == 1) { - if (vhpet->config & HPET_CNF_LEG_RT) - return (false); - } - if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable) return (true); else @@ -152,41 +142,9 @@ vhpet_timer_ioapic_pin(struct vhpet *vhp if (vhpet_timer_msi_enabled(vhpet, n)) return (0); - if (vhpet->config & HPET_CNF_LEG_RT) { - /* - * In "legacy routing" timers 0 and 1 are connected to - * ioapic pins 2 and 8 respectively. - */ - switch (n) { - case 0: - return (2); - case 1: - return (8); - } - } - return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9); } -static __inline int -vhpet_timer_atpic_pin(struct vhpet *vhpet, int n) -{ - if (vhpet->config & HPET_CNF_LEG_RT) { - /* - * In "legacy routing" timers 0 and 1 are connected to - * 8259 master pin 0 and slave pin 0 respectively. - */ - switch (n) { - case 0: - return (0); - case 1: - return (8); - } - } - - return (-1); -} - static uint32_t vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr) { @@ -216,17 +174,12 @@ vhpet_counter(struct vhpet *vhpet, sbint *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201506272248.t5RMmM6h043264>