Date: Sun, 20 Jul 2014 00:48:41 -0700 From: Adrian Chadd <adrian@freebsd.org> To: Neel Natu <neelnatu@gmail.com> Cc: "svn-src-head@freebsd.org" <svn-src-head@freebsd.org>, "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>, "src-committers@freebsd.org" <src-committers@freebsd.org>, Neel Natu <neel@freebsd.org> Subject: Re: svn commit: r268889 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel usr.sbin/bhyve usr.sbin/bhyvectl Message-ID: <CAJ-VmokkfCXSc1e0FOwp6vjHkrWCMh%2Bcjo2BDWExpzJpcbzXkw@mail.gmail.com> In-Reply-To: <CAFgRE9FVXwKtxe9yAEHhx0dUck6huKtjngCPx=OvT4k922YbqQ@mail.gmail.com> References: <201407192059.s6JKx8un072543@svn.freebsd.org> <CAJ-VmondeNMPhTFTUZxW-j5kLoV1XBjazUERWPTqOiV2tcQz_A@mail.gmail.com> <CAFgRE9FVXwKtxe9yAEHhx0dUck6huKtjngCPx=OvT4k922YbqQ@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
On 20 July 2014 00:47, Neel Natu <neelnatu@gmail.com> wrote: > Hi Adrian, > > On Sat, Jul 19, 2014 at 11:30 PM, Adrian Chadd <adrian@freebsd.org> wrote: >> Hi! >> >> This broke -HEAD. 'exc' in vmx_inject_interrupts() is no longer >> initialised before use /and/ it's part of a KASSERT() output. >> > > Are you building with an external toolchain? Nope; just head with invariants disabled. -a > best > Neel > >> Thanks! >> >> >> -a >> >> >> On 19 July 2014 13:59, Neel Natu <neel@freebsd.org> wrote: >>> Author: neel >>> Date: Sat Jul 19 20:59:08 2014 >>> New Revision: 268889 >>> URL: http://svnweb.freebsd.org/changeset/base/268889 >>> >>> Log: >>> Handle nested exceptions in bhyve. >>> >>> A nested exception condition arises when a second exception is triggered while >>> delivering the first exception. Most nested exceptions can be handled serially >>> but some are converted into a double fault. If an exception is generated during >>> delivery of a double fault then the virtual machine shuts down as a result of >>> a triple fault. >>> >>> vm_exit_intinfo() is used to record that a VM-exit happened while an event was >>> being delivered through the IDT. If an exception is triggered while handling >>> the VM-exit it will be treated like a nested exception. >>> >>> vm_entry_intinfo() is used by processor-specific code to get the event to be >>> injected into the guest on the next VM-entry. This function is responsible for >>> deciding the disposition of nested exceptions. >>> >>> Modified: >>> head/lib/libvmmapi/vmmapi.c >>> head/lib/libvmmapi/vmmapi.h >>> head/sys/amd64/include/vmm.h >>> head/sys/amd64/include/vmm_dev.h >>> head/sys/amd64/vmm/intel/vmx.c >>> head/sys/amd64/vmm/vmm.c >>> head/sys/amd64/vmm/vmm_dev.c >>> head/usr.sbin/bhyve/bhyverun.c >>> head/usr.sbin/bhyve/task_switch.c >>> head/usr.sbin/bhyvectl/bhyvectl.c >>> >>> Modified: head/lib/libvmmapi/vmmapi.c >>> ============================================================================== >>> --- head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -1106,3 +1106,32 @@ vm_activate_cpu(struct vmctx *ctx, int v >>> error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); >>> return (error); >>> } >>> + >>> +int >>> +vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) >>> +{ >>> + struct vm_intinfo vmii; >>> + int error; >>> + >>> + bzero(&vmii, sizeof(struct vm_intinfo)); >>> + vmii.vcpuid = vcpu; >>> + error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii); >>> + if (error == 0) { >>> + *info1 = vmii.info1; >>> + *info2 = vmii.info2; >>> + } >>> + return (error); >>> +} >>> + >>> +int >>> +vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) >>> +{ >>> + struct vm_intinfo vmii; >>> + int error; >>> + >>> + bzero(&vmii, sizeof(struct vm_intinfo)); >>> + vmii.vcpuid = vcpu; >>> + vmii.info1 = info1; >>> + error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); >>> + return (error); >>> +} >>> >>> Modified: head/lib/libvmmapi/vmmapi.h >>> ============================================================================== >>> --- head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -104,6 +104,9 @@ int vm_setup_pptdev_msix(struct vmctx *c >>> int func, int idx, uint64_t addr, uint64_t msg, >>> uint32_t vector_control); >>> >>> +int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2); >>> +int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo); >>> + >>> /* >>> * Return a pointer to the statistics buffer. Note that this is not MT-safe. >>> */ >>> >>> Modified: head/sys/amd64/include/vmm.h >>> ============================================================================== >>> --- head/sys/amd64/include/vmm.h Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/sys/amd64/include/vmm.h Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -34,6 +34,7 @@ enum vm_suspend_how { >>> VM_SUSPEND_RESET, >>> VM_SUSPEND_POWEROFF, >>> VM_SUSPEND_HALT, >>> + VM_SUSPEND_TRIPLEFAULT, >>> VM_SUSPEND_LAST >>> }; >>> >>> @@ -88,6 +89,16 @@ enum x2apic_state { >>> X2APIC_STATE_LAST >>> }; >>> >>> +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) >>> +#define VM_INTINFO_DEL_ERRCODE 0x800 >>> +#define VM_INTINFO_RSVD 0x7ffff000 >>> +#define VM_INTINFO_VALID 0x80000000 >>> +#define VM_INTINFO_TYPE 0x700 >>> +#define VM_INTINFO_HWINTR (0 << 8) >>> +#define VM_INTINFO_NMI (2 << 8) >>> +#define VM_INTINFO_HWEXCEPTION (3 << 8) >>> +#define VM_INTINFO_SWINTR (4 << 8) >>> + >>> #ifdef _KERNEL >>> >>> #define VM_MAX_NAMELEN 32 >>> @@ -278,14 +289,31 @@ struct vatpit *vm_atpit(struct vm *vm); >>> int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme); >>> >>> /* >>> - * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an >>> - * exception is pending and also updates 'vme'. The pending exception is >>> - * cleared when this function returns. >>> + * This function is called after a VM-exit that occurred during exception or >>> + * interrupt delivery through the IDT. The format of 'intinfo' is described >>> + * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. >>> * >>> - * This function should only be called in the context of the thread that is >>> - * executing this vcpu. >>> + * If a VM-exit handler completes the event delivery successfully then it >>> + * should call vm_exit_intinfo() to extinguish the pending event. For e.g., >>> + * if the task switch emulation is triggered via a task gate then it should >>> + * call this function with 'intinfo=0' to indicate that the external event >>> + * is not pending anymore. >>> + * >>> + * Return value is 0 on success and non-zero on failure. >>> */ >>> -int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme); >>> +int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo); >>> + >>> +/* >>> + * This function is called before every VM-entry to retrieve a pending >>> + * event that should be injected into the guest. This function combines >>> + * nested events into a double or triple fault. >>> + * >>> + * Returns 0 if there are no events that need to be injected into the guest >>> + * and non-zero otherwise. >>> + */ >>> +int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); >>> + >>> +int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); >>> >>> void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ >>> void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ >>> >>> Modified: head/sys/amd64/include/vmm_dev.h >>> ============================================================================== >>> --- head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -189,6 +189,12 @@ struct vm_cpuset { >>> #define VM_ACTIVE_CPUS 0 >>> #define VM_SUSPENDED_CPUS 1 >>> >>> +struct vm_intinfo { >>> + int vcpuid; >>> + uint64_t info1; >>> + uint64_t info2; >>> +}; >>> + >>> enum { >>> /* general routines */ >>> IOCNUM_ABIVERS = 0, >>> @@ -211,6 +217,8 @@ enum { >>> IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, >>> >>> /* interrupt injection */ >>> + IOCNUM_GET_INTINFO = 28, >>> + IOCNUM_SET_INTINFO = 29, >>> IOCNUM_INJECT_EXCEPTION = 30, >>> IOCNUM_LAPIC_IRQ = 31, >>> IOCNUM_INJECT_NMI = 32, >>> @@ -324,4 +332,8 @@ enum { >>> _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) >>> #define VM_GET_CPUS \ >>> _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) >>> +#define VM_SET_INTINFO \ >>> + _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) >>> +#define VM_GET_INTINFO \ >>> + _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) >>> #endif >>> >>> Modified: head/sys/amd64/vmm/intel/vmx.c >>> ============================================================================== >>> --- head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -1213,22 +1213,31 @@ vmx_inject_interrupts(struct vmx *vmx, i >>> { >>> struct vm_exception exc; >>> int vector, need_nmi_exiting, extint_pending; >>> - uint64_t rflags; >>> + uint64_t rflags, entryinfo; >>> uint32_t gi, info; >>> >>> - if (vm_exception_pending(vmx->vm, vcpu, &exc)) { >>> - KASSERT(exc.vector >= 0 && exc.vector < 32, >>> - ("%s: invalid exception vector %d", __func__, exc.vector)); >>> + if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { >>> + KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry " >>> + "intinfo is not valid: %#lx", __func__, entryinfo)); >>> >>> info = vmcs_read(VMCS_ENTRY_INTR_INFO); >>> KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject " >>> "pending exception %d: %#x", __func__, exc.vector, info)); >>> >>> - info = exc.vector | VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID; >>> - if (exc.error_code_valid) { >>> - info |= VMCS_INTR_DEL_ERRCODE; >>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, exc.error_code); >>> + info = entryinfo; >>> + vector = info & 0xff; >>> + if (vector == IDT_BP || vector == IDT_OF) { >>> + /* >>> + * VT-x requires #BP and #OF to be injected as software >>> + * exceptions. >>> + */ >>> + info &= ~VMCS_INTR_T_MASK; >>> + info |= VMCS_INTR_T_SWEXCEPTION; >>> } >>> + >>> + if (info & VMCS_INTR_DEL_ERRCODE) >>> + vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32); >>> + >>> vmcs_write(VMCS_ENTRY_INTR_INFO, info); >>> } >>> >>> @@ -1407,6 +1416,16 @@ vmx_clear_nmi_blocking(struct vmx *vmx, >>> vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); >>> } >>> >>> +static void >>> +vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) >>> +{ >>> + uint32_t gi; >>> + >>> + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); >>> + KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING, >>> + ("NMI blocking is not in effect %#x", gi)); >>> +} >>> + >>> static int >>> vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) >>> { >>> @@ -2050,7 +2069,7 @@ vmx_exit_process(struct vmx *vmx, int vc >>> struct vm_task_switch *ts; >>> uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; >>> uint32_t intr_type, reason; >>> - uint64_t qual, gpa; >>> + uint64_t exitintinfo, qual, gpa; >>> bool retu; >>> >>> CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); >>> @@ -2070,47 +2089,49 @@ vmx_exit_process(struct vmx *vmx, int vc >>> * be handled specially by re-injecting the event if the IDT >>> * vectoring information field's valid bit is set. >>> * >>> - * If the VM-exit is due to a task gate in the IDT then we don't >>> - * reinject the event because emulating the task switch also >>> - * completes the event delivery. >>> - * >>> * See "Information for VM Exits During Event Delivery" in Intel SDM >>> * for details. >>> */ >>> - switch (reason) { >>> - case EXIT_REASON_EPT_FAULT: >>> - case EXIT_REASON_EPT_MISCONFIG: >>> - case EXIT_REASON_APIC_ACCESS: >>> - case EXIT_REASON_TASK_SWITCH: >>> - case EXIT_REASON_EXCEPTION: >>> - idtvec_info = vmcs_idt_vectoring_info(); >>> - VCPU_CTR2(vmx->vm, vcpu, "vm exit %s: idtvec_info 0x%08x", >>> - exit_reason_to_str(reason), idtvec_info); >>> - if ((idtvec_info & VMCS_IDT_VEC_VALID) && >>> - (reason != EXIT_REASON_TASK_SWITCH)) { >>> - idtvec_info &= ~(1 << 12); /* clear undefined bit */ >>> - vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info); >>> - if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { >>> - idtvec_err = vmcs_idt_vectoring_err(); >>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, >>> - idtvec_err); >>> - } >>> - /* >>> - * If 'virtual NMIs' are being used and the VM-exit >>> - * happened while injecting an NMI during the previous >>> - * VM-entry, then clear "blocking by NMI" in the Guest >>> - * Interruptibility-state. >>> - */ >>> - if ((idtvec_info & VMCS_INTR_T_MASK) == >>> - VMCS_INTR_T_NMI) { >>> - vmx_clear_nmi_blocking(vmx, vcpu); >>> - } >>> + idtvec_info = vmcs_idt_vectoring_info(); >>> + if (idtvec_info & VMCS_IDT_VEC_VALID) { >>> + idtvec_info &= ~(1 << 12); /* clear undefined bit */ >>> + exitintinfo = idtvec_info; >>> + if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { >>> + idtvec_err = vmcs_idt_vectoring_err(); >>> + exitintinfo |= (uint64_t)idtvec_err << 32; >>> + } >>> + error = vm_exit_intinfo(vmx->vm, vcpu, exitintinfo); >>> + KASSERT(error == 0, ("%s: vm_set_intinfo error %d", >>> + __func__, error)); >>> + >>> + /* >>> + * If 'virtual NMIs' are being used and the VM-exit >>> + * happened while injecting an NMI during the previous >>> + * VM-entry, then clear "blocking by NMI" in the >>> + * Guest Interruptibility-State so the NMI can be >>> + * reinjected on the subsequent VM-entry. >>> + * >>> + * However, if the NMI was being delivered through a task >>> + * gate, then the new task must start execution with NMIs >>> + * blocked so don't clear NMI blocking in this case. >>> + */ >>> + intr_type = idtvec_info & VMCS_INTR_T_MASK; >>> + if (intr_type == VMCS_INTR_T_NMI) { >>> + if (reason != EXIT_REASON_TASK_SWITCH) >>> + vmx_clear_nmi_blocking(vmx, vcpu); >>> + else >>> + vmx_assert_nmi_blocking(vmx, vcpu); >>> + } >>> + >>> + /* >>> + * Update VM-entry instruction length if the event being >>> + * delivered was a software interrupt or software exception. >>> + */ >>> + if (intr_type == VMCS_INTR_T_SWINTR || >>> + intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION || >>> + intr_type == VMCS_INTR_T_SWEXCEPTION) { >>> vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); >>> } >>> - break; >>> - default: >>> - idtvec_info = 0; >>> - break; >>> } >>> >>> switch (reason) { >>> @@ -2136,7 +2157,7 @@ vmx_exit_process(struct vmx *vmx, int vc >>> */ >>> if (ts->reason == TSR_IDT_GATE) { >>> KASSERT(idtvec_info & VMCS_IDT_VEC_VALID, >>> - ("invalid idtvec_info %x for IDT task switch", >>> + ("invalid idtvec_info %#x for IDT task switch", >>> idtvec_info)); >>> intr_type = idtvec_info & VMCS_INTR_T_MASK; >>> if (intr_type != VMCS_INTR_T_SWINTR && >>> @@ -2302,6 +2323,7 @@ vmx_exit_process(struct vmx *vmx, int vc >>> * the guest. >>> * >>> * See "Resuming Guest Software after Handling an Exception". >>> + * See "Information for VM Exits Due to Vectored Events". >>> */ >>> if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && >>> (intr_info & 0xff) != IDT_DF && >>> @@ -2519,6 +2541,13 @@ vmx_run(void *arg, int vcpu, register_t >>> * pmap_invalidate_ept(). >>> */ >>> disable_intr(); >>> + vmx_inject_interrupts(vmx, vcpu, vlapic); >>> + >>> + /* >>> + * Check for vcpu suspension after injecting events because >>> + * vmx_inject_interrupts() can suspend the vcpu due to a >>> + * triple fault. >>> + */ >>> if (vcpu_suspended(suspend_cookie)) { >>> enable_intr(); >>> vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip()); >>> @@ -2539,7 +2568,6 @@ vmx_run(void *arg, int vcpu, register_t >>> break; >>> } >>> >>> - vmx_inject_interrupts(vmx, vcpu, vlapic); >>> vmx_run_trace(vmx, vcpu); >>> rc = vmx_enter_guest(vmxctx, vmx, launched); >>> >>> >>> Modified: head/sys/amd64/vmm/vmm.c >>> ============================================================================== >>> --- head/sys/amd64/vmm/vmm.c Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/sys/amd64/vmm/vmm.c Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -97,6 +97,7 @@ struct vcpu { >>> int hostcpu; /* (o) vcpu's host cpu */ >>> struct vlapic *vlapic; /* (i) APIC device model */ >>> enum x2apic_state x2apic_state; /* (i) APIC mode */ >>> + uint64_t exitintinfo; /* (i) events pending at VM exit */ >>> int nmi_pending; /* (i) NMI pending */ >>> int extint_pending; /* (i) INTR pending */ >>> struct vm_exception exception; /* (x) exception collateral */ >>> @@ -241,6 +242,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bo >>> >>> vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); >>> vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); >>> + vcpu->exitintinfo = 0; >>> vcpu->nmi_pending = 0; >>> vcpu->extint_pending = 0; >>> vcpu->exception_pending = 0; >>> @@ -1458,6 +1460,202 @@ restart: >>> } >>> >>> int >>> +vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) >>> +{ >>> + struct vcpu *vcpu; >>> + int type, vector; >>> + >>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) >>> + return (EINVAL); >>> + >>> + vcpu = &vm->vcpu[vcpuid]; >>> + >>> + if (info & VM_INTINFO_VALID) { >>> + type = info & VM_INTINFO_TYPE; >>> + vector = info & 0xff; >>> + if (type == VM_INTINFO_NMI && vector != IDT_NMI) >>> + return (EINVAL); >>> + if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) >>> + return (EINVAL); >>> + if (info & VM_INTINFO_RSVD) >>> + return (EINVAL); >>> + } else { >>> + info = 0; >>> + } >>> + VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); >>> + vcpu->exitintinfo = info; >>> + return (0); >>> +} >>> + >>> +enum exc_class { >>> + EXC_BENIGN, >>> + EXC_CONTRIBUTORY, >>> + EXC_PAGEFAULT >>> +}; >>> + >>> +#define IDT_VE 20 /* Virtualization Exception (Intel specific) */ >>> + >>> +static enum exc_class >>> +exception_class(uint64_t info) >>> +{ >>> + int type, vector; >>> + >>> + KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); >>> + type = info & VM_INTINFO_TYPE; >>> + vector = info & 0xff; >>> + >>> + /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ >>> + switch (type) { >>> + case VM_INTINFO_HWINTR: >>> + case VM_INTINFO_SWINTR: >>> + case VM_INTINFO_NMI: >>> + return (EXC_BENIGN); >>> + default: >>> + /* >>> + * Hardware exception. >>> + * >>> + * SVM and VT-x use identical type values to represent NMI, >>> + * hardware interrupt and software interrupt. >>> + * >>> + * SVM uses type '3' for all exceptions. VT-x uses type '3' >>> + * for exceptions except #BP and #OF. #BP and #OF use a type >>> + * value of '5' or '6'. Therefore we don't check for explicit >>> + * values of 'type' to classify 'intinfo' into a hardware >>> + * exception. >>> + */ >>> + break; >>> + } >>> + >>> + switch (vector) { >>> + case IDT_PF: >>> + case IDT_VE: >>> + return (EXC_PAGEFAULT); >>> + case IDT_DE: >>> + case IDT_TS: >>> + case IDT_NP: >>> + case IDT_SS: >>> + case IDT_GP: >>> + return (EXC_CONTRIBUTORY); >>> + default: >>> + return (EXC_BENIGN); >>> + } >>> +} >>> + >>> +static int >>> +nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, >>> + uint64_t *retinfo) >>> +{ >>> + enum exc_class exc1, exc2; >>> + int type1, vector1; >>> + >>> + KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); >>> + KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); >>> + >>> + /* >>> + * If an exception occurs while attempting to call the double-fault >>> + * handler the processor enters shutdown mode (aka triple fault). >>> + */ >>> + type1 = info1 & VM_INTINFO_TYPE; >>> + vector1 = info1 & 0xff; >>> + if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { >>> + VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", >>> + info1, info2); >>> + vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); >>> + *retinfo = 0; >>> + return (0); >>> + } >>> + >>> + /* >>> + * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 >>> + */ >>> + exc1 = exception_class(info1); >>> + exc2 = exception_class(info2); >>> + if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || >>> + (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { >>> + /* Convert nested fault into a double fault. */ >>> + *retinfo = IDT_DF; >>> + *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; >>> + *retinfo |= VM_INTINFO_DEL_ERRCODE; >>> + } else { >>> + /* Handle exceptions serially */ >>> + *retinfo = info2; >>> + } >>> + return (1); >>> +} >>> + >>> +static uint64_t >>> +vcpu_exception_intinfo(struct vcpu *vcpu) >>> +{ >>> + uint64_t info = 0; >>> + >>> + if (vcpu->exception_pending) { >>> + info = vcpu->exception.vector & 0xff; >>> + info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; >>> + if (vcpu->exception.error_code_valid) { >>> + info |= VM_INTINFO_DEL_ERRCODE; >>> + info |= (uint64_t)vcpu->exception.error_code << 32; >>> + } >>> + } >>> + return (info); >>> +} >>> + >>> +int >>> +vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) >>> +{ >>> + struct vcpu *vcpu; >>> + uint64_t info1, info2; >>> + int valid; >>> + >>> + KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); >>> + >>> + vcpu = &vm->vcpu[vcpuid]; >>> + >>> + info1 = vcpu->exitintinfo; >>> + vcpu->exitintinfo = 0; >>> + >>> + info2 = 0; >>> + if (vcpu->exception_pending) { >>> + info2 = vcpu_exception_intinfo(vcpu); >>> + vcpu->exception_pending = 0; >>> + VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", >>> + vcpu->exception.vector, info2); >>> + } >>> + >>> + if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { >>> + valid = nested_fault(vm, vcpuid, info1, info2, retinfo); >>> + } else if (info1 & VM_INTINFO_VALID) { >>> + *retinfo = info1; >>> + valid = 1; >>> + } else if (info2 & VM_INTINFO_VALID) { >>> + *retinfo = info2; >>> + valid = 1; >>> + } else { >>> + valid = 0; >>> + } >>> + >>> + if (valid) { >>> + VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " >>> + "retinfo(%#lx)", __func__, info1, info2, *retinfo); >>> + } >>> + >>> + return (valid); >>> +} >>> + >>> +int >>> +vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) >>> +{ >>> + struct vcpu *vcpu; >>> + >>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) >>> + return (EINVAL); >>> + >>> + vcpu = &vm->vcpu[vcpuid]; >>> + *info1 = vcpu->exitintinfo; >>> + *info2 = vcpu_exception_intinfo(vcpu); >>> + return (0); >>> +} >>> + >>> +int >>> vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) >>> { >>> struct vcpu *vcpu; >>> @@ -1468,6 +1666,14 @@ vm_inject_exception(struct vm *vm, int v >>> if (exception->vector < 0 || exception->vector >= 32) >>> return (EINVAL); >>> >>> + /* >>> + * A double fault exception should never be injected directly into >>> + * the guest. It is a derived exception that results from specific >>> + * combinations of nested faults. >>> + */ >>> + if (exception->vector == IDT_DF) >>> + return (EINVAL); >>> + >>> vcpu = &vm->vcpu[vcpuid]; >>> >>> if (vcpu->exception_pending) { >>> @@ -1483,25 +1689,6 @@ vm_inject_exception(struct vm *vm, int v >>> return (0); >>> } >>> >>> -int >>> -vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception) >>> -{ >>> - struct vcpu *vcpu; >>> - int pending; >>> - >>> - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); >>> - >>> - vcpu = &vm->vcpu[vcpuid]; >>> - pending = vcpu->exception_pending; >>> - if (pending) { >>> - vcpu->exception_pending = 0; >>> - *exception = vcpu->exception; >>> - VCPU_CTR1(vm, vcpuid, "Exception %d delivered", >>> - exception->vector); >>> - } >>> - return (pending); >>> -} >>> - >>> static void >>> vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception) >>> { >>> >>> Modified: head/sys/amd64/vmm/vmm_dev.c >>> ============================================================================== >>> --- head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -173,6 +173,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c >>> struct vm_gla2gpa *gg; >>> struct vm_activate_cpu *vac; >>> struct vm_cpuset *vm_cpuset; >>> + struct vm_intinfo *vmii; >>> >>> sc = vmmdev_lookup2(cdev); >>> if (sc == NULL) >>> @@ -199,6 +200,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long c >>> case VM_SET_X2APIC_STATE: >>> case VM_GLA2GPA: >>> case VM_ACTIVATE_CPU: >>> + case VM_SET_INTINFO: >>> + case VM_GET_INTINFO: >>> /* >>> * XXX fragile, handle with care >>> * Assumes that the first field of the ioctl data is the vcpu. >>> @@ -470,6 +473,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long c >>> error = copyout(cpuset, vm_cpuset->cpus, size); >>> free(cpuset, M_TEMP); >>> break; >>> + case VM_SET_INTINFO: >>> + vmii = (struct vm_intinfo *)data; >>> + error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); >>> + break; >>> + case VM_GET_INTINFO: >>> + vmii = (struct vm_intinfo *)data; >>> + error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, >>> + &vmii->info2); >>> + break; >>> default: >>> error = ENOTTY; >>> break; >>> >>> Modified: head/usr.sbin/bhyve/bhyverun.c >>> ============================================================================== >>> --- head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -534,6 +534,8 @@ vmexit_suspend(struct vmctx *ctx, struct >>> exit(1); >>> case VM_SUSPEND_HALT: >>> exit(2); >>> + case VM_SUSPEND_TRIPLEFAULT: >>> + exit(3); >>> default: >>> fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); >>> exit(100); >>> >>> Modified: head/usr.sbin/bhyve/task_switch.c >>> ============================================================================== >>> --- head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -904,10 +904,14 @@ vmexit_task_switch(struct vmctx *ctx, st >>> */ >>> >>> /* >>> - * XXX is the original task switch was triggered by a hardware >>> - * exception then do we generate a double-fault if we encounter >>> - * an exception during the task switch? >>> + * If the task switch was triggered by an event delivered through >>> + * the IDT then extinguish the pending event from the vcpu's >>> + * exitintinfo. >>> */ >>> + if (task_switch->reason == TSR_IDT_GATE) { >>> + error = vm_set_intinfo(ctx, vcpu, 0); >>> + assert(error == 0); >>> + } >>> >>> /* >>> * XXX should inject debug exception if 'T' bit is 1 >>> >>> Modified: head/usr.sbin/bhyvectl/bhyvectl.c >>> ============================================================================== >>> --- head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:55:13 2014 (r268888) >>> +++ head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:59:08 2014 (r268889) >>> @@ -195,7 +195,8 @@ usage(void) >>> " [--force-reset]\n" >>> " [--force-poweroff]\n" >>> " [--get-active-cpus]\n" >>> - " [--get-suspended-cpus]\n", >>> + " [--get-suspended-cpus]\n" >>> + " [--get-intinfo]\n", >>> progname); >>> exit(1); >>> } >>> @@ -205,6 +206,7 @@ static int inject_nmi, assert_lapic_lvt; >>> static int force_reset, force_poweroff; >>> static const char *capname; >>> static int create, destroy, get_lowmem, get_highmem; >>> +static int get_intinfo; >>> static int get_active_cpus, get_suspended_cpus; >>> static uint64_t memsize; >>> static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; >>> @@ -412,6 +414,37 @@ print_cpus(const char *banner, const cpu >>> printf("\n"); >>> } >>> >>> +static void >>> +print_intinfo(const char *banner, uint64_t info) >>> +{ >>> + int type; >>> + >>> + printf("%s:\t", banner); >>> + if (info & VM_INTINFO_VALID) { >>> + type = info & VM_INTINFO_TYPE; >>> + switch (type) { >>> + case VM_INTINFO_HWINTR: >>> + printf("extint"); >>> + break; >>> + case VM_INTINFO_NMI: >>> + printf("nmi"); >>> + break; >>> + case VM_INTINFO_SWINTR: >>> + printf("swint"); >>> + break; >>> + default: >>> + printf("exception"); >>> + break; >>> + } >>> + printf(" vector %d", (int)VM_INTINFO_VECTOR(info)); >>> + if (info & VM_INTINFO_DEL_ERRCODE) >>> + printf(" errcode %#x", (u_int)(info >> 32)); >>> + } else { >>> + printf("n/a"); >>> + } >>> + printf("\n"); >>> +} >>> + >>> int >>> main(int argc, char *argv[]) >>> { >>> @@ -420,7 +453,7 @@ main(int argc, char *argv[]) >>> vm_paddr_t gpa, gpa_pmap; >>> size_t len; >>> struct vm_exit vmexit; >>> - uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte; >>> + uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte, info[2]; >>> struct vmctx *ctx; >>> int wired; >>> cpuset_t cpus; >>> @@ -595,6 +628,7 @@ main(int argc, char *argv[]) >>> { "force-poweroff", NO_ARG, &force_poweroff, 1 }, >>> { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, >>> { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, >>> + { "get-intinfo", NO_ARG, &get_intinfo, 1 }, >>> { NULL, 0, NULL, 0 } >>> }; >>> >>> @@ -1566,6 +1600,14 @@ main(int argc, char *argv[]) >>> print_cpus("suspended cpus", &cpus); >>> } >>> >>> + if (!error && (get_intinfo || get_all)) { >>> + error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]); >>> + if (!error) { >>> + print_intinfo("pending", info[0]); >>> + print_intinfo("current", info[1]); >>> + } >>> + } >>> + >>> if (!error && run) { >>> error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); >>> assert(error == 0); >>>
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-VmokkfCXSc1e0FOwp6vjHkrWCMh%2Bcjo2BDWExpzJpcbzXkw>