Date: Sun, 20 Jul 2014 00:54:51 -0700 From: Neel Natu <neelnatu@gmail.com> To: Adrian Chadd <adrian@freebsd.org> Cc: "svn-src-head@freebsd.org" <svn-src-head@freebsd.org>, "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>, "src-committers@freebsd.org" <src-committers@freebsd.org>, Neel Natu <neel@freebsd.org> Subject: Re: svn commit: r268889 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel usr.sbin/bhyve usr.sbin/bhyvectl Message-ID: <CAFgRE9HGYMakq%2BfAvM9ZinwGVrgd_Gu8bbH0yDopFkmYGY5G%2Bw@mail.gmail.com> In-Reply-To: <CAJ-VmokkfCXSc1e0FOwp6vjHkrWCMh%2Bcjo2BDWExpzJpcbzXkw@mail.gmail.com> References: <201407192059.s6JKx8un072543@svn.freebsd.org> <CAJ-VmondeNMPhTFTUZxW-j5kLoV1XBjazUERWPTqOiV2tcQz_A@mail.gmail.com> <CAFgRE9FVXwKtxe9yAEHhx0dUck6huKtjngCPx=OvT4k922YbqQ@mail.gmail.com> <CAJ-VmokkfCXSc1e0FOwp6vjHkrWCMh%2Bcjo2BDWExpzJpcbzXkw@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
Hi Adrian, On Sun, Jul 20, 2014 at 12:48 AM, Adrian Chadd <adrian@freebsd.org> wrote: > On 20 July 2014 00:47, Neel Natu <neelnatu@gmail.com> wrote: >> Hi Adrian, >> >> On Sat, Jul 19, 2014 at 11:30 PM, Adrian Chadd <adrian@freebsd.org> wrote: >>> Hi! >>> >>> This broke -HEAD. 'exc' in vmx_inject_interrupts() is no longer >>> initialised before use /and/ it's part of a KASSERT() output. >>> >> >> Are you building with an external toolchain? > > Nope; just head with invariants disabled. > Ok, so it broke your custom kernel config as opposed to breaking HEAD. I'll fix it shortly. best Neel > > > -a > >> best >> Neel >> >>> Thanks! >>> >>> >>> -a >>> >>> >>> On 19 July 2014 13:59, Neel Natu <neel@freebsd.org> wrote: >>>> Author: neel >>>> Date: Sat Jul 19 20:59:08 2014 >>>> New Revision: 268889 >>>> URL: http://svnweb.freebsd.org/changeset/base/268889 >>>> >>>> Log: >>>> Handle nested exceptions in bhyve. >>>> >>>> A nested exception condition arises when a second exception is triggered while >>>> delivering the first exception. Most nested exceptions can be handled serially >>>> but some are converted into a double fault. If an exception is generated during >>>> delivery of a double fault then the virtual machine shuts down as a result of >>>> a triple fault. >>>> >>>> vm_exit_intinfo() is used to record that a VM-exit happened while an event was >>>> being delivered through the IDT. If an exception is triggered while handling >>>> the VM-exit it will be treated like a nested exception. >>>> >>>> vm_entry_intinfo() is used by processor-specific code to get the event to be >>>> injected into the guest on the next VM-entry. This function is responsible for >>>> deciding the disposition of nested exceptions. >>>> >>>> Modified: >>>> head/lib/libvmmapi/vmmapi.c >>>> head/lib/libvmmapi/vmmapi.h >>>> head/sys/amd64/include/vmm.h >>>> head/sys/amd64/include/vmm_dev.h >>>> head/sys/amd64/vmm/intel/vmx.c >>>> head/sys/amd64/vmm/vmm.c >>>> head/sys/amd64/vmm/vmm_dev.c >>>> head/usr.sbin/bhyve/bhyverun.c >>>> head/usr.sbin/bhyve/task_switch.c >>>> head/usr.sbin/bhyvectl/bhyvectl.c >>>> >>>> Modified: head/lib/libvmmapi/vmmapi.c >>>> ============================================================================== >>>> --- head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/lib/libvmmapi/vmmapi.c Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -1106,3 +1106,32 @@ vm_activate_cpu(struct vmctx *ctx, int v >>>> error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); >>>> return (error); >>>> } >>>> + >>>> +int >>>> +vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) >>>> +{ >>>> + struct vm_intinfo vmii; >>>> + int error; >>>> + >>>> + bzero(&vmii, sizeof(struct vm_intinfo)); >>>> + vmii.vcpuid = vcpu; >>>> + error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii); >>>> + if (error == 0) { >>>> + *info1 = vmii.info1; >>>> + *info2 = vmii.info2; >>>> + } >>>> + return (error); >>>> +} >>>> + >>>> +int >>>> +vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) >>>> +{ >>>> + struct vm_intinfo vmii; >>>> + int error; >>>> + >>>> + bzero(&vmii, sizeof(struct vm_intinfo)); >>>> + vmii.vcpuid = vcpu; >>>> + vmii.info1 = info1; >>>> + error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); >>>> + return (error); >>>> +} >>>> >>>> Modified: head/lib/libvmmapi/vmmapi.h >>>> ============================================================================== >>>> --- head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/lib/libvmmapi/vmmapi.h Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -104,6 +104,9 @@ int vm_setup_pptdev_msix(struct vmctx *c >>>> int func, int idx, uint64_t addr, uint64_t msg, >>>> uint32_t vector_control); >>>> >>>> +int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2); >>>> +int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo); >>>> + >>>> /* >>>> * Return a pointer to the statistics buffer. Note that this is not MT-safe. >>>> */ >>>> >>>> Modified: head/sys/amd64/include/vmm.h >>>> ============================================================================== >>>> --- head/sys/amd64/include/vmm.h Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/sys/amd64/include/vmm.h Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -34,6 +34,7 @@ enum vm_suspend_how { >>>> VM_SUSPEND_RESET, >>>> VM_SUSPEND_POWEROFF, >>>> VM_SUSPEND_HALT, >>>> + VM_SUSPEND_TRIPLEFAULT, >>>> VM_SUSPEND_LAST >>>> }; >>>> >>>> @@ -88,6 +89,16 @@ enum x2apic_state { >>>> X2APIC_STATE_LAST >>>> }; >>>> >>>> +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) >>>> +#define VM_INTINFO_DEL_ERRCODE 0x800 >>>> +#define VM_INTINFO_RSVD 0x7ffff000 >>>> +#define VM_INTINFO_VALID 0x80000000 >>>> +#define VM_INTINFO_TYPE 0x700 >>>> +#define VM_INTINFO_HWINTR (0 << 8) >>>> +#define VM_INTINFO_NMI (2 << 8) >>>> +#define VM_INTINFO_HWEXCEPTION (3 << 8) >>>> +#define VM_INTINFO_SWINTR (4 << 8) >>>> + >>>> #ifdef _KERNEL >>>> >>>> #define VM_MAX_NAMELEN 32 >>>> @@ -278,14 +289,31 @@ struct vatpit *vm_atpit(struct vm *vm); >>>> int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme); >>>> >>>> /* >>>> - * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an >>>> - * exception is pending and also updates 'vme'. The pending exception is >>>> - * cleared when this function returns. >>>> + * This function is called after a VM-exit that occurred during exception or >>>> + * interrupt delivery through the IDT. The format of 'intinfo' is described >>>> + * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. >>>> * >>>> - * This function should only be called in the context of the thread that is >>>> - * executing this vcpu. >>>> + * If a VM-exit handler completes the event delivery successfully then it >>>> + * should call vm_exit_intinfo() to extinguish the pending event. For e.g., >>>> + * if the task switch emulation is triggered via a task gate then it should >>>> + * call this function with 'intinfo=0' to indicate that the external event >>>> + * is not pending anymore. >>>> + * >>>> + * Return value is 0 on success and non-zero on failure. >>>> */ >>>> -int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme); >>>> +int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo); >>>> + >>>> +/* >>>> + * This function is called before every VM-entry to retrieve a pending >>>> + * event that should be injected into the guest. This function combines >>>> + * nested events into a double or triple fault. >>>> + * >>>> + * Returns 0 if there are no events that need to be injected into the guest >>>> + * and non-zero otherwise. >>>> + */ >>>> +int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); >>>> + >>>> +int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); >>>> >>>> void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ >>>> void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ >>>> >>>> Modified: head/sys/amd64/include/vmm_dev.h >>>> ============================================================================== >>>> --- head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/sys/amd64/include/vmm_dev.h Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -189,6 +189,12 @@ struct vm_cpuset { >>>> #define VM_ACTIVE_CPUS 0 >>>> #define VM_SUSPENDED_CPUS 1 >>>> >>>> +struct vm_intinfo { >>>> + int vcpuid; >>>> + uint64_t info1; >>>> + uint64_t info2; >>>> +}; >>>> + >>>> enum { >>>> /* general routines */ >>>> IOCNUM_ABIVERS = 0, >>>> @@ -211,6 +217,8 @@ enum { >>>> IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, >>>> >>>> /* interrupt injection */ >>>> + IOCNUM_GET_INTINFO = 28, >>>> + IOCNUM_SET_INTINFO = 29, >>>> IOCNUM_INJECT_EXCEPTION = 30, >>>> IOCNUM_LAPIC_IRQ = 31, >>>> IOCNUM_INJECT_NMI = 32, >>>> @@ -324,4 +332,8 @@ enum { >>>> _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) >>>> #define VM_GET_CPUS \ >>>> _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) >>>> +#define VM_SET_INTINFO \ >>>> + _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) >>>> +#define VM_GET_INTINFO \ >>>> + _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) >>>> #endif >>>> >>>> Modified: head/sys/amd64/vmm/intel/vmx.c >>>> ============================================================================== >>>> --- head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/sys/amd64/vmm/intel/vmx.c Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -1213,22 +1213,31 @@ vmx_inject_interrupts(struct vmx *vmx, i >>>> { >>>> struct vm_exception exc; >>>> int vector, need_nmi_exiting, extint_pending; >>>> - uint64_t rflags; >>>> + uint64_t rflags, entryinfo; >>>> uint32_t gi, info; >>>> >>>> - if (vm_exception_pending(vmx->vm, vcpu, &exc)) { >>>> - KASSERT(exc.vector >= 0 && exc.vector < 32, >>>> - ("%s: invalid exception vector %d", __func__, exc.vector)); >>>> + if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { >>>> + KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry " >>>> + "intinfo is not valid: %#lx", __func__, entryinfo)); >>>> >>>> info = vmcs_read(VMCS_ENTRY_INTR_INFO); >>>> KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject " >>>> "pending exception %d: %#x", __func__, exc.vector, info)); >>>> >>>> - info = exc.vector | VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID; >>>> - if (exc.error_code_valid) { >>>> - info |= VMCS_INTR_DEL_ERRCODE; >>>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, exc.error_code); >>>> + info = entryinfo; >>>> + vector = info & 0xff; >>>> + if (vector == IDT_BP || vector == IDT_OF) { >>>> + /* >>>> + * VT-x requires #BP and #OF to be injected as software >>>> + * exceptions. >>>> + */ >>>> + info &= ~VMCS_INTR_T_MASK; >>>> + info |= VMCS_INTR_T_SWEXCEPTION; >>>> } >>>> + >>>> + if (info & VMCS_INTR_DEL_ERRCODE) >>>> + vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32); >>>> + >>>> vmcs_write(VMCS_ENTRY_INTR_INFO, info); >>>> } >>>> >>>> @@ -1407,6 +1416,16 @@ vmx_clear_nmi_blocking(struct vmx *vmx, >>>> vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); >>>> } >>>> >>>> +static void >>>> +vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) >>>> +{ >>>> + uint32_t gi; >>>> + >>>> + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); >>>> + KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING, >>>> + ("NMI blocking is not in effect %#x", gi)); >>>> +} >>>> + >>>> static int >>>> vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) >>>> { >>>> @@ -2050,7 +2069,7 @@ vmx_exit_process(struct vmx *vmx, int vc >>>> struct vm_task_switch *ts; >>>> uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; >>>> uint32_t intr_type, reason; >>>> - uint64_t qual, gpa; >>>> + uint64_t exitintinfo, qual, gpa; >>>> bool retu; >>>> >>>> CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); >>>> @@ -2070,47 +2089,49 @@ vmx_exit_process(struct vmx *vmx, int vc >>>> * be handled specially by re-injecting the event if the IDT >>>> * vectoring information field's valid bit is set. >>>> * >>>> - * If the VM-exit is due to a task gate in the IDT then we don't >>>> - * reinject the event because emulating the task switch also >>>> - * completes the event delivery. >>>> - * >>>> * See "Information for VM Exits During Event Delivery" in Intel SDM >>>> * for details. >>>> */ >>>> - switch (reason) { >>>> - case EXIT_REASON_EPT_FAULT: >>>> - case EXIT_REASON_EPT_MISCONFIG: >>>> - case EXIT_REASON_APIC_ACCESS: >>>> - case EXIT_REASON_TASK_SWITCH: >>>> - case EXIT_REASON_EXCEPTION: >>>> - idtvec_info = vmcs_idt_vectoring_info(); >>>> - VCPU_CTR2(vmx->vm, vcpu, "vm exit %s: idtvec_info 0x%08x", >>>> - exit_reason_to_str(reason), idtvec_info); >>>> - if ((idtvec_info & VMCS_IDT_VEC_VALID) && >>>> - (reason != EXIT_REASON_TASK_SWITCH)) { >>>> - idtvec_info &= ~(1 << 12); /* clear undefined bit */ >>>> - vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info); >>>> - if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { >>>> - idtvec_err = vmcs_idt_vectoring_err(); >>>> - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, >>>> - idtvec_err); >>>> - } >>>> - /* >>>> - * If 'virtual NMIs' are being used and the VM-exit >>>> - * happened while injecting an NMI during the previous >>>> - * VM-entry, then clear "blocking by NMI" in the Guest >>>> - * Interruptibility-state. >>>> - */ >>>> - if ((idtvec_info & VMCS_INTR_T_MASK) == >>>> - VMCS_INTR_T_NMI) { >>>> - vmx_clear_nmi_blocking(vmx, vcpu); >>>> - } >>>> + idtvec_info = vmcs_idt_vectoring_info(); >>>> + if (idtvec_info & VMCS_IDT_VEC_VALID) { >>>> + idtvec_info &= ~(1 << 12); /* clear undefined bit */ >>>> + exitintinfo = idtvec_info; >>>> + if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { >>>> + idtvec_err = vmcs_idt_vectoring_err(); >>>> + exitintinfo |= (uint64_t)idtvec_err << 32; >>>> + } >>>> + error = vm_exit_intinfo(vmx->vm, vcpu, exitintinfo); >>>> + KASSERT(error == 0, ("%s: vm_set_intinfo error %d", >>>> + __func__, error)); >>>> + >>>> + /* >>>> + * If 'virtual NMIs' are being used and the VM-exit >>>> + * happened while injecting an NMI during the previous >>>> + * VM-entry, then clear "blocking by NMI" in the >>>> + * Guest Interruptibility-State so the NMI can be >>>> + * reinjected on the subsequent VM-entry. >>>> + * >>>> + * However, if the NMI was being delivered through a task >>>> + * gate, then the new task must start execution with NMIs >>>> + * blocked so don't clear NMI blocking in this case. >>>> + */ >>>> + intr_type = idtvec_info & VMCS_INTR_T_MASK; >>>> + if (intr_type == VMCS_INTR_T_NMI) { >>>> + if (reason != EXIT_REASON_TASK_SWITCH) >>>> + vmx_clear_nmi_blocking(vmx, vcpu); >>>> + else >>>> + vmx_assert_nmi_blocking(vmx, vcpu); >>>> + } >>>> + >>>> + /* >>>> + * Update VM-entry instruction length if the event being >>>> + * delivered was a software interrupt or software exception. >>>> + */ >>>> + if (intr_type == VMCS_INTR_T_SWINTR || >>>> + intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION || >>>> + intr_type == VMCS_INTR_T_SWEXCEPTION) { >>>> vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); >>>> } >>>> - break; >>>> - default: >>>> - idtvec_info = 0; >>>> - break; >>>> } >>>> >>>> switch (reason) { >>>> @@ -2136,7 +2157,7 @@ vmx_exit_process(struct vmx *vmx, int vc >>>> */ >>>> if (ts->reason == TSR_IDT_GATE) { >>>> KASSERT(idtvec_info & VMCS_IDT_VEC_VALID, >>>> - ("invalid idtvec_info %x for IDT task switch", >>>> + ("invalid idtvec_info %#x for IDT task switch", >>>> idtvec_info)); >>>> intr_type = idtvec_info & VMCS_INTR_T_MASK; >>>> if (intr_type != VMCS_INTR_T_SWINTR && >>>> @@ -2302,6 +2323,7 @@ vmx_exit_process(struct vmx *vmx, int vc >>>> * the guest. >>>> * >>>> * See "Resuming Guest Software after Handling an Exception". >>>> + * See "Information for VM Exits Due to Vectored Events". >>>> */ >>>> if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && >>>> (intr_info & 0xff) != IDT_DF && >>>> @@ -2519,6 +2541,13 @@ vmx_run(void *arg, int vcpu, register_t >>>> * pmap_invalidate_ept(). >>>> */ >>>> disable_intr(); >>>> + vmx_inject_interrupts(vmx, vcpu, vlapic); >>>> + >>>> + /* >>>> + * Check for vcpu suspension after injecting events because >>>> + * vmx_inject_interrupts() can suspend the vcpu due to a >>>> + * triple fault. >>>> + */ >>>> if (vcpu_suspended(suspend_cookie)) { >>>> enable_intr(); >>>> vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip()); >>>> @@ -2539,7 +2568,6 @@ vmx_run(void *arg, int vcpu, register_t >>>> break; >>>> } >>>> >>>> - vmx_inject_interrupts(vmx, vcpu, vlapic); >>>> vmx_run_trace(vmx, vcpu); >>>> rc = vmx_enter_guest(vmxctx, vmx, launched); >>>> >>>> >>>> Modified: head/sys/amd64/vmm/vmm.c >>>> ============================================================================== >>>> --- head/sys/amd64/vmm/vmm.c Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/sys/amd64/vmm/vmm.c Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -97,6 +97,7 @@ struct vcpu { >>>> int hostcpu; /* (o) vcpu's host cpu */ >>>> struct vlapic *vlapic; /* (i) APIC device model */ >>>> enum x2apic_state x2apic_state; /* (i) APIC mode */ >>>> + uint64_t exitintinfo; /* (i) events pending at VM exit */ >>>> int nmi_pending; /* (i) NMI pending */ >>>> int extint_pending; /* (i) INTR pending */ >>>> struct vm_exception exception; /* (x) exception collateral */ >>>> @@ -241,6 +242,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bo >>>> >>>> vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); >>>> vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); >>>> + vcpu->exitintinfo = 0; >>>> vcpu->nmi_pending = 0; >>>> vcpu->extint_pending = 0; >>>> vcpu->exception_pending = 0; >>>> @@ -1458,6 +1460,202 @@ restart: >>>> } >>>> >>>> int >>>> +vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) >>>> +{ >>>> + struct vcpu *vcpu; >>>> + int type, vector; >>>> + >>>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) >>>> + return (EINVAL); >>>> + >>>> + vcpu = &vm->vcpu[vcpuid]; >>>> + >>>> + if (info & VM_INTINFO_VALID) { >>>> + type = info & VM_INTINFO_TYPE; >>>> + vector = info & 0xff; >>>> + if (type == VM_INTINFO_NMI && vector != IDT_NMI) >>>> + return (EINVAL); >>>> + if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) >>>> + return (EINVAL); >>>> + if (info & VM_INTINFO_RSVD) >>>> + return (EINVAL); >>>> + } else { >>>> + info = 0; >>>> + } >>>> + VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); >>>> + vcpu->exitintinfo = info; >>>> + return (0); >>>> +} >>>> + >>>> +enum exc_class { >>>> + EXC_BENIGN, >>>> + EXC_CONTRIBUTORY, >>>> + EXC_PAGEFAULT >>>> +}; >>>> + >>>> +#define IDT_VE 20 /* Virtualization Exception (Intel specific) */ >>>> + >>>> +static enum exc_class >>>> +exception_class(uint64_t info) >>>> +{ >>>> + int type, vector; >>>> + >>>> + KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); >>>> + type = info & VM_INTINFO_TYPE; >>>> + vector = info & 0xff; >>>> + >>>> + /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ >>>> + switch (type) { >>>> + case VM_INTINFO_HWINTR: >>>> + case VM_INTINFO_SWINTR: >>>> + case VM_INTINFO_NMI: >>>> + return (EXC_BENIGN); >>>> + default: >>>> + /* >>>> + * Hardware exception. >>>> + * >>>> + * SVM and VT-x use identical type values to represent NMI, >>>> + * hardware interrupt and software interrupt. >>>> + * >>>> + * SVM uses type '3' for all exceptions. VT-x uses type '3' >>>> + * for exceptions except #BP and #OF. #BP and #OF use a type >>>> + * value of '5' or '6'. Therefore we don't check for explicit >>>> + * values of 'type' to classify 'intinfo' into a hardware >>>> + * exception. >>>> + */ >>>> + break; >>>> + } >>>> + >>>> + switch (vector) { >>>> + case IDT_PF: >>>> + case IDT_VE: >>>> + return (EXC_PAGEFAULT); >>>> + case IDT_DE: >>>> + case IDT_TS: >>>> + case IDT_NP: >>>> + case IDT_SS: >>>> + case IDT_GP: >>>> + return (EXC_CONTRIBUTORY); >>>> + default: >>>> + return (EXC_BENIGN); >>>> + } >>>> +} >>>> + >>>> +static int >>>> +nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, >>>> + uint64_t *retinfo) >>>> +{ >>>> + enum exc_class exc1, exc2; >>>> + int type1, vector1; >>>> + >>>> + KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); >>>> + KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); >>>> + >>>> + /* >>>> + * If an exception occurs while attempting to call the double-fault >>>> + * handler the processor enters shutdown mode (aka triple fault). >>>> + */ >>>> + type1 = info1 & VM_INTINFO_TYPE; >>>> + vector1 = info1 & 0xff; >>>> + if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { >>>> + VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", >>>> + info1, info2); >>>> + vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); >>>> + *retinfo = 0; >>>> + return (0); >>>> + } >>>> + >>>> + /* >>>> + * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 >>>> + */ >>>> + exc1 = exception_class(info1); >>>> + exc2 = exception_class(info2); >>>> + if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || >>>> + (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { >>>> + /* Convert nested fault into a double fault. */ >>>> + *retinfo = IDT_DF; >>>> + *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; >>>> + *retinfo |= VM_INTINFO_DEL_ERRCODE; >>>> + } else { >>>> + /* Handle exceptions serially */ >>>> + *retinfo = info2; >>>> + } >>>> + return (1); >>>> +} >>>> + >>>> +static uint64_t >>>> +vcpu_exception_intinfo(struct vcpu *vcpu) >>>> +{ >>>> + uint64_t info = 0; >>>> + >>>> + if (vcpu->exception_pending) { >>>> + info = vcpu->exception.vector & 0xff; >>>> + info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; >>>> + if (vcpu->exception.error_code_valid) { >>>> + info |= VM_INTINFO_DEL_ERRCODE; >>>> + info |= (uint64_t)vcpu->exception.error_code << 32; >>>> + } >>>> + } >>>> + return (info); >>>> +} >>>> + >>>> +int >>>> +vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) >>>> +{ >>>> + struct vcpu *vcpu; >>>> + uint64_t info1, info2; >>>> + int valid; >>>> + >>>> + KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); >>>> + >>>> + vcpu = &vm->vcpu[vcpuid]; >>>> + >>>> + info1 = vcpu->exitintinfo; >>>> + vcpu->exitintinfo = 0; >>>> + >>>> + info2 = 0; >>>> + if (vcpu->exception_pending) { >>>> + info2 = vcpu_exception_intinfo(vcpu); >>>> + vcpu->exception_pending = 0; >>>> + VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", >>>> + vcpu->exception.vector, info2); >>>> + } >>>> + >>>> + if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { >>>> + valid = nested_fault(vm, vcpuid, info1, info2, retinfo); >>>> + } else if (info1 & VM_INTINFO_VALID) { >>>> + *retinfo = info1; >>>> + valid = 1; >>>> + } else if (info2 & VM_INTINFO_VALID) { >>>> + *retinfo = info2; >>>> + valid = 1; >>>> + } else { >>>> + valid = 0; >>>> + } >>>> + >>>> + if (valid) { >>>> + VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " >>>> + "retinfo(%#lx)", __func__, info1, info2, *retinfo); >>>> + } >>>> + >>>> + return (valid); >>>> +} >>>> + >>>> +int >>>> +vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) >>>> +{ >>>> + struct vcpu *vcpu; >>>> + >>>> + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) >>>> + return (EINVAL); >>>> + >>>> + vcpu = &vm->vcpu[vcpuid]; >>>> + *info1 = vcpu->exitintinfo; >>>> + *info2 = vcpu_exception_intinfo(vcpu); >>>> + return (0); >>>> +} >>>> + >>>> +int >>>> vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) >>>> { >>>> struct vcpu *vcpu; >>>> @@ -1468,6 +1666,14 @@ vm_inject_exception(struct vm *vm, int v >>>> if (exception->vector < 0 || exception->vector >= 32) >>>> return (EINVAL); >>>> >>>> + /* >>>> + * A double fault exception should never be injected directly into >>>> + * the guest. It is a derived exception that results from specific >>>> + * combinations of nested faults. >>>> + */ >>>> + if (exception->vector == IDT_DF) >>>> + return (EINVAL); >>>> + >>>> vcpu = &vm->vcpu[vcpuid]; >>>> >>>> if (vcpu->exception_pending) { >>>> @@ -1483,25 +1689,6 @@ vm_inject_exception(struct vm *vm, int v >>>> return (0); >>>> } >>>> >>>> -int >>>> -vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception) >>>> -{ >>>> - struct vcpu *vcpu; >>>> - int pending; >>>> - >>>> - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); >>>> - >>>> - vcpu = &vm->vcpu[vcpuid]; >>>> - pending = vcpu->exception_pending; >>>> - if (pending) { >>>> - vcpu->exception_pending = 0; >>>> - *exception = vcpu->exception; >>>> - VCPU_CTR1(vm, vcpuid, "Exception %d delivered", >>>> - exception->vector); >>>> - } >>>> - return (pending); >>>> -} >>>> - >>>> static void >>>> vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception) >>>> { >>>> >>>> Modified: head/sys/amd64/vmm/vmm_dev.c >>>> ============================================================================== >>>> --- head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/sys/amd64/vmm/vmm_dev.c Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -173,6 +173,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c >>>> struct vm_gla2gpa *gg; >>>> struct vm_activate_cpu *vac; >>>> struct vm_cpuset *vm_cpuset; >>>> + struct vm_intinfo *vmii; >>>> >>>> sc = vmmdev_lookup2(cdev); >>>> if (sc == NULL) >>>> @@ -199,6 +200,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long c >>>> case VM_SET_X2APIC_STATE: >>>> case VM_GLA2GPA: >>>> case VM_ACTIVATE_CPU: >>>> + case VM_SET_INTINFO: >>>> + case VM_GET_INTINFO: >>>> /* >>>> * XXX fragile, handle with care >>>> * Assumes that the first field of the ioctl data is the vcpu. >>>> @@ -470,6 +473,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long c >>>> error = copyout(cpuset, vm_cpuset->cpus, size); >>>> free(cpuset, M_TEMP); >>>> break; >>>> + case VM_SET_INTINFO: >>>> + vmii = (struct vm_intinfo *)data; >>>> + error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); >>>> + break; >>>> + case VM_GET_INTINFO: >>>> + vmii = (struct vm_intinfo *)data; >>>> + error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, >>>> + &vmii->info2); >>>> + break; >>>> default: >>>> error = ENOTTY; >>>> break; >>>> >>>> Modified: head/usr.sbin/bhyve/bhyverun.c >>>> ============================================================================== >>>> --- head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/usr.sbin/bhyve/bhyverun.c Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -534,6 +534,8 @@ vmexit_suspend(struct vmctx *ctx, struct >>>> exit(1); >>>> case VM_SUSPEND_HALT: >>>> exit(2); >>>> + case VM_SUSPEND_TRIPLEFAULT: >>>> + exit(3); >>>> default: >>>> fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); >>>> exit(100); >>>> >>>> Modified: head/usr.sbin/bhyve/task_switch.c >>>> ============================================================================== >>>> --- head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/usr.sbin/bhyve/task_switch.c Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -904,10 +904,14 @@ vmexit_task_switch(struct vmctx *ctx, st >>>> */ >>>> >>>> /* >>>> - * XXX is the original task switch was triggered by a hardware >>>> - * exception then do we generate a double-fault if we encounter >>>> - * an exception during the task switch? >>>> + * If the task switch was triggered by an event delivered through >>>> + * the IDT then extinguish the pending event from the vcpu's >>>> + * exitintinfo. >>>> */ >>>> + if (task_switch->reason == TSR_IDT_GATE) { >>>> + error = vm_set_intinfo(ctx, vcpu, 0); >>>> + assert(error == 0); >>>> + } >>>> >>>> /* >>>> * XXX should inject debug exception if 'T' bit is 1 >>>> >>>> Modified: head/usr.sbin/bhyvectl/bhyvectl.c >>>> ============================================================================== >>>> --- head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:55:13 2014 (r268888) >>>> +++ head/usr.sbin/bhyvectl/bhyvectl.c Sat Jul 19 20:59:08 2014 (r268889) >>>> @@ -195,7 +195,8 @@ usage(void) >>>> " [--force-reset]\n" >>>> " [--force-poweroff]\n" >>>> " [--get-active-cpus]\n" >>>> - " [--get-suspended-cpus]\n", >>>> + " [--get-suspended-cpus]\n" >>>> + " [--get-intinfo]\n", >>>> progname); >>>> exit(1); >>>> } >>>> @@ -205,6 +206,7 @@ static int inject_nmi, assert_lapic_lvt; >>>> static int force_reset, force_poweroff; >>>> static const char *capname; >>>> static int create, destroy, get_lowmem, get_highmem; >>>> +static int get_intinfo; >>>> static int get_active_cpus, get_suspended_cpus; >>>> static uint64_t memsize; >>>> static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; >>>> @@ -412,6 +414,37 @@ print_cpus(const char *banner, const cpu >>>> printf("\n"); >>>> } >>>> >>>> +static void >>>> +print_intinfo(const char *banner, uint64_t info) >>>> +{ >>>> + int type; >>>> + >>>> + printf("%s:\t", banner); >>>> + if (info & VM_INTINFO_VALID) { >>>> + type = info & VM_INTINFO_TYPE; >>>> + switch (type) { >>>> + case VM_INTINFO_HWINTR: >>>> + printf("extint"); >>>> + break; >>>> + case VM_INTINFO_NMI: >>>> + printf("nmi"); >>>> + break; >>>> + case VM_INTINFO_SWINTR: >>>> + printf("swint"); >>>> + break; >>>> + default: >>>> + printf("exception"); >>>> + break; >>>> + } >>>> + printf(" vector %d", (int)VM_INTINFO_VECTOR(info)); >>>> + if (info & VM_INTINFO_DEL_ERRCODE) >>>> + printf(" errcode %#x", (u_int)(info >> 32)); >>>> + } else { >>>> + printf("n/a"); >>>> + } >>>> + printf("\n"); >>>> +} >>>> + >>>> int >>>> main(int argc, char *argv[]) >>>> { >>>> @@ -420,7 +453,7 @@ main(int argc, char *argv[]) >>>> vm_paddr_t gpa, gpa_pmap; >>>> size_t len; >>>> struct vm_exit vmexit; >>>> - uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte; >>>> + uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte, info[2]; >>>> struct vmctx *ctx; >>>> int wired; >>>> cpuset_t cpus; >>>> @@ -595,6 +628,7 @@ main(int argc, char *argv[]) >>>> { "force-poweroff", NO_ARG, &force_poweroff, 1 }, >>>> { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, >>>> { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, >>>> + { "get-intinfo", NO_ARG, &get_intinfo, 1 }, >>>> { NULL, 0, NULL, 0 } >>>> }; >>>> >>>> @@ -1566,6 +1600,14 @@ main(int argc, char *argv[]) >>>> print_cpus("suspended cpus", &cpus); >>>> } >>>> >>>> + if (!error && (get_intinfo || get_all)) { >>>> + error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]); >>>> + if (!error) { >>>> + print_intinfo("pending", info[0]); >>>> + print_intinfo("current", info[1]); >>>> + } >>>> + } >>>> + >>>> if (!error && run) { >>>> error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); >>>> assert(error == 0); >>>>
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAFgRE9HGYMakq%2BfAvM9ZinwGVrgd_Gu8bbH0yDopFkmYGY5G%2Bw>