Date: Thu, 10 Sep 2020 10:49:59 +0000 (UTC) From: Peter Grehan <grehan@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r365560 - in stable/12/sys: amd64/amd64 amd64/include amd64/vmm amd64/vmm/intel i386/i386 x86/include Message-ID: <202009101049.08AAnx2F077560@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: grehan Date: Thu Sep 10 10:49:59 2020 New Revision: 365560 URL: https://svnweb.freebsd.org/changeset/base/365560 Log: MFC r364340, r364343, r364656 r364340 Support guest rdtscp and rdpid instructions on Intel VT-x Follow-on commits: r364343 Export a routine to provide the TSC_AUX MSR value and use this in vmm r364656 assert caller is preventing CPU migration Submitted by: adam_fenn.io Differential Revision: https://reviews.freebsd.org/D26003 Modified: stable/12/sys/amd64/amd64/initcpu.c stable/12/sys/amd64/include/vmm.h stable/12/sys/amd64/vmm/intel/vmx.c stable/12/sys/amd64/vmm/intel/vmx.h stable/12/sys/amd64/vmm/intel/vmx_msr.c stable/12/sys/amd64/vmm/intel/vmx_msr.h stable/12/sys/amd64/vmm/x86.c stable/12/sys/i386/i386/initcpu.c stable/12/sys/x86/include/x86_var.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/amd64/amd64/initcpu.c ============================================================================== --- stable/12/sys/amd64/amd64/initcpu.c Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/amd64/amd64/initcpu.c Thu Sep 10 10:49:59 2020 (r365560) @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cputypes.h> #include <machine/md_var.h> +#include <machine/psl.h> #include <machine/specialreg.h> #include <vm/vm.h> @@ -218,6 +219,18 @@ init_via(void) } /* + * The value for the TSC_AUX MSR and rdtscp/rdpid on the invoking CPU. + * + * Caller should prevent CPU migration. + */ +u_int +cpu_auxmsr(void) +{ + KASSERT((read_rflags() & PSL_I) == 0, ("context switch possible")); + return (PCPU_GET(cpuid)); +} + +/* * Initialize CPU control registers */ void @@ -283,7 +296,7 @@ initializecpu(void) if ((amd_feature & AMDID_RDTSCP) != 0 || (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0) - wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid)); + wrmsr(MSR_TSC_AUX, cpu_auxmsr()); } void Modified: stable/12/sys/amd64/include/vmm.h ============================================================================== --- stable/12/sys/amd64/include/vmm.h Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/amd64/include/vmm.h Thu Sep 10 10:49:59 2020 (r365560) @@ -436,6 +436,8 @@ enum vm_cap_type { VM_CAP_UNRESTRICTED_GUEST, VM_CAP_ENABLE_INVPCID, VM_CAP_BPT_EXIT, + VM_CAP_RDPID, + VM_CAP_RDTSCP, VM_CAP_MAX }; Modified: stable/12/sys/amd64/vmm/intel/vmx.c ============================================================================== --- stable/12/sys/amd64/vmm/intel/vmx.c Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/amd64/vmm/intel/vmx.c Thu Sep 10 10:49:59 2020 (r365560) @@ -160,6 +160,14 @@ static int cap_pause_exit; SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit, 0, "PAUSE triggers a VM-exit"); +static int cap_rdpid; +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdpid, CTLFLAG_RD, &cap_rdpid, 0, + "Guests are allowed to use RDPID"); + +static int cap_rdtscp; +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdtscp, CTLFLAG_RD, &cap_rdtscp, 0, + "Guests are allowed to use RDTSCP"); + static int cap_unrestricted_guest; SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD, &cap_unrestricted_guest, 0, "Unrestricted guests"); @@ -293,6 +301,18 @@ static int vmx_getreg(void *arg, int vcpu, int reg, ui static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); static void vmx_inject_pir(struct vlapic *vlapic); +static inline bool +host_has_rdpid(void) +{ + return ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0); +} + +static inline bool +host_has_rdtscp(void) +{ + return ((amd_feature & AMDID_RDTSCP) != 0); +} + #ifdef KTR static const char * exit_reason_to_str(int reason) @@ -745,6 +765,43 @@ vmx_init(int ipinum) PROCBASED_PAUSE_EXITING, 0, &tmp) == 0); + /* + * Check support for RDPID and/or RDTSCP. + * + * Support a pass-through-based implementation of these via the + * "enable RDTSCP" VM-execution control and the "RDTSC exiting" + * VM-execution control. + * + * The "enable RDTSCP" VM-execution control applies to both RDPID + * and RDTSCP (see SDM volume 3, section 25.3, "Changes to + * Instruction Behavior in VMX Non-root operation"); this is why + * only this VM-execution control needs to be enabled in order to + * enable passing through whichever of RDPID and/or RDTSCP are + * supported by the host. + * + * The "RDTSC exiting" VM-execution control applies to both RDTSC + * and RDTSCP (again, per SDM volume 3, section 25.3), and is + * already set up for RDTSC and RDTSCP pass-through by the current + * implementation of RDTSC. + * + * Although RDPID and RDTSCP are optional capabilities, since there + * does not currently seem to be a use case for enabling/disabling + * these via libvmmapi, choose not to support this and, instead, + * just statically always enable or always disable this support + * across all vCPUs on all VMs. (Note that there may be some + * complications to providing this functionality, e.g., the MSR + * bitmap is currently per-VM rather than per-vCPU while the + * capability API wants to be able to control capabilities on a + * per-vCPU basis). + */ + error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, + MSR_VMX_PROCBASED_CTLS2, + PROCBASED2_ENABLE_RDTSCP, 0, &tmp); + cap_rdpid = error == 0 && host_has_rdpid(); + cap_rdtscp = error == 0 && host_has_rdtscp(); + if (cap_rdpid || cap_rdtscp) + procbased_ctls2 |= PROCBASED2_ENABLE_RDTSCP; + cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, PROCBASED2_UNRESTRICTED_GUEST, 0, @@ -997,6 +1054,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap) * the "use TSC offsetting" execution control is enabled and the * difference between the host TSC and the guest TSC is written * into the TSC offset in the VMCS. + * + * Guest TSC_AUX support is enabled if any of guest RDPID and/or + * guest RDTSCP support are enabled (since, as per Table 2-2 in SDM + * volume 4, TSC_AUX is supported if any of RDPID and/or RDTSCP are + * supported). If guest TSC_AUX support is enabled, TSC_AUX is + * exposed read-only so that the VMM can do one fewer MSR read per + * exit than if this register were exposed read-write; the guest + * restore value can be updated during guest writes (expected to be + * rare) instead of during all exits (common). */ if (guest_msr_rw(vmx, MSR_GSBASE) || guest_msr_rw(vmx, MSR_FSBASE) || @@ -1004,7 +1070,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap) guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || guest_msr_rw(vmx, MSR_EFER) || - guest_msr_ro(vmx, MSR_TSC)) + guest_msr_ro(vmx, MSR_TSC) || + ((cap_rdpid || cap_rdtscp) && guest_msr_ro(vmx, MSR_TSC_AUX))) panic("vmx_vminit: error setting guest msr access"); vpid_alloc(vpid, VM_MAXCPU); @@ -1083,6 +1150,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap) KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs")); vmx->cap[i].set = 0; + vmx->cap[i].set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0; + vmx->cap[i].set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0; vmx->cap[i].proc_ctls = procbased_ctls; vmx->cap[i].proc_ctls2 = procbased_ctls2; vmx->cap[i].exc_bitmap = exc_bitmap; @@ -2989,11 +3058,30 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pm sidt(&idtr); ldt_sel = sldt(); + /* + * The TSC_AUX MSR must be saved/restored while interrupts + * are disabled so that it is not possible for the guest + * TSC_AUX MSR value to be overwritten by the resume + * portion of the IPI_SUSPEND codepath. This is why the + * transition of this MSR is handled separately from those + * handled by vmx_msr_guest_{enter,exit}(), which are ok to + * be transitioned with preemption disabled but interrupts + * enabled. + * + * These vmx_msr_guest_{enter,exit}_tsc_aux() calls can be + * anywhere in this loop so long as they happen with + * interrupts disabled. This location is chosen for + * simplicity. + */ + vmx_msr_guest_enter_tsc_aux(vmx, vcpu); + vmx_run_trace(vmx, vcpu); vmx_dr_enter_guest(vmxctx); rc = vmx_enter_guest(vmxctx, vmx, launched); vmx_dr_leave_guest(vmxctx); + vmx_msr_guest_exit_tsc_aux(vmx, vcpu); + bare_lgdt(&gdtr); lidt(&idtr); lldt(ldt_sel); @@ -3331,6 +3419,14 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval) if (cap_monitor_trap) ret = 0; break; + case VM_CAP_RDPID: + if (cap_rdpid) + ret = 0; + break; + case VM_CAP_RDTSCP: + if (cap_rdtscp) + ret = 0; + break; case VM_CAP_UNRESTRICTED_GUEST: if (cap_unrestricted_guest) ret = 0; @@ -3394,6 +3490,17 @@ vmx_setcap(void *arg, int vcpu, int type, int val) flag = PROCBASED_PAUSE_EXITING; reg = VMCS_PRI_PROC_BASED_CTLS; } + break; + case VM_CAP_RDPID: + case VM_CAP_RDTSCP: + if (cap_rdpid || cap_rdtscp) + /* + * Choose not to support enabling/disabling + * RDPID/RDTSCP via libvmmapi since, as per the + * discussion in vmx_init(), RDPID/RDTSCP are + * either always enabled or always disabled. + */ + error = EOPNOTSUPP; break; case VM_CAP_UNRESTRICTED_GUEST: if (cap_unrestricted_guest) { Modified: stable/12/sys/amd64/vmm/intel/vmx.h ============================================================================== --- stable/12/sys/amd64/vmm/intel/vmx.h Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/amd64/vmm/intel/vmx.h Thu Sep 10 10:49:59 2020 (r365560) @@ -117,6 +117,7 @@ enum { IDX_MSR_SF_MASK, IDX_MSR_KGSBASE, IDX_MSR_PAT, + IDX_MSR_TSC_AUX, GUEST_MSR_NUM /* must be the last enumeration */ }; @@ -152,5 +153,19 @@ int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint extern char vmx_exit_guest[]; extern char vmx_exit_guest_flush_rsb[]; + +static inline bool +vmx_have_msr_tsc_aux(struct vmx *vmx) +{ + int rdpid_rdtscp_bits = ((1 << VM_CAP_RDPID) | (1 << VM_CAP_RDTSCP)); + + /* + * Since the values of these bits are uniform across all vCPUs + * (see discussion in vmx_init() and initialization of these bits + * in vmx_vminit()), just always use vCPU-zero's capability set and + * remove the need to require a vcpuid argument. + */ + return ((vmx->cap[0].set & rdpid_rdtscp_bits) != 0); +} #endif Modified: stable/12/sys/amd64/vmm/intel/vmx_msr.c ============================================================================== --- stable/12/sys/amd64/vmm/intel/vmx_msr.c Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/amd64/vmm/intel/vmx_msr.c Thu Sep 10 10:49:59 2020 (r365560) @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include "vmx.h" #include "vmx_msr.h" +#include "x86.h" static bool vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) @@ -361,6 +362,16 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) } void +vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid) +{ + uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX]; + uint32_t host_aux = cpu_auxmsr(); + + if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux) + wrmsr(MSR_TSC_AUX, guest_tsc_aux); +} + +void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) { uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; @@ -381,6 +392,23 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) /* MSR_KGSBASE will be restored on the way back to userspace */ } +void +vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid) +{ + uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX]; + uint32_t host_aux = cpu_auxmsr(); + + if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux) + /* + * Note that it is not necessary to save the guest value + * here; vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX] always + * contains the current value since it is updated whenever + * the guest writes to it (which is expected to be very + * rare). + */ + wrmsr(MSR_TSC_AUX, host_aux); +} + int vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) { @@ -472,6 +500,17 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint break; case MSR_TSC: error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); + break; + case MSR_TSC_AUX: + if (vmx_have_msr_tsc_aux(vmx)) + /* + * vmx_msr_guest_enter_tsc_aux() will apply this + * value when it is called immediately before guest + * entry. + */ + guest_msrs[IDX_MSR_TSC_AUX] = val; + else + vm_inject_gp(vmx->vm, vcpuid); break; default: error = EINVAL; Modified: stable/12/sys/amd64/vmm/intel/vmx_msr.h ============================================================================== --- stable/12/sys/amd64/vmm/intel/vmx_msr.h Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/amd64/vmm/intel/vmx_msr.h Thu Sep 10 10:49:59 2020 (r365560) @@ -35,8 +35,10 @@ struct vmx; void vmx_msr_init(void); void vmx_msr_guest_init(struct vmx *vmx, int vcpuid); +void vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid); void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid); void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid); +void vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid); int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu); int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu); Modified: stable/12/sys/amd64/vmm/x86.c ============================================================================== --- stable/12/sys/amd64/vmm/x86.c Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/amd64/vmm/x86.c Thu Sep 10 10:49:59 2020 (r365560) @@ -91,7 +91,8 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, { const struct xsave_limits *limits; uint64_t cr4; - int error, enable_invpcid, level, width, x2apic_id; + int error, enable_invpcid, enable_rdpid, enable_rdtscp, level, + width, x2apic_id; unsigned int func, regs[4], logical_cpus; enum x2apic_state x2apic_state; uint16_t cores, maxcpus, sockets, threads; @@ -194,11 +195,13 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, /* Hide mwaitx/monitorx capability from the guest */ regs[2] &= ~AMDID2_MWAITX; - /* - * Hide rdtscp/ia32_tsc_aux until we know how - * to deal with them. - */ - regs[3] &= ~AMDID_RDTSCP; + /* Advertise RDTSCP if it is enabled. */ + error = vm_get_capability(vm, vcpu_id, + VM_CAP_RDTSCP, &enable_rdtscp); + if (error == 0 && enable_rdtscp) + regs[3] |= AMDID_RDTSCP; + else + regs[3] &= ~AMDID_RDTSCP; break; case CPUID_8000_0007: @@ -441,6 +444,12 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA); regs[2] = 0; regs[3] &= CPUID_STDEXT3_MD_CLEAR; + + /* Advertise RDPID if it is enabled. */ + error = vm_get_capability(vm, vcpu_id, + VM_CAP_RDPID, &enable_rdpid); + if (error == 0 && enable_rdpid) + regs[2] |= CPUID_STDEXT2_RDPID; /* Advertise INVPCID if it is enabled. */ error = vm_get_capability(vm, vcpu_id, Modified: stable/12/sys/i386/i386/initcpu.c ============================================================================== --- stable/12/sys/i386/i386/initcpu.c Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/i386/i386/initcpu.c Thu Sep 10 10:49:59 2020 (r365560) @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cputypes.h> #include <machine/md_var.h> +#include <machine/psl.h> #include <machine/specialreg.h> #include <vm/vm.h> @@ -627,6 +628,18 @@ init_transmeta(void) } #endif +/* + * The value for the TSC_AUX MSR and rdtscp/rdpid on the invoking CPU. + * + * Caller should prevent CPU migration. + */ +u_int +cpu_auxmsr(void) +{ + KASSERT((read_eflags() & PSL_I) == 0, ("context switch possible")); + return (PCPU_GET(cpuid)); +} + extern int elf32_nxstack; void @@ -756,7 +769,7 @@ initializecpu(void) #endif if ((amd_feature & AMDID_RDTSCP) != 0 || (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0) - wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid)); + wrmsr(MSR_TSC_AUX, cpu_auxmsr()); } void Modified: stable/12/sys/x86/include/x86_var.h ============================================================================== --- stable/12/sys/x86/include/x86_var.h Thu Sep 10 09:50:43 2020 (r365559) +++ stable/12/sys/x86/include/x86_var.h Thu Sep 10 10:49:59 2020 (r365560) @@ -123,6 +123,7 @@ cpu_getmaxphyaddr(void) bool acpi_get_fadt_bootflags(uint16_t *flagsp); void *alloc_fpusave(int flags); void busdma_swi(void); +u_int cpu_auxmsr(void); bool cpu_mwait_usable(void); void cpu_probe_amdc1e(void); void cpu_setregs(void);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202009101049.08AAnx2F077560>