Date: Tue, 12 Nov 2019 18:01:34 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r354649 - in head: share/man/man7 sys/amd64/amd64 sys/amd64/include sys/dev/cpuctl sys/x86/include Message-ID: <201911121801.xACI1Y3j081660@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Tue Nov 12 18:01:33 2019 New Revision: 354649 URL: https://svnweb.freebsd.org/changeset/base/354649 Log: Workaround for Intel SKL002/SKL012S errata. Disable the use of executable 2M page mappings in EPT-format page tables on affected CPUs. For bhyve virtual machines, this effectively disables all use of superpage mappings on affected CPUs. The vm.pmap.allow_2m_x_ept sysctl can be set to override the default and enable mappings on affected CPUs. Alternate approaches have been suggested, but at present we do not believe the complexity is warranted for typical bhyve's use cases. Reviewed by: alc, emaste, markj, scottl Security: CVE-2018-12207 Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D21884 Modified: head/share/man/man7/security.7 head/sys/amd64/amd64/pmap.c head/sys/amd64/include/pmap.h head/sys/dev/cpuctl/cpuctl.c head/sys/x86/include/specialreg.h Modified: head/share/man/man7/security.7 ============================================================================== --- head/share/man/man7/security.7 Tue Nov 12 16:24:37 2019 (r354648) +++ head/share/man/man7/security.7 Tue Nov 12 18:01:33 2019 (r354649) @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 13, 2019 +.Dd November 12, 2019 .Dt SECURITY 7 .Os .Sh NAME @@ -1015,6 +1015,13 @@ hardware information leak. .It Dv hw.vmm.vmx.l1d_flush amd64. Controls the mitigation of L1 Terminal Fault in bhyve hypervisor. +.It Dv vm.pmap.allow_2m_x_ept +amd64. +Allows the use of superpages for executable mappings under the EPT +page table format used by hypervisors on Intel CPUs to map the guest +physical address space to machine physical memory. +May be disabled to work around a CPU Erratum called +Machine Check Error Avoidance on Page Size Change. .It Dv kern.elf32.aslr.enable Controls system-global Address Space Layout Randomization (ASLR) for normal non-PIE (Position Independent Executable) 32bit binaries. Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Tue Nov 12 16:24:37 2019 (r354648) +++ head/sys/amd64/amd64/pmap.c Tue Nov 12 18:01:33 2019 (r354649) @@ -1894,6 +1894,51 @@ pmap_page_init(vm_page_t m) m->md.pat_mode = PAT_WRITE_BACK; } +static int pmap_allow_2m_x_ept; +SYSCTL_INT(_vm_pmap, OID_AUTO, allow_2m_x_ept, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &pmap_allow_2m_x_ept, 0, + "Allow executable superpage mappings in EPT"); + +void +pmap_allow_2m_x_ept_recalculate(void) +{ + /* + * SKL002, SKL012S. Since the EPT format is only used by + * Intel CPUs, the vendor check is merely a formality. + */ + if (!(cpu_vendor_id != CPU_VENDOR_INTEL || + (cpu_ia32_arch_caps & IA32_ARCH_CAP_IF_PSCHANGE_MC_NO) != 0 || + (CPUID_TO_FAMILY(cpu_id) == 0x6 && + (CPUID_TO_MODEL(cpu_id) == 0x26 || /* Atoms */ + CPUID_TO_MODEL(cpu_id) == 0x27 || + CPUID_TO_MODEL(cpu_id) == 0x35 || + CPUID_TO_MODEL(cpu_id) == 0x36 || + CPUID_TO_MODEL(cpu_id) == 0x37 || + CPUID_TO_MODEL(cpu_id) == 0x86 || + CPUID_TO_MODEL(cpu_id) == 0x1c || + CPUID_TO_MODEL(cpu_id) == 0x4a || + CPUID_TO_MODEL(cpu_id) == 0x4c || + CPUID_TO_MODEL(cpu_id) == 0x4d || + CPUID_TO_MODEL(cpu_id) == 0x5a || + CPUID_TO_MODEL(cpu_id) == 0x5c || + CPUID_TO_MODEL(cpu_id) == 0x5d || + CPUID_TO_MODEL(cpu_id) == 0x5f || + CPUID_TO_MODEL(cpu_id) == 0x6e || + CPUID_TO_MODEL(cpu_id) == 0x7a || + CPUID_TO_MODEL(cpu_id) == 0x57 || /* Knights */ + CPUID_TO_MODEL(cpu_id) == 0x85)))) + pmap_allow_2m_x_ept = 1; + TUNABLE_INT_FETCH("hw.allow_2m_x_ept", &pmap_allow_2m_x_ept); +} + +static bool +pmap_allow_2m_x_page(pmap_t pmap, bool executable) +{ + + return (pmap->pm_type != PT_EPT || !executable || + !pmap_allow_2m_x_ept); +} + #ifdef NUMA static void pmap_init_pv_table(void) @@ -2037,6 +2082,9 @@ pmap_init(void) } } + /* IFU */ + pmap_allow_2m_x_ept_recalculate(); + /* * Initialize the vm page array entries for the kernel pmap's * page table pages. @@ -5718,6 +5766,15 @@ retry: } #if VM_NRESERVLEVEL > 0 +static bool +pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde) +{ + + if (pmap->pm_type != PT_EPT) + return (false); + return ((pde & EPT_PG_EXECUTE) != 0); +} + /* * Tries to promote the 512, contiguous 4KB page mappings that are within a * single page table page (PTP) to a single 2MB page mapping. For promotion @@ -5753,7 +5810,9 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offs firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); setpde: newpde = *firstpte; - if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { + if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) || + !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, + newpde))) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); @@ -6185,6 +6244,12 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, + newpde))) { + CTR2(KTR_PMAP, "pmap_enter_pde: 2m x blocked for va %#lx" + " in pmap %p", va, pmap); + return (KERN_FAILURE); + } if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" @@ -6331,6 +6396,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_o va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pmap_ps_enabled(pmap) && + pmap_allow_2m_x_page(pmap, (prot & VM_PROT_EXECUTE) != 0) && pmap_enter_2mpage(pmap, va, m, prot, &lock)) m = &m[NBPDR / PAGE_SIZE - 1]; else Modified: head/sys/amd64/include/pmap.h ============================================================================== --- head/sys/amd64/include/pmap.h Tue Nov 12 16:24:37 2019 (r354648) +++ head/sys/amd64/include/pmap.h Tue Nov 12 18:01:33 2019 (r354649) @@ -422,6 +422,7 @@ struct thread; void pmap_activate_boot(pmap_t pmap); void pmap_activate_sw(struct thread *); +void pmap_allow_2m_x_ept_recalculate(void); void pmap_bootstrap(vm_paddr_t *); int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde); int pmap_change_attr(vm_offset_t, vm_size_t, int); Modified: head/sys/dev/cpuctl/cpuctl.c ============================================================================== --- head/sys/dev/cpuctl/cpuctl.c Tue Nov 12 16:24:37 2019 (r354648) +++ head/sys/dev/cpuctl/cpuctl.c Tue Nov 12 18:01:33 2019 (r354649) @@ -50,6 +50,10 @@ __FBSDID("$FreeBSD$"); #include <sys/pmckern.h> #include <sys/cpuctl.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> + #include <machine/cpufunc.h> #include <machine/md_var.h> #include <machine/specialreg.h> @@ -539,6 +543,7 @@ cpuctl_do_eval_cpu_features(int cpu, struct thread *td hw_ssb_recalculate(true); #ifdef __amd64__ amd64_syscall_ret_flush_l1d_recalc(); + pmap_allow_2m_x_ept_recalculate(); #endif hw_mds_recalculate(); printcpuinfo(); Modified: head/sys/x86/include/specialreg.h ============================================================================== --- head/sys/x86/include/specialreg.h Tue Nov 12 16:24:37 2019 (r354648) +++ head/sys/x86/include/specialreg.h Tue Nov 12 18:01:33 2019 (r354649) @@ -490,6 +490,7 @@ #define IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY 0x00000008 #define IA32_ARCH_CAP_SSB_NO 0x00000010 #define IA32_ARCH_CAP_MDS_NO 0x00000020 +#define IA32_ARCH_CAP_IF_PSCHANGE_MC_NO 0x00000040 /* * CPUID manufacturers identifiers
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201911121801.xACI1Y3j081660>