Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 12 Nov 2019 18:01:34 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r354649 - in head: share/man/man7 sys/amd64/amd64 sys/amd64/include sys/dev/cpuctl sys/x86/include
Message-ID:  <201911121801.xACI1Y3j081660@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Tue Nov 12 18:01:33 2019
New Revision: 354649
URL: https://svnweb.freebsd.org/changeset/base/354649

Log:
  Workaround for Intel SKL002/SKL012S errata.
  
  Disable the use of executable 2M page mappings in EPT-format page
  tables on affected CPUs.  For bhyve virtual machines, this effectively
  disables all use of superpage mappings on affected CPUs.  The
  vm.pmap.allow_2m_x_ept sysctl can be set to override the default and
  enable mappings on affected CPUs.
  
  Alternate approaches have been suggested, but at present we do not
  believe the complexity is warranted for typical bhyve's use cases.
  
  Reviewed by:	alc, emaste, markj, scottl
  Security:	CVE-2018-12207
  Sponsored by:	The FreeBSD Foundation
  Differential revision:	https://reviews.freebsd.org/D21884

Modified:
  head/share/man/man7/security.7
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/pmap.h
  head/sys/dev/cpuctl/cpuctl.c
  head/sys/x86/include/specialreg.h

Modified: head/share/man/man7/security.7
==============================================================================
--- head/share/man/man7/security.7	Tue Nov 12 16:24:37 2019	(r354648)
+++ head/share/man/man7/security.7	Tue Nov 12 18:01:33 2019	(r354649)
@@ -28,7 +28,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 13, 2019
+.Dd November 12, 2019
 .Dt SECURITY 7
 .Os
 .Sh NAME
@@ -1015,6 +1015,13 @@ hardware information leak.
 .It Dv hw.vmm.vmx.l1d_flush
 amd64.
 Controls the mitigation of L1 Terminal Fault in bhyve hypervisor.
+.It Dv vm.pmap.allow_2m_x_ept
+amd64.
+Allows the use of superpages for executable mappings under the EPT
+page table format used by hypervisors on Intel CPUs to map the guest
+physical address space to machine physical memory.
+May be disabled to work around a CPU Erratum called
+Machine Check Error Avoidance on Page Size Change.
 .It Dv kern.elf32.aslr.enable
 Controls system-global Address Space Layout Randomization (ASLR) for
 normal non-PIE (Position Independent Executable) 32bit binaries.

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Tue Nov 12 16:24:37 2019	(r354648)
+++ head/sys/amd64/amd64/pmap.c	Tue Nov 12 18:01:33 2019	(r354649)
@@ -1894,6 +1894,51 @@ pmap_page_init(vm_page_t m)
 	m->md.pat_mode = PAT_WRITE_BACK;
 }
 
+static int pmap_allow_2m_x_ept;
+SYSCTL_INT(_vm_pmap, OID_AUTO, allow_2m_x_ept, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
+    &pmap_allow_2m_x_ept, 0,
+    "Allow executable superpage mappings in EPT");
+
+void
+pmap_allow_2m_x_ept_recalculate(void)
+{
+	/*
+	 * SKL002, SKL012S.  Since the EPT format is only used by
+	 * Intel CPUs, the vendor check is merely a formality.
+	 */
+	if (!(cpu_vendor_id != CPU_VENDOR_INTEL ||
+	    (cpu_ia32_arch_caps & IA32_ARCH_CAP_IF_PSCHANGE_MC_NO) != 0 ||
+	    (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
+	    (CPUID_TO_MODEL(cpu_id) == 0x26 ||	/* Atoms */
+	    CPUID_TO_MODEL(cpu_id) == 0x27 ||
+	    CPUID_TO_MODEL(cpu_id) == 0x35 ||
+	    CPUID_TO_MODEL(cpu_id) == 0x36 ||
+	    CPUID_TO_MODEL(cpu_id) == 0x37 ||
+	    CPUID_TO_MODEL(cpu_id) == 0x86 ||
+	    CPUID_TO_MODEL(cpu_id) == 0x1c ||
+	    CPUID_TO_MODEL(cpu_id) == 0x4a ||
+	    CPUID_TO_MODEL(cpu_id) == 0x4c ||
+	    CPUID_TO_MODEL(cpu_id) == 0x4d ||
+	    CPUID_TO_MODEL(cpu_id) == 0x5a ||
+	    CPUID_TO_MODEL(cpu_id) == 0x5c ||
+	    CPUID_TO_MODEL(cpu_id) == 0x5d ||
+	    CPUID_TO_MODEL(cpu_id) == 0x5f ||
+	    CPUID_TO_MODEL(cpu_id) == 0x6e ||
+	    CPUID_TO_MODEL(cpu_id) == 0x7a ||
+	    CPUID_TO_MODEL(cpu_id) == 0x57 ||	/* Knights */
+	    CPUID_TO_MODEL(cpu_id) == 0x85))))
+		pmap_allow_2m_x_ept = 1;
+	TUNABLE_INT_FETCH("hw.allow_2m_x_ept", &pmap_allow_2m_x_ept);
+}
+
+static bool
+pmap_allow_2m_x_page(pmap_t pmap, bool executable)
+{
+
+	return (pmap->pm_type != PT_EPT || !executable ||
+	    !pmap_allow_2m_x_ept);
+}
+
 #ifdef NUMA
 static void
 pmap_init_pv_table(void)
@@ -2037,6 +2082,9 @@ pmap_init(void)
 		}
 	}
 
+	/* IFU */
+	pmap_allow_2m_x_ept_recalculate();
+
 	/*
 	 * Initialize the vm page array entries for the kernel pmap's
 	 * page table pages.
@@ -5718,6 +5766,15 @@ retry:
 }
 
 #if VM_NRESERVLEVEL > 0
+static bool
+pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
+{
+
+	if (pmap->pm_type != PT_EPT)
+		return (false);
+	return ((pde & EPT_PG_EXECUTE) != 0);
+}
+
 /*
  * Tries to promote the 512, contiguous 4KB page mappings that are within a
  * single page table page (PTP) to a single 2MB page mapping.  For promotion
@@ -5753,7 +5810,9 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offs
 	firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
 setpde:
 	newpde = *firstpte;
-	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
+	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
+	    !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
+	    newpde))) {
 		atomic_add_long(&pmap_pde_p_failures, 1);
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
@@ -6185,6 +6244,12 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
+	if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
+	    newpde))) {
+		CTR2(KTR_PMAP, "pmap_enter_pde: 2m x blocked for va %#lx"
+		    " in pmap %p", va, pmap);
+		return (KERN_FAILURE);
+	}
 	if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ?
 	    NULL : lockp)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
@@ -6331,6 +6396,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_o
 		va = start + ptoa(diff);
 		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 		    m->psind == 1 && pmap_ps_enabled(pmap) &&
+		    pmap_allow_2m_x_page(pmap, (prot & VM_PROT_EXECUTE) != 0) &&
 		    pmap_enter_2mpage(pmap, va, m, prot, &lock))
 			m = &m[NBPDR / PAGE_SIZE - 1];
 		else

Modified: head/sys/amd64/include/pmap.h
==============================================================================
--- head/sys/amd64/include/pmap.h	Tue Nov 12 16:24:37 2019	(r354648)
+++ head/sys/amd64/include/pmap.h	Tue Nov 12 18:01:33 2019	(r354649)
@@ -422,6 +422,7 @@ struct thread;
 
 void	pmap_activate_boot(pmap_t pmap);
 void	pmap_activate_sw(struct thread *);
+void	pmap_allow_2m_x_ept_recalculate(void);
 void	pmap_bootstrap(vm_paddr_t *);
 int	pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde);
 int	pmap_change_attr(vm_offset_t, vm_size_t, int);

Modified: head/sys/dev/cpuctl/cpuctl.c
==============================================================================
--- head/sys/dev/cpuctl/cpuctl.c	Tue Nov 12 16:24:37 2019	(r354648)
+++ head/sys/dev/cpuctl/cpuctl.c	Tue Nov 12 18:01:33 2019	(r354649)
@@ -50,6 +50,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/pmckern.h>
 #include <sys/cpuctl.h>
 
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
@@ -539,6 +543,7 @@ cpuctl_do_eval_cpu_features(int cpu, struct thread *td
 	hw_ssb_recalculate(true);
 #ifdef __amd64__
 	amd64_syscall_ret_flush_l1d_recalc();
+	pmap_allow_2m_x_ept_recalculate();
 #endif
 	hw_mds_recalculate();
 	printcpuinfo();

Modified: head/sys/x86/include/specialreg.h
==============================================================================
--- head/sys/x86/include/specialreg.h	Tue Nov 12 16:24:37 2019	(r354648)
+++ head/sys/x86/include/specialreg.h	Tue Nov 12 18:01:33 2019	(r354649)
@@ -490,6 +490,7 @@
 #define	IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY	0x00000008
 #define	IA32_ARCH_CAP_SSB_NO	0x00000010
 #define	IA32_ARCH_CAP_MDS_NO	0x00000020
+#define	IA32_ARCH_CAP_IF_PSCHANGE_MC_NO	0x00000040
 
 /*
  * CPUID manufacturers identifiers



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201911121801.xACI1Y3j081660>