Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 2 Sep 2018 10:51:31 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r338427 - in stable/11/sys/amd64: amd64 include vmm/intel
Message-ID:  <201809021051.w82ApVE0012450@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sun Sep  2 10:51:31 2018
New Revision: 338427
URL: https://svnweb.freebsd.org/changeset/base/338427

Log:
  MFC r338068, r338113:
  Update L1TF workaround to sustain L1D pollution from NMI.

Modified:
  stable/11/sys/amd64/amd64/exception.S
  stable/11/sys/amd64/amd64/support.S
  stable/11/sys/amd64/amd64/trap.c
  stable/11/sys/amd64/include/md_var.h
  stable/11/sys/amd64/vmm/intel/vmx.c
  stable/11/sys/amd64/vmm/intel/vmx_support.S
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/amd64/amd64/exception.S
==============================================================================
--- stable/11/sys/amd64/amd64/exception.S	Sat Sep  1 16:16:40 2018	(r338426)
+++ stable/11/sys/amd64/amd64/exception.S	Sun Sep  2 10:51:31 2018	(r338427)
@@ -848,7 +848,10 @@ nocallchain:
 	movl	%edx,%eax
 	shrq	$32,%rdx
 	wrmsr
-	movq	%r13,%cr3
+	cmpb	$0, nmi_flush_l1d_sw(%rip)
+	je	2f
+	call	flush_l1d_sw		/* bhyve L1TF assist */
+2:	movq	%r13,%cr3
 	RESTORE_REGS
 	addq	$TF_RIP,%rsp
 	jmp	doreti_iret

Modified: stable/11/sys/amd64/amd64/support.S
==============================================================================
--- stable/11/sys/amd64/amd64/support.S	Sat Sep  1 16:16:40 2018	(r338426)
+++ stable/11/sys/amd64/amd64/support.S	Sun Sep  2 10:51:31 2018	(r338427)
@@ -892,3 +892,36 @@ ENTRY(handle_ibrs_exit_rs)
 END(handle_ibrs_exit_rs)
 
 	.noaltmacro
+
+/*
+ * Flush L1D cache.  Load enough of the data from the kernel text
+ * to flush existing L1D content.
+ *
+ * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
+ * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
+ * registers are clobbered.  The NMI handler caller only needs %r13 preserved.
+ */
+ENTRY(flush_l1d_sw)
+#define	L1D_FLUSH_SIZE	(64 * 1024)
+	movq	$KERNBASE, %r9
+	movq	$-L1D_FLUSH_SIZE, %rcx
+	/*
+	 * pass 1: Preload TLB.
+	 * Kernel text is mapped using superpages.  TLB preload is
+	 * done for the benefit of older CPUs which split 2M page
+	 * into 4k TLB entries.
+	 */
+1:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
+	addq	$PAGE_SIZE, %rcx
+	jne	1b
+	xorl	%eax, %eax
+	cpuid
+	movq	$-L1D_FLUSH_SIZE, %rcx
+	/* pass 2: Read each cache line. */
+2:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
+	addq	$64, %rcx
+	jne	2b
+	lfence
+	ret
+#undef	L1D_FLUSH_SIZE
+END(flush_l1d_sw)

Modified: stable/11/sys/amd64/amd64/trap.c
==============================================================================
--- stable/11/sys/amd64/amd64/trap.c	Sat Sep  1 16:16:40 2018	(r338426)
+++ stable/11/sys/amd64/amd64/trap.c	Sun Sep  2 10:51:31 2018	(r338427)
@@ -158,6 +158,20 @@ SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG
     "Print debugging information on trap signal to ctty");
 
 /*
+ * Control L1D flush on return from NMI.
+ *
+ * Tunable  can be set to the following values:
+ * 0 - only enable flush on return from NMI if required by vmm.ko (default)
+ * >1 - always flush on return from NMI.
+ *
+ * Post-boot, the sysctl indicates if flushing is currently enabled.
+ */
+int nmi_flush_l1d_sw;
+SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN,
+    &nmi_flush_l1d_sw, 0,
+    "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist");
+
+/*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this

Modified: stable/11/sys/amd64/include/md_var.h
==============================================================================
--- stable/11/sys/amd64/include/md_var.h	Sat Sep  1 16:16:40 2018	(r338426)
+++ stable/11/sys/amd64/include/md_var.h	Sun Sep  2 10:51:31 2018	(r338427)
@@ -38,6 +38,7 @@ extern uint64_t	*vm_page_dump;
 extern int	hw_lower_amd64_sharedpage;
 extern int	hw_ibrs_disable;
 extern int	hw_ssb_disable;
+extern int	nmi_flush_l1d_sw;
 
 /*
  * The file "conf/ldscript.amd64" defines the symbol "kernphys".  Its

Modified: stable/11/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- stable/11/sys/amd64/vmm/intel/vmx.c	Sat Sep  1 16:16:40 2018	(r338426)
+++ stable/11/sys/amd64/vmm/intel/vmx.c	Sun Sep  2 10:51:31 2018	(r338427)
@@ -188,8 +188,11 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, 
 static int guest_l1d_flush;
 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD,
     &guest_l1d_flush, 0, NULL);
+static int guest_l1d_flush_sw;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD,
+    &guest_l1d_flush_sw, 0, NULL);
 
-uint64_t vmx_msr_flush_cmd;
+static struct msr_entry msr_load_list[1] __aligned(16);
 
 /*
  * Use the last page below 4GB as the APIC access address. This address is
@@ -500,6 +503,9 @@ vmx_cleanup(void)
 		vpid_unr = NULL;
 	}
 
+	if (nmi_flush_l1d_sw == 1)
+		nmi_flush_l1d_sw = 0;
+
 	smp_rendezvous(NULL, vmx_disable, NULL, NULL);
 
 	return (0);
@@ -728,11 +734,30 @@ vmx_init(int ipinum)
 
 	guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0;
 	TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush);
-	if (guest_l1d_flush &&
-	    (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0)
-		vmx_msr_flush_cmd = IA32_FLUSH_CMD_L1D;
 
 	/*
+	 * L1D cache flush is enabled.  Use IA32_FLUSH_CMD MSR when
+	 * available.  Otherwise fall back to the software flush
+	 * method which loads enough data from the kernel text to
+	 * flush existing L1D content, both on VMX entry and on NMI
+	 * return.
+	 */
+	if (guest_l1d_flush) {
+		if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) {
+			guest_l1d_flush_sw = 1;
+			TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw",
+			    &guest_l1d_flush_sw);
+		}
+		if (guest_l1d_flush_sw) {
+			if (nmi_flush_l1d_sw <= 1)
+				nmi_flush_l1d_sw = 1;
+		} else {
+			msr_load_list[0].index = MSR_IA32_FLUSH_CMD;
+			msr_load_list[0].val = IA32_FLUSH_CMD_L1D;
+		}
+	}
+
+	/*
 	 * Stash the cr0 and cr4 bits that must be fixed to 0 or 1
 	 */
 	fixed0 = rdmsr(MSR_VMX_CR0_FIXED0);
@@ -920,6 +945,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 		error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
 		error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
 		error += vmwrite(VMCS_VPID, vpid[i]);
+
+		if (guest_l1d_flush && !guest_l1d_flush_sw) {
+			vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
+			    (vm_offset_t)&msr_load_list[0]));
+			vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT,
+			    nitems(msr_load_list));
+			vmcs_write(VMCS_EXIT_MSR_STORE, 0);
+			vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0);
+		}
 
 		/* exception bitmap */
 		if (vcpu_trace_exceptions(vm, i))

Modified: stable/11/sys/amd64/vmm/intel/vmx_support.S
==============================================================================
--- stable/11/sys/amd64/vmm/intel/vmx_support.S	Sat Sep  1 16:16:40 2018	(r338426)
+++ stable/11/sys/amd64/vmm/intel/vmx_support.S	Sun Sep  2 10:51:31 2018	(r338427)
@@ -176,44 +176,10 @@ ENTRY(vmx_enter_guest)
 	jbe	invept_error		/* Check invept instruction error */
 
 guest_restore:
-
-	/*
-	 * Flush L1D cache if requested.  Use IA32_FLUSH_CMD MSR if available,
-	 * otherwise load enough of the data from the zero_region to flush
-	 * existing L1D content.
-	 */
-#define	L1D_FLUSH_SIZE	(64 * 1024)
 	movl	%edx, %r8d
-	cmpb	$0, guest_l1d_flush(%rip)
+	cmpb	$0, guest_l1d_flush_sw(%rip)
 	je	after_l1d
-	movq	vmx_msr_flush_cmd(%rip), %rax
-	testq	%rax, %rax
-	jz	1f
-	movq	%rax, %rdx
-	shrq	$32, %rdx
-	movl	$MSR_IA32_FLUSH_CMD, %ecx
-	wrmsr
-	jmp	after_l1d
-1:	movq	$KERNBASE, %r9
-	movq	$-L1D_FLUSH_SIZE, %rcx
-	/*
-	 * pass 1: Preload TLB.
-	 * Kernel text is mapped using superpages.  TLB preload is
-	 * done for the benefit of older CPUs which split 2M page
-	 * into 4k TLB entries.
-	 */
-2:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
-	addq	$PAGE_SIZE, %rcx
-	jne	2b
-	xorl	%eax, %eax
-	cpuid
-	movq	$-L1D_FLUSH_SIZE, %rcx
-	/* pass 2: Read each cache line */
-3:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
-	addq	$64, %rcx
-	jne	3b
-	lfence
-#undef	L1D_FLUSH_SIZE
+	call	flush_l1d_sw
 after_l1d:
 	cmpl	$0, %r8d
 	je	do_launch



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809021051.w82ApVE0012450>