Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 12 Jul 2019 22:31:12 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r349958 - in stable: 11/sys/amd64/vmm 11/sys/amd64/vmm/amd 11/sys/x86/x86 11/usr.sbin/bhyve 12/sys/amd64/vmm 12/sys/amd64/vmm/amd 12/sys/x86/x86 12/usr.sbin/bhyve
Message-ID:  <201907122231.x6CMVCWU059296@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Fri Jul 12 22:31:12 2019
New Revision: 349958
URL: https://svnweb.freebsd.org/changeset/base/349958

Log:
  MFC 339911,339936,343075,343166,348592: Various AMD CPU-specific fixes.
  
  339911:
  Emulate machine check related MSR_EXTFEATURES to allow guest OSes to
  boot on AMD FX Series.
  
  339936:
  Merge cases with upper block.
  This is a cosmetic change only to simplify code.
  
  343075:
  vmm(4): Take steps towards multicore bhyve AMD support
  
  vmm's CPUID emulation presented Intel topology information to the guest, but
  disabled AMD topology information and in some cases passed through garbage.
  I.e., CPUID leaves 0x8000_001[de] were passed through to the guest, but
  guest CPUs can migrate between host threads, so the information presented
  was not consistent.  This could easily be observed with 'cpucontrol -i 0xfoo
  /dev/cpuctl0'.
  
  Slightly improve this situation by enabling the AMD topology feature flag
  and presenting at least the CPUID fields used by FreeBSD itself to probe
  topology on more modern AMD64 hardware (Family 15h+).  Older stuff is
  probably less interesting.  I have not been able to empirically confirm it
  is sufficient, but it should not regress anything either.
  
  343166:
  vmm(4): Mask Spectre feature bits on AMD hosts
  
  For parity with Intel hosts, which already mask out the CPUID feature
  bits that indicate the presence of the SPEC_CTRL MSR, do the same on
  AMD.
  
  Eventually we may want to have a better support story for guests, but
  for now, limit the damage of incorrectly indicating an MSR we do not yet
  support.
  
  Eventually, we may want a generic CPUID override system for
  administrators, or for minimum supported feature set in heterogenous
  environments with failover.  That is a much larger scope effort than
  this bug fix.
  
  348592:
  Emulate the AMD MSR_LS_CFG MSR used for various Ryzen errata.
  
  Writes are ignored and reads always return zero.
  
  PR:		224476, 235010

Modified:
  stable/12/sys/amd64/vmm/amd/svm_msr.c
  stable/12/sys/amd64/vmm/x86.c
  stable/12/sys/amd64/vmm/x86.h
  stable/12/sys/x86/x86/mp_x86.c
  stable/12/usr.sbin/bhyve/xmsr.c
Directory Properties:
  stable/12/   (props changed)

Changes in other areas also in this revision:
Modified:
  stable/11/sys/amd64/vmm/amd/svm_msr.c
  stable/11/sys/amd64/vmm/x86.c
  stable/11/sys/amd64/vmm/x86.h
  stable/11/sys/x86/x86/mp_x86.c
  stable/11/usr.sbin/bhyve/xmsr.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/12/sys/amd64/vmm/amd/svm_msr.c
==============================================================================
--- stable/12/sys/amd64/vmm/amd/svm_msr.c	Fri Jul 12 21:19:47 2019	(r349957)
+++ stable/12/sys/amd64/vmm/amd/svm_msr.c	Fri Jul 12 22:31:12 2019	(r349958)
@@ -122,9 +122,8 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, u
 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
 	case MSR_MTRR64kBase:
 	case MSR_SYSCFG:
-		*result = 0;
-		break;
 	case MSR_AMDK8_IPM:
+	case MSR_EXTFEATURES:
 		*result = 0;
 		break;
 	default:
@@ -162,6 +161,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, u
 		/*
 		 * Ignore writes to microcode update register.
 		 */
+		break;
+	case MSR_EXTFEATURES:
 		break;
 	default:
 		error = EINVAL;

Modified: stable/12/sys/amd64/vmm/x86.c
==============================================================================
--- stable/12/sys/amd64/vmm/x86.c	Fri Jul 12 21:19:47 2019	(r349957)
+++ stable/12/sys/amd64/vmm/x86.c	Fri Jul 12 22:31:12 2019	(r349958)
@@ -137,17 +137,30 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			cpuid_count(*eax, *ecx, regs);
 			if (vmm_is_amd()) {
 				/*
-				 * XXX this might appear silly because AMD
-				 * cpus don't have threads.
-				 *
-				 * However this matches the logical cpus as
-				 * advertised by leaf 0x1 and will work even
-				 * if threads is set incorrectly on an AMD host.
+				 * As on Intel (0000_0007:0, EDX), mask out
+				 * unsupported or unsafe AMD extended features
+				 * (8000_0008 EBX).
 				 */
+				regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
+				    AMDFEID_XSAVEERPTR);
+
 				vm_get_topology(vm, &sockets, &cores, &threads,
 				    &maxcpus);
-				logical_cpus = threads * cores;
-				regs[2] = logical_cpus - 1;
+				/*
+				 * Here, width is ApicIdCoreIdSize, present on
+				 * at least Family 15h and newer.  It
+				 * represents the "number of bits in the
+				 * initial apicid that indicate thread id
+				 * within a package."
+				 *
+				 * Our topo_probe_amd() uses it for
+				 * pkg_id_shift and other OSes may rely on it.
+				 */
+				width = MIN(0xF, log2(threads * cores));
+				if (width < 0x4)
+					width = 0;
+				logical_cpus = MIN(0xFF, threads * cores - 1);
+				regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
 			}
 			break;
 
@@ -155,9 +168,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			cpuid_count(*eax, *ecx, regs);
 
 			/*
-			 * Hide SVM and Topology Extension features from guest.
+			 * Hide SVM from guest.
 			 */
-			regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY);
+			regs[2] &= ~AMDID2_SVM;
 
 			/*
 			 * Don't advertise extended performance counter MSRs
@@ -219,6 +232,68 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 				regs[3] |= AMDPM_TSC_INVARIANT;
 			break;
 
+		case CPUID_8000_001D:
+			/* AMD Cache topology, like 0000_0004 for Intel. */
+			if (!vmm_is_amd())
+				goto default_leaf;
+
+			/*
+			 * Similar to Intel, generate a ficticious cache
+			 * topology for the guest with L3 shared by the
+			 * package, and L1 and L2 local to a core.
+			 */
+			vm_get_topology(vm, &sockets, &cores, &threads,
+			    &maxcpus);
+			switch (*ecx) {
+			case 0:
+				logical_cpus = threads;
+				level = 1;
+				func = 1;	/* data cache */
+				break;
+			case 1:
+				logical_cpus = threads;
+				level = 2;
+				func = 3;	/* unified cache */
+				break;
+			case 2:
+				logical_cpus = threads * cores;
+				level = 3;
+				func = 3;	/* unified cache */
+				break;
+			default:
+				logical_cpus = 0;
+				level = 0;
+				func = 0;
+				break;
+			}
+
+			logical_cpus = MIN(0xfff, logical_cpus - 1);
+			regs[0] = (logical_cpus << 14) | (1 << 8) |
+			    (level << 5) | func;
+			regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
+			regs[2] = 0;
+			regs[3] = 0;
+			break;
+
+		case CPUID_8000_001E:
+			/* AMD Family 16h+ additional identifiers */
+			if (!vmm_is_amd() || CPUID_TO_FAMILY(cpu_id) < 0x16)
+				goto default_leaf;
+
+			vm_get_topology(vm, &sockets, &cores, &threads,
+			    &maxcpus);
+			regs[0] = vcpu_id;
+			threads = MIN(0xFF, threads - 1);
+			regs[1] = (threads << 8) |
+			    (vcpu_id >> log2(threads + 1));
+			/*
+			 * XXX Bhyve topology cannot yet represent >1 node per
+			 * processor.
+			 */
+			regs[2] = 0;
+			regs[3] = 0;
+			break;
+
 		case CPUID_0000_0001:
 			do_cpuid(1, regs);
 
@@ -359,7 +434,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 				    CPUID_STDEXT_AVX512F |
 				    CPUID_STDEXT_AVX512PF |
 				    CPUID_STDEXT_AVX512ER |
-				    CPUID_STDEXT_AVX512CD);
+				    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
 				regs[2] = 0;
 				regs[3] &= CPUID_STDEXT3_MD_CLEAR;
 
@@ -391,35 +466,42 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 
 		case CPUID_0000_000B:
 			/*
-			 * Processor topology enumeration
+			 * Intel processor topology enumeration
 			 */
-			vm_get_topology(vm, &sockets, &cores, &threads,
-			    &maxcpus);
-			if (*ecx == 0) {
-				logical_cpus = threads;
-				width = log2(logical_cpus);
-				level = CPUID_TYPE_SMT;
-				x2apic_id = vcpu_id;
-			}
+			if (vmm_is_intel()) {
+				vm_get_topology(vm, &sockets, &cores, &threads,
+				    &maxcpus);
+				if (*ecx == 0) {
+					logical_cpus = threads;
+					width = log2(logical_cpus);
+					level = CPUID_TYPE_SMT;
+					x2apic_id = vcpu_id;
+				}
 
-			if (*ecx == 1) {
-				logical_cpus = threads * cores;
-				width = log2(logical_cpus);
-				level = CPUID_TYPE_CORE;
-				x2apic_id = vcpu_id;
-			}
+				if (*ecx == 1) {
+					logical_cpus = threads * cores;
+					width = log2(logical_cpus);
+					level = CPUID_TYPE_CORE;
+					x2apic_id = vcpu_id;
+				}
 
-			if (!cpuid_leaf_b || *ecx >= 2) {
-				width = 0;
-				logical_cpus = 0;
-				level = 0;
-				x2apic_id = 0;
-			}
+				if (!cpuid_leaf_b || *ecx >= 2) {
+					width = 0;
+					logical_cpus = 0;
+					level = 0;
+					x2apic_id = 0;
+				}
 
-			regs[0] = width & 0x1f;
-			regs[1] = logical_cpus & 0xffff;
-			regs[2] = (level << 8) | (*ecx & 0xff);
-			regs[3] = x2apic_id;
+				regs[0] = width & 0x1f;
+				regs[1] = logical_cpus & 0xffff;
+				regs[2] = (level << 8) | (*ecx & 0xff);
+				regs[3] = x2apic_id;
+			} else {
+				regs[0] = 0;
+				regs[1] = 0;
+				regs[2] = 0;
+				regs[3] = 0;
+			}
 			break;
 
 		case CPUID_0000_000D:
@@ -481,6 +563,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			break;
 
 		default:
+default_leaf:
 			/*
 			 * The leaf value has already been clamped so
 			 * simply pass this through, keeping count of

Modified: stable/12/sys/amd64/vmm/x86.h
==============================================================================
--- stable/12/sys/amd64/vmm/x86.h	Fri Jul 12 21:19:47 2019	(r349957)
+++ stable/12/sys/amd64/vmm/x86.h	Fri Jul 12 22:31:12 2019	(r349958)
@@ -49,6 +49,8 @@
 #define CPUID_8000_0006	(0x80000006)
 #define CPUID_8000_0007	(0x80000007)
 #define CPUID_8000_0008	(0x80000008)
+#define CPUID_8000_001D	(0x8000001D)
+#define CPUID_8000_001E	(0x8000001E)
 
 /*
  * CPUID instruction Fn0000_0001:

Modified: stable/12/sys/x86/x86/mp_x86.c
==============================================================================
--- stable/12/sys/x86/x86/mp_x86.c	Fri Jul 12 21:19:47 2019	(r349957)
+++ stable/12/sys/x86/x86/mp_x86.c	Fri Jul 12 22:31:12 2019	(r349958)
@@ -235,6 +235,7 @@ add_deterministic_cache(int type, int level, int share
  *  - BKDG For AMD Family 10h Processors (Publication # 31116)
  *  - BKDG For AMD Family 15h Models 00h-0Fh Processors (Publication # 42301)
  *  - BKDG For AMD Family 16h Models 00h-0Fh Processors (Publication # 48751)
+ *  - PPR For AMD Family 17h Models 00h-0Fh Processors (Publication # 54945)
  */
 static void
 topo_probe_amd(void)

Modified: stable/12/usr.sbin/bhyve/xmsr.c
==============================================================================
--- stable/12/usr.sbin/bhyve/xmsr.c	Fri Jul 12 21:19:47 2019	(r349957)
+++ stable/12/usr.sbin/bhyve/xmsr.c	Fri Jul 12 22:31:12 2019	(r349958)
@@ -72,6 +72,7 @@ emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t nu
 			return (0);
 
 		case MSR_NB_CFG1:
+		case MSR_LS_CFG:
 		case MSR_IC_CFG:
 			return (0);	/* Ignore writes */
 
@@ -141,6 +142,7 @@ emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t nu
 			break;
 
 		case MSR_NB_CFG1:
+		case MSR_LS_CFG:
 		case MSR_IC_CFG:
 			/*
 			 * The reset value is processor family dependent so



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201907122231.x6CMVCWU059296>