From owner-svn-src-stable-8@FreeBSD.ORG Mon Nov 1 08:20:14 2010 Return-Path: Delivered-To: svn-src-stable-8@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id E9387106566B; Mon, 1 Nov 2010 08:20:14 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id D67A98FC0A; Mon, 1 Nov 2010 08:20:14 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oA18KEo9050502; Mon, 1 Nov 2010 08:20:14 GMT (envelope-from avg@svn.freebsd.org) Received: (from avg@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oA18KEas050499; Mon, 1 Nov 2010 08:20:14 GMT (envelope-from avg@svn.freebsd.org) Message-Id: <201011010820.oA18KEas050499@svn.freebsd.org> From: Andriy Gapon Date: Mon, 1 Nov 2010 08:20:14 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r214621 - in stable/8/sys: amd64/amd64 i386/i386 X-BeenThere: svn-src-stable-8@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 8-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 01 Nov 2010 08:20:15 -0000 Author: avg Date: Mon Nov 1 08:20:14 2010 New Revision: 214621 URL: http://svn.freebsd.org/changeset/base/214621 Log: MFC r213323: i386 and amd64 mp_machdep: improve topology detection for Intel CPUs Modified: stable/8/sys/amd64/amd64/mp_machdep.c stable/8/sys/i386/i386/mp_machdep.c Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) Modified: stable/8/sys/amd64/amd64/mp_machdep.c ============================================================================== --- stable/8/sys/amd64/amd64/mp_machdep.c Mon Nov 1 08:12:28 2010 (r214620) +++ stable/8/sys/amd64/amd64/mp_machdep.c Mon Nov 1 08:20:14 2010 (r214621) @@ -118,7 +118,6 @@ extern inthand_t IDTVEC(fast_syscall), I * Local data and functions. */ -static u_int logical_cpus; static volatile cpumask_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ @@ -144,8 +143,8 @@ int apic_cpuids[MAX_APIC_ID + 1]; static volatile u_int cpu_ipi_pending[MAXCPU]; static u_int boot_address; -static int cpu_logical; -static int cpu_cores; +static int cpu_logical; /* logical cpus per core */ +static int cpu_cores; /* cores per package */ static void assign_cpu_ids(void); static void set_interrupt_apic_ids(void); @@ -154,7 +153,7 @@ static int start_ap(int apic_id); static void release_aps(void *dummy); static int hlt_logical_cpus; -static u_int hyperthreading_cpus; +static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ static cpumask_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; static struct sysctl_ctx_list logical_cpu_clist; @@ -168,24 +167,105 @@ mem_range_AP_init(void) } static void +topo_probe_amd(void) +{ + + /* AMD processors do not support HTT. */ + cpu_cores = (amd_feature2 & AMDID2_CMP) != 0 ? + (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1; + cpu_logical = 1; +} + +/* + * Round up to the next power of two, if necessary, and then + * take log2. + * Returns -1 if argument is zero. + */ +static __inline int +mask_width(u_int x) +{ + + return (fls(x << (1 - powerof2(x))) - 1); +} + +static void +topo_probe_0x4(void) +{ + u_int p[4]; + int pkg_id_bits; + int core_id_bits; + int max_cores; + int max_logical; + int id; + + /* Both zero and one here mean one logical processor per package. */ + max_logical = (cpu_feature & CPUID_HTT) != 0 ? + (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1; + if (max_logical <= 1) + return; + + /* + * Because of uniformity assumption we examine only + * those logical processors that belong to the same + * package as BSP. Further, we count number of + * logical processors that belong to the same core + * as BSP thus deducing number of threads per core. + */ + cpuid_count(0x04, 0, p); + max_cores = ((p[0] >> 26) & 0x3f) + 1; + core_id_bits = mask_width(max_logical/max_cores); + if (core_id_bits < 0) + return; + pkg_id_bits = core_id_bits + mask_width(max_cores); + + for (id = 0; id <= MAX_APIC_ID; id++) { + /* Check logical CPU availability. */ + if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) + continue; + /* Check if logical CPU has the same package ID. */ + if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits)) + continue; + cpu_cores++; + /* Check if logical CPU has the same package and core IDs. */ + if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits)) + cpu_logical++; + } + + cpu_cores /= cpu_logical; + hyperthreading_cpus = cpu_logical; +} + +static void topo_probe_0xb(void) { - int logical; - int p[4]; + u_int p[4]; int bits; - int type; int cnt; int i; + int logical; + int type; int x; - /* We only support two levels for now. */ + /* We only support three levels for now. */ for (i = 0; i < 3; i++) { - cpuid_count(0x0B, i, p); + cpuid_count(0x0b, i, p); + + /* Fall back if CPU leaf 11 doesn't really exist. */ + if (i == 0 && p[1] == 0) { + topo_probe_0x4(); + return; + } + bits = p[0] & 0x1f; logical = p[1] &= 0xffff; type = (p[2] >> 8) & 0xff; if (type == 0 || logical == 0) break; + /* + * Because of uniformity assumption we examine only + * those logical processors that belong to the same + * package as BSP. + */ for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { if (!cpu_info[x].cpu_present || cpu_info[x].cpu_disabled) @@ -203,76 +283,16 @@ topo_probe_0xb(void) cpu_cores /= cpu_logical; } -static void -topo_probe_0x4(void) -{ - u_int threads_per_cache, p[4]; - u_int htt, cmp; - int i; - - htt = cmp = 1; - /* - * If this CPU supports HTT or CMP then mention the - * number of physical/logical cores it contains. - */ - if (cpu_feature & CPUID_HTT) - htt = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - if (cpu_vendor_id == CPU_VENDOR_AMD && (amd_feature2 & AMDID2_CMP)) - cmp = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; - else if (cpu_vendor_id == CPU_VENDOR_INTEL && (cpu_high >= 4)) { - cpuid_count(4, 0, p); - if ((p[0] & 0x1f) != 0) - cmp = ((p[0] >> 26) & 0x3f) + 1; - } - cpu_cores = cmp; - cpu_logical = htt / cmp; - - /* Setup the initial logical CPUs info. */ - if (cpu_feature & CPUID_HTT) - logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - - /* - * Work out if hyperthreading is *really* enabled. This - * is made really ugly by the fact that processors lie: Dual - * core processors claim to be hyperthreaded even when they're - * not, presumably because they want to be treated the same - * way as HTT with respect to per-cpu software licensing. - * At the time of writing (May 12, 2005) the only hyperthreaded - * cpus are from Intel, and Intel's dual-core processors can be - * identified via the "deterministic cache parameters" cpuid - * calls. - */ - /* - * First determine if this is an Intel processor which claims - * to have hyperthreading support. - */ - if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_INTEL) { - /* - * If the "deterministic cache parameters" cpuid calls - * are available, use them. - */ - if (cpu_high >= 4) { - /* Ask the processor about the L1 cache. */ - for (i = 0; i < 1; i++) { - cpuid_count(4, i, p); - threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1; - if (hyperthreading_cpus < threads_per_cache) - hyperthreading_cpus = threads_per_cache; - if ((p[0] & 0x1f) == 0) - break; - } - } - - /* - * If the deterministic cache parameters are not - * available, or if no caches were reported to exist, - * just accept what the HTT flag indicated. - */ - if (hyperthreading_cpus == 0) - hyperthreading_cpus = logical_cpus; - } -} - +/* + * Both topology discovery code and code that consumes topology + * information assume top-down uniformity of the topology. + * That is, all physical packages must be identical and each + * core in a package must have the same number of threads. + * Topology information is queried only on BSP, on which this + * code runs and for which it can query CPUID information. + * Then topology is extrapolated on all packages using the + * uniformity assumption. + */ static void topo_probe(void) { @@ -281,13 +301,31 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus = logical_cpus_mask = 0; - if (cpu_high >= 0xb) - topo_probe_0xb(); - else if (cpu_high) - topo_probe_0x4(); + logical_cpus_mask = 0; + if (cpu_vendor_id == CPU_VENDOR_AMD) + topo_probe_amd(); + else if (cpu_vendor_id == CPU_VENDOR_INTEL) { + /* + * See Intel(R) 64 Architecture Processor + * Topology Enumeration article for details. + * + * Note that 0x1 <= cpu_high < 4 case should be + * compatible with topo_probe_0x4() logic when + * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) + * or it should trigger the fallback otherwise. + */ + if (cpu_high >= 0xb) + topo_probe_0xb(); + else if (cpu_high >= 0x1) + topo_probe_0x4(); + } + + /* + * Fallback: assume each logical CPU is in separate + * physical package. That is, no multi-core, no SMT. + */ if (cpu_cores == 0) - cpu_cores = mp_ncpus > 0 ? mp_ncpus : 1; + cpu_cores = 1; if (cpu_logical == 0) cpu_logical = 1; cpu_topo_probed = 1; @@ -667,7 +705,8 @@ init_secondary(void) printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); /* Determine if we are a logical CPU. */ - if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) + /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ + if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) logical_cpus_mask |= PCPU_GET(cpumask); /* Determine if we are a hyperthread. */ Modified: stable/8/sys/i386/i386/mp_machdep.c ============================================================================== --- stable/8/sys/i386/i386/mp_machdep.c Mon Nov 1 08:12:28 2010 (r214620) +++ stable/8/sys/i386/i386/mp_machdep.c Mon Nov 1 08:20:14 2010 (r214621) @@ -172,7 +172,6 @@ u_long *ipi_lazypmap_counts[MAXCPU]; * Local data and functions. */ -static u_int logical_cpus; static volatile cpumask_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ @@ -198,8 +197,8 @@ int apic_cpuids[MAX_APIC_ID + 1]; static volatile u_int cpu_ipi_pending[MAXCPU]; static u_int boot_address; -static int cpu_logical; -static int cpu_cores; +static int cpu_logical; /* logical cpus per core */ +static int cpu_cores; /* cores per package */ static void assign_cpu_ids(void); static void install_ap_tramp(void); @@ -209,7 +208,7 @@ static int start_ap(int apic_id); static void release_aps(void *dummy); static int hlt_logical_cpus; -static u_int hyperthreading_cpus; +static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ static cpumask_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; static struct sysctl_ctx_list logical_cpu_clist; @@ -222,24 +221,105 @@ mem_range_AP_init(void) } static void +topo_probe_amd(void) +{ + + /* AMD processors do not support HTT. */ + cpu_cores = (amd_feature2 & AMDID2_CMP) != 0 ? + (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1; + cpu_logical = 1; +} + +/* + * Round up to the next power of two, if necessary, and then + * take log2. + * Returns -1 if argument is zero. + */ +static __inline int +mask_width(u_int x) +{ + + return (fls(x << (1 - powerof2(x))) - 1); +} + +static void +topo_probe_0x4(void) +{ + u_int p[4]; + int pkg_id_bits; + int core_id_bits; + int max_cores; + int max_logical; + int id; + + /* Both zero and one here mean one logical processor per package. */ + max_logical = (cpu_feature & CPUID_HTT) != 0 ? + (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1; + if (max_logical <= 1) + return; + + /* + * Because of uniformity assumption we examine only + * those logical processors that belong to the same + * package as BSP. Further, we count number of + * logical processors that belong to the same core + * as BSP thus deducing number of threads per core. + */ + cpuid_count(0x04, 0, p); + max_cores = ((p[0] >> 26) & 0x3f) + 1; + core_id_bits = mask_width(max_logical/max_cores); + if (core_id_bits < 0) + return; + pkg_id_bits = core_id_bits + mask_width(max_cores); + + for (id = 0; id <= MAX_APIC_ID; id++) { + /* Check logical CPU availability. */ + if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) + continue; + /* Check if logical CPU has the same package ID. */ + if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits)) + continue; + cpu_cores++; + /* Check if logical CPU has the same package and core IDs. */ + if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits)) + cpu_logical++; + } + + cpu_cores /= cpu_logical; + hyperthreading_cpus = cpu_logical; +} + +static void topo_probe_0xb(void) { - int logical; - int p[4]; + u_int p[4]; int bits; - int type; int cnt; int i; + int logical; + int type; int x; - /* We only support two levels for now. */ + /* We only support three levels for now. */ for (i = 0; i < 3; i++) { - cpuid_count(0x0B, i, p); + cpuid_count(0x0b, i, p); + + /* Fall back if CPU leaf 11 doesn't really exist. */ + if (i == 0 && p[1] == 0) { + topo_probe_0x4(); + return; + } + bits = p[0] & 0x1f; logical = p[1] &= 0xffff; type = (p[2] >> 8) & 0xff; if (type == 0 || logical == 0) break; + /* + * Because of uniformity assumption we examine only + * those logical processors that belong to the same + * package as BSP. + */ for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { if (!cpu_info[x].cpu_present || cpu_info[x].cpu_disabled) @@ -257,76 +337,16 @@ topo_probe_0xb(void) cpu_cores /= cpu_logical; } -static void -topo_probe_0x4(void) -{ - u_int threads_per_cache, p[4]; - u_int htt, cmp; - int i; - - htt = cmp = 1; - /* - * If this CPU supports HTT or CMP then mention the - * number of physical/logical cores it contains. - */ - if (cpu_feature & CPUID_HTT) - htt = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - if (cpu_vendor_id == CPU_VENDOR_AMD && (amd_feature2 & AMDID2_CMP)) - cmp = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; - else if (cpu_vendor_id == CPU_VENDOR_INTEL && (cpu_high >= 4)) { - cpuid_count(4, 0, p); - if ((p[0] & 0x1f) != 0) - cmp = ((p[0] >> 26) & 0x3f) + 1; - } - cpu_cores = cmp; - cpu_logical = htt / cmp; - - /* Setup the initial logical CPUs info. */ - if (cpu_feature & CPUID_HTT) - logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - - /* - * Work out if hyperthreading is *really* enabled. This - * is made really ugly by the fact that processors lie: Dual - * core processors claim to be hyperthreaded even when they're - * not, presumably because they want to be treated the same - * way as HTT with respect to per-cpu software licensing. - * At the time of writing (May 12, 2005) the only hyperthreaded - * cpus are from Intel, and Intel's dual-core processors can be - * identified via the "deterministic cache parameters" cpuid - * calls. - */ - /* - * First determine if this is an Intel processor which claims - * to have hyperthreading support. - */ - if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_INTEL) { - /* - * If the "deterministic cache parameters" cpuid calls - * are available, use them. - */ - if (cpu_high >= 4) { - /* Ask the processor about the L1 cache. */ - for (i = 0; i < 1; i++) { - cpuid_count(4, i, p); - threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1; - if (hyperthreading_cpus < threads_per_cache) - hyperthreading_cpus = threads_per_cache; - if ((p[0] & 0x1f) == 0) - break; - } - } - - /* - * If the deterministic cache parameters are not - * available, or if no caches were reported to exist, - * just accept what the HTT flag indicated. - */ - if (hyperthreading_cpus == 0) - hyperthreading_cpus = logical_cpus; - } -} - +/* + * Both topology discovery code and code that consumes topology + * information assume top-down uniformity of the topology. + * That is, all physical packages must be identical and each + * core in a package must have the same number of threads. + * Topology information is queried only on BSP, on which this + * code runs and for which it can query CPUID information. + * Then topology is extrapolated on all packages using the + * uniformity assumption. + */ static void topo_probe(void) { @@ -335,13 +355,31 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus = logical_cpus_mask = 0; - if (cpu_high >= 0xb) - topo_probe_0xb(); - else if (cpu_high) - topo_probe_0x4(); + logical_cpus_mask = 0; + if (cpu_vendor_id == CPU_VENDOR_AMD) + topo_probe_amd(); + else if (cpu_vendor_id == CPU_VENDOR_INTEL) { + /* + * See Intel(R) 64 Architecture Processor + * Topology Enumeration article for details. + * + * Note that 0x1 <= cpu_high < 4 case should be + * compatible with topo_probe_0x4() logic when + * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) + * or it should trigger the fallback otherwise. + */ + if (cpu_high >= 0xb) + topo_probe_0xb(); + else if (cpu_high >= 0x1) + topo_probe_0x4(); + } + + /* + * Fallback: assume each logical CPU is in separate + * physical package. That is, no multi-core, no SMT. + */ if (cpu_cores == 0) - cpu_cores = mp_ncpus > 0 ? mp_ncpus : 1; + cpu_cores = 1; if (cpu_logical == 0) cpu_logical = 1; cpu_topo_probed = 1; @@ -705,7 +743,8 @@ init_secondary(void) printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); /* Determine if we are a logical CPU. */ - if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) + /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ + if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) logical_cpus_mask |= PCPU_GET(cpumask); /* Determine if we are a hyperthread. */