Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 22 Oct 2018 20:13:52 +0000 (UTC)
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r339616 - in head: share/man/man4 sys/arm64/arm64 sys/kern sys/vm sys/x86/acpica
Message-ID:  <201810222013.w9MKDqUX091544@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: markj
Date: Mon Oct 22 20:13:51 2018
New Revision: 339616
URL: https://svnweb.freebsd.org/changeset/base/339616

Log:
  Make it possible to disable NUMA support with a tunable.
  
  This provides a chicken switch for anyone negatively impacted by
  enabling NUMA in the amd64 GENERIC kernel configuration.  With
  NUMA disabled at boot-time, information about the NUMA topology
  is not exposed to the rest of the kernel, and all of physical
  memory is viewed as coming from a single domain.
  
  This method still has some performance overhead relative to disabling
  NUMA support at compile time.
  
  PR:		231460
  Reviewed by:	alc, gallatin, kib
  MFC after:	1 week
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D17439

Modified:
  head/share/man/man4/numa.4
  head/sys/arm64/arm64/mp_machdep.c
  head/sys/kern/kern_cpuset.c
  head/sys/vm/vm_phys.c
  head/sys/x86/acpica/srat.c

Modified: head/share/man/man4/numa.4
==============================================================================
--- head/share/man/man4/numa.4	Mon Oct 22 20:00:43 2018	(r339615)
+++ head/share/man/man4/numa.4	Mon Oct 22 20:13:51 2018	(r339616)
@@ -24,18 +24,16 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd July 10, 2018
+.Dd October 22, 2018
 .Dt NUMA 4
 .Os
 .Sh NAME
 .Nm NUMA
 .Nd Non-Uniform Memory Access
 .Sh SYNOPSIS
-.Cd options SMP
-.Cd options MAXMEMDOM=16
+.Cd options MAXMEMDOM
+.Cd options NUMA
 .Pp
-.In sys/cpuset.h
-.In sys/bus.h
 .Sh DESCRIPTION
 Non-Uniform Memory Access is a computer architecture design which
 involves unequal costs between processors, memory and IO devices
@@ -47,14 +45,26 @@ architecture, the latency to access specific memory or
 depends upon which processor the memory or device is attached to.
 Accessing memory local to a processor is faster than accessing memory
 that is connected to one of the other processors.
+.Fx
+implements NUMA-aware memory allocation policies.
+By default it attempts to ensure that allocations are balanced across
+each domain.
+Users may override the default domain selection policy using
+.Xr cpuset 1 .
 .Pp
 .Nm
-is enabled when the
+support is enabled when the
 .Cd NUMA
-option is used in a kernel configuration
-file and the
+option is specified in the kernel configuration file.
+Each platform defines the
 .Cd MAXMEMDOM
-option is set to a value greater than 1.
+constant, which specifies the maximum number of supported NUMA domains.
+This constant may be specified in the kernel configuration file.
+.Nm
+support can be disabled at boot time by setting the
+.Va vm.numa.disabled
+tunable to 1.
+Other values for this tunable are currently ignored.
 .Pp
 Thread and process
 .Nm
@@ -128,7 +138,7 @@ tool first appeared in
 .Fx 11.0
 and were removed in
 .Fx 12.0 .
-Current implementation appeared in
+The current implementation appeared in
 .Fx 12.0 .
 .Pp
 .Sh AUTHORS

Modified: head/sys/arm64/arm64/mp_machdep.c
==============================================================================
--- head/sys/arm64/arm64/mp_machdep.c	Mon Oct 22 20:00:43 2018	(r339615)
+++ head/sys/arm64/arm64/mp_machdep.c	Mon Oct 22 20:13:51 2018	(r339616)
@@ -576,11 +576,12 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size
 		return (FALSE);
 
 	/* Try to read the numa node of this cpu */
-	if (OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) > 0) {
-		__pcpu[id].pc_domain = domain;
-		if (domain < MAXMEMDOM)
-			CPU_SET(id, &cpuset_domain[domain]);
-	}
+	if (vm_ndomains == 1 ||
+	    OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0)
+		domain = 0;
+	__pcpu[id].pc_domain = domain;
+	if (domain < MAXMEMDOM)
+		CPU_SET(id, &cpuset_domain[domain]);
 
 	return (TRUE);
 }

Modified: head/sys/kern/kern_cpuset.c
==============================================================================
--- head/sys/kern/kern_cpuset.c	Mon Oct 22 20:00:43 2018	(r339615)
+++ head/sys/kern/kern_cpuset.c	Mon Oct 22 20:13:51 2018	(r339616)
@@ -458,6 +458,12 @@ _domainset_create(struct domainset *domain, struct dom
 	struct domainset *ndomain;
 	int i, j, max;
 
+	KASSERT(domain->ds_cnt <= vm_ndomains,
+	    ("invalid domain count in domainset %p", domain));
+	KASSERT(domain->ds_policy != DOMAINSET_POLICY_PREFER ||
+	    domain->ds_prefer < vm_ndomains,
+	    ("invalid preferred domain in domains %p", domain));
+
 	mtx_lock_spin(&cpuset_lock);
 	LIST_FOREACH(ndomain, &cpuset_domains, ds_link)
 		if (domainset_equal(ndomain, domain))

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c	Mon Oct 22 20:00:43 2018	(r339615)
+++ head/sys/vm/vm_phys.c	Mon Oct 22 20:13:51 2018	(r339616)
@@ -597,11 +597,22 @@ vm_phys_register_domains(int ndomains, struct mem_affi
     int *locality)
 {
 #ifdef NUMA
-	int i;
+	int d, i;
 
-	vm_ndomains = ndomains;
-	mem_affinity = affinity;
-	mem_locality = locality;
+	/*
+	 * For now the only override value that we support is 1, which
+	 * effectively disables NUMA-awareness in the allocators.
+	 */
+	d = 0;
+	TUNABLE_INT_FETCH("vm.numa.disabled", &d);
+	if (d)
+		ndomains = 1;
+
+	if (ndomains > 1) {
+		vm_ndomains = ndomains;
+		mem_affinity = affinity;
+		mem_locality = locality;
+	}
 
 	for (i = 0; i < vm_ndomains; i++)
 		DOMAINSET_SET(i, &all_domains);

Modified: head/sys/x86/acpica/srat.c
==============================================================================
--- head/sys/x86/acpica/srat.c	Mon Oct 22 20:00:43 2018	(r339615)
+++ head/sys/x86/acpica/srat.c	Mon Oct 22 20:13:51 2018	(r339616)
@@ -535,11 +535,7 @@ srat_set_cpus(void *dummy)
 		if (!cpu->enabled)
 			panic("SRAT: CPU with APIC ID %u is not known",
 			    pc->pc_apic_id);
-#ifdef NUMA
-		pc->pc_domain = cpu->domain;
-#else
-		pc->pc_domain = 0;
-#endif
+		pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0;
 		CPU_SET(i, &cpuset_domain[pc->pc_domain]);
 		if (bootverbose)
 			printf("SRAT: CPU %u has memory domain %d\n", i,
@@ -564,7 +560,7 @@ acpi_map_pxm_to_vm_domainid(int pxm)
 
 	for (i = 0; i < ndomain; i++) {
 		if (domain_pxm[i] == pxm)
-			return (i);
+			return (vm_ndomains > 1 ? i : 0);
 	}
 
 	return (-1);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201810222013.w9MKDqUX091544>