Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 03 Sep 2019 14:06:49 -0000
From:      Justin Hibbits <jhibbits@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r346174 - in head/sys/powerpc: aim conf include ofw powernv powerpc
Message-ID:  <201904130403.x3D43IDd021224@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhibbits
Date: Sat Apr 13 04:03:18 2019
New Revision: 346174
URL: https://svnweb.freebsd.org/changeset/base/346174

Log:
  Add NUMA support to powerpc
  
  Summary:
  Initial NUMA support:
      - associate CPU with domain
      - associate memory ranges with domain
      - identify domain for devices
      - limit device interrupt binding to appropriate domain
  
  - Additionally fixes a bug in the setting of Maxmem which led to
    only memory attached to the first socket being enabled for DMA
  
  A pmap variant can opt in to numa support by by calling `numa_mem_regions`
  at the end of pmap_bootstrap - registering the corresponding ranges with the
  VM.
  
  This yields a ~20% improvement in build times of llvm on dual socket POWER9
  over non-NUMA.
  
  Original patch by mmacy.
  
  Differential Revision: https://reviews.freebsd.org/D17933

Modified:
  head/sys/powerpc/aim/mmu_oea64.c
  head/sys/powerpc/conf/GENERIC64
  head/sys/powerpc/include/intr_machdep.h
  head/sys/powerpc/include/ofw_machdep.h
  head/sys/powerpc/include/param.h
  head/sys/powerpc/include/platform.h
  head/sys/powerpc/include/smp.h
  head/sys/powerpc/ofw/ofw_machdep.c
  head/sys/powerpc/ofw/ofw_pcibus.c
  head/sys/powerpc/powernv/opal_pci.c
  head/sys/powerpc/powernv/platform_powernv.c
  head/sys/powerpc/powerpc/intr_machdep.c
  head/sys/powerpc/powerpc/mp_machdep.c
  head/sys/powerpc/powerpc/nexus.c
  head/sys/powerpc/powerpc/platform.c
  head/sys/powerpc/powerpc/platform_if.m

Modified: head/sys/powerpc/aim/mmu_oea64.c
==============================================================================
--- head/sys/powerpc/aim/mmu_oea64.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/aim/mmu_oea64.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -146,8 +146,9 @@ extern void *slbtrap, *slbtrapend;
  */
 static struct	mem_region *regions;
 static struct	mem_region *pregions;
+static struct	numa_mem_region *numa_pregions;
 static u_int	phys_avail_count;
-static int	regions_sz, pregions_sz;
+static int	regions_sz, pregions_sz, numapregions_sz;
 
 extern void bs_remap_earlyboot(void);
 
@@ -1048,6 +1049,8 @@ moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelst
 			PMAP_UNLOCK(kernel_pmap);
 		}
 	}
+
+	numa_mem_regions(&numa_pregions, &numapregions_sz);
 }
 
 static void

Modified: head/sys/powerpc/conf/GENERIC64
==============================================================================
--- head/sys/powerpc/conf/GENERIC64	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/conf/GENERIC64	Sat Apr 13 04:03:18 2019	(r346174)
@@ -35,6 +35,7 @@ options 	POWERNV			#Non-virtualized OpenPOWER systems
 
 options		FDT			#Flattened Device Tree
 options 	SCHED_ULE		#ULE scheduler
+options 	NUMA			#Non-Uniform Memory Architecture support
 options 	PREEMPTION		#Enable kernel thread preemption
 options 	VIMAGE			# Subsystem virtualization, e.g. VNET
 options 	INET			#InterNETworking

Modified: head/sys/powerpc/include/intr_machdep.h
==============================================================================
--- head/sys/powerpc/include/intr_machdep.h	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/include/intr_machdep.h	Sat Apr 13 04:03:18 2019	(r346174)
@@ -54,7 +54,7 @@ u_int	powerpc_get_irq(uint32_t, u_int);
 void	powerpc_dispatch_intr(u_int, struct trapframe *);
 int	powerpc_enable_intr(void);
 int	powerpc_setup_intr(const char *, u_int, driver_filter_t, driver_intr_t,
-	    void *, enum intr_type, void **);
+	    void *, enum intr_type, void **, int);
 int	powerpc_teardown_intr(void *);
 int	powerpc_bind_intr(u_int irq, u_char cpu);
 int	powerpc_config_intr(int, enum intr_trigger, enum intr_polarity);

Modified: head/sys/powerpc/include/ofw_machdep.h
==============================================================================
--- head/sys/powerpc/include/ofw_machdep.h	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/include/ofw_machdep.h	Sat Apr 13 04:03:18 2019	(r346174)
@@ -47,7 +47,11 @@ boolean_t OF_bootstrap(void);
 void OF_reboot(void);
 
 void ofw_mem_regions(struct mem_region *, int *, struct mem_region *, int *);
+void ofw_numa_mem_regions(struct numa_mem_region *, int *);
 void ofw_quiesce(void); /* Must be called before VM is up! */
 void ofw_save_trap_vec(char *);
+int ofw_pcibus_get_domain(device_t dev, device_t child, int *domain);
+int ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op,
+		size_t setsize, cpuset_t *cpuset);
 
 #endif /* _MACHINE_OFW_MACHDEP_H_ */

Modified: head/sys/powerpc/include/param.h
==============================================================================
--- head/sys/powerpc/include/param.h	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/include/param.h	Sat Apr 13 04:03:18 2019	(r346174)
@@ -82,7 +82,7 @@
 #endif /* SMP || KLD_MODULE */
 
 #ifndef MAXMEMDOM
-#define	MAXMEMDOM	1
+#define	MAXMEMDOM	8
 #endif
 
 #define	ALIGNBYTES	_ALIGNBYTES

Modified: head/sys/powerpc/include/platform.h
==============================================================================
--- head/sys/powerpc/include/platform.h	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/include/platform.h	Sat Apr 13 04:03:18 2019	(r346174)
@@ -45,9 +45,16 @@ struct mem_region {
 	uint64_t	mr_size;
 };
 
+struct numa_mem_region {
+	uint64_t	mr_start;
+	uint64_t	mr_size;
+	uint64_t	mr_domain;
+};
+
 /* Documentation for these functions is in platform_if.m */
 
 void	mem_regions(struct mem_region **, int *, struct mem_region **, int *);
+void	numa_mem_regions(struct numa_mem_region **, int *);
 vm_offset_t platform_real_maxaddr(void);
 
 u_long	platform_timebase_freq(struct cpuref *);

Modified: head/sys/powerpc/include/smp.h
==============================================================================
--- head/sys/powerpc/include/smp.h	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/include/smp.h	Sat Apr 13 04:03:18 2019	(r346174)
@@ -52,6 +52,7 @@ void	ipi_selected(cpuset_t cpus, int ipi);
 struct cpuref {
 	uintptr_t	cr_hwref;
 	u_int		cr_cpuid;
+	u_int		cr_domain;
 };
 
 void	pmap_cpu_bootstrap(int);

Modified: head/sys/powerpc/ofw/ofw_machdep.c
==============================================================================
--- head/sys/powerpc/ofw/ofw_machdep.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/ofw/ofw_machdep.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
+#include <vm/vm_phys.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
@@ -222,11 +223,59 @@ parse_ofw_memory(phandle_t node, const char *prop, str
 
 		j++;
 	}
-	sz = j*sizeof(output[0]);
 
-	return (sz);
+	return (j);
 }
 
+static int
+parse_numa_ofw_memory(phandle_t node, const char *prop,
+    struct numa_mem_region *output)
+{
+	cell_t address_cells, size_cells;
+	cell_t OFmem[4 * PHYS_AVAIL_SZ];
+	int sz, i, j;
+	phandle_t phandle;
+
+	sz = 0;
+
+	/*
+	 * Get #address-cells from root node, defaulting to 1 if it cannot
+	 * be found.
+	 */
+	phandle = OF_finddevice("/");
+	if (OF_getencprop(phandle, "#address-cells", &address_cells,
+	    sizeof(address_cells)) < (ssize_t)sizeof(address_cells))
+		address_cells = 1;
+	if (OF_getencprop(phandle, "#size-cells", &size_cells,
+	    sizeof(size_cells)) < (ssize_t)sizeof(size_cells))
+		size_cells = 1;
+
+	/*
+	 * Get memory.
+	 */
+	if (node == -1 || (sz = OF_getencprop(node, prop,
+	    OFmem, sizeof(OFmem))) <= 0)
+		panic("Physical memory map not found");
+
+	i = 0;
+	j = 0;
+	while (i < sz/sizeof(cell_t)) {
+		output[j].mr_start = OFmem[i++];
+		if (address_cells == 2) {
+			output[j].mr_start <<= 32;
+			output[j].mr_start += OFmem[i++];
+		}
+		output[j].mr_size = OFmem[i++];
+		if (size_cells == 2) {
+			output[j].mr_size <<= 32;
+			output[j].mr_size += OFmem[i++];
+		}
+		j++;
+	}
+
+	return (j);
+}
+
 #ifdef FDT
 static int
 excise_reserved_regions(struct mem_region *avail, int asz,
@@ -408,6 +457,51 @@ excise_fdt_reserved(struct mem_region *avail, int asz)
  * The available regions need not take the kernel into account.
  */
 void
+ofw_numa_mem_regions(struct numa_mem_region *memp, int *memsz)
+{
+	phandle_t phandle;
+	int res, count, msz;
+	char name[31];
+	cell_t associativity[5];
+	struct numa_mem_region *curmemp;
+
+	msz = 0;
+	/*
+	 * Get memory from all the /memory nodes.
+	 */
+	for (phandle = OF_child(OF_peer(0)); phandle != 0;
+	    phandle = OF_peer(phandle)) {
+		if (OF_getprop(phandle, "name", name, sizeof(name)) <= 0)
+			continue;
+		if (strncmp(name, "memory@", strlen("memory@")) != 0)
+			continue;
+
+		count = parse_numa_ofw_memory(phandle, "reg", &memp[msz]);
+		if (count == 0)
+			continue;
+		curmemp = &memp[msz];
+		res = OF_getproplen(phandle, "ibm,associativity");
+		if (res <= 0)
+			continue;
+		MPASS(count == 1);
+		OF_getencprop(phandle, "ibm,associativity",
+			associativity, res);
+		curmemp->mr_domain = associativity[3] - 1;
+		if (bootverbose)
+			printf("%s %#jx-%#jx domain(%ju)\n",
+			    name, (uintmax_t)curmemp->mr_start,
+			    (uintmax_t)curmemp->mr_start + curmemp->mr_size,
+			    (uintmax_t)curmemp->mr_domain);
+		msz += count;
+	}
+	*memsz = msz;
+}
+/*
+ * This is called during powerpc_init, before the system is really initialized.
+ * It shall provide the total and the available regions of RAM.
+ * The available regions need not take the kernel into account.
+ */
+void
 ofw_mem_regions(struct mem_region *memp, int *memsz,
 		struct mem_region *availp, int *availsz)
 {
@@ -430,7 +524,7 @@ ofw_mem_regions(struct mem_region *memp, int *memsz,
 			continue;
 
 		res = parse_ofw_memory(phandle, "reg", &memp[msz]);
-		msz += res/sizeof(struct mem_region);
+		msz += res;
 
 		/*
 		 * On POWER9 Systems we might have both linux,usable-memory and
@@ -446,7 +540,7 @@ ofw_mem_regions(struct mem_region *memp, int *memsz,
 			    &availp[asz]);
 		else
 			res = parse_ofw_memory(phandle, "reg", &availp[asz]);
-		asz += res/sizeof(struct mem_region);
+		asz += res;
 	}
 
 #ifdef FDT

Modified: head/sys/powerpc/ofw/ofw_pcibus.c
==============================================================================
--- head/sys/powerpc/ofw/ofw_pcibus.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/ofw/ofw_pcibus.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/libkern.h>
 #include <sys/module.h>
 #include <sys/pciio.h>
+#include <sys/smp.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
@@ -80,6 +81,8 @@ static device_method_t ofw_pcibus_methods[] = {
 	DEVMETHOD(bus_child_deleted,	ofw_pcibus_child_deleted),
 	DEVMETHOD(bus_child_pnpinfo_str, ofw_pcibus_child_pnpinfo_str_method),
 	DEVMETHOD(bus_rescan,		bus_null_rescan),
+	DEVMETHOD(bus_get_cpus,		ofw_pcibus_get_cpus),
+	DEVMETHOD(bus_get_domain,	ofw_pcibus_get_domain),
 
 	/* PCI interface */
 	DEVMETHOD(pci_alloc_devinfo,	ofw_pcibus_alloc_devinfo),
@@ -382,3 +385,76 @@ ofw_pcibus_get_devinfo(device_t bus, device_t dev)
 	return (&dinfo->opd_obdinfo);
 }
 
+static int
+ofw_pcibus_parse_associativity(device_t dev, int *domain)
+{
+	phandle_t node;
+	cell_t associativity[5];
+	int res;
+
+	if ((node = ofw_bus_get_node(dev)) == -1) {
+		device_printf(dev, "no ofw node found\n");
+		return (ENXIO);
+	}
+	res = OF_getproplen(node, "ibm,associativity");
+	if (res <= 0)
+		return (ENXIO);
+	OF_getencprop(node, "ibm,associativity",
+		associativity, res);
+
+	*domain = associativity[3] - 1;
+	if (bootverbose)
+		device_printf(dev, "domain(%d)\n", *domain);
+	return (0);
+}
+
+int
+ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
+    cpuset_t *cpuset)
+{
+	int d, error;
+
+	error = ofw_pcibus_parse_associativity(child, &d);
+	if (error)
+		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
+
+	switch (op) {
+	case LOCAL_CPUS:
+		if (setsize != sizeof(cpuset_t))
+			return (EINVAL);
+		*cpuset = cpuset_domain[d];
+		return (0);
+	case INTR_CPUS:
+		error = bus_generic_get_cpus(dev, child, op, setsize, cpuset);
+		if (error != 0)
+			return (error);
+		if (setsize != sizeof(cpuset_t))
+			return (EINVAL);
+		CPU_AND(cpuset, &cpuset_domain[d]);
+		return (0);
+	default:
+		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
+	}
+	return (0);
+}
+
+/*
+ * Fetch the NUMA domain for the given device 'dev'.
+ *
+ * If a device has a _PXM method, map that to a NUMA domain.
+ * Otherwise, pass the request up to the parent.
+ * If there's no matching domain or the domain cannot be
+ * determined, return ENOENT.
+ */
+int
+ofw_pcibus_get_domain(device_t dev, device_t child, int *domain)
+{
+	int d, error;
+
+	error = ofw_pcibus_parse_associativity(child, &d);
+	/* No ofw node; go up a level */
+	if (error)
+		return (bus_generic_get_domain(dev, child, domain));
+	*domain = d;
+	return (0);
+}

Modified: head/sys/powerpc/powernv/opal_pci.c
==============================================================================
--- head/sys/powerpc/powernv/opal_pci.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/powernv/opal_pci.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -149,6 +149,8 @@ static device_method_t	opalpci_methods[] = {
 
 	/* Bus interface */
 	DEVMETHOD(bus_get_dma_tag,	opalpci_get_dma_tag),
+	DEVMETHOD(bus_get_cpus,		ofw_pcibus_get_cpus),
+	DEVMETHOD(bus_get_domain,	ofw_pcibus_get_domain),
 
 	DEVMETHOD_END
 };
@@ -367,7 +369,7 @@ opalpci_attach(device_t dev)
 	tce_size = max_tce_size(dev);
 	maxmem = roundup2(powerpc_ptob(Maxmem), tce_size);
 	entries = round_pow2(maxmem / tce_size);
-	tce_tbl_size = max(entries * sizeof(uint64_t), 4096);
+	tce_tbl_size = MAX(entries * sizeof(uint64_t), 4096);
 	if (entries > OPAL_PCI_TCE_MAX_ENTRIES)
 		panic("POWERNV supports only %jdGB of memory space\n",
 		    (uintmax_t)((OPAL_PCI_TCE_MAX_ENTRIES * tce_size) >> 30));

Modified: head/sys/powerpc/powernv/platform_powernv.c
==============================================================================
--- head/sys/powerpc/powernv/platform_powernv.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/powernv/platform_powernv.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -65,6 +65,7 @@ static int powernv_probe(platform_t);
 static int powernv_attach(platform_t);
 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz,
     struct mem_region *avail, int *availsz);
+static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz);
 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref);
 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref);
 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref);
@@ -83,6 +84,7 @@ static platform_method_t powernv_methods[] = {
 	PLATFORMMETHOD(platform_probe, 		powernv_probe),
 	PLATFORMMETHOD(platform_attach,		powernv_attach),
 	PLATFORMMETHOD(platform_mem_regions,	powernv_mem_regions),
+	PLATFORMMETHOD(platform_numa_mem_regions,	powernv_numa_mem_regions),
 	PLATFORMMETHOD(platform_timebase_freq,	powernv_timebase_freq),
 	
 	PLATFORMMETHOD(platform_smp_ap_init,	powernv_smp_ap_init),
@@ -250,6 +252,13 @@ powernv_mem_regions(platform_t plat, struct mem_region
 	ofw_mem_regions(phys, physsz, avail, availsz);
 }
 
+static void
+powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz)
+{
+
+	ofw_numa_mem_regions(phys, physsz);
+}
+
 static u_long
 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref)
 {
@@ -313,15 +322,13 @@ powernv_cpuref_init(void)
 		if (res > 0 && strcmp(buf, "cpu") == 0) {
 			res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
 			if (res > 0) {
-
-
 				OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
 				    interrupt_servers, res);
 
 				for (a = 0; a < res/sizeof(cell_t); a++) {
 					tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
 					tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
-
+					tmp_cpuref[tmp_cpuref_cnt].cr_domain = interrupt_servers[a] >> 11;
 					if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)
 						bsp = tmp_cpuref_cnt;
 
@@ -335,11 +342,13 @@ powernv_cpuref_init(void)
 	for (a = bsp; a < tmp_cpuref_cnt; a++) {
 		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
 		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
+		platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
 		platform_cpuref_cnt++;
 	}
 	for (a = 0; a < bsp; a++) {
 		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
 		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
+		platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
 		platform_cpuref_cnt++;
 	}
 
@@ -356,6 +365,7 @@ powernv_smp_first_cpu(platform_t plat, struct cpuref *
 
 	cpuref->cr_cpuid = 0;
 	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
+	cpuref->cr_domain = platform_cpuref[0].cr_domain;
 
 	return (0);
 }
@@ -374,6 +384,7 @@ powernv_smp_next_cpu(platform_t plat, struct cpuref *c
 
 	cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
 	cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
+	cpuref->cr_domain = platform_cpuref[id].cr_domain;
 
 	return (0);
 }
@@ -384,6 +395,7 @@ powernv_smp_get_bsp(platform_t plat, struct cpuref *cp
 
 	cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
 	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
+	cpuref->cr_domain = platform_cpuref[0].cr_domain;
 	return (0);
 }
 

Modified: head/sys/powerpc/powerpc/intr_machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/intr_machdep.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/powerpc/intr_machdep.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -97,16 +97,17 @@ struct powerpc_intr {
 	struct intr_event *event;
 	long	*cntp;
 	void	*priv;		/* PIC-private data */
-	u_int	irq;
 	device_t pic;
+	u_int	irq;
 	u_int	intline;
 	u_int	vector;
 	u_int	cntindex;
-	cpuset_t cpu;
-	enum intr_trigger trig;
-	enum intr_polarity pol;
 	int	fwcode;
 	int	ipi;
+	int	pi_domain;
+	enum intr_trigger trig;
+	enum intr_polarity pol;
+	cpuset_t pi_cpuset;
 };
 
 struct pic {
@@ -203,7 +204,7 @@ smp_intr_init(void *dummy __unused)
 	for (vector = 0; vector < nvectors; vector++) {
 		i = powerpc_intrs[vector];
 		if (i != NULL && i->event != NULL && i->pic == root_pic)
-			PIC_BIND(i->pic, i->intline, i->cpu, &i->priv);
+			PIC_BIND(i->pic, i->intline, i->pi_cpuset, &i->priv);
 	}
 }
 SYSINIT(smp_intr_init, SI_SUB_SMP, SI_ORDER_ANY, smp_intr_init, NULL);
@@ -256,9 +257,9 @@ intr_lookup(u_int irq)
 	i->ipi = 0;
 
 #ifdef SMP
-	i->cpu = all_cpus;
+	i->pi_cpuset = all_cpus;
 #else
-	CPU_SETOF(0, &i->cpu);
+	CPU_SETOF(0, &i->pi_cpuset);
 #endif
 
 	for (vector = 0; vector < num_io_irqs && vector <= nvectors;
@@ -347,12 +348,12 @@ powerpc_assign_intr_cpu(void *arg, int cpu)
 	struct powerpc_intr *i = arg;
 
 	if (cpu == NOCPU)
-		i->cpu = all_cpus;
+		i->pi_cpuset = all_cpus;
 	else
-		CPU_SETOF(cpu, &i->cpu);
+		CPU_SETOF(cpu, &i->pi_cpuset);
 
 	if (!cold && i->pic != NULL && i->pic == root_pic)
-		PIC_BIND(i->pic, i->intline, i->cpu, &i->priv);
+		PIC_BIND(i->pic, i->intline, i->pi_cpuset, &i->priv);
 
 	return (0);
 #else
@@ -469,7 +470,8 @@ powerpc_enable_intr(void)
 			error = powerpc_setup_intr("IPI",
 			    MAP_IRQ(piclist[n].node, piclist[n].irqs),
 			    powerpc_ipi_handler, NULL, NULL,
-			    INTR_TYPE_MISC | INTR_EXCL, &ipi_cookie);
+			    INTR_TYPE_MISC | INTR_EXCL, &ipi_cookie,
+			    0 /* domain XXX */);
 			if (error) {
 				printf("unable to setup IPI handler\n");
 				return (error);
@@ -512,7 +514,8 @@ powerpc_enable_intr(void)
 
 int
 powerpc_setup_intr(const char *name, u_int irq, driver_filter_t filter,
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
+    int domain)
 {
 	struct powerpc_intr *i;
 	int error, enable = 0;
@@ -533,7 +536,13 @@ powerpc_setup_intr(const char *name, u_int irq, driver
 
 	error = intr_event_add_handler(i->event, name, filter, handler, arg,
 	    intr_priority(flags), flags, cookiep);
-
+	if (error)
+		return (error);
+	i->pi_domain = domain;
+	if (strcmp(name, "IPI") != 0)  {
+		CPU_ZERO(&i->pi_cpuset);
+		CPU_COPY(&cpuset_domain[domain], &i->pi_cpuset);
+	}
 	mtx_lock(&intr_table_lock);
 	intrcnt_setname(i->event->ie_fullname, i->cntindex);
 	mtx_unlock(&intr_table_lock);
@@ -551,7 +560,7 @@ powerpc_setup_intr(const char *name, u_int irq, driver
 				PIC_CONFIG(i->pic, i->intline, i->trig, i->pol);
 
 			if (i->pic == root_pic)
-				PIC_BIND(i->pic, i->intline, i->cpu, &i->priv);
+				PIC_BIND(i->pic, i->intline, i->pi_cpuset, &i->priv);
 
 			if (enable)
 				PIC_ENABLE(i->pic, i->intline, i->vector,

Modified: head/sys/powerpc/powerpc/mp_machdep.c
==============================================================================
--- head/sys/powerpc/powerpc/mp_machdep.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/powerpc/mp_machdep.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -182,6 +182,15 @@ cpu_mp_start(void)
 			pc->pc_bsp = 1;
 		}
 		pc->pc_hwref = cpu.cr_hwref;
+
+		if (vm_ndomains > 1)
+			pc->pc_domain = cpu.cr_domain;
+		else
+			pc->pc_domain = 0;
+
+		CPU_SET(pc->pc_cpuid, &cpuset_domain[pc->pc_domain]);
+		KASSERT(pc->pc_domain < MAXMEMDOM, ("bad domain value %d\n",
+		    pc->pc_domain));
 		CPU_SET(pc->pc_cpuid, &all_cpus);
 next:
 		error = platform_smp_next_cpu(&cpu);
@@ -205,7 +214,7 @@ cpu_mp_announce(void)
 		pc = pcpu_find(i);
 		if (pc == NULL)
 			continue;
-		printf("cpu%d: dev=%x", i, (int)pc->pc_hwref);
+		printf("cpu%d: dev=%x domain=%d ", i, (int)pc->pc_hwref, pc->pc_domain);
 		if (pc->pc_bsp)
 			printf(" (BSP)");
 		printf("\n");

Modified: head/sys/powerpc/powerpc/nexus.c
==============================================================================
--- head/sys/powerpc/powerpc/nexus.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/powerpc/nexus.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -38,11 +38,13 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/pcpu.h>
 #include <sys/rman.h>
+#include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
@@ -67,6 +69,8 @@ static bus_teardown_intr_t nexus_teardown_intr;
 static bus_activate_resource_t nexus_activate_resource;
 static bus_deactivate_resource_t nexus_deactivate_resource;
 static bus_space_tag_t nexus_get_bus_tag(device_t, device_t);
+static int nexus_get_cpus(device_t, device_t, enum cpu_sets, size_t,
+    cpuset_t *);
 #ifdef SMP
 static bus_bind_intr_t nexus_bind_intr;
 #endif
@@ -89,6 +93,7 @@ static device_method_t nexus_methods[] = {
 #endif
 	DEVMETHOD(bus_config_intr,	nexus_config_intr),
 	DEVMETHOD(bus_get_bus_tag,	nexus_get_bus_tag),
+	DEVMETHOD(bus_get_cpus,		nexus_get_cpus),
 
 	/* ofw_bus interface */
 	DEVMETHOD(ofw_bus_map_intr,	nexus_ofw_map_intr),
@@ -127,11 +132,13 @@ nexus_setup_intr(device_t bus __unused, device_t child
     int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg,
     void **cookiep)
 {
-	int error;
+	int error, domain;
 
 	if (r == NULL)
 		panic("%s: NULL interrupt resource!", __func__);
 
+	if (cookiep != NULL)
+		*cookiep = NULL;
 	if ((rman_get_flags(r) & RF_SHAREABLE) == 0)
 		flags |= INTR_EXCL;
 
@@ -140,8 +147,13 @@ nexus_setup_intr(device_t bus __unused, device_t child
 	if (error)
 		return (error);
 
+	if (bus_get_domain(child, &domain) != 0) {
+		if(bootverbose)
+			device_printf(child, "no domain found\n");
+		domain = 0;
+	}
 	error = powerpc_setup_intr(device_get_nameunit(child),
-	    rman_get_start(r), filt, intr, arg, flags, cookiep);
+	    rman_get_start(r), filt, intr, arg, flags, cookiep, domain);
 
 	return (error);
 }
@@ -162,6 +174,24 @@ nexus_get_bus_tag(device_t bus __unused, device_t chil
 {
 
 	return(&bs_be_tag);
+}
+
+static int
+nexus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
+    cpuset_t *cpuset)
+{
+
+	switch (op) {
+#ifdef SMP
+	case INTR_CPUS:
+		if (setsize != sizeof(cpuset_t))
+			return (EINVAL);
+		*cpuset = all_cpus;
+		return (0);
+#endif
+	default:
+		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
+	}
 }
 
 #ifdef SMP

Modified: head/sys/powerpc/powerpc/platform.c
==============================================================================
--- head/sys/powerpc/powerpc/platform.c	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/powerpc/platform.c	Sat Apr 13 04:03:18 2019	(r346174)
@@ -48,13 +48,16 @@ __FBSDID("$FreeBSD$");
 #include <sys/types.h>
 
 #include <vm/vm.h>
+#include <vm/vm_param.h>
 #include <vm/vm_page.h>
+#include <vm/vm_phys.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/platform.h>
 #include <machine/platformvar.h>
 #include <machine/smp.h>
+#include <machine/vmparam.h>
 
 #include "platform_if.h"
 
@@ -67,9 +70,12 @@ static char plat_name[64] = "";
 SYSCTL_STRING(_hw, OID_AUTO, platform, CTLFLAG_RD | CTLFLAG_TUN,
     plat_name, 0, "Platform currently in use");
 
+static struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
+static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
 static struct mem_region pregions[PHYS_AVAIL_SZ];
+static struct numa_mem_region numa_pregions[PHYS_AVAIL_SZ];
 static struct mem_region aregions[PHYS_AVAIL_SZ];
-static int npregions, naregions;
+static int nnumapregions, npregions, naregions;
 
 /*
  * Memory region utilities: determine if two regions overlap,
@@ -113,6 +119,54 @@ mr_cmp(const void *a, const void *b)
 }
 
 void
+numa_mem_regions(struct numa_mem_region **phys, int *physsz)
+{
+	struct mem_affinity *mi;
+	int i, j, maxdom, ndomain, offset;
+
+	nnumapregions = 0;
+	PLATFORM_NUMA_MEM_REGIONS(plat_obj, numa_pregions, &nnumapregions);
+
+	if (physsz != NULL)
+		*physsz = nnumapregions;
+	if (phys != NULL)
+		*phys = numa_pregions;
+	if (physsz == NULL || phys == NULL) {
+		printf("unset value\n");
+		return;
+	}
+	maxdom = 0;
+	for (i = 0; i < nnumapregions; i++)
+		if (numa_pregions[i].mr_domain > maxdom)
+			maxdom = numa_pregions[i].mr_domain;
+
+	mi = mem_info;
+	for (i = 0; i < nnumapregions; i++, mi++) {
+		mi->start = numa_pregions[i].mr_start;
+		mi->end = numa_pregions[i].mr_start + numa_pregions[i].mr_size;
+		mi->domain = numa_pregions[i].mr_domain;
+	}
+	offset = 0;
+	vm_locality_table[offset] = 10;
+	ndomain = maxdom + 1;
+	if (ndomain > 1) {
+		for (i = 0; i < ndomain; i++) {
+			for (j = 0; j < ndomain; j++) {
+				/*
+				 * Not sure what these values should actually be
+				 */
+				if (i == j)
+					vm_locality_table[offset] = 10;
+				else
+					vm_locality_table[offset] = 21;
+				offset++;
+			}
+		}
+	}
+	vm_phys_register_domains(ndomain, mem_info, vm_locality_table);
+}
+
+void
 mem_regions(struct mem_region **phys, int *physsz, struct mem_region **avail,
     int *availsz)
 {
@@ -252,7 +306,7 @@ platform_smp_probe_threads(void)
 struct cpu_group *
 cpu_topo(void)
 {
-        return (PLATFORM_SMP_TOPO(plat_obj));
+	return (PLATFORM_SMP_TOPO(plat_obj));
 }
 #endif
 

Modified: head/sys/powerpc/powerpc/platform_if.m
==============================================================================
--- head/sys/powerpc/powerpc/platform_if.m	Sat Apr 13 03:32:21 2019	(r346173)
+++ head/sys/powerpc/powerpc/platform_if.m	Sat Apr 13 04:03:18 2019	(r346174)
@@ -130,6 +130,22 @@ METHOD void mem_regions {
 	int		   *_availsz;
 };
 
+
+/**
+ * @brief Return the system's physical memory map.
+ *
+ * It shall provide the total RAM with the corresponding domains.
+ *
+ * @param _memp		Array of physical memory chunks
+ * @param _memsz	Number of physical memory chunks
+ */
+
+METHOD void numa_mem_regions {
+	platform_t	    _plat;
+	struct numa_mem_region  *_memp;
+	int		   *_memsz;
+};
+
 /**
  * @brief Return the maximum address accessible in real mode
  *   (for use with hypervisors)





Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201904130403.x3D43IDd021224>