Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 28 Mar 2018 18:47:35 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r331698 - in head/sys: amd64/include i386/include kern x86/x86 x86/xen
Message-ID:  <201803281847.w2SIlZh9029824@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Wed Mar 28 18:47:35 2018
New Revision: 331698
URL: https://svnweb.freebsd.org/changeset/base/331698

Log:
  Restore r331606 with a bugfix to setup cpuset_domain[] earlier on all
  platforms.  Original commit message as follows:
  
  Only use CPUs in the domain the device is attached to for default
  assignment.  Device drivers are able to override the default assignment
  if they bind directly.  There are severe performance penalties for
  handling interrupts on remote CPUs and this should only be done in
  very controlled circumstances.
  
  Reviewed by:    jhb, kib
  Tested by:      pho
  Sponsored by:   Netflix, Dell/EMC Isilon
  Differential Revision:  https://reviews.freebsd.org/D14838

Modified:
  head/sys/amd64/include/intr_machdep.h
  head/sys/i386/include/intr_machdep.h
  head/sys/kern/kern_cpuset.c
  head/sys/x86/x86/intr_machdep.c
  head/sys/x86/x86/io_apic.c
  head/sys/x86/x86/msi.c
  head/sys/x86/x86/nexus.c
  head/sys/x86/xen/xen_intr.c

Modified: head/sys/amd64/include/intr_machdep.h
==============================================================================
--- head/sys/amd64/include/intr_machdep.h	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/amd64/include/intr_machdep.h	Wed Mar 28 18:47:35 2018	(r331698)
@@ -132,6 +132,7 @@ struct intsrc {
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
+	u_int is_domain;
 	u_int is_cpu;
 };
 
@@ -168,7 +169,7 @@ void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter, 
 			 driver_intr_t handler, void *arg, enum intr_type flags, 
-			 void **cookiep);    
+			 void **cookiep, int domain);    
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
@@ -176,7 +177,7 @@ int	intr_config_intr(int vector, enum intr_trigger tri
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
-u_int	intr_next_cpu(void);
+u_int	intr_next_cpu(int domain);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);

Modified: head/sys/i386/include/intr_machdep.h
==============================================================================
--- head/sys/i386/include/intr_machdep.h	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/i386/include/intr_machdep.h	Wed Mar 28 18:47:35 2018	(r331698)
@@ -132,6 +132,7 @@ struct intsrc {
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
+	u_int is_domain;
 	u_int is_cpu;
 };
 
@@ -158,7 +159,8 @@ void	elcr_write_trigger(u_int irq, enum intr_trigger t
 void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter,
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
+    int domain);
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
@@ -166,7 +168,7 @@ int	intr_config_intr(int vector, enum intr_trigger tri
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
-u_int	intr_next_cpu(void);
+u_int	intr_next_cpu(int domain);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);

Modified: head/sys/kern/kern_cpuset.c
==============================================================================
--- head/sys/kern/kern_cpuset.c	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/kern/kern_cpuset.c	Wed Mar 28 18:47:35 2018	(r331698)
@@ -1363,6 +1363,7 @@ cpuset_thread0(void)
 {
 	struct cpuset *set;
 	int error;
+	int i;
 
 	cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
@@ -1374,11 +1375,11 @@ cpuset_thread0(void)
 	 * cpuset_create() due to NULL parent.
 	 */
 	set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
-	CPU_FILL(&set->cs_mask);
+	CPU_COPY(&all_cpus, &set->cs_mask);
 	LIST_INIT(&set->cs_children);
 	LIST_INSERT_HEAD(&cpuset_ids, set, cs_link);
 	set->cs_ref = 1;
-	set->cs_flags = CPU_SET_ROOT;
+	set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY;
 	set->cs_domain = &domainset0;
 	cpuset_zero = set;
 	cpuset_root = &set->cs_mask;
@@ -1396,6 +1397,16 @@ cpuset_thread0(void)
 	 */
 	cpuset_unr = new_unrhdr(2, INT_MAX, NULL);
 
+	/*
+	 * If MD code has not initialized per-domain cpusets, place all
+	 * CPUs in domain 0.
+	 */
+	for (i = 0; i < MAXMEMDOM; i++)
+		if (!CPU_EMPTY(&cpuset_domain[i]))
+			goto domains_set;
+	CPU_COPY(&all_cpus, &cpuset_domain[0]);
+domains_set:
+
 	return (set);
 }
 
@@ -1446,34 +1457,6 @@ cpuset_setproc_update_set(struct proc *p, struct cpuse
 	cpuset_rel(set);
 	return (0);
 }
-
-/*
- * This is called once the final set of system cpus is known.  Modifies
- * the root set and all children and mark the root read-only.  
- */
-static void
-cpuset_init(void *arg)
-{
-	cpuset_t mask;
-	int i;
-
-	mask = all_cpus;
-	if (cpuset_modify(cpuset_zero, &mask))
-		panic("Can't set initial cpuset mask.\n");
-	cpuset_zero->cs_flags |= CPU_SET_RDONLY;
-
-	/*
-	 * If MD code has not initialized per-domain cpusets, place all
-	 * CPUs in domain 0.
-	 */
-	for (i = 0; i < MAXMEMDOM; i++)
-		if (!CPU_EMPTY(&cpuset_domain[i]))
-			goto domains_set;
-	CPU_COPY(&all_cpus, &cpuset_domain[0]);
-domains_set:
-	return;
-}
-SYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL);
 
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_args {

Modified: head/sys/x86/x86/intr_machdep.c
==============================================================================
--- head/sys/x86/x86/intr_machdep.c	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/x86/x86/intr_machdep.c	Wed Mar 28 18:47:35 2018	(r331698)
@@ -71,6 +71,8 @@
 #include <isa/isareg.h>
 #endif
 
+#include <vm/vm.h>
+
 #define	MAX_STRAY_LOG	5
 
 typedef void (*mask_fn)(void *);
@@ -185,7 +187,8 @@ intr_lookup_source(int vector)
 
 int
 intr_add_handler(const char *name, int vector, driver_filter_t filter,
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
+    int domain)
 {
 	struct intsrc *isrc;
 	int error;
@@ -200,6 +203,7 @@ intr_add_handler(const char *name, int vector, driver_
 		intrcnt_updatename(isrc);
 		isrc->is_handlers++;
 		if (isrc->is_handlers == 1) {
+			isrc->is_domain = domain;
 			isrc->is_pic->pic_enable_intr(isrc);
 			isrc->is_pic->pic_enable_source(isrc);
 		}
@@ -507,14 +511,27 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  */
 
 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
-static int current_cpu;
+static int current_cpu[MAXMEMDOM];
 
+static void
+intr_init_cpus(void)
+{
+	int i;
+
+	for (i = 0; i < vm_ndomains; i++) {
+		current_cpu[i] = 0;
+		if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
+		    !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
+			intr_next_cpu(i);
+	}
+}
+
 /*
  * Return the CPU that the next interrupt source should use.  For now
  * this just returns the next local APIC according to round-robin.
  */
 u_int
-intr_next_cpu(void)
+intr_next_cpu(int domain)
 {
 	u_int apic_id;
 
@@ -529,12 +546,13 @@ intr_next_cpu(void)
 #endif
 
 	mtx_lock_spin(&icu_lock);
-	apic_id = cpu_apic_ids[current_cpu];
+	apic_id = cpu_apic_ids[current_cpu[domain]];
 	do {
-		current_cpu++;
-		if (current_cpu > mp_maxid)
-			current_cpu = 0;
-	} while (!CPU_ISSET(current_cpu, &intr_cpus));
+		current_cpu[domain]++;
+		if (current_cpu[domain] > mp_maxid)
+			current_cpu[domain] = 0;
+	} while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
+	    !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
@@ -568,7 +586,18 @@ intr_add_cpu(u_int cpu)
 	CPU_SET(cpu, &intr_cpus);
 }
 
-#ifndef EARLY_AP_STARTUP
+#ifdef EARLY_AP_STARTUP
+static void
+intr_smp_startup(void *arg __unused)
+{
+
+	intr_init_cpus();
+	return;
+}
+SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
+    NULL);
+
+#else
 /*
  * Distribute all the interrupt sources among the available CPUs once the
  * AP's have been launched.
@@ -580,6 +609,7 @@ intr_shuffle_irqs(void *arg __unused)
 	u_int cpu;
 	int i;
 
+	intr_init_cpus();
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
@@ -599,12 +629,12 @@ intr_shuffle_irqs(void *arg __unused)
 			 */
 			cpu = isrc->is_event->ie_cpu;
 			if (cpu == NOCPU)
-				cpu = current_cpu;
+				cpu = current_cpu[isrc->is_domain];
 			if (isrc->is_pic->pic_assign_cpu(isrc,
 			    cpu_apic_ids[cpu]) == 0) {
 				isrc->is_cpu = cpu;
 				if (isrc->is_event->ie_cpu == NOCPU)
-					intr_next_cpu();
+					intr_next_cpu(isrc->is_domain);
 			}
 		}
 	}
@@ -635,10 +665,11 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
 		isrc = interrupt_sources[i];
 		if (isrc == NULL)
 			continue;
-		sbuf_printf(&sbuf, "%s:%d @%d: %ld\n",
+		sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
 		    isrc->is_event->ie_fullname,
 		    isrc->is_index,
 		    isrc->is_cpu,
+		    isrc->is_domain,
 		    *isrc->is_count);
 	}
 
@@ -697,7 +728,7 @@ intr_balance(void *dummy __unused, int pending __unuse
 	 * Restart the scan from the same location to avoid moving in the
 	 * common case.
 	 */
-	current_cpu = 0;
+	intr_init_cpus();
 
 	/*
 	 * Assign round-robin from most loaded to least.
@@ -706,8 +737,8 @@ intr_balance(void *dummy __unused, int pending __unuse
 		isrc = interrupt_sorted[i];
 		if (isrc == NULL  || isrc->is_event->ie_cpu != NOCPU)
 			continue;
-		cpu = current_cpu;
-		intr_next_cpu();
+		cpu = current_cpu[isrc->is_domain];
+		intr_next_cpu(isrc->is_domain);
 		if (isrc->is_cpu != cpu &&
 		    isrc->is_pic->pic_assign_cpu(isrc,
 		    cpu_apic_ids[cpu]) == 0)
@@ -735,7 +766,7 @@ SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, i
  * Always route interrupts to the current processor in the UP case.
  */
 u_int
-intr_next_cpu(void)
+intr_next_cpu(int domain)
 {
 
 	return (PCPU_GET(apic_id));

Modified: head/sys/x86/x86/io_apic.c
==============================================================================
--- head/sys/x86/x86/io_apic.c	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/x86/x86/io_apic.c	Wed Mar 28 18:47:35 2018	(r331698)
@@ -499,7 +499,7 @@ ioapic_enable_intr(struct intsrc *isrc)
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 
 	if (intpin->io_vector == 0)
-		if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0)
+		if (ioapic_assign_cpu(isrc, intr_next_cpu(isrc->is_domain)) != 0)
 			panic("Couldn't find an APIC vector for IRQ %d",
 			    intpin->io_irq);
 	apic_enable_vector(intpin->io_cpu, intpin->io_vector);

Modified: head/sys/x86/x86/msi.c
==============================================================================
--- head/sys/x86/x86/msi.c	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/x86/x86/msi.c	Wed Mar 28 18:47:35 2018	(r331698)
@@ -363,7 +363,7 @@ int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
 	struct msi_intsrc *msi, *fsrc;
-	u_int cpu;
+	u_int cpu, domain;
 	int cnt, i, *mirqs, vector;
 #ifdef ACPI_DMAR
 	u_int cookies[count];
@@ -373,6 +373,9 @@ msi_alloc(device_t dev, int count, int maxcount, int *
 	if (!msi_enabled)
 		return (ENXIO);
 
+	if (bus_get_domain(dev, &domain) != 0)
+		domain = 0;
+
 	if (count > 1)
 		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
 	else
@@ -420,7 +423,7 @@ again:
 	KASSERT(cnt == count, ("count mismatch"));
 
 	/* Allocate 'count' IDT vectors. */
-	cpu = intr_next_cpu();
+	cpu = intr_next_cpu(domain);
 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);
@@ -610,7 +613,7 @@ int
 msix_alloc(device_t dev, int *irq)
 {
 	struct msi_intsrc *msi;
-	u_int cpu;
+	u_int cpu, domain;
 	int i, vector;
 #ifdef ACPI_DMAR
 	u_int cookie;
@@ -620,6 +623,9 @@ msix_alloc(device_t dev, int *irq)
 	if (!msi_enabled)
 		return (ENXIO);
 
+	if (bus_get_domain(dev, &domain) != 0)
+		domain = 0;
+
 again:
 	mtx_lock(&msi_lock);
 
@@ -651,7 +657,7 @@ again:
 	}
 
 	/* Allocate an IDT vector. */
-	cpu = intr_next_cpu();
+	cpu = intr_next_cpu(domain);
 	vector = apic_alloc_vector(cpu, i);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);

Modified: head/sys/x86/x86/nexus.c
==============================================================================
--- head/sys/x86/x86/nexus.c	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/x86/x86/nexus.c	Wed Mar 28 18:47:35 2018	(r331698)
@@ -573,7 +573,7 @@ nexus_setup_intr(device_t bus, device_t child, struct 
 		 int flags, driver_filter_t filter, void (*ihand)(void *),
 		 void *arg, void **cookiep)
 {
-	int		error;
+	int		error, domain;
 
 	/* somebody tried to setup an irq that failed to allocate! */
 	if (irq == NULL)
@@ -589,9 +589,11 @@ nexus_setup_intr(device_t bus, device_t child, struct 
 	error = rman_activate_resource(irq);
 	if (error)
 		return (error);
+	if (bus_get_domain(child, &domain) != 0)
+		domain = 0;
 
 	error = intr_add_handler(device_get_nameunit(child),
-	    rman_get_start(irq), filter, ihand, arg, flags, cookiep);
+	    rman_get_start(irq), filter, ihand, arg, flags, cookiep, domain);
 
 	return (error);
 }

Modified: head/sys/x86/xen/xen_intr.c
==============================================================================
--- head/sys/x86/xen/xen_intr.c	Wed Mar 28 18:43:06 2018	(r331697)
+++ head/sys/x86/xen/xen_intr.c	Wed Mar 28 18:47:35 2018	(r331698)
@@ -430,7 +430,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port
 		 * unless specified otherwise, so shuffle them to balance
 		 * the interrupt load.
 		 */
-		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu());
+		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0));
 	}
 #endif
 
@@ -1562,7 +1562,7 @@ xen_intr_add_handler(const char *name, driver_filter_t
 		return (EINVAL);
 
 	error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg,
-	    flags|INTR_EXCL, &isrc->xi_cookie);
+	    flags|INTR_EXCL, &isrc->xi_cookie, 0);
 	if (error != 0) {
 		printf(
 		    "%s: xen_intr_add_handler: intr_add_handler failed: %d\n",



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803281847.w2SIlZh9029824>