Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 27 Mar 2018 08:15:35 +0200
From:      "O. Hartmann" <ohartmann@walstatt.org>
To:        Jeff Roberson <jeff@FreeBSD.org>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r331606 - in head/sys: amd64/include i386/include x86/x86 x86/xen
Message-ID:  <20180327081535.0dacffcb@freyja.zeit4.iv.bundesimmobilien.de>
In-Reply-To: <201803270337.w2R3b4iv035285@repo.freebsd.org>

index | next in thread | previous in thread | raw e-mail

On Tue, 27 Mar 2018 03:37:04 +0000 (UTC)
Jeff Roberson <jeff@FreeBSD.org> wrote:

> Author: jeff
> Date: Tue Mar 27 03:37:04 2018
> New Revision: 331606
> URL: https://svnweb.freebsd.org/changeset/base/331606
> 
> Log:
>   Only use CPUs in the domain the device is attached to for default
>   assignment.  Device drivers are able to override the default assignment
>   if they bind directly.  There are severe performance penalties for
>   handling interrupts on remote CPUs and this should only be done in
>   very controlled circumstances.
>   
>   Reviewed by:	jhb, kib
>   Tested by:	pho (earlier version)
>   Sponsored by:	Netflix, Dell/EMC Isilon
>   Differential Revision:	https://reviews.freebsd.org/D14838
> 
> Modified:
>   head/sys/amd64/include/intr_machdep.h
>   head/sys/i386/include/intr_machdep.h
>   head/sys/x86/x86/intr_machdep.c
>   head/sys/x86/x86/io_apic.c
>   head/sys/x86/x86/msi.c
>   head/sys/x86/x86/nexus.c
>   head/sys/x86/xen/xen_intr.c
> 
> Modified: head/sys/amd64/include/intr_machdep.h
> ==============================================================================
> --- head/sys/amd64/include/intr_machdep.h	Tue Mar 27 03:27:02
> 2018	(r331605) +++ head/sys/amd64/include/intr_machdep.h	Tue
> Mar 27 03:37:04 2018	(r331606) @@ -132,6 +132,7 @@ struct intsrc {
>  	u_long *is_straycount;
>  	u_int is_index;
>  	u_int is_handlers;
> +	u_int is_domain;
>  	u_int is_cpu;
>  };
>  
> @@ -168,7 +169,7 @@ void	intr_add_cpu(u_int cpu);
>  #endif
>  int	intr_add_handler(const char *name, int vector, driver_filter_t
> filter, driver_intr_t handler, void *arg, enum intr_type flags, 
> -			 void **cookiep);    
> +			 void **cookiep, int domain);    
>  #ifdef SMP
>  int	intr_bind(u_int vector, u_char cpu);
>  #endif
> @@ -176,7 +177,7 @@ int	intr_config_intr(int vector, enum intr_trigger
> tri enum intr_polarity pol);
>  int	intr_describe(u_int vector, void *ih, const char *descr);
>  void	intr_execute_handlers(struct intsrc *isrc, struct trapframe
> *frame); -u_int	intr_next_cpu(void);
> +u_int	intr_next_cpu(int domain);
>  struct intsrc *intr_lookup_source(int vector);
>  int	intr_register_pic(struct pic *pic);
>  int	intr_register_source(struct intsrc *isrc);
> 
> Modified: head/sys/i386/include/intr_machdep.h
> ==============================================================================
> --- head/sys/i386/include/intr_machdep.h	Tue Mar 27 03:27:02
> 2018	(r331605) +++ head/sys/i386/include/intr_machdep.h	Tue Mar
> 27 03:37:04 2018	(r331606) @@ -132,6 +132,7 @@ struct intsrc {
>  	u_long *is_straycount;
>  	u_int is_index;
>  	u_int is_handlers;
> +	u_int is_domain;
>  	u_int is_cpu;
>  };
>  
> @@ -158,7 +159,8 @@ void	elcr_write_trigger(u_int irq, enum
> intr_trigger t void	intr_add_cpu(u_int cpu);
>  #endif
>  int	intr_add_handler(const char *name, int vector, driver_filter_t
> filter,
> -    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
> +    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
> +    int domain);
>  #ifdef SMP
>  int	intr_bind(u_int vector, u_char cpu);
>  #endif
> @@ -166,7 +168,7 @@ int	intr_config_intr(int vector, enum intr_trigger
> tri enum intr_polarity pol);
>  int	intr_describe(u_int vector, void *ih, const char *descr);
>  void	intr_execute_handlers(struct intsrc *isrc, struct trapframe
> *frame); -u_int	intr_next_cpu(void);
> +u_int	intr_next_cpu(int domain);
>  struct intsrc *intr_lookup_source(int vector);
>  int	intr_register_pic(struct pic *pic);
>  int	intr_register_source(struct intsrc *isrc);
> 
> Modified: head/sys/x86/x86/intr_machdep.c
> ==============================================================================
> --- head/sys/x86/x86/intr_machdep.c	Tue Mar 27 03:27:02 2018
> (r331605) +++ head/sys/x86/x86/intr_machdep.c	Tue Mar 27 03:37:04
> 2018	(r331606) @@ -71,6 +71,8 @@
>  #include <isa/isareg.h>
>  #endif
>  
> +#include <vm/vm.h>
> +
>  #define	MAX_STRAY_LOG	5
>  
>  typedef void (*mask_fn)(void *);
> @@ -185,7 +187,8 @@ intr_lookup_source(int vector)
>  
>  int
>  intr_add_handler(const char *name, int vector, driver_filter_t filter,
> -    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
> +    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
> +    int domain)
>  {
>  	struct intsrc *isrc;
>  	int error;
> @@ -200,6 +203,7 @@ intr_add_handler(const char *name, int vector, driver_
>  		intrcnt_updatename(isrc);
>  		isrc->is_handlers++;
>  		if (isrc->is_handlers == 1) {
> +			isrc->is_domain = domain;
>  			isrc->is_pic->pic_enable_intr(isrc);
>  			isrc->is_pic->pic_enable_source(isrc);
>  		}
> @@ -507,14 +511,27 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
>   */
>  
>  cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
> -static int current_cpu;
> +static int current_cpu[MAXMEMDOM];
>  
> +static void
> +intr_init_cpus(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < vm_ndomains; i++) {
> +		current_cpu[i] = 0;
> +		if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
> +		    !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
> +			intr_next_cpu(i);
> +	}
> +}
> +
>  /*
>   * Return the CPU that the next interrupt source should use.  For now
>   * this just returns the next local APIC according to round-robin.
>   */
>  u_int
> -intr_next_cpu(void)
> +intr_next_cpu(int domain)
>  {
>  	u_int apic_id;
>  
> @@ -529,12 +546,13 @@ intr_next_cpu(void)
>  #endif
>  
>  	mtx_lock_spin(&icu_lock);
> -	apic_id = cpu_apic_ids[current_cpu];
> +	apic_id = cpu_apic_ids[current_cpu[domain]];
>  	do {
> -		current_cpu++;
> -		if (current_cpu > mp_maxid)
> -			current_cpu = 0;
> -	} while (!CPU_ISSET(current_cpu, &intr_cpus));
> +		current_cpu[domain]++;
> +		if (current_cpu[domain] > mp_maxid)
> +			current_cpu[domain] = 0;
> +	} while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
> +	    !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]));
>  	mtx_unlock_spin(&icu_lock);
>  	return (apic_id);
>  }
> @@ -568,7 +586,18 @@ intr_add_cpu(u_int cpu)
>  	CPU_SET(cpu, &intr_cpus);
>  }
>  
> -#ifndef EARLY_AP_STARTUP
> +#ifdef EARLY_AP_STARTUP
> +static void
> +intr_smp_startup(void *arg __unused)
> +{
> +
> +	intr_init_cpus();
> +	return;
> +}
> +SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
> +    NULL);
> +
> +#else
>  /*
>   * Distribute all the interrupt sources among the available CPUs once the
>   * AP's have been launched.
> @@ -580,6 +609,7 @@ intr_shuffle_irqs(void *arg __unused)
>  	u_int cpu;
>  	int i;
>  
> +	intr_init_cpus();
>  	/* Don't bother on UP. */
>  	if (mp_ncpus == 1)
>  		return;
> @@ -599,12 +629,12 @@ intr_shuffle_irqs(void *arg __unused)
>  			 */
>  			cpu = isrc->is_event->ie_cpu;
>  			if (cpu == NOCPU)
> -				cpu = current_cpu;
> +				cpu = current_cpu[isrc->is_domain];
>  			if (isrc->is_pic->pic_assign_cpu(isrc,
>  			    cpu_apic_ids[cpu]) == 0) {
>  				isrc->is_cpu = cpu;
>  				if (isrc->is_event->ie_cpu == NOCPU)
> -					intr_next_cpu();
> +					intr_next_cpu(isrc->is_domain);
>  			}
>  		}
>  	}
> @@ -635,10 +665,11 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
>  		isrc = interrupt_sources[i];
>  		if (isrc == NULL)
>  			continue;
> -		sbuf_printf(&sbuf, "%s:%d @%d: %ld\n",
> +		sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
>  		    isrc->is_event->ie_fullname,
>  		    isrc->is_index,
>  		    isrc->is_cpu,
> +		    isrc->is_domain,
>  		    *isrc->is_count);
>  	}
>  
> @@ -697,7 +728,7 @@ intr_balance(void *dummy __unused, int pending __unuse
>  	 * Restart the scan from the same location to avoid moving in the
>  	 * common case.
>  	 */
> -	current_cpu = 0;
> +	intr_init_cpus();
>  
>  	/*
>  	 * Assign round-robin from most loaded to least.
> @@ -706,8 +737,8 @@ intr_balance(void *dummy __unused, int pending __unuse
>  		isrc = interrupt_sorted[i];
>  		if (isrc == NULL  || isrc->is_event->ie_cpu != NOCPU)
>  			continue;
> -		cpu = current_cpu;
> -		intr_next_cpu();
> +		cpu = current_cpu[isrc->is_domain];
> +		intr_next_cpu(isrc->is_domain);
>  		if (isrc->is_cpu != cpu &&
>  		    isrc->is_pic->pic_assign_cpu(isrc,
>  		    cpu_apic_ids[cpu]) == 0)
> @@ -735,7 +766,7 @@ SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, i
>   * Always route interrupts to the current processor in the UP case.
>   */
>  u_int
> -intr_next_cpu(void)
> +intr_next_cpu(int domain)
>  {
>  
>  	return (PCPU_GET(apic_id));
> 
> Modified: head/sys/x86/x86/io_apic.c
> ==============================================================================
> --- head/sys/x86/x86/io_apic.c	Tue Mar 27 03:27:02 2018
> (r331605) +++ head/sys/x86/x86/io_apic.c	Tue Mar 27 03:37:04
> 2018	(r331606) @@ -499,7 +499,7 @@ ioapic_enable_intr(struct intsrc
> *isrc) struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
>  
>  	if (intpin->io_vector == 0)
> -		if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0)
> +		if (ioapic_assign_cpu(isrc,
> intr_next_cpu(isrc->is_domain)) != 0) panic("Couldn't find an APIC vector for
> IRQ %d", intpin->io_irq);
>  	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
> 
> Modified: head/sys/x86/x86/msi.c
> ==============================================================================
> --- head/sys/x86/x86/msi.c	Tue Mar 27 03:27:02 2018	(r331605)
> +++ head/sys/x86/x86/msi.c	Tue Mar 27 03:37:04 2018	(r331606)
> @@ -363,7 +363,7 @@ int
>  msi_alloc(device_t dev, int count, int maxcount, int *irqs)
>  {
>  	struct msi_intsrc *msi, *fsrc;
> -	u_int cpu;
> +	u_int cpu, domain;
>  	int cnt, i, *mirqs, vector;
>  #ifdef ACPI_DMAR
>  	u_int cookies[count];
> @@ -373,6 +373,9 @@ msi_alloc(device_t dev, int count, int maxcount, int *
>  	if (!msi_enabled)
>  		return (ENXIO);
>  
> +	if (bus_get_domain(dev, &domain) != 0)
> +		domain = 0;
> +
>  	if (count > 1)
>  		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
>  	else
> @@ -420,7 +423,7 @@ again:
>  	KASSERT(cnt == count, ("count mismatch"));
>  
>  	/* Allocate 'count' IDT vectors. */
> -	cpu = intr_next_cpu();
> +	cpu = intr_next_cpu(domain);
>  	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
>  	if (vector == 0) {
>  		mtx_unlock(&msi_lock);
> @@ -610,7 +613,7 @@ int
>  msix_alloc(device_t dev, int *irq)
>  {
>  	struct msi_intsrc *msi;
> -	u_int cpu;
> +	u_int cpu, domain;
>  	int i, vector;
>  #ifdef ACPI_DMAR
>  	u_int cookie;
> @@ -620,6 +623,9 @@ msix_alloc(device_t dev, int *irq)
>  	if (!msi_enabled)
>  		return (ENXIO);
>  
> +	if (bus_get_domain(dev, &domain) != 0)
> +		domain = 0;
> +
>  again:
>  	mtx_lock(&msi_lock);
>  
> @@ -651,7 +657,7 @@ again:
>  	}
>  
>  	/* Allocate an IDT vector. */
> -	cpu = intr_next_cpu();
> +	cpu = intr_next_cpu(domain);
>  	vector = apic_alloc_vector(cpu, i);
>  	if (vector == 0) {
>  		mtx_unlock(&msi_lock);
> 
> Modified: head/sys/x86/x86/nexus.c
> ==============================================================================
> --- head/sys/x86/x86/nexus.c	Tue Mar 27 03:27:02 2018	(r331605)
> +++ head/sys/x86/x86/nexus.c	Tue Mar 27 03:37:04 2018	(r331606)
> @@ -573,7 +573,7 @@ nexus_setup_intr(device_t bus, device_t child, struct 
>  		 int flags, driver_filter_t filter, void (*ihand)(void *),
>  		 void *arg, void **cookiep)
>  {
> -	int		error;
> +	int		error, domain;
>  
>  	/* somebody tried to setup an irq that failed to allocate! */
>  	if (irq == NULL)
> @@ -589,9 +589,11 @@ nexus_setup_intr(device_t bus, device_t child, struct 
>  	error = rman_activate_resource(irq);
>  	if (error)
>  		return (error);
> +	if (bus_get_domain(child, &domain) != 0)
> +		domain = 0;
>  
>  	error = intr_add_handler(device_get_nameunit(child),
> -	    rman_get_start(irq), filter, ihand, arg, flags, cookiep);
> +	    rman_get_start(irq), filter, ihand, arg, flags, cookiep, domain);
>  
>  	return (error);
>  }
> 
> Modified: head/sys/x86/xen/xen_intr.c
> ==============================================================================
> --- head/sys/x86/xen/xen_intr.c	Tue Mar 27 03:27:02 2018
> (r331605) +++ head/sys/x86/xen/xen_intr.c	Tue Mar 27 03:37:04
> 2018	(r331606) @@ -430,7 +430,7 @@ xen_intr_bind_isrc(struct xenisrc
> **isrcp, evtchn_port
>  		 * unless specified otherwise, so shuffle them to balance
>  		 * the interrupt load.
>  		 */
> -		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu());
> +		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0));
>  	}
>  #endif
>  
> @@ -1562,7 +1562,7 @@ xen_intr_add_handler(const char *name, driver_filter_t
>  		return (EINVAL);
>  
>  	error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg,
> -	    flags|INTR_EXCL, &isrc->xi_cookie);
> +	    flags|INTR_EXCL, &isrc->xi_cookie, 0);
>  	if (error != 0) {
>  		printf(
>  		    "%s: xen_intr_add_handler: intr_add_handler failed:
> %d\n", _______________________________________________
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org"

Has this been ever tested?

This commit makes ALL multi CPU systems I tested it on getting stuck on
detecting and reporting the physical and virtual (SMP) CPUs on all boxes.

Boot process is then stuck forever.

Kind regards

oh


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20180327081535.0dacffcb>