Date: Tue, 27 Mar 2018 11:07:22 -1000 (HST) From: Jeff Roberson <jroberson@jroberson.net> To: Li-Wen Hsu <lwhsu@FreeBSD.org> Cc: "O. Hartmann" <o.hartmann@walstatt.org>, Jeff Roberson <jeff@FreeBSD.org>, pho@freebsd.org, svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org Subject: Re: svn commit: r331606 - in head/sys: amd64/include i386/include x86/x86 x86/xen Message-ID: <alpine.BSF.2.21.1803271105550.2307@desktop> In-Reply-To: <20180327101309.GA75720@freefall.freebsd.org> References: <201803270337.w2R3b4iv035285@repo.freebsd.org> <20180327081535.0dacffcb@freyja.zeit4.iv.bundesimmobilien.de> <20180327082651.28258a8c@freyja.zeit4.iv.bundesimmobilien.de> <alpine.BSF.2.21.1803262032260.2307@desktop> <20180327101309.GA75720@freefall.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --] On Tue, 27 Mar 2018, Li-Wen Hsu wrote: > On Mon, Mar 26, 2018 at 20:35:12 -1000, Jeff Roberson wrote: >> The patch has been on my branch for weeks and has been tested by a half >> dozen people. I'm sorry it does not work for you. If you reverted 331605 >> the change that followed should not have built properly. Did you build >> cleanly? Can you share your kernel config? >> >> I tried with and without EARLY_AP_STARTUP and with and without NUMA. I'm >> not having any trouble booting. Did you make cleandepend && make depend? > > It also hangs in our testing system: > > https://ci.freebsd.org/job/FreeBSD-head-amd64-test/6817/console > https://ci.freebsd.org/job/FreeBSD-head-i386-test/1000/console > > It is built cleanly and uses unmodified GENERIC config. > > The artifacts used are available here: > > https://artifact.ci.freebsd.org/snapshot/head/r331606/amd64/amd64/ > https://artifact.ci.freebsd.org/snapshot/head/r331606/i386/i386/ > > Hope these information help. Could someone who was experiencing the hang try the enclosed patch? I can only verify that it continues to boot for me but I believe this fixes the bug on other systems. I believe the issue was that cpuset_domain[0] was initialized too late on some systems. It depended on what kind of hardware was present and which sysinit with SI_ORDER_ANY ran first. Thanks, Jeff > > Li-Wen > > -- > Li-Wen Hsu <lwhsu@FreeBSD.org> > https://lwhsu.org > [-- Attachment #2 --] Index: amd64/include/intr_machdep.h =================================================================== --- amd64/include/intr_machdep.h (revision 331610) +++ amd64/include/intr_machdep.h (working copy) @@ -132,6 +132,7 @@ struct intsrc { u_long *is_straycount; u_int is_index; u_int is_handlers; + u_int is_domain; u_int is_cpu; }; @@ -168,7 +169,7 @@ void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, - void **cookiep); + void **cookiep, int domain); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif @@ -176,7 +177,7 @@ int intr_config_intr(int vector, enum intr_trigger enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); -u_int intr_next_cpu(void); +u_int intr_next_cpu(int domain); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); Index: i386/include/intr_machdep.h =================================================================== --- i386/include/intr_machdep.h (revision 331610) +++ i386/include/intr_machdep.h (working copy) @@ -132,6 +132,7 @@ struct intsrc { u_long *is_straycount; u_int is_index; u_int is_handlers; + u_int is_domain; u_int is_cpu; }; @@ -158,7 +159,8 @@ void elcr_write_trigger(u_int irq, enum intr_trigg void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, - driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); + driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, + int domain); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif @@ -166,7 +168,7 @@ int intr_config_intr(int vector, enum intr_trigger enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); -u_int intr_next_cpu(void); +u_int intr_next_cpu(int domain); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); Index: kern/kern_cpuset.c =================================================================== --- kern/kern_cpuset.c (revision 331604) +++ kern/kern_cpuset.c (working copy) @@ -1363,6 +1363,7 @@ cpuset_thread0(void) { struct cpuset *set; int error; + int i; cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); @@ -1374,11 +1375,11 @@ cpuset_thread0(void) * cpuset_create() due to NULL parent. */ set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); - CPU_FILL(&set->cs_mask); + CPU_COPY(&all_cpus, &set->cs_mask); LIST_INIT(&set->cs_children); LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); set->cs_ref = 1; - set->cs_flags = CPU_SET_ROOT; + set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY; set->cs_domain = &domainset0; cpuset_zero = set; cpuset_root = &set->cs_mask; @@ -1396,6 +1397,16 @@ cpuset_thread0(void) */ cpuset_unr = new_unrhdr(2, INT_MAX, NULL); + /* + * If MD code has not initialized per-domain cpusets, place all + * CPUs in domain 0. + */ + for (i = 0; i < MAXMEMDOM; i++) + if (!CPU_EMPTY(&cpuset_domain[i])) + goto domains_set; + CPU_COPY(&all_cpus, &cpuset_domain[0]); +domains_set: + return (set); } @@ -1447,34 +1458,6 @@ cpuset_setproc_update_set(struct proc *p, struct c return (0); } -/* - * This is called once the final set of system cpus is known. Modifies - * the root set and all children and mark the root read-only. - */ -static void -cpuset_init(void *arg) -{ - cpuset_t mask; - int i; - - mask = all_cpus; - if (cpuset_modify(cpuset_zero, &mask)) - panic("Can't set initial cpuset mask.\n"); - cpuset_zero->cs_flags |= CPU_SET_RDONLY; - - /* - * If MD code has not initialized per-domain cpusets, place all - * CPUs in domain 0. - */ - for (i = 0; i < MAXMEMDOM; i++) - if (!CPU_EMPTY(&cpuset_domain[i])) - goto domains_set; - CPU_COPY(&all_cpus, &cpuset_domain[0]); -domains_set: - return; -} -SYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL); - #ifndef _SYS_SYSPROTO_H_ struct cpuset_args { cpusetid_t *setid; Index: x86/x86/intr_machdep.c =================================================================== --- x86/x86/intr_machdep.c (revision 331610) +++ x86/x86/intr_machdep.c (working copy) @@ -71,6 +71,8 @@ #include <isa/isareg.h> #endif +#include <vm/vm.h> + #define MAX_STRAY_LOG 5 typedef void (*mask_fn)(void *); @@ -185,7 +187,8 @@ intr_lookup_source(int vector) int intr_add_handler(const char *name, int vector, driver_filter_t filter, - driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep) + driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep, + int domain) { struct intsrc *isrc; int error; @@ -200,6 +203,7 @@ intr_add_handler(const char *name, int vector, dri intrcnt_updatename(isrc); isrc->is_handlers++; if (isrc->is_handlers == 1) { + isrc->is_domain = domain; isrc->is_pic->pic_enable_intr(isrc); isrc->is_pic->pic_enable_source(isrc); } @@ -507,14 +511,27 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) */ cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1); -static int current_cpu; +static int current_cpu[MAXMEMDOM]; +static void +intr_init_cpus(void) +{ + int i; + + for (i = 0; i < vm_ndomains; i++) { + current_cpu[i] = 0; + if (!CPU_ISSET(current_cpu[i], &intr_cpus) || + !CPU_ISSET(current_cpu[i], &cpuset_domain[i])) + intr_next_cpu(i); + } +} + /* * Return the CPU that the next interrupt source should use. For now * this just returns the next local APIC according to round-robin. */ u_int -intr_next_cpu(void) +intr_next_cpu(int domain) { u_int apic_id; @@ -529,12 +546,13 @@ u_int #endif mtx_lock_spin(&icu_lock); - apic_id = cpu_apic_ids[current_cpu]; + apic_id = cpu_apic_ids[current_cpu[domain]]; do { - current_cpu++; - if (current_cpu > mp_maxid) - current_cpu = 0; - } while (!CPU_ISSET(current_cpu, &intr_cpus)); + current_cpu[domain]++; + if (current_cpu[domain] > mp_maxid) + current_cpu[domain] = 0; + } while (!CPU_ISSET(current_cpu[domain], &intr_cpus) || + !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain])); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -568,7 +586,18 @@ intr_add_cpu(u_int cpu) CPU_SET(cpu, &intr_cpus); } -#ifndef EARLY_AP_STARTUP +#ifdef EARLY_AP_STARTUP +static void +intr_smp_startup(void *arg __unused) +{ + + intr_init_cpus(); + return; +} +SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup, + NULL); + +#else /* * Distribute all the interrupt sources among the available CPUs once the * AP's have been launched. @@ -580,6 +609,7 @@ intr_shuffle_irqs(void *arg __unused) u_int cpu; int i; + intr_init_cpus(); /* Don't bother on UP. */ if (mp_ncpus == 1) return; @@ -599,12 +629,12 @@ intr_shuffle_irqs(void *arg __unused) */ cpu = isrc->is_event->ie_cpu; if (cpu == NOCPU) - cpu = current_cpu; + cpu = current_cpu[isrc->is_domain]; if (isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]) == 0) { isrc->is_cpu = cpu; if (isrc->is_event->ie_cpu == NOCPU) - intr_next_cpu(); + intr_next_cpu(isrc->is_domain); } } } @@ -635,10 +665,11 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) isrc = interrupt_sources[i]; if (isrc == NULL) continue; - sbuf_printf(&sbuf, "%s:%d @%d: %ld\n", + sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n", isrc->is_event->ie_fullname, isrc->is_index, isrc->is_cpu, + isrc->is_domain, *isrc->is_count); } @@ -697,7 +728,7 @@ intr_balance(void *dummy __unused, int pending __u * Restart the scan from the same location to avoid moving in the * common case. */ - current_cpu = 0; + intr_init_cpus(); /* * Assign round-robin from most loaded to least. @@ -706,8 +737,8 @@ intr_balance(void *dummy __unused, int pending __u isrc = interrupt_sorted[i]; if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) continue; - cpu = current_cpu; - intr_next_cpu(); + cpu = current_cpu[isrc->is_domain]; + intr_next_cpu(isrc->is_domain); if (isrc->is_cpu != cpu && isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]) == 0) @@ -735,7 +766,7 @@ SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_AN * Always route interrupts to the current processor in the UP case. */ u_int -intr_next_cpu(void) +intr_next_cpu(int domain) { return (PCPU_GET(apic_id)); Index: x86/x86/io_apic.c =================================================================== --- x86/x86/io_apic.c (revision 331610) +++ x86/x86/io_apic.c (working copy) @@ -499,7 +499,7 @@ ioapic_enable_intr(struct intsrc *isrc) struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) - if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0) + if (ioapic_assign_cpu(isrc, intr_next_cpu(isrc->is_domain)) != 0) panic("Couldn't find an APIC vector for IRQ %d", intpin->io_irq); apic_enable_vector(intpin->io_cpu, intpin->io_vector); Index: x86/x86/msi.c =================================================================== --- x86/x86/msi.c (revision 331610) +++ x86/x86/msi.c (working copy) @@ -363,7 +363,7 @@ int msi_alloc(device_t dev, int count, int maxcount, int *irqs) { struct msi_intsrc *msi, *fsrc; - u_int cpu; + u_int cpu, domain; int cnt, i, *mirqs, vector; #ifdef ACPI_DMAR u_int cookies[count]; @@ -373,6 +373,9 @@ msi_alloc(device_t dev, int count, int maxcount, i if (!msi_enabled) return (ENXIO); + if (bus_get_domain(dev, &domain) != 0) + domain = 0; + if (count > 1) mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK); else @@ -420,7 +423,7 @@ again: KASSERT(cnt == count, ("count mismatch")); /* Allocate 'count' IDT vectors. */ - cpu = intr_next_cpu(); + cpu = intr_next_cpu(domain); vector = apic_alloc_vectors(cpu, irqs, count, maxcount); if (vector == 0) { mtx_unlock(&msi_lock); @@ -610,7 +613,7 @@ int msix_alloc(device_t dev, int *irq) { struct msi_intsrc *msi; - u_int cpu; + u_int cpu, domain; int i, vector; #ifdef ACPI_DMAR u_int cookie; @@ -620,6 +623,9 @@ msix_alloc(device_t dev, int *irq) if (!msi_enabled) return (ENXIO); + if (bus_get_domain(dev, &domain) != 0) + domain = 0; + again: mtx_lock(&msi_lock); @@ -651,7 +657,7 @@ again: } /* Allocate an IDT vector. */ - cpu = intr_next_cpu(); + cpu = intr_next_cpu(domain); vector = apic_alloc_vector(cpu, i); if (vector == 0) { mtx_unlock(&msi_lock); Index: x86/x86/nexus.c =================================================================== --- x86/x86/nexus.c (revision 331610) +++ x86/x86/nexus.c (working copy) @@ -573,7 +573,7 @@ nexus_setup_intr(device_t bus, device_t child, str int flags, driver_filter_t filter, void (*ihand)(void *), void *arg, void **cookiep) { - int error; + int error, domain; /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) @@ -589,9 +589,11 @@ nexus_setup_intr(device_t bus, device_t child, str error = rman_activate_resource(irq); if (error) return (error); + if (bus_get_domain(child, &domain) != 0) + domain = 0; error = intr_add_handler(device_get_nameunit(child), - rman_get_start(irq), filter, ihand, arg, flags, cookiep); + rman_get_start(irq), filter, ihand, arg, flags, cookiep, domain); return (error); } Index: x86/xen/xen_intr.c =================================================================== --- x86/xen/xen_intr.c (revision 331610) +++ x86/xen/xen_intr.c (working copy) @@ -430,7 +430,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_ * unless specified otherwise, so shuffle them to balance * the interrupt load. */ - xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu()); + xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0)); } #endif @@ -1562,7 +1562,7 @@ xen_intr_add_handler(const char *name, driver_filt return (EINVAL); error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg, - flags|INTR_EXCL, &isrc->xi_cookie); + flags|INTR_EXCL, &isrc->xi_cookie, 0); if (error != 0) { printf( "%s: xen_intr_add_handler: intr_add_handler failed: %d\n",
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?alpine.BSF.2.21.1803271105550.2307>
