Date: Thu, 19 Nov 2020 15:12:57 -0300 From: luporl <luporl@freebsd.org> To: Mateusz Guzik <mjg@freebsd.org> Cc: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: Re: svn commit: r367842 - head/sys/kern Message-ID: <CAC7XEcJ4aJFc4GJ0scz=tUo2V02iXx1M%2B%2BDcO4w_4fP=eB-Ahw@mail.gmail.com> In-Reply-To: <202011191000.0AJA0mfp036191@repo.freebsd.org> References: <202011191000.0AJA0mfp036191@repo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
This causes a panic on PowerPC64: panic: mtx_lock() by idle thread 0xc008000006437100 on sleep mutex kernelpmap @ /usr/home/luporl/git/master/sys/powerpc/aim/mmu_oea64.c:2237 cpuid = 31 time = 1605806644 KDB: stack backtrace: 0xc0080000014d91e0: at kdb_backtrace+0x60 0xc0080000014d92f0: at vpanic+0x1e0 0xc0080000014d93a0: at panic+0x40 0xc0080000014d93d0: at __mtx_lock_flags+0x23c 0xc0080000014d9480: at moea64_kextract+0x68 0xc0080000014d9560: at thread_stash+0x48 0xc0080000014d95a0: at mi_switch+0x1fc 0xc0080000014d9620: at critical_exit_preempt+0x88 0xc0080000014d9650: at cpu_idle+0xd4 0xc0080000014d96c0: at sched_idletd+0x440 0xc0080000014d9820: at fork_exit+0xc4 0xc0080000014d98c0: at fork_trampoline+0x18 0xc0080000014d98f0: at cpu_reset_handler+0x64 It seems the problem is the vtophys() call in thread_zombie(), as PPC64's pmap_kextract() will try to acquire a pmap lock for addresses out of DMAP range. On Thu, Nov 19, 2020 at 7:01 AM Mateusz Guzik <mjg@freebsd.org> wrote: > Author: mjg > Date: Thu Nov 19 10:00:48 2020 > New Revision: 367842 > URL: https://svnweb.freebsd.org/changeset/base/367842 > > Log: > thread: numa-aware zombie reaping > > The current global list is a significant problem, in particular induces > a lot > of cross-domain thread frees. When running poudriere on a 2 domain box > about > half of all frees were of that nature. > > Patch below introduces per-domain thread data containing zombie lists and > domain-aware reaping. By default it only reaps from the current domain, > only > reaping from others if there is free TID shortage. > > A dedicated callout is introduced to reap lingering threads if there > happens > to be no activity. > > Reviewed by: kib, markj > Differential Revision: https://reviews.freebsd.org/D27185 > > Modified: > head/sys/kern/kern_thread.c > > Modified: head/sys/kern/kern_thread.c > > ============================================================================== > --- head/sys/kern/kern_thread.c Thu Nov 19 09:26:51 2020 (r367841) > +++ head/sys/kern/kern_thread.c Thu Nov 19 10:00:48 2020 (r367842) > @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); > #include <sys/syscallsubr.h> > #include <sys/sysent.h> > #include <sys/turnstile.h> > +#include <sys/taskqueue.h> > #include <sys/ktr.h> > #include <sys/rwlock.h> > #include <sys/umtx.h> > @@ -64,9 +65,11 @@ __FBSDID("$FreeBSD$"); > > #include <security/audit/audit.h> > > +#include <vm/pmap.h> > #include <vm/vm.h> > #include <vm/vm_extern.h> > #include <vm/uma.h> > +#include <vm/vm_phys.h> > #include <sys/eventhandler.h> > > /* > @@ -128,9 +131,20 @@ SDT_PROBE_DEFINE(proc, , , lwp__exit); > */ > static uma_zone_t thread_zone; > > -static __exclusive_cache_line struct thread *thread_zombies; > +struct thread_domain_data { > + struct thread *tdd_zombies; > + int tdd_reapticks; > +} __aligned(CACHE_LINE_SIZE); > > +static struct thread_domain_data thread_domain_data[MAXMEMDOM]; > + > +static struct task thread_reap_task; > +static struct callout thread_reap_callout; > + > static void thread_zombie(struct thread *); > +static void thread_reap_all(void); > +static void thread_reap_task_cb(void *, int); > +static void thread_reap_callout_cb(void *); > static int thread_unsuspend_one(struct thread *td, struct proc *p, > bool boundary); > static void thread_free_batched(struct thread *td); > @@ -159,30 +173,45 @@ EVENTHANDLER_LIST_DEFINE(thread_init); > EVENTHANDLER_LIST_DEFINE(thread_fini); > > static bool > -thread_count_inc(void) > +thread_count_inc_try(void) > { > - static struct timeval lastfail; > - static int curfail; > int nthreads_new; > > - thread_reap(); > - > nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; > if (nthreads_new >= maxthread - 100) { > if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != > 0 || > nthreads_new >= maxthread) { > atomic_subtract_int(&nthreads, 1); > - if (ppsratecheck(&lastfail, &curfail, 1)) { > - printf("maxthread limit exceeded by uid %u > " > - "(pid %d); consider increasing > kern.maxthread\n", > - curthread->td_ucred->cr_ruid, > curproc->p_pid); > - } > return (false); > } > } > return (true); > } > > +static bool > +thread_count_inc(void) > +{ > + static struct timeval lastfail; > + static int curfail; > + > + thread_reap(); > + if (thread_count_inc_try()) { > + return (true); > + } > + > + thread_reap_all(); > + if (thread_count_inc_try()) { > + return (true); > + } > + > + if (ppsratecheck(&lastfail, &curfail, 1)) { > + printf("maxthread limit exceeded by uid %u " > + "(pid %d); consider increasing kern.maxthread\n", > + curthread->td_ucred->cr_ruid, curproc->p_pid); > + } > + return (false); > +} > + > static void > thread_count_sub(int n) > { > @@ -500,6 +529,10 @@ threadinit(void) > M_TIDHASH, M_WAITOK | M_ZERO); > for (i = 0; i < tidhashlock + 1; i++) > rw_init(&tidhashtbl_lock[i], "tidhash"); > + > + TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL); > + callout_init(&thread_reap_callout, 1); > + callout_reset(&thread_reap_callout, 5 * hz, > thread_reap_callout_cb, NULL); > } > > /* > @@ -508,12 +541,14 @@ threadinit(void) > void > thread_zombie(struct thread *td) > { > + struct thread_domain_data *tdd; > struct thread *ztd; > > - ztd = atomic_load_ptr(&thread_zombies); > + tdd = &thread_domain_data[vm_phys_domain(vtophys(td))]; > + ztd = atomic_load_ptr(&tdd->tdd_zombies); > for (;;) { > td->td_zombie = ztd; > - if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies, > + if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, > (uintptr_t *)&ztd, (uintptr_t)td)) > break; > continue; > @@ -531,10 +566,10 @@ thread_stash(struct thread *td) > } > > /* > - * Reap zombie threads. > + * Reap zombies from passed domain. > */ > -void > -thread_reap(void) > +static void > +thread_reap_domain(struct thread_domain_data *tdd) > { > struct thread *itd, *ntd; > struct tidbatch tidbatch; > @@ -547,19 +582,26 @@ thread_reap(void) > * Reading upfront is pessimal if followed by concurrent > atomic_swap, > * but most of the time the list is empty. > */ > - if (thread_zombies == NULL) > + if (tdd->tdd_zombies == NULL) > return; > > - itd = (struct thread *)atomic_swap_ptr((uintptr_t > *)&thread_zombies, > + itd = (struct thread *)atomic_swap_ptr((uintptr_t > *)&tdd->tdd_zombies, > (uintptr_t)NULL); > if (itd == NULL) > return; > > + /* > + * Multiple CPUs can get here, the race is fine as ticks is only > + * advisory. > + */ > + tdd->tdd_reapticks = ticks; > + > tidbatch_prep(&tidbatch); > credbatch_prep(&credbatch); > tdcount = 0; > lim = NULL; > limcount = 0; > + > while (itd != NULL) { > ntd = itd->td_zombie; > EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd); > @@ -592,6 +634,68 @@ thread_reap(void) > } > MPASS(limcount != 0); > lim_freen(lim, limcount); > +} > + > +/* > + * Reap zombies from all domains. > + */ > +static void > +thread_reap_all(void) > +{ > + struct thread_domain_data *tdd; > + int i, domain; > + > + domain = PCPU_GET(domain); > + for (i = 0; i < vm_ndomains; i++) { > + tdd = &thread_domain_data[(i + domain) % vm_ndomains]; > + thread_reap_domain(tdd); > + } > +} > + > +/* > + * Reap zombies from local domain. > + */ > +void > +thread_reap(void) > +{ > + struct thread_domain_data *tdd; > + int domain; > + > + domain = PCPU_GET(domain); > + tdd = &thread_domain_data[domain]; > + > + thread_reap_domain(tdd); > +} > + > +static void > +thread_reap_task_cb(void *arg __unused, int pending __unused) > +{ > + > + thread_reap_all(); > +} > + > +static void > +thread_reap_callout_cb(void *arg __unused) > +{ > + struct thread_domain_data *tdd; > + int i, cticks, lticks; > + bool wantreap; > + > + wantreap = false; > + cticks = atomic_load_int(&ticks); > + for (i = 0; i < vm_ndomains; i++) { > + tdd = &thread_domain_data[i]; > + lticks = tdd->tdd_reapticks; > + if (tdd->tdd_zombies != NULL && > + (u_int)(cticks - lticks) > 5 * hz) { > + wantreap = true; > + break; > + } > + } > + > + if (wantreap) > + taskqueue_enqueue(taskqueue_thread, &thread_reap_task); > + callout_reset(&thread_reap_callout, 5 * hz, > thread_reap_callout_cb, NULL); > } > > /* >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAC7XEcJ4aJFc4GJ0scz=tUo2V02iXx1M%2B%2BDcO4w_4fP=eB-Ahw>