Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 19 Nov 2020 15:12:57 -0300
From:      luporl <luporl@freebsd.org>
To:        Mateusz Guzik <mjg@freebsd.org>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org,  svn-src-head@freebsd.org
Subject:   Re: svn commit: r367842 - head/sys/kern
Message-ID:  <CAC7XEcJ4aJFc4GJ0scz=tUo2V02iXx1M%2B%2BDcO4w_4fP=eB-Ahw@mail.gmail.com>
In-Reply-To: <202011191000.0AJA0mfp036191@repo.freebsd.org>
References:  <202011191000.0AJA0mfp036191@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
This causes a panic on PowerPC64:

panic: mtx_lock() by idle thread 0xc008000006437100 on sleep mutex
kernelpmap @ /usr/home/luporl/git/master/sys/powerpc/aim/mmu_oea64.c:2237
cpuid = 31
time = 1605806644
KDB: stack backtrace:
0xc0080000014d91e0: at kdb_backtrace+0x60
0xc0080000014d92f0: at vpanic+0x1e0
0xc0080000014d93a0: at panic+0x40
0xc0080000014d93d0: at __mtx_lock_flags+0x23c
0xc0080000014d9480: at moea64_kextract+0x68
0xc0080000014d9560: at thread_stash+0x48
0xc0080000014d95a0: at mi_switch+0x1fc
0xc0080000014d9620: at critical_exit_preempt+0x88
0xc0080000014d9650: at cpu_idle+0xd4
0xc0080000014d96c0: at sched_idletd+0x440
0xc0080000014d9820: at fork_exit+0xc4
0xc0080000014d98c0: at fork_trampoline+0x18
0xc0080000014d98f0: at cpu_reset_handler+0x64

It seems the problem is the vtophys() call in thread_zombie(), as PPC64's
pmap_kextract() will try to acquire a pmap lock for addresses out of DMAP
range.

On Thu, Nov 19, 2020 at 7:01 AM Mateusz Guzik <mjg@freebsd.org> wrote:

> Author: mjg
> Date: Thu Nov 19 10:00:48 2020
> New Revision: 367842
> URL: https://svnweb.freebsd.org/changeset/base/367842
>
> Log:
>   thread: numa-aware zombie reaping
>
>   The current global list is a significant problem, in particular induces
> a lot
>   of cross-domain thread frees. When running poudriere on a 2 domain box
> about
>   half of all frees were of that nature.
>
>   Patch below introduces per-domain thread data containing zombie lists and
>   domain-aware reaping. By default it only reaps from the current domain,
> only
>   reaping from others if there is free TID shortage.
>
>   A dedicated callout is introduced to reap lingering threads if there
> happens
>   to be no activity.
>
>   Reviewed by:  kib, markj
>   Differential Revision:        https://reviews.freebsd.org/D27185
>
> Modified:
>   head/sys/kern/kern_thread.c
>
> Modified: head/sys/kern/kern_thread.c
>
> ==============================================================================
> --- head/sys/kern/kern_thread.c Thu Nov 19 09:26:51 2020        (r367841)
> +++ head/sys/kern/kern_thread.c Thu Nov 19 10:00:48 2020        (r367842)
> @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
>  #include <sys/syscallsubr.h>
>  #include <sys/sysent.h>
>  #include <sys/turnstile.h>
> +#include <sys/taskqueue.h>
>  #include <sys/ktr.h>
>  #include <sys/rwlock.h>
>  #include <sys/umtx.h>
> @@ -64,9 +65,11 @@ __FBSDID("$FreeBSD$");
>
>  #include <security/audit/audit.h>
>
> +#include <vm/pmap.h>
>  #include <vm/vm.h>
>  #include <vm/vm_extern.h>
>  #include <vm/uma.h>
> +#include <vm/vm_phys.h>
>  #include <sys/eventhandler.h>
>
>  /*
> @@ -128,9 +131,20 @@ SDT_PROBE_DEFINE(proc, , , lwp__exit);
>   */
>  static uma_zone_t thread_zone;
>
> -static __exclusive_cache_line struct thread *thread_zombies;
> +struct thread_domain_data {
> +       struct thread   *tdd_zombies;
> +       int             tdd_reapticks;
> +} __aligned(CACHE_LINE_SIZE);
>
> +static struct thread_domain_data thread_domain_data[MAXMEMDOM];
> +
> +static struct task     thread_reap_task;
> +static struct callout          thread_reap_callout;
> +
>  static void thread_zombie(struct thread *);
> +static void thread_reap_all(void);
> +static void thread_reap_task_cb(void *, int);
> +static void thread_reap_callout_cb(void *);
>  static int thread_unsuspend_one(struct thread *td, struct proc *p,
>      bool boundary);
>  static void thread_free_batched(struct thread *td);
> @@ -159,30 +173,45 @@ EVENTHANDLER_LIST_DEFINE(thread_init);
>  EVENTHANDLER_LIST_DEFINE(thread_fini);
>
>  static bool
> -thread_count_inc(void)
> +thread_count_inc_try(void)
>  {
> -       static struct timeval lastfail;
> -       static int curfail;
>         int nthreads_new;
>
> -       thread_reap();
> -
>         nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
>         if (nthreads_new >= maxthread - 100) {
>                 if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) !=
> 0 ||
>                     nthreads_new >= maxthread) {
>                         atomic_subtract_int(&nthreads, 1);
> -                       if (ppsratecheck(&lastfail, &curfail, 1)) {
> -                               printf("maxthread limit exceeded by uid %u
> "
> -                               "(pid %d); consider increasing
> kern.maxthread\n",
> -                               curthread->td_ucred->cr_ruid,
> curproc->p_pid);
> -                       }
>                         return (false);
>                 }
>         }
>         return (true);
>  }
>
> +static bool
> +thread_count_inc(void)
> +{
> +       static struct timeval lastfail;
> +       static int curfail;
> +
> +       thread_reap();
> +       if (thread_count_inc_try()) {
> +               return (true);
> +       }
> +
> +       thread_reap_all();
> +       if (thread_count_inc_try()) {
> +               return (true);
> +       }
> +
> +       if (ppsratecheck(&lastfail, &curfail, 1)) {
> +               printf("maxthread limit exceeded by uid %u "
> +                   "(pid %d); consider increasing kern.maxthread\n",
> +                   curthread->td_ucred->cr_ruid, curproc->p_pid);
> +       }
> +       return (false);
> +}
> +
>  static void
>  thread_count_sub(int n)
>  {
> @@ -500,6 +529,10 @@ threadinit(void)
>             M_TIDHASH, M_WAITOK | M_ZERO);
>         for (i = 0; i < tidhashlock + 1; i++)
>                 rw_init(&tidhashtbl_lock[i], "tidhash");
> +
> +       TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL);
> +       callout_init(&thread_reap_callout, 1);
> +       callout_reset(&thread_reap_callout, 5 * hz,
> thread_reap_callout_cb, NULL);
>  }
>
>  /*
> @@ -508,12 +541,14 @@ threadinit(void)
>  void
>  thread_zombie(struct thread *td)
>  {
> +       struct thread_domain_data *tdd;
>         struct thread *ztd;
>
> -       ztd = atomic_load_ptr(&thread_zombies);
> +       tdd = &thread_domain_data[vm_phys_domain(vtophys(td))];
> +       ztd = atomic_load_ptr(&tdd->tdd_zombies);
>         for (;;) {
>                 td->td_zombie = ztd;
> -               if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies,
> +               if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
>                     (uintptr_t *)&ztd, (uintptr_t)td))
>                         break;
>                 continue;
> @@ -531,10 +566,10 @@ thread_stash(struct thread *td)
>  }
>
>  /*
> - * Reap zombie threads.
> + * Reap zombies from passed domain.
>   */
> -void
> -thread_reap(void)
> +static void
> +thread_reap_domain(struct thread_domain_data *tdd)
>  {
>         struct thread *itd, *ntd;
>         struct tidbatch tidbatch;
> @@ -547,19 +582,26 @@ thread_reap(void)
>          * Reading upfront is pessimal if followed by concurrent
> atomic_swap,
>          * but most of the time the list is empty.
>          */
> -       if (thread_zombies == NULL)
> +       if (tdd->tdd_zombies == NULL)
>                 return;
>
> -       itd = (struct thread *)atomic_swap_ptr((uintptr_t
> *)&thread_zombies,
> +       itd = (struct thread *)atomic_swap_ptr((uintptr_t
> *)&tdd->tdd_zombies,
>             (uintptr_t)NULL);
>         if (itd == NULL)
>                 return;
>
> +       /*
> +        * Multiple CPUs can get here, the race is fine as ticks is only
> +        * advisory.
> +        */
> +       tdd->tdd_reapticks = ticks;
> +
>         tidbatch_prep(&tidbatch);
>         credbatch_prep(&credbatch);
>         tdcount = 0;
>         lim = NULL;
>         limcount = 0;
> +
>         while (itd != NULL) {
>                 ntd = itd->td_zombie;
>                 EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd);
> @@ -592,6 +634,68 @@ thread_reap(void)
>         }
>         MPASS(limcount != 0);
>         lim_freen(lim, limcount);
> +}
> +
> +/*
> + * Reap zombies from all domains.
> + */
> +static void
> +thread_reap_all(void)
> +{
> +       struct thread_domain_data *tdd;
> +       int i, domain;
> +
> +       domain = PCPU_GET(domain);
> +       for (i = 0; i < vm_ndomains; i++) {
> +               tdd = &thread_domain_data[(i + domain) % vm_ndomains];
> +               thread_reap_domain(tdd);
> +       }
> +}
> +
> +/*
> + * Reap zombies from local domain.
> + */
> +void
> +thread_reap(void)
> +{
> +       struct thread_domain_data *tdd;
> +       int domain;
> +
> +       domain = PCPU_GET(domain);
> +       tdd = &thread_domain_data[domain];
> +
> +       thread_reap_domain(tdd);
> +}
> +
> +static void
> +thread_reap_task_cb(void *arg __unused, int pending __unused)
> +{
> +
> +       thread_reap_all();
> +}
> +
> +static void
> +thread_reap_callout_cb(void *arg __unused)
> +{
> +       struct thread_domain_data *tdd;
> +       int i, cticks, lticks;
> +       bool wantreap;
> +
> +       wantreap = false;
> +       cticks = atomic_load_int(&ticks);
> +       for (i = 0; i < vm_ndomains; i++) {
> +               tdd = &thread_domain_data[i];
> +               lticks = tdd->tdd_reapticks;
> +               if (tdd->tdd_zombies != NULL &&
> +                   (u_int)(cticks - lticks) > 5 * hz) {
> +                       wantreap = true;
> +                       break;
> +               }
> +       }
> +
> +       if (wantreap)
> +               taskqueue_enqueue(taskqueue_thread, &thread_reap_task);
> +       callout_reset(&thread_reap_callout, 5 * hz,
> thread_reap_callout_cb, NULL);
>  }
>
>  /*
>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAC7XEcJ4aJFc4GJ0scz=tUo2V02iXx1M%2B%2BDcO4w_4fP=eB-Ahw>