From owner-svn-src-head@freebsd.org Thu Nov 19 10:00:49 2020 Return-Path: Delivered-To: svn-src-head@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 26BB72ED4F8; Thu, 19 Nov 2020 10:00:49 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4CcFYX6V9Sz4Sp2; Thu, 19 Nov 2020 10:00:48 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id D1EA221EFF; Thu, 19 Nov 2020 10:00:48 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 0AJA0mqj036192; Thu, 19 Nov 2020 10:00:48 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 0AJA0mfp036191; Thu, 19 Nov 2020 10:00:48 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <202011191000.0AJA0mfp036191@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Thu, 19 Nov 2020 10:00:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r367842 - head/sys/kern X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: head/sys/kern X-SVN-Commit-Revision: 367842 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 19 Nov 2020 10:00:49 -0000 Author: mjg Date: Thu Nov 19 10:00:48 2020 New Revision: 367842 URL: https://svnweb.freebsd.org/changeset/base/367842 Log: thread: numa-aware zombie reaping The current global list is a significant problem, in particular induces a lot of cross-domain thread frees. When running poudriere on a 2 domain box about half of all frees were of that nature. Patch below introduces per-domain thread data containing zombie lists and domain-aware reaping. By default it only reaps from the current domain, only reaping from others if there is free TID shortage. A dedicated callout is introduced to reap lingering threads if there happens to be no activity. Reviewed by: kib, markj Differential Revision: https://reviews.freebsd.org/D27185 Modified: head/sys/kern/kern_thread.c Modified: head/sys/kern/kern_thread.c ============================================================================== --- head/sys/kern/kern_thread.c Thu Nov 19 09:26:51 2020 (r367841) +++ head/sys/kern/kern_thread.c Thu Nov 19 10:00:48 2020 (r367842) @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -64,9 +65,11 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include +#include #include /* @@ -128,9 +131,20 @@ SDT_PROBE_DEFINE(proc, , , lwp__exit); */ static uma_zone_t thread_zone; -static __exclusive_cache_line struct thread *thread_zombies; +struct thread_domain_data { + struct thread *tdd_zombies; + int tdd_reapticks; +} __aligned(CACHE_LINE_SIZE); +static struct thread_domain_data thread_domain_data[MAXMEMDOM]; + +static struct task thread_reap_task; +static struct callout thread_reap_callout; + static void thread_zombie(struct thread *); +static void thread_reap_all(void); +static void thread_reap_task_cb(void *, int); +static void thread_reap_callout_cb(void *); static int thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary); static void thread_free_batched(struct thread *td); @@ -159,30 +173,45 @@ EVENTHANDLER_LIST_DEFINE(thread_init); EVENTHANDLER_LIST_DEFINE(thread_fini); static bool -thread_count_inc(void) +thread_count_inc_try(void) { - static struct timeval lastfail; - static int curfail; int nthreads_new; - thread_reap(); - nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; if (nthreads_new >= maxthread - 100) { if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || nthreads_new >= maxthread) { atomic_subtract_int(&nthreads, 1); - if (ppsratecheck(&lastfail, &curfail, 1)) { - printf("maxthread limit exceeded by uid %u " - "(pid %d); consider increasing kern.maxthread\n", - curthread->td_ucred->cr_ruid, curproc->p_pid); - } return (false); } } return (true); } +static bool +thread_count_inc(void) +{ + static struct timeval lastfail; + static int curfail; + + thread_reap(); + if (thread_count_inc_try()) { + return (true); + } + + thread_reap_all(); + if (thread_count_inc_try()) { + return (true); + } + + if (ppsratecheck(&lastfail, &curfail, 1)) { + printf("maxthread limit exceeded by uid %u " + "(pid %d); consider increasing kern.maxthread\n", + curthread->td_ucred->cr_ruid, curproc->p_pid); + } + return (false); +} + static void thread_count_sub(int n) { @@ -500,6 +529,10 @@ threadinit(void) M_TIDHASH, M_WAITOK | M_ZERO); for (i = 0; i < tidhashlock + 1; i++) rw_init(&tidhashtbl_lock[i], "tidhash"); + + TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL); + callout_init(&thread_reap_callout, 1); + callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); } /* @@ -508,12 +541,14 @@ threadinit(void) void thread_zombie(struct thread *td) { + struct thread_domain_data *tdd; struct thread *ztd; - ztd = atomic_load_ptr(&thread_zombies); + tdd = &thread_domain_data[vm_phys_domain(vtophys(td))]; + ztd = atomic_load_ptr(&tdd->tdd_zombies); for (;;) { td->td_zombie = ztd; - if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies, + if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t *)&ztd, (uintptr_t)td)) break; continue; @@ -531,10 +566,10 @@ thread_stash(struct thread *td) } /* - * Reap zombie threads. + * Reap zombies from passed domain. */ -void -thread_reap(void) +static void +thread_reap_domain(struct thread_domain_data *tdd) { struct thread *itd, *ntd; struct tidbatch tidbatch; @@ -547,19 +582,26 @@ thread_reap(void) * Reading upfront is pessimal if followed by concurrent atomic_swap, * but most of the time the list is empty. */ - if (thread_zombies == NULL) + if (tdd->tdd_zombies == NULL) return; - itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies, + itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t)NULL); if (itd == NULL) return; + /* + * Multiple CPUs can get here, the race is fine as ticks is only + * advisory. + */ + tdd->tdd_reapticks = ticks; + tidbatch_prep(&tidbatch); credbatch_prep(&credbatch); tdcount = 0; lim = NULL; limcount = 0; + while (itd != NULL) { ntd = itd->td_zombie; EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd); @@ -592,6 +634,68 @@ thread_reap(void) } MPASS(limcount != 0); lim_freen(lim, limcount); +} + +/* + * Reap zombies from all domains. + */ +static void +thread_reap_all(void) +{ + struct thread_domain_data *tdd; + int i, domain; + + domain = PCPU_GET(domain); + for (i = 0; i < vm_ndomains; i++) { + tdd = &thread_domain_data[(i + domain) % vm_ndomains]; + thread_reap_domain(tdd); + } +} + +/* + * Reap zombies from local domain. + */ +void +thread_reap(void) +{ + struct thread_domain_data *tdd; + int domain; + + domain = PCPU_GET(domain); + tdd = &thread_domain_data[domain]; + + thread_reap_domain(tdd); +} + +static void +thread_reap_task_cb(void *arg __unused, int pending __unused) +{ + + thread_reap_all(); +} + +static void +thread_reap_callout_cb(void *arg __unused) +{ + struct thread_domain_data *tdd; + int i, cticks, lticks; + bool wantreap; + + wantreap = false; + cticks = atomic_load_int(&ticks); + for (i = 0; i < vm_ndomains; i++) { + tdd = &thread_domain_data[i]; + lticks = tdd->tdd_reapticks; + if (tdd->tdd_zombies != NULL && + (u_int)(cticks - lticks) > 5 * hz) { + wantreap = true; + break; + } + } + + if (wantreap) + taskqueue_enqueue(taskqueue_thread, &thread_reap_task); + callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); } /*