Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 19 Nov 2020 10:00:48 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r367842 - head/sys/kern
Message-ID:  <202011191000.0AJA0mfp036191@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Nov 19 10:00:48 2020
New Revision: 367842
URL: https://svnweb.freebsd.org/changeset/base/367842

Log:
  thread: numa-aware zombie reaping
  
  The current global list is a significant problem, in particular induces a lot
  of cross-domain thread frees. When running poudriere on a 2 domain box about
  half of all frees were of that nature.
  
  Patch below introduces per-domain thread data containing zombie lists and
  domain-aware reaping. By default it only reaps from the current domain, only
  reaping from others if there is free TID shortage.
  
  A dedicated callout is introduced to reap lingering threads if there happens
  to be no activity.
  
  Reviewed by:	kib, markj
  Differential Revision:	https://reviews.freebsd.org/D27185

Modified:
  head/sys/kern/kern_thread.c

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c	Thu Nov 19 09:26:51 2020	(r367841)
+++ head/sys/kern/kern_thread.c	Thu Nov 19 10:00:48 2020	(r367842)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/turnstile.h>
+#include <sys/taskqueue.h>
 #include <sys/ktr.h>
 #include <sys/rwlock.h>
 #include <sys/umtx.h>
@@ -64,9 +65,11 @@ __FBSDID("$FreeBSD$");
 
 #include <security/audit/audit.h>
 
+#include <vm/pmap.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
+#include <vm/vm_phys.h>
 #include <sys/eventhandler.h>
 
 /*
@@ -128,9 +131,20 @@ SDT_PROBE_DEFINE(proc, , , lwp__exit);
  */
 static uma_zone_t thread_zone;
 
-static __exclusive_cache_line struct thread *thread_zombies;
+struct thread_domain_data {
+	struct thread	*tdd_zombies;
+	int		tdd_reapticks;
+} __aligned(CACHE_LINE_SIZE);
 
+static struct thread_domain_data thread_domain_data[MAXMEMDOM];
+
+static struct task	thread_reap_task;
+static struct callout  	thread_reap_callout;
+
 static void thread_zombie(struct thread *);
+static void thread_reap_all(void);
+static void thread_reap_task_cb(void *, int);
+static void thread_reap_callout_cb(void *);
 static int thread_unsuspend_one(struct thread *td, struct proc *p,
     bool boundary);
 static void thread_free_batched(struct thread *td);
@@ -159,30 +173,45 @@ EVENTHANDLER_LIST_DEFINE(thread_init);
 EVENTHANDLER_LIST_DEFINE(thread_fini);
 
 static bool
-thread_count_inc(void)
+thread_count_inc_try(void)
 {
-	static struct timeval lastfail;
-	static int curfail;
 	int nthreads_new;
 
-	thread_reap();
-
 	nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
 	if (nthreads_new >= maxthread - 100) {
 		if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 ||
 		    nthreads_new >= maxthread) {
 			atomic_subtract_int(&nthreads, 1);
-			if (ppsratecheck(&lastfail, &curfail, 1)) {
-				printf("maxthread limit exceeded by uid %u "
-				"(pid %d); consider increasing kern.maxthread\n",
-				curthread->td_ucred->cr_ruid, curproc->p_pid);
-			}
 			return (false);
 		}
 	}
 	return (true);
 }
 
+static bool
+thread_count_inc(void)
+{
+	static struct timeval lastfail;
+	static int curfail;
+
+	thread_reap();
+	if (thread_count_inc_try()) {
+		return (true);
+	}
+
+	thread_reap_all();
+	if (thread_count_inc_try()) {
+		return (true);
+	}
+
+	if (ppsratecheck(&lastfail, &curfail, 1)) {
+		printf("maxthread limit exceeded by uid %u "
+		    "(pid %d); consider increasing kern.maxthread\n",
+		    curthread->td_ucred->cr_ruid, curproc->p_pid);
+	}
+	return (false);
+}
+
 static void
 thread_count_sub(int n)
 {
@@ -500,6 +529,10 @@ threadinit(void)
 	    M_TIDHASH, M_WAITOK | M_ZERO);
 	for (i = 0; i < tidhashlock + 1; i++)
 		rw_init(&tidhashtbl_lock[i], "tidhash");
+
+	TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL);
+	callout_init(&thread_reap_callout, 1);
+	callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL);
 }
 
 /*
@@ -508,12 +541,14 @@ threadinit(void)
 void
 thread_zombie(struct thread *td)
 {
+	struct thread_domain_data *tdd;
 	struct thread *ztd;
 
-	ztd = atomic_load_ptr(&thread_zombies);
+	tdd = &thread_domain_data[vm_phys_domain(vtophys(td))];
+	ztd = atomic_load_ptr(&tdd->tdd_zombies);
 	for (;;) {
 		td->td_zombie = ztd;
-		if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies,
+		if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
 		    (uintptr_t *)&ztd, (uintptr_t)td))
 			break;
 		continue;
@@ -531,10 +566,10 @@ thread_stash(struct thread *td)
 }
 
 /*
- * Reap zombie threads.
+ * Reap zombies from passed domain.
  */
-void
-thread_reap(void)
+static void
+thread_reap_domain(struct thread_domain_data *tdd)
 {
 	struct thread *itd, *ntd;
 	struct tidbatch tidbatch;
@@ -547,19 +582,26 @@ thread_reap(void)
 	 * Reading upfront is pessimal if followed by concurrent atomic_swap,
 	 * but most of the time the list is empty.
 	 */
-	if (thread_zombies == NULL)
+	if (tdd->tdd_zombies == NULL)
 		return;
 
-	itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies,
+	itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies,
 	    (uintptr_t)NULL);
 	if (itd == NULL)
 		return;
 
+	/*
+	 * Multiple CPUs can get here, the race is fine as ticks is only
+	 * advisory.
+	 */
+	tdd->tdd_reapticks = ticks;
+
 	tidbatch_prep(&tidbatch);
 	credbatch_prep(&credbatch);
 	tdcount = 0;
 	lim = NULL;
 	limcount = 0;
+
 	while (itd != NULL) {
 		ntd = itd->td_zombie;
 		EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd);
@@ -592,6 +634,68 @@ thread_reap(void)
 	}
 	MPASS(limcount != 0);
 	lim_freen(lim, limcount);
+}
+
+/*
+ * Reap zombies from all domains.
+ */
+static void
+thread_reap_all(void)
+{
+	struct thread_domain_data *tdd;
+	int i, domain;
+
+	domain = PCPU_GET(domain);
+	for (i = 0; i < vm_ndomains; i++) {
+		tdd = &thread_domain_data[(i + domain) % vm_ndomains];
+		thread_reap_domain(tdd);
+	}
+}
+
+/*
+ * Reap zombies from local domain.
+ */
+void
+thread_reap(void)
+{
+	struct thread_domain_data *tdd;
+	int domain;
+
+	domain = PCPU_GET(domain);
+	tdd = &thread_domain_data[domain];
+
+	thread_reap_domain(tdd);
+}
+
+static void
+thread_reap_task_cb(void *arg __unused, int pending __unused)
+{
+
+	thread_reap_all();
+}
+
+static void
+thread_reap_callout_cb(void *arg __unused)
+{
+	struct thread_domain_data *tdd;
+	int i, cticks, lticks;
+	bool wantreap;
+
+	wantreap = false;
+	cticks = atomic_load_int(&ticks);
+	for (i = 0; i < vm_ndomains; i++) {
+		tdd = &thread_domain_data[i];
+		lticks = tdd->tdd_reapticks;
+		if (tdd->tdd_zombies != NULL &&
+		    (u_int)(cticks - lticks) > 5 * hz) {
+			wantreap = true;
+			break;
+		}
+	}
+
+	if (wantreap)
+		taskqueue_enqueue(taskqueue_thread, &thread_reap_task);
+	callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL);
 }
 
 /*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202011191000.0AJA0mfp036191>