Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 14 Apr 2021 17:03:46 GMT
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: aabe13f1450b - main - uma: Introduce per-domain reclamation functions
Message-ID:  <202104141703.13EH3kpg038659@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=aabe13f1450bb4caba66ec2a7a41c0dfefff511d

commit aabe13f1450bb4caba66ec2a7a41c0dfefff511d
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2021-04-14 16:57:24 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2021-04-14 17:03:34 +0000

    uma: Introduce per-domain reclamation functions
    
    Make it possible to reclaim items from a specific NUMA domain.
    
    - Add uma_zone_reclaim_domain() and uma_reclaim_domain().
    - Permit parallel reclamations.  Use a counter instead of a flag to
      synchronize with zone_dtor().
    - Use the zone lock to protect cache_shrink() now that parallel reclaims
      can happen.
    - Add a sysctl that can be used to trigger reclamation from a specific
      domain.
    
    Currently the new KPIs are unused, so there should be no functional
    change.
    
    Reviewed by:    mav
    MFC after:      2 weeks
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D29685
---
 share/man/man9/zone.9 |  14 ++++-
 sys/vm/uma.h          |   8 ++-
 sys/vm/uma_core.c     | 152 ++++++++++++++++++++++++++++++--------------------
 sys/vm/uma_int.h      |   5 +-
 sys/vm/vm_kern.c      |  29 +++++++++-
 5 files changed, 137 insertions(+), 71 deletions(-)

diff --git a/share/man/man9/zone.9 b/share/man/man9/zone.9
index 7da40b13469b..89d5f3e2640f 100644
--- a/share/man/man9/zone.9
+++ b/share/man/man9/zone.9
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 11, 2021
+.Dd April 14, 2021
 .Dt UMA 9
 .Os
 .Sh NAME
@@ -98,8 +98,12 @@ typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
 .Ft void
 .Fn uma_reclaim "int req"
 .Ft void
+.Fn uma_reclaim_domain "int req" "int domain"
+.Ft void
 .Fn uma_zone_reclaim "uma_zone_t zone" "int req"
 .Ft void
+.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain"
+.Ft void
 .Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
 .Ft void
 .Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
@@ -471,6 +475,14 @@ Free items in the per-CPU caches are left alone.
 .It Dv UMA_RECLAIM_DRAIN_CPU
 Reclaim all cached items.
 .El
+The
+.Fn uma_reclaim_domain
+and
+.Fn uma_zone_reclaim_domain
+functions apply only to items allocated from the specified domain.
+In the case of domains using a round-robin NUMA policy, cached items from all
+domains are freed to the keg, but only slabs from the specific domain will
+be freed.
 .Pp
 The
 .Fn uma_zone_set_allocf
diff --git a/sys/vm/uma.h b/sys/vm/uma.h
index 361c64900845..5d473ba909b6 100644
--- a/sys/vm/uma.h
+++ b/sys/vm/uma.h
@@ -446,10 +446,12 @@ typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
 typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
 
 /*
- * Reclaims unused memory
+ * Reclaims unused memory.  If no NUMA domain is specified, memory from all
+ * domains is reclaimed.
  *
  * Arguments:
- *	req  Reclamation request type.
+ *	req    Reclamation request type.
+ *	domain The target NUMA domain.
  * Returns:
  *	None
  */
@@ -457,7 +459,9 @@ typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
 #define	UMA_RECLAIM_DRAIN_CPU	2	/* release bucket and per-CPU caches */
 #define	UMA_RECLAIM_TRIM	3	/* trim bucket cache to WSS */
 void uma_reclaim(int req);
+void uma_reclaim_domain(int req, int domain);
 void uma_zone_reclaim(uma_zone_t, int req);
+void uma_zone_reclaim_domain(uma_zone_t, int req, int domain);
 
 /*
  * Sets the alignment mask to be used for all zones requesting cache
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 0348d6468d74..6b0add6b6b07 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -168,17 +168,20 @@ static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
 static LIST_HEAD(,uma_zone) uma_cachezones =
     LIST_HEAD_INITIALIZER(uma_cachezones);
 
-/* This RW lock protects the keg list */
+/*
+ * Mutex for global lists: uma_kegs, uma_cachezones, and the per-keg list of
+ * zones.
+ */
 static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
 
+static struct sx uma_reclaim_lock;
+
 /*
  * First available virual address for boot time allocations.
  */
 static vm_offset_t bootstart;
 static vm_offset_t bootmem;
 
-static struct sx uma_reclaim_lock;
-
 /*
  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
  * allocations don't trigger a wakeup of the reclaim thread.
@@ -289,7 +292,7 @@ static void pcpu_page_free(void *, vm_size_t, uint8_t);
 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
 static void cache_drain(uma_zone_t);
 static void bucket_drain(uma_zone_t, uma_bucket_t);
-static void bucket_cache_reclaim(uma_zone_t zone, bool);
+static void bucket_cache_reclaim(uma_zone_t zone, bool, int);
 static int keg_ctor(void *, int, void *, int);
 static void keg_dtor(void *, int, void *);
 static int zone_ctor(void *, int, void *, int);
@@ -315,7 +318,7 @@ static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
-static void bucket_zone_drain(void);
+static void bucket_zone_drain(int domain);
 static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
 static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
@@ -525,12 +528,13 @@ bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
 }
 
 static void
-bucket_zone_drain(void)
+bucket_zone_drain(int domain)
 {
 	struct uma_bucket_zone *ubz;
 
 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
-		uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
+		uma_zone_reclaim_domain(ubz->ubz_zone, UMA_RECLAIM_DRAIN,
+		    domain);
 }
 
 #ifdef KASAN
@@ -1308,7 +1312,7 @@ cache_drain(uma_zone_t zone)
 			bucket_free(zone, bucket, NULL);
 		}
 	}
-	bucket_cache_reclaim(zone, true);
+	bucket_cache_reclaim(zone, true, UMA_ANYDOMAIN);
 }
 
 static void
@@ -1318,8 +1322,10 @@ cache_shrink(uma_zone_t zone, void *unused)
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
 
+	ZONE_LOCK(zone);
 	zone->uz_bucket_size =
 	    (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
+	ZONE_UNLOCK(zone);
 }
 
 static void
@@ -1442,7 +1448,7 @@ bucket_cache_reclaim_domain(uma_zone_t zone, bool drain, int domain)
 }
 
 static void
-bucket_cache_reclaim(uma_zone_t zone, bool drain)
+bucket_cache_reclaim(uma_zone_t zone, bool drain, int domain)
 {
 	int i;
 
@@ -1453,8 +1459,13 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
 	if (zone->uz_bucket_size > zone->uz_bucket_size_min)
 		zone->uz_bucket_size--;
 
-	for (i = 0; i < vm_ndomains; i++)
-		bucket_cache_reclaim_domain(zone, drain, i);
+	if (domain != UMA_ANYDOMAIN &&
+	    (zone->uz_flags & UMA_ZONE_ROUNDROBIN) == 0) {
+		bucket_cache_reclaim_domain(zone, drain, domain);
+	} else {
+		for (i = 0; i < vm_ndomains; i++)
+			bucket_cache_reclaim_domain(zone, drain, i);
+	}
 }
 
 static void
@@ -1561,63 +1572,65 @@ keg_drain_domain(uma_keg_t keg, int domain)
  * Returns nothing.
  */
 static void
-keg_drain(uma_keg_t keg)
+keg_drain(uma_keg_t keg, int domain)
 {
 	int i;
 
 	if ((keg->uk_flags & UMA_ZONE_NOFREE) != 0)
 		return;
-	for (i = 0; i < vm_ndomains; i++)
-		keg_drain_domain(keg, i);
+	if (domain != UMA_ANYDOMAIN) {
+		keg_drain_domain(keg, domain);
+	} else {
+		for (i = 0; i < vm_ndomains; i++)
+			keg_drain_domain(keg, i);
+	}
 }
 
 static void
-zone_reclaim(uma_zone_t zone, int waitok, bool drain)
+zone_reclaim(uma_zone_t zone, int domain, int waitok, bool drain)
 {
-
 	/*
-	 * Set draining to interlock with zone_dtor() so we can release our
-	 * locks as we go.  Only dtor() should do a WAITOK call since it
-	 * is the only call that knows the structure will still be available
-	 * when it wakes up.
+	 * Count active reclaim operations in order to interlock with
+	 * zone_dtor(), which removes the zone from global lists before
+	 * attempting to reclaim items itself.
+	 *
+	 * The zone may be destroyed while sleeping, so only zone_dtor() should
+	 * specify M_WAITOK.
 	 */
 	ZONE_LOCK(zone);
-	while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
-		if (waitok == M_NOWAIT)
-			goto out;
-		msleep(zone, &ZDOM_GET(zone, 0)->uzd_lock, PVM, "zonedrain",
-		    1);
+	if (waitok == M_WAITOK) {
+		while (zone->uz_reclaimers > 0)
+			msleep(zone, ZONE_LOCKPTR(zone), PVM, "zonedrain", 1);
 	}
-	zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
+	zone->uz_reclaimers++;
 	ZONE_UNLOCK(zone);
-	bucket_cache_reclaim(zone, drain);
+	bucket_cache_reclaim(zone, drain, domain);
 
-	/*
-	 * The DRAINING flag protects us from being freed while
-	 * we're running.  Normally the uma_rwlock would protect us but we
-	 * must be able to release and acquire the right lock for each keg.
-	 */
 	if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
-		keg_drain(zone->uz_keg);
+		keg_drain(zone->uz_keg, domain);
 	ZONE_LOCK(zone);
-	zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
-	wakeup(zone);
-out:
+	zone->uz_reclaimers--;
+	if (zone->uz_reclaimers == 0)
+		wakeup(zone);
 	ZONE_UNLOCK(zone);
 }
 
 static void
-zone_drain(uma_zone_t zone, void *unused)
+zone_drain(uma_zone_t zone, void *arg)
 {
+	int domain;
 
-	zone_reclaim(zone, M_NOWAIT, true);
+	domain = (int)(uintptr_t)arg;
+	zone_reclaim(zone, domain, M_NOWAIT, true);
 }
 
 static void
-zone_trim(uma_zone_t zone, void *unused)
+zone_trim(uma_zone_t zone, void *arg)
 {
+	int domain;
 
-	zone_reclaim(zone, M_NOWAIT, false);
+	domain = (int)(uintptr_t)arg;
+	zone_reclaim(zone, domain, M_NOWAIT, false);
 }
 
 /*
@@ -2883,7 +2896,7 @@ zone_dtor(void *arg, int size, void *udata)
 		keg = zone->uz_keg;
 		keg->uk_reserve = 0;
 	}
-	zone_reclaim(zone, M_WAITOK, true);
+	zone_reclaim(zone, UMA_ANYDOMAIN, M_WAITOK, true);
 
 	/*
 	 * We only destroy kegs from non secondary/non cache zones.
@@ -3153,9 +3166,9 @@ uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 	args.flags = flags;
 	args.keg = NULL;
 
-	sx_slock(&uma_reclaim_lock);
+	sx_xlock(&uma_reclaim_lock);
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
-	sx_sunlock(&uma_reclaim_lock);
+	sx_xunlock(&uma_reclaim_lock);
 
 	return (res);
 }
@@ -3181,9 +3194,9 @@ uma_zsecond_create(const char *name, uma_ctor ctor, uma_dtor dtor,
 	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
 	args.keg = keg;
 
-	sx_slock(&uma_reclaim_lock);
+	sx_xlock(&uma_reclaim_lock);
 	res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
-	sx_sunlock(&uma_reclaim_lock);
+	sx_xunlock(&uma_reclaim_lock);
 
 	return (res);
 }
@@ -3224,9 +3237,9 @@ uma_zdestroy(uma_zone_t zone)
 	if (booted == BOOT_SHUTDOWN &&
 	    zone->uz_fini == NULL && zone->uz_release == zone_release)
 		return;
-	sx_slock(&uma_reclaim_lock);
+	sx_xlock(&uma_reclaim_lock);
 	zone_free_item(zones, zone, NULL, SKIP_NONE);
-	sx_sunlock(&uma_reclaim_lock);
+	sx_xunlock(&uma_reclaim_lock);
 }
 
 void
@@ -5035,22 +5048,29 @@ uma_zone_memory(uma_zone_t zone)
 void
 uma_reclaim(int req)
 {
+	uma_reclaim_domain(req, UMA_ANYDOMAIN);
+}
+
+void
+uma_reclaim_domain(int req, int domain)
+{
+	void *arg;
 
-	CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
-	sx_xlock(&uma_reclaim_lock);
 	bucket_enable();
 
+	arg = (void *)(uintptr_t)domain;
+	sx_slock(&uma_reclaim_lock);
 	switch (req) {
 	case UMA_RECLAIM_TRIM:
-		zone_foreach(zone_trim, NULL);
+		zone_foreach(zone_trim, arg);
 		break;
 	case UMA_RECLAIM_DRAIN:
+		zone_foreach(zone_drain, arg);
+		break;
 	case UMA_RECLAIM_DRAIN_CPU:
-		zone_foreach(zone_drain, NULL);
-		if (req == UMA_RECLAIM_DRAIN_CPU) {
-			pcpu_cache_drain_safe(NULL);
-			zone_foreach(zone_drain, NULL);
-		}
+		zone_foreach(zone_drain, arg);
+		pcpu_cache_drain_safe(NULL);
+		zone_foreach(zone_drain, arg);
 		break;
 	default:
 		panic("unhandled reclamation request %d", req);
@@ -5061,10 +5081,10 @@ uma_reclaim(int req)
 	 * we visit again so that we can free pages that are empty once other
 	 * zones are drained.  We have to do the same for buckets.
 	 */
-	zone_drain(slabzones[0], NULL);
-	zone_drain(slabzones[1], NULL);
-	bucket_zone_drain();
-	sx_xunlock(&uma_reclaim_lock);
+	zone_drain(slabzones[0], arg);
+	zone_drain(slabzones[1], arg);
+	bucket_zone_drain(domain);
+	sx_sunlock(&uma_reclaim_lock);
 }
 
 static volatile int uma_reclaim_needed;
@@ -5099,17 +5119,25 @@ uma_reclaim_worker(void *arg __unused)
 void
 uma_zone_reclaim(uma_zone_t zone, int req)
 {
+	uma_zone_reclaim_domain(zone, req, UMA_ANYDOMAIN);
+}
+
+void
+uma_zone_reclaim_domain(uma_zone_t zone, int req, int domain)
+{
+	void *arg;
 
+	arg = (void *)(uintptr_t)domain;
 	switch (req) {
 	case UMA_RECLAIM_TRIM:
-		zone_trim(zone, NULL);
+		zone_trim(zone, arg);
 		break;
 	case UMA_RECLAIM_DRAIN:
-		zone_drain(zone, NULL);
+		zone_drain(zone, arg);
 		break;
 	case UMA_RECLAIM_DRAIN_CPU:
 		pcpu_cache_drain_safe(zone);
-		zone_drain(zone, NULL);
+		zone_drain(zone, arg);
 		break;
 	default:
 		panic("unhandled reclamation request %d", req);
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index 9965e486ca53..93910e78165b 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -162,7 +162,6 @@
 #define	UMA_ZFLAG_CTORDTOR	0x01000000	/* Zone has ctor/dtor set. */
 #define	UMA_ZFLAG_LIMIT		0x02000000	/* Zone has limit set. */
 #define	UMA_ZFLAG_CACHE		0x04000000	/* uma_zcache_create()d it */
-#define	UMA_ZFLAG_RECLAIMING	0x08000000	/* Running zone_reclaim(). */
 #define	UMA_ZFLAG_BUCKET	0x10000000	/* Bucket zone. */
 #define	UMA_ZFLAG_INTERNAL	0x20000000	/* No offpage no PCPU. */
 #define	UMA_ZFLAG_TRASH		0x40000000	/* Add trash ctor/dtor. */
@@ -175,7 +174,6 @@
     "\37TRASH"				\
     "\36INTERNAL"			\
     "\35BUCKET"				\
-    "\34RECLAIMING"			\
     "\33CACHE"				\
     "\32LIMIT"				\
     "\31CTORDTOR"			\
@@ -490,7 +488,7 @@ struct uma_zone {
 	char		*uz_ctlname;	/* sysctl safe name string. */
 	int		uz_namecnt;	/* duplicate name count. */
 	uint16_t	uz_bucket_size_min; /* Min number of items in bucket */
-	uint16_t	uz_pad0;
+	uint16_t	uz_reclaimers;	/* pending reclaim operations. */
 
 	/* Offset 192, rare read-only. */
 	struct sysctl_oid *uz_oid;	/* sysctl oid pointer. */
@@ -582,6 +580,7 @@ static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data);
 
 #define	ZONE_LOCK(z)	ZDOM_LOCK(ZDOM_GET((z), 0))
 #define	ZONE_UNLOCK(z)	ZDOM_UNLOCK(ZDOM_GET((z), 0))
+#define	ZONE_LOCKPTR(z)	(&ZDOM_GET((z), 0)->uzd_lock)
 
 #define	ZONE_CROSS_LOCK_INIT(z)					\
 	mtx_init(&(z)->uz_cross_lock, "UMA Cross", NULL, MTX_DEF)
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index a69493d1323f..7ab1fdb8950e 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -907,7 +907,6 @@ debug_vm_lowmem(SYSCTL_HANDLER_ARGS)
 		EVENTHANDLER_INVOKE(vm_lowmem, i);
 	return (0);
 }
-
 SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0, debug_vm_lowmem, "I",
     "set to trigger vm_lowmem event with given flags");
@@ -919,7 +918,7 @@ debug_uma_reclaim(SYSCTL_HANDLER_ARGS)
 
 	i = 0;
 	error = sysctl_handle_int(oidp, &i, 0, req);
-	if (error != 0)
+	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (i != UMA_RECLAIM_TRIM && i != UMA_RECLAIM_DRAIN &&
 	    i != UMA_RECLAIM_DRAIN_CPU)
@@ -927,7 +926,31 @@ debug_uma_reclaim(SYSCTL_HANDLER_ARGS)
 	uma_reclaim(i);
 	return (0);
 }
-
 SYSCTL_PROC(_debug, OID_AUTO, uma_reclaim,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0, debug_uma_reclaim, "I",
     "set to generate request to reclaim uma caches");
+
+static int
+debug_uma_reclaim_domain(SYSCTL_HANDLER_ARGS)
+{
+	int domain, error, request;
+
+	request = 0;
+	error = sysctl_handle_int(oidp, &request, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	domain = request >> 4;
+	request &= 0xf;
+	if (request != UMA_RECLAIM_TRIM && request != UMA_RECLAIM_DRAIN &&
+	    request != UMA_RECLAIM_DRAIN_CPU)
+		return (EINVAL);
+	if (domain < 0 || domain >= vm_ndomains)
+		return (EINVAL);
+	uma_reclaim_domain(request, domain);
+	return (0);
+}
+SYSCTL_PROC(_debug, OID_AUTO, uma_reclaim_domain,
+    CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, 0,
+    debug_uma_reclaim_domain, "I",
+    "");



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202104141703.13EH3kpg038659>