Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 21 Feb 2018 23:15:06 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r329759 - in head: cddl/contrib/opensolaris/lib/libzpool/common/sys sys/cddl/compat/opensolaris/kern sys/cddl/compat/opensolaris/sys sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <201802212315.w1LNF6xx071171@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Wed Feb 21 23:15:06 2018
New Revision: 329759
URL: https://svnweb.freebsd.org/changeset/base/329759

Log:
  9018 Replace kmem_cache_reap_now() with kmem_cache_reap_soon()
  
  illumos/illumos-gate@36a64e62848b51ac5a9a5216e894ec723cfef14e
  
  To prevent kmem_cache reaping from blocking other system resources, turn
  kmem_cache_reap_now() (which blocks) into kmem_cache_reap_soon(). Callers
  to kmem_cache_reap_soon() should use kmem_cache_reap_active(), which
  exploits #9017's new taskq_empty().
  
  Reviewed by: Bryan Cantrill <bryan@joyent.com>
  Reviewed by: Dan McDonald <danmcd@joyent.com>
  Reviewed by: Matthew Ahrens <mahrens@delphix.com>
  Reviewed by: Yuri Pankov <yuripv@yuripv.net>
  Author: Tim Kordas <tim.kordas@joyent.com>
  
  FreeBSD does not use taskqueue for kmem caches reaping, so this change
  is less dramatic then it is on Illumos, just limiting reaping to 1 time
  per second.  It may possibly be improved later, if needed.

Modified:
  head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
  head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
  head/sys/cddl/compat/opensolaris/sys/kmem.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Wed Feb 21 22:24:49 2018	(r329758)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Wed Feb 21 23:15:06 2018	(r329759)
@@ -364,7 +364,8 @@ extern void cv_broadcast(kcondvar_t *cv);
 #define	kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
 #define	kmem_cache_free(_c, _b)	umem_cache_free(_c, _b)
 #define	kmem_debugging()	0
-#define	kmem_cache_reap_now(_c)		/* nothing */
+#define	kmem_cache_reap_active()	(B_FALSE)
+#define	kmem_cache_reap_soon(_c)	/* nothing */
 #define	kmem_cache_set_move(_c, _cb)	/* nothing */
 #define	POINTER_INVALIDATE(_pp)		/* nothing */
 #define	POINTER_IS_VALID(_p)	0

Modified: head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
==============================================================================
--- head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c	Wed Feb 21 22:24:49 2018	(r329758)
+++ head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c	Wed Feb 21 23:15:06 2018	(r329759)
@@ -212,9 +212,30 @@ kmem_cache_free(kmem_cache_t *cache, void *buf)
 #endif
 }
 
+/*
+ * Allow our caller to determine if there are running reaps.
+ *
+ * This call is very conservative and may return B_TRUE even when
+ * reaping activity isn't active. If it returns B_FALSE, then reaping
+ * activity is definitely inactive.
+ */
+boolean_t
+kmem_cache_reap_active(void)
+{
+
+	return (B_FALSE);
+}
+
+/*
+ * Reap (almost) everything soon.
+ *
+ * Note: this does not wait for the reap-tasks to complete. Caller
+ * should use kmem_cache_reap_active() (above) and/or moderation to
+ * avoid scheduling too many reap-tasks.
+ */
 #ifdef _KERNEL
 void
-kmem_cache_reap_now(kmem_cache_t *cache)
+kmem_cache_reap_soon(kmem_cache_t *cache)
 {
 #ifndef KMEM_DEBUG
 	zone_drain(cache->kc_zone);
@@ -228,7 +249,7 @@ kmem_reap(void)
 }
 #else
 void
-kmem_cache_reap_now(kmem_cache_t *cache __unused)
+kmem_cache_reap_soon(kmem_cache_t *cache __unused)
 {
 }
 

Modified: head/sys/cddl/compat/opensolaris/sys/kmem.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/kmem.h	Wed Feb 21 22:24:49 2018	(r329758)
+++ head/sys/cddl/compat/opensolaris/sys/kmem.h	Wed Feb 21 23:15:06 2018	(r329759)
@@ -73,7 +73,8 @@ kmem_cache_t *kmem_cache_create(char *name, size_t buf
 void kmem_cache_destroy(kmem_cache_t *cache);
 void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
 void kmem_cache_free(kmem_cache_t *cache, void *buf);
-void kmem_cache_reap_now(kmem_cache_t *cache);
+boolean_t kmem_cache_reap_active(void);
+void kmem_cache_reap_soon(kmem_cache_t *);
 void kmem_reap(void);
 int kmem_debugging(void);
 void *calloc(size_t n, size_t s);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed Feb 21 22:24:49 2018	(r329758)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed Feb 21 23:15:06 2018	(r329759)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
@@ -309,6 +309,9 @@ int zfs_arc_evict_batch_limit = 10;
 /* number of seconds before growing cache again */
 static int		arc_grow_retry = 60;
 
+/* number of milliseconds before attempting a kmem-cache-reap */
+static int		arc_kmem_cache_reap_retry_ms = 1000;
+
 /* shift of arc_c for calculating overflow limit in arc_get_data_impl */
 int		zfs_arc_overflow_shift = 8;
 
@@ -4395,21 +4398,31 @@ arc_kmem_reap_now(void)
 #endif
 #endif
 
+	/*
+	 * If a kmem reap is already active, don't schedule more.  We must
+	 * check for this because kmem_cache_reap_soon() won't actually
+	 * block on the cache being reaped (this is to prevent callers from
+	 * becoming implicitly blocked by a system-wide kmem reap -- which,
+	 * on a system with many, many full magazines, can take minutes).
+	 */
+	if (kmem_cache_reap_active())
+		return;
+
 	for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
 		if (zio_buf_cache[i] != prev_cache) {
 			prev_cache = zio_buf_cache[i];
-			kmem_cache_reap_now(zio_buf_cache[i]);
+			kmem_cache_reap_soon(zio_buf_cache[i]);
 		}
 		if (zio_data_buf_cache[i] != prev_data_cache) {
 			prev_data_cache = zio_data_buf_cache[i];
-			kmem_cache_reap_now(zio_data_buf_cache[i]);
+			kmem_cache_reap_soon(zio_data_buf_cache[i]);
 		}
 	}
-	kmem_cache_reap_now(abd_chunk_cache);
-	kmem_cache_reap_now(buf_cache);
-	kmem_cache_reap_now(hdr_full_cache);
-	kmem_cache_reap_now(hdr_l2only_cache);
-	kmem_cache_reap_now(range_seg_cache);
+	kmem_cache_reap_soon(abd_chunk_cache);
+	kmem_cache_reap_soon(buf_cache);
+	kmem_cache_reap_soon(hdr_full_cache);
+	kmem_cache_reap_soon(hdr_l2only_cache);
+	kmem_cache_reap_soon(range_seg_cache);
 
 #ifdef illumos
 	if (zio_arena != NULL) {
@@ -4444,6 +4457,7 @@ static void
 arc_reclaim_thread(void *unused __unused)
 {
 	hrtime_t		growtime = 0;
+	hrtime_t		kmem_reap_time = 0;
 	callb_cpr_t		cpr;
 
 	CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
@@ -4477,7 +4491,7 @@ arc_reclaim_thread(void *unused __unused)
 
 		int64_t free_memory = arc_available_memory();
 		if (free_memory < 0) {
-
+			hrtime_t curtime = gethrtime();
 			arc_no_grow = B_TRUE;
 			arc_warm = B_TRUE;
 
@@ -4485,9 +4499,20 @@ arc_reclaim_thread(void *unused __unused)
 			 * Wait at least zfs_grow_retry (default 60) seconds
 			 * before considering growing.
 			 */
-			growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
+			growtime = curtime + SEC2NSEC(arc_grow_retry);
 
-			arc_kmem_reap_now();
+			/*
+			 * Wait at least arc_kmem_cache_reap_retry_ms
+			 * between arc_kmem_reap_now() calls. Without
+			 * this check it is possible to end up in a
+			 * situation where we spend lots of time
+			 * reaping caches, while we're near arc_c_min.
+			 */
+			if (curtime >= kmem_reap_time) {
+				arc_kmem_reap_now();
+				kmem_reap_time = gethrtime() +
+				    MSEC2NSEC(arc_kmem_cache_reap_retry_ms);
+			}
 
 			/*
 			 * If we are still low on memory, shrink the ARC



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802212315.w1LNF6xx071171>