Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Feb 2018 03:49:06 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r329802 - in head/sys: cddl/contrib/opensolaris/uts/common cddl/contrib/opensolaris/uts/common/fs/zfs cddl/contrib/opensolaris/uts/common/fs/zfs/sys conf
Message-ID:  <201802220349.w1M3n60q009478@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Thu Feb 22 03:49:06 2018
New Revision: 329802
URL: https://svnweb.freebsd.org/changeset/base/329802

Log:
  MFV r329799, r329800:
  9079 race condition in starting and ending condesing thread for indirect vdevs
  
  illumos/illumos-gate@667ec66f1b4f491d5e839644e0912cad1c9e7122
  
  The timeline of the race condition is the following:
  [1] Thread A is about to finish condesing the first vdev in spa_condense_indirect_thread(),
  so it calls the spa_condense_indirect_complete_sync() sync task which sets the
  spa_condensing_indirect field to NULL. Waiting for the sync task to finish, thread A
  sleeps until the txg is done. When this happens, thread A will acquire spa_async_lock
  and set spa_condense_thread to NULL.
  [2] While thread A waits for the txg to finish, thread B which is running spa_sync() checks
  whether it should condense the second vdev in vdev_indirect_should_condense() by checking
  the spa_condensing_indirect field which was set to NULL by spa_condense_indirect_thread()
  from thread A. So it goes on and tries to spawn a new condensing thread in
  spa_condense_indirect_start_sync() and the aforementioned assertions fails because thread A
  has not set spa_condense_thread to NULL (which is basically the last thing it does before
  returning).
  
  The main issue here is that we rely on both spa_condensing_indirect and spa_condense_thread to
  signify whether a condensing thread is running. Ideally we would only use one throughout the
  codebase. In addition, for managing spa_condense_thread we currently use spa_async_lock which
  basically tights condensing to scrubing when it comes to pausing and resuming those actions
  during spa export.
  
  Reviewed by: Matt Ahrens <mahrens@delphix.com>
  Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
  Approved by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
  Author: Serapheim Dimitropoulos <serapheim@delphix.com>

Added:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zthr.h
     - copied unchanged from r329800, vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zthr.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zthr.c
     - copied unchanged from r329800, vendor-sys/illumos/dist/uts/common/fs/zfs/zthr.c
Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_removal.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c
  head/sys/conf/files
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files	Thu Feb 22 03:37:01 2018	(r329801)
+++ head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files	Thu Feb 22 03:49:06 2018	(r329802)
@@ -148,7 +148,8 @@ ZFS_COMMON_OBJS +=		\
 	zio_compress.o		\
 	zio_inject.o		\
 	zle.o			\
-	zrlock.o
+	zrlock.o		\
+	zthr.o
 
 ZFS_SHARED_OBJS +=		\
 	zfeature_common.o	\

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Thu Feb 22 03:37:01 2018	(r329801)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Thu Feb 22 03:49:06 2018	(r329802)
@@ -1411,6 +1411,12 @@ spa_unload(spa_t *spa)
 		spa->spa_vdev_removal = NULL;
 	}
 
+	if (spa->spa_condense_zthr != NULL) {
+		ASSERT(!zthr_isrunning(spa->spa_condense_zthr));
+		zthr_destroy(spa->spa_condense_zthr);
+		spa->spa_condense_zthr = NULL;
+	}
+
 	spa_condense_fini(spa);
 
 	bpobj_close(&spa->spa_deferred_bpobj);
@@ -2167,6 +2173,16 @@ spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err)
 	return (SET_ERROR(err));
 }
 
+static void
+spa_spawn_aux_threads(spa_t *spa)
+{
+	ASSERT(spa_writeable(spa));
+
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+	spa_start_indirect_condensing_thread(spa);
+}
+
 /*
  * Fix up config after a partly-completed split.  This is done with the
  * ZPOOL_CONFIG_SPLIT nvlist.  Both the splitting pool and the split-off
@@ -3574,18 +3590,6 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char
 		ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT);
 
 		/*
-		 * We must check this before we start the sync thread, because
-		 * we only want to start a condense thread for condense
-		 * operations that were in progress when the pool was
-		 * imported.  Once we start syncing, spa_sync() could
-		 * initiate a condense (and start a thread for it).  In
-		 * that case it would be wrong to start a second
-		 * condense thread.
-		 */
-		boolean_t condense_in_progress =
-		    (spa->spa_condensing_indirect != NULL);
-
-		/*
 		 * Traverse the ZIL and claim all blocks.
 		 */
 		spa_ld_claim_log_blocks(spa);
@@ -3637,15 +3641,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char
 		 */
 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
 
-		/*
-		 * Note: unlike condensing, we don't need an analogous
-		 * "removal_in_progress" dance because no other thread
-		 * can start a removal while we hold the spa_namespace_lock.
-		 */
 		spa_restart_removal(spa);
 
-		if (condense_in_progress)
-			spa_condense_indirect_restart(spa);
+		spa_spawn_aux_threads(spa);
 	}
 
 	spa_load_note(spa, "LOADED");
@@ -4569,6 +4567,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_
 	 */
 	txg_wait_synced(spa->spa_dsl_pool, txg);
 
+	spa_spawn_aux_threads(spa);
+
 	spa_write_cachefile(spa, B_FALSE, B_TRUE);
 	spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE);
 
@@ -6753,12 +6753,15 @@ spa_async_suspend(spa_t *spa)
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_suspended++;
 	while (spa->spa_async_thread != NULL ||
-	    spa->spa_async_thread_vd != NULL ||
-	    spa->spa_condense_thread != NULL)
+	    spa->spa_async_thread_vd != NULL)
 		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
 	mutex_exit(&spa->spa_async_lock);
 
 	spa_vdev_remove_suspend(spa);
+
+	zthr_t *condense_thread = spa->spa_condense_zthr;
+	if (condense_thread != NULL && zthr_isrunning(condense_thread))
+		VERIFY0(zthr_cancel(condense_thread));
 }
 
 void
@@ -6769,6 +6772,10 @@ spa_async_resume(spa_t *spa)
 	spa->spa_async_suspended--;
 	mutex_exit(&spa->spa_async_lock);
 	spa_restart_removal(spa);
+
+	zthr_t *condense_thread = spa->spa_condense_zthr;
+	if (condense_thread != NULL && !zthr_isrunning(condense_thread))
+		zthr_resume(condense_thread);
 }
 
 static boolean_t

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Thu Feb 22 03:37:01 2018	(r329801)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Thu Feb 22 03:49:06 2018	(r329802)
@@ -44,6 +44,7 @@
 #include <sys/bplist.h>
 #include <sys/bpobj.h>
 #include <sys/zfeature.h>
+#include <sys/zthr.h>
 #include <zfeature_common.h>
 
 #ifdef	__cplusplus
@@ -280,7 +281,7 @@ struct spa {
 
 	spa_condensing_indirect_phys_t	spa_condensing_indirect_phys;
 	spa_condensing_indirect_t	*spa_condensing_indirect;
-	kthread_t	*spa_condense_thread;	/* thread doing condense. */
+	zthr_t		*spa_condense_zthr;	/* zthr doing condense. */
 
 	char		*spa_root;		/* alternate root directory */
 	uint64_t	spa_ena;		/* spa-wide ereport ENA */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_removal.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_removal.h	Thu Feb 22 03:37:01 2018	(r329801)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_removal.h	Thu Feb 22 03:49:06 2018	(r329802)
@@ -76,7 +76,7 @@ extern int spa_remove_init(spa_t *);
 extern void spa_restart_removal(spa_t *);
 extern int spa_condense_init(spa_t *);
 extern void spa_condense_fini(spa_t *);
-extern void spa_condense_indirect_restart(spa_t *);
+extern void spa_start_indirect_condensing_thread(spa_t *);
 extern void spa_vdev_condense_suspend(spa_t *);
 extern int spa_vdev_remove(spa_t *, uint64_t, boolean_t);
 extern void free_from_removing_vdev(vdev_t *, uint64_t, uint64_t, uint64_t);

Copied: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zthr.h (from r329800, vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zthr.h)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zthr.h	Thu Feb 22 03:49:06 2018	(r329802, copy of r329800, vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zthr.h)
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+
+/*
+ * Copyright (c) 2017 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_ZTHR_H
+#define	_SYS_ZTHR_H
+
+typedef struct zthr zthr_t;
+typedef int (zthr_func_t)(void *, zthr_t *);
+typedef boolean_t (zthr_checkfunc_t)(void *, zthr_t *);
+
+struct zthr {
+	kthread_t	*zthr_thread;
+	kmutex_t	zthr_lock;
+	kcondvar_t	zthr_cv;
+	boolean_t	zthr_cancel;
+
+	zthr_checkfunc_t	*zthr_checkfunc;
+	zthr_func_t	*zthr_func;
+	void		*zthr_arg;
+	int		zthr_rc;
+};
+
+extern zthr_t *zthr_create(zthr_checkfunc_t checkfunc,
+    zthr_func_t *func, void *arg);
+extern void zthr_exit(zthr_t *t, int rc);
+extern void zthr_destroy(zthr_t *t);
+
+extern void zthr_wakeup(zthr_t *t);
+extern int zthr_cancel(zthr_t *t);
+extern void zthr_resume(zthr_t *t);
+
+extern boolean_t zthr_iscancelled(zthr_t *t);
+extern boolean_t zthr_isrunning(zthr_t *t);
+
+#endif /* _SYS_ZTHR_H */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c	Thu Feb 22 03:37:01 2018	(r329801)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_indirect.c	Thu Feb 22 03:49:06 2018	(r329802)
@@ -30,6 +30,8 @@
 #include <sys/dmu_tx.h>
 #include <sys/dsl_synctask.h>
 #include <sys/zap.h>
+#include <sys/abd.h>
+#include <sys/zthr.h>
 
 /*
  * An indirect vdev corresponds to a vdev that has been removed.  Since
@@ -475,7 +477,7 @@ spa_condense_indirect_commit_entry(spa_t *spa,
 
 static void
 spa_condense_indirect_generate_new_mapping(vdev_t *vd,
-    uint32_t *obsolete_counts, uint64_t start_index)
+    uint32_t *obsolete_counts, uint64_t start_index, zthr_t *zthr)
 {
 	spa_t *spa = vd->vdev_spa;
 	uint64_t mapi = start_index;
@@ -490,7 +492,15 @@ spa_condense_indirect_generate_new_mapping(vdev_t *vd,
 	    (u_longlong_t)vd->vdev_id,
 	    (u_longlong_t)mapi);
 
-	while (mapi < old_num_entries && !spa_shutting_down(spa)) {
+	while (mapi < old_num_entries) {
+
+		if (zthr_iscancelled(zthr)) {
+			zfs_dbgmsg("pausing condense of vdev %llu "
+			    "at index %llu", (u_longlong_t)vd->vdev_id,
+			    (u_longlong_t)mapi);
+			break;
+		}
+
 		vdev_indirect_mapping_entry_phys_t *entry =
 		    &old_mapping->vim_entries[mapi];
 		uint64_t entry_size = DVA_GET_ASIZE(&entry->vimep_dst);
@@ -508,18 +518,30 @@ spa_condense_indirect_generate_new_mapping(vdev_t *vd,
 
 		mapi++;
 	}
-	if (spa_shutting_down(spa)) {
-		zfs_dbgmsg("pausing condense of vdev %llu at index %llu",
-		    (u_longlong_t)vd->vdev_id,
-		    (u_longlong_t)mapi);
-	}
 }
 
-static void
-spa_condense_indirect_thread(void *arg)
+/* ARGSUSED */
+static boolean_t
+spa_condense_indirect_thread_check(void *arg, zthr_t *zthr)
 {
-	vdev_t *vd = arg;
-	spa_t *spa = vd->vdev_spa;
+	spa_t *spa = arg;
+
+	return (spa->spa_condensing_indirect != NULL);
+}
+
+/* ARGSUSED */
+static int
+spa_condense_indirect_thread(void *arg, zthr_t *zthr)
+{
+	spa_t *spa = arg;
+	vdev_t *vd;
+
+	ASSERT3P(spa->spa_condensing_indirect, !=, NULL);
+	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+	vd = vdev_lookup_top(spa, spa->spa_condensing_indirect_phys.scip_vdev);
+	ASSERT3P(vd, !=, NULL);
+	spa_config_exit(spa, SCL_VDEV, FTAG);
+
 	spa_condensing_indirect_t *sci = spa->spa_condensing_indirect;
 	spa_condensing_indirect_phys_t *scip =
 	    &spa->spa_condensing_indirect_phys;
@@ -593,25 +615,24 @@ spa_condense_indirect_thread(void *arg)
 		}
 	}
 
-	spa_condense_indirect_generate_new_mapping(vd, counts, start_index);
+	spa_condense_indirect_generate_new_mapping(vd, counts,
+	    start_index, zthr);
 
 	vdev_indirect_mapping_free_obsolete_counts(old_mapping, counts);
 
 	/*
-	 * We may have bailed early from generate_new_mapping(), if
-	 * the spa is shutting down.  In this case, do not complete
-	 * the condense.
+	 * If the zthr has received a cancellation signal while running
+	 * in generate_new_mapping() or at any point after that, then bail
+	 * early. We don't want to complete the condense if the spa is
+	 * shutting down.
 	 */
-	if (!spa_shutting_down(spa)) {
-		VERIFY0(dsl_sync_task(spa_name(spa), NULL,
-		    spa_condense_indirect_complete_sync, sci, 0,
-		    ZFS_SPACE_CHECK_NONE));
-	}
+	if (zthr_iscancelled(zthr))
+		return (0);
 
-	mutex_enter(&spa->spa_async_lock);
-	spa->spa_condense_thread = NULL;
-	cv_broadcast(&spa->spa_async_cv);
-	mutex_exit(&spa->spa_async_lock);
+	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
+	    spa_condense_indirect_complete_sync, sci, 0, ZFS_SPACE_CHECK_NONE));
+
+	return (0);
 	thread_exit();
 }
 
@@ -665,9 +686,7 @@ spa_condense_indirect_start_sync(vdev_t *vd, dmu_tx_t 
 	    (u_longlong_t)scip->scip_prev_obsolete_sm_object,
 	    (u_longlong_t)scip->scip_next_mapping_object);
 
-	ASSERT3P(spa->spa_condense_thread, ==, NULL);
-	spa->spa_condense_thread = thread_create(NULL, 0,
-	    spa_condense_indirect_thread, vd, 0, &p0, TS_RUN, minclsyspri);
+	zthr_wakeup(spa->spa_condense_zthr);
 }
 
 /*
@@ -744,24 +763,12 @@ spa_condense_fini(spa_t *spa)
 	}
 }
 
-/*
- * Restart the condense - called when the pool is opened.
- */
 void
-spa_condense_indirect_restart(spa_t *spa)
+spa_start_indirect_condensing_thread(spa_t *spa)
 {
-	vdev_t *vd;
-	ASSERT(spa->spa_condensing_indirect != NULL);
-	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
-	vd = vdev_lookup_top(spa,
-	    spa->spa_condensing_indirect_phys.scip_vdev);
-	ASSERT(vd != NULL);
-	spa_config_exit(spa, SCL_VDEV, FTAG);
-
-	ASSERT3P(spa->spa_condense_thread, ==, NULL);
-	spa->spa_condense_thread = thread_create(NULL, 0,
-	    spa_condense_indirect_thread, vd, 0, &p0, TS_RUN,
-	    minclsyspri);
+	ASSERT3P(spa->spa_condense_zthr, ==, NULL);
+	spa->spa_condense_zthr = zthr_create(spa_condense_indirect_thread_check,
+	    spa_condense_indirect_thread, spa);
 }
 
 /*

Copied: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zthr.c (from r329800, vendor-sys/illumos/dist/uts/common/fs/zfs/zthr.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zthr.c	Thu Feb 22 03:49:06 2018	(r329802, copy of r329800, vendor-sys/illumos/dist/uts/common/fs/zfs/zthr.c)
@@ -0,0 +1,319 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017 by Delphix. All rights reserved.
+ */
+
+/*
+ * ZTHR Infrastructure
+ * ===================
+ *
+ * ZTHR threads are used for isolated operations that span multiple txgs
+ * within a SPA. They generally exist from SPA creation/loading and until
+ * the SPA is exported/destroyed. The ideal requirements for an operation
+ * to be modeled with a zthr are the following:
+ *
+ * 1] The operation needs to run over multiple txgs.
+ * 2] There is be a single point of reference in memory or on disk that
+ *    indicates whether the operation should run/is running or is
+ *    stopped.
+ *
+ * If the operation satisfies the above then the following rules guarantee
+ * a certain level of correctness:
+ *
+ * 1] Any thread EXCEPT the zthr changes the work indicator from stopped
+ *    to running but not the opposite.
+ * 2] Only the zthr can change the work indicator from running to stopped
+ *    (e.g. when it is done) but not the opposite.
+ *
+ * This way a normal zthr cycle should go like this:
+ *
+ * 1] An external thread changes the work indicator from stopped to
+ *    running and wakes up the zthr.
+ * 2] The zthr wakes up, checks the indicator and starts working.
+ * 3] When the zthr is done, it changes the indicator to stopped, allowing
+ *    a new cycle to start.
+ *
+ * == ZTHR creation
+ *
+ * Every zthr needs three inputs to start running:
+ *
+ * 1] A user-defined checker function (checkfunc) that decides whether
+ *    the zthr should start working or go to sleep. The function should
+ *    return TRUE when the zthr needs to work or FALSE to let it sleep,
+ *    and should adhere to the following signature:
+ *    boolean_t checkfunc_name(void *args, zthr_t *t);
+ *
+ * 2] A user-defined ZTHR function (func) which the zthr executes when
+ *    it is not sleeping. The function should adhere to the following
+ *    signature type:
+ *    int func_name(void *args, zthr_t *t);
+ *
+ * 3] A void args pointer that will be passed to checkfunc and func
+ *    implicitly by the infrastructure.
+ *
+ * The reason why the above API needs two different functions,
+ * instead of one that both checks and does the work, has to do with
+ * the zthr's internal lock (zthr_lock) and the allowed cancellation
+ * windows. We want to hold the zthr_lock while running checkfunc
+ * but not while running func. This way the zthr can be cancelled
+ * while doing work and not while checking for work.
+ *
+ * To start a zthr:
+ *     zthr_t *zthr_pointer = zthr_create(checkfunc, func, args);
+ *
+ * After that you should be able to wakeup, cancel, and resume the
+ * zthr from another thread using zthr_pointer.
+ *
+ * NOTE: ZTHR threads could potentially wake up spuriously and the
+ * user should take this into account when writing a checkfunc.
+ * [see ZTHR state transitions]
+ *
+ * == ZTHR cancellation
+ *
+ * ZTHR threads must be cancelled when their SPA is being exported
+ * or when they need to be paused so they don't interfere with other
+ * operations.
+ *
+ * To cancel a zthr:
+ *     zthr_cancel(zthr_pointer);
+ *
+ * To resume it:
+ *     zthr_resume(zthr_pointer);
+ *
+ * A zthr will implicitly check if it has received a cancellation
+ * signal every time func returns and everytime it wakes up [see ZTHR
+ * state transitions below].
+ *
+ * At times, waiting for the zthr's func to finish its job may take
+ * time. This may be very time-consuming for some operations that
+ * need to cancel the SPA's zthrs (e.g spa_export). For this scenario
+ * the user can explicitly make their ZTHR function aware of incoming
+ * cancellation signals using zthr_iscancelled(). A common pattern for
+ * that looks like this:
+ *
+ * int
+ * func_name(void *args, zthr_t *t)
+ * {
+ *     ... <unpack args> ...
+ *     while (!work_done && !zthr_iscancelled(t)) {
+ *         ... <do more work> ...
+ *     }
+ *     return (0);
+ * }
+ *
+ * == ZTHR exit
+ *
+ * For the rare cases where the zthr wants to stop running voluntarily
+ * while running its ZTHR function (func), we provide zthr_exit().
+ * When a zthr has voluntarily stopped running, it can be resumed with
+ * zthr_resume(), just like it would if it was cancelled by some other
+ * thread.
+ *
+ * == ZTHR cleanup
+ *
+ * Cancelling a zthr doesn't clean up its metadata (internal locks,
+ * function pointers to func and checkfunc, etc..). This is because
+ * we want to keep them around in case we want to resume the execution
+ * of the zthr later. Similarly for zthrs that exit themselves.
+ *
+ * To completely cleanup a zthr, cancel it first to ensure that it
+ * is not running and then use zthr_destroy().
+ *
+ * == ZTHR state transitions
+ *
+ *    zthr creation
+ *      +
+ *      |
+ *      |      woke up
+ *      |   +--------------+ sleep
+ *      |   |                  ^
+ *      |   |                  |
+ *      |   |                  | FALSE
+ *      |   |                  |
+ *      v   v     FALSE        +
+ *   cancelled? +---------> checkfunc?
+ *      +   ^                  +
+ *      |   |                  |
+ *      |   |                  | TRUE
+ *      |   |                  |
+ *      |   |  func returned   v
+ *      |   +---------------+ func
+ *      |
+ *      | TRUE
+ *      |
+ *      v
+ *   zthr stopped running
+ *
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zthr.h>
+
+void
+zthr_exit(zthr_t *t, int rc)
+{
+	ASSERT3P(t->zthr_thread, ==, curthread);
+	mutex_enter(&t->zthr_lock);
+	t->zthr_thread = NULL;
+	t->zthr_rc = rc;
+	cv_broadcast(&t->zthr_cv);
+	mutex_exit(&t->zthr_lock);
+	thread_exit();
+}
+
+static void
+zthr_procedure(void *arg)
+{
+	zthr_t *t = arg;
+	int rc = 0;
+
+	mutex_enter(&t->zthr_lock);
+	while (!t->zthr_cancel) {
+		if (t->zthr_checkfunc(t->zthr_arg, t)) {
+			mutex_exit(&t->zthr_lock);
+			rc = t->zthr_func(t->zthr_arg, t);
+			mutex_enter(&t->zthr_lock);
+		} else {
+			/* go to sleep */
+			cv_wait(&t->zthr_cv, &t->zthr_lock);
+		}
+	}
+	mutex_exit(&t->zthr_lock);
+
+	zthr_exit(t, rc);
+}
+
+zthr_t *
+zthr_create(zthr_checkfunc_t *checkfunc, zthr_func_t *func, void *arg)
+{
+	zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
+	mutex_init(&t->zthr_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL);
+
+	mutex_enter(&t->zthr_lock);
+	t->zthr_checkfunc = checkfunc;
+	t->zthr_func = func;
+	t->zthr_arg = arg;
+
+	t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t,
+	    0, &p0, TS_RUN, minclsyspri);
+	mutex_exit(&t->zthr_lock);
+
+	return (t);
+}
+
+void
+zthr_destroy(zthr_t *t)
+{
+	VERIFY3P(t->zthr_thread, ==, NULL);
+	mutex_destroy(&t->zthr_lock);
+	cv_destroy(&t->zthr_cv);
+	kmem_free(t, sizeof (*t));
+}
+
+/*
+ * Note: If the zthr is not sleeping and misses the wakeup
+ * (e.g it is running its ZTHR function), it will check if
+ * there is work to do before going to sleep using its checker
+ * function [see ZTHR state transition in ZTHR block comment].
+ * Thus, missing the wakeup still yields the expected behavior.
+ */
+void
+zthr_wakeup(zthr_t *t)
+{
+	ASSERT3P(t->zthr_thread, !=, NULL);
+
+	mutex_enter(&t->zthr_lock);
+	cv_broadcast(&t->zthr_cv);
+	mutex_exit(&t->zthr_lock);
+}
+
+/*
+ * Note: If the zthr is not running (e.g. has been cancelled
+ * already), this is a no-op.
+ */
+int
+zthr_cancel(zthr_t *t)
+{
+	int rc = 0;
+
+	mutex_enter(&t->zthr_lock);
+
+	/* broadcast in case the zthr is sleeping */
+	cv_broadcast(&t->zthr_cv);
+
+	t->zthr_cancel = B_TRUE;
+	while (t->zthr_thread != NULL)
+		cv_wait(&t->zthr_cv, &t->zthr_lock);
+	t->zthr_cancel = B_FALSE;
+	rc = t->zthr_rc;
+	mutex_exit(&t->zthr_lock);
+
+	return (rc);
+}
+
+void
+zthr_resume(zthr_t *t)
+{
+	ASSERT3P(t->zthr_thread, ==, NULL);
+
+	mutex_enter(&t->zthr_lock);
+
+	ASSERT3P(&t->zthr_checkfunc, !=, NULL);
+	ASSERT3P(&t->zthr_func, !=, NULL);
+	ASSERT(!t->zthr_cancel);
+
+	t->zthr_thread = thread_create(NULL, 0, zthr_procedure, t,
+	    0, &p0, TS_RUN, minclsyspri);
+
+	mutex_exit(&t->zthr_lock);
+}
+
+/*
+ * This function is intended to be used by the zthr itself
+ * to check if another thread has signal it to stop running.
+ *
+ * returns TRUE if we are in the middle of trying to cancel
+ *     this thread.
+ *
+ * returns FALSE otherwise.
+ */
+boolean_t
+zthr_iscancelled(zthr_t *t)
+{
+	boolean_t cancelled;
+
+	ASSERT3P(t->zthr_thread, ==, curthread);
+
+	mutex_enter(&t->zthr_lock);
+	cancelled = t->zthr_cancel;
+	mutex_exit(&t->zthr_lock);
+
+	return (cancelled);
+}
+
+boolean_t
+zthr_isrunning(zthr_t *t)
+{
+	boolean_t running;
+
+	mutex_enter(&t->zthr_lock);
+	running = (t->zthr_thread != NULL);
+	mutex_exit(&t->zthr_lock);
+
+	return (running);
+}

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Thu Feb 22 03:37:01 2018	(r329801)
+++ head/sys/conf/files	Thu Feb 22 03:49:06 2018	(r329802)
@@ -261,6 +261,7 @@ cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compres
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zle.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c			optional zfs compile-with "${ZFS_C}"
+cddl/contrib/opensolaris/uts/common/fs/zfs/zthr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/callb.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/fm.c				optional zfs compile-with "${ZFS_C}"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802220349.w1M3n60q009478>