Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 26 Feb 2019 08:36:10 -0800
From:      Cy Schubert <Cy.Schubert@cschubert.com>
To:        Baptiste Daroussin <bapt@FreeBSD.org>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org, jack@gandi.net, fatih@gandi.net
Subject:   Re: svn commit: r344569 - in head/cddl/contrib/opensolaris: cmd/zfs lib/libzfs/common
Message-ID:  <44EBCB1E-4872-429A-A967-FA24CC299A5D@cschubert.com>
In-Reply-To: <20190226161131.6rb3slb5slhkomwz@ivaldir.net>
References:  <201902260818.x1Q8IZGO061175@repo.freebsd.org> <39F76FF0-B40A-43F9-AC77-5D535674A4B8@cschubert.com> <20190226160411.kutpzjltiqm4bfgb@ivaldir.net> <20190226161131.6rb3slb5slhkomwz@ivaldir.net>

next in thread | previous in thread | raw e-mail | index | archive | help
On February 26, 2019 8:11:31 AM PST, Baptiste Daroussin <bapt@FreeBSD=2Eorg=
> wrote:
>On Tue, Feb 26, 2019 at 05:04:11PM +0100, Baptiste Daroussin wrote:
>> On Tue, Feb 26, 2019 at 07:48:27AM -0800, Cy Schubert wrote:
>> > On February 26, 2019 12:18:35 AM PST, Baptiste Daroussin
><bapt@FreeBSD=2Eorg> wrote:
>> > >Author: bapt
>> > >Date: Tue Feb 26 08:18:34 2019
>> > >New Revision: 344569
>> > >URL: https://svnweb=2Efreebsd=2Eorg/changeset/base/344569
>> > >
>> > >Log:
>> > >  Implement parallel mounting for ZFS filesystem
>> > > =20
>> > >  It was first implemented on Illumos and then ported to ZoL=2E
>> > >  This patch is a port to FreeBSD of the ZoL version=2E
>> > >  This patch also includes a fix for a race condition that was
>amended
>> > > =20
>> > >With such patch Delphix has seen a huge decrease in latency of the
>> > >mount phase
>> > >  (https://github=2Ecom/openzfs/openzfs/commit/a3f0e2b569 for
>details)=2E
>> > >With that current change Gandi has measured improvments that are
>on par
>> > >with
>> > >  those reported by Delphix=2E
>> > > =20
>> > >  Zol commits incorporated:
>> >
>>https://github=2Ecom/zfsonlinux/zfs/commit/a10d50f999511d304f910852c7825=
c70c9c9e303
>> >
>>https://github=2Ecom/zfsonlinux/zfs/commit/e63ac16d25fbe991a356489c86d40=
77567dfea21
>> > > =20
>> > >  Reviewed by:	avg, sef
>> > >  Approved by:	avg, sef
>> > >  Obtained from:	ZoL
>> > >  MFC after:	1 month
>> > >  Relnotes:	yes
>> > >  Sponsored by:	Gandi=2Enet
>> > >  Differential Revision:	https://reviews=2Efreebsd=2Eorg/D19098
>> > >
>> > >Modified:
>> > >  head/cddl/contrib/opensolaris/cmd/zfs/zfs_main=2Ec
>> > >  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs=2Eh
>> > >  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset=2Ec
>> > >  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl=2Eh
>> > >  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount=2Ec
>> > >
>> > >Modified: head/cddl/contrib/opensolaris/cmd/zfs/zfs_main=2Ec
>> >
>>=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D
>> > >--- head/cddl/contrib/opensolaris/cmd/zfs/zfs_main=2Ec	Tue Feb 26
>> > >06:22:10 2019	(r344568)
>> > >+++ head/cddl/contrib/opensolaris/cmd/zfs/zfs_main=2Ec	Tue Feb 26
>> > >08:18:34 2019	(r344569)
>> > >@@ -5812,8 +5812,13 @@ zfs_do_holds(int argc, char **argv)
>> > >=20
>> > > #define	CHECK_SPINNER 30
>> > > #define	SPINNER_TIME 3		/* seconds */
>> > >-#define	MOUNT_TIME 5		/* seconds */
>> > >+#define	MOUNT_TIME 1		/* seconds */
>> > >=20
>> > >+typedef struct get_all_state {
>> > >+	boolean_t	ga_verbose;
>> > >+	get_all_cb_t	*ga_cbp;
>> > >+} get_all_state_t;
>> > >+
>> > > static int
>> > > get_one_dataset(zfs_handle_t *zhp, void *data)
>> > > {
>> > >@@ -5821,10 +5826,10 @@ get_one_dataset(zfs_handle_t *zhp, void
>*data)
>> > > 	static int spinval =3D 0;
>> > > 	static int spincheck =3D 0;
>> > > 	static time_t last_spin_time =3D (time_t)0;
>> > >-	get_all_cb_t *cbp =3D data;
>> > >+	get_all_state_t *state =3D data;
>> > > 	zfs_type_t type =3D zfs_get_type(zhp);
>> > >=20
>> > >-	if (cbp->cb_verbose) {
>> > >+	if (state->ga_verbose) {
>> > > 		if (--spincheck < 0) {
>> > > 			time_t now =3D time(NULL);
>> > > 			if (last_spin_time + SPINNER_TIME < now) {
>> > >@@ -5850,26 +5855,24 @@ get_one_dataset(zfs_handle_t *zhp, void
>*data)
>> > > 		zfs_close(zhp);
>> > > 		return (0);
>> > > 	}
>> > >-	libzfs_add_handle(cbp, zhp);
>> > >-	assert(cbp->cb_used <=3D cbp->cb_alloc);
>> > >+	libzfs_add_handle(state->ga_cbp, zhp);
>> > >+	assert(state->ga_cbp->cb_used <=3D state->ga_cbp->cb_alloc);
>> > >=20
>> > > 	return (0);
>> > > }
>> > >=20
>> > > static void
>> > >-get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t
>> > >verbose)
>> > >+get_all_datasets(get_all_cb_t *cbp, boolean_t verbose)
>> > > {
>> > >-	get_all_cb_t cb =3D { 0 };
>> > >-	cb=2Ecb_verbose =3D verbose;
>> > >-	cb=2Ecb_getone =3D get_one_dataset;
>> > >+	get_all_state_t state =3D {
>> > >+		=2Ega_verbose =3D verbose,
>> > >+		=2Ega_cbp =3D cbp
>> > >+	};
>> > >=20
>> > > 	if (verbose)
>> > > 		set_progress_header(gettext("Reading ZFS config"));
>> > >-	(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
>> > >+	(void) zfs_iter_root(g_zfs, get_one_dataset, &state);
>> > >=20
>> > >-	*dslist =3D cb=2Ecb_handles;
>> > >-	*count =3D cb=2Ecb_used;
>> > >-
>> > > 	if (verbose)
>> > > 		finish_progress(gettext("done=2E"));
>> > > }
>> > >@@ -5879,9 +5882,20 @@ get_all_datasets(zfs_handle_t ***dslist,
>size_t
>> > >*count
>> > >* similar, we have a common function with an extra parameter to
>> > >determine which
>> > >  * mode we are using=2E
>> > >  */
>> > >-#define	OP_SHARE	0x1
>> > >-#define	OP_MOUNT	0x2
>> > >+typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t;
>> > >=20
>> > >+typedef struct share_mount_state {
>> > >+	share_mount_op_t	sm_op;
>> > >+	boolean_t	sm_verbose;
>> > >+	int	sm_flags;
>> > >+	char	*sm_options;
>> > >+	char	*sm_proto; /* only valid for OP_SHARE */
>> > >+	pthread_mutex_t	sm_lock; /* protects the remaining fields */
>> > >+	uint_t	sm_total; /* number of filesystems to process */
>> > >+	uint_t	sm_done; /* number of filesystems processed */
>> > >+	int	sm_status; /* -1 if any of the share/mount operations failed
>*/
>> > >+} share_mount_state_t;
>> > >+
>> > > /*
>> > >  * Share or mount a dataset=2E
>> > >  */
>> > >@@ -6122,6 +6136,29 @@ report_mount_progress(int current, int
>total)
>> > > 		update_progress(info);
>> > > }
>> > >=20
>> > >+/*
>> > >+ * zfs_foreach_mountpoint() callback that mounts or shares on
>> > >filesystem and
>> > >+ * updates the progress meter
>> > >+ */
>> > >+static int
>> > >+share_mount_one_cb(zfs_handle_t *zhp, void *arg)
>> > >+{
>> > >+	share_mount_state_t *sms =3D arg;
>> > >+	int ret;
>> > >+
>> > >+	ret =3D share_mount_one(zhp, sms->sm_op, sms->sm_flags,
>sms->sm_proto,
>> > >+	    B_FALSE, sms->sm_options);
>> > >+
>> > >+	pthread_mutex_lock(&sms->sm_lock);
>> > >+	if (ret !=3D 0)
>> > >+		sms->sm_status =3D ret;
>> > >+	sms->sm_done++;
>> > >+	if (sms->sm_verbose)
>> > >+		report_mount_progress(sms->sm_done, sms->sm_total);
>> > >+	pthread_mutex_unlock(&sms->sm_lock);
>> > >+	return (ret);
>> > >+}
>> > >+
>> > > static void
>> > > append_options(char *mntopts, char *newopts)
>> > > {
>> > >@@ -6194,8 +6231,6 @@ share_mount(int op, int argc, char **argv)
>> > >=20
>> > > 	/* check number of arguments */
>> > > 	if (do_all) {
>> > >-		zfs_handle_t **dslist =3D NULL;
>> > >-		size_t i, count =3D 0;
>> > > 		char *protocol =3D NULL;
>> > >=20
>> > > 		if (op =3D=3D OP_SHARE && argc > 0) {
>> > >@@ -6216,35 +6251,48 @@ share_mount(int op, int argc, char **argv)
>> > > 		}
>> > >=20
>> > > 		start_progress_timer();
>> > >-		get_all_datasets(&dslist, &count, verbose);
>> > >+		get_all_cb_t cb =3D { 0 };
>> > >+		get_all_datasets(&cb, verbose);
>> > >=20
>> > >-		if (count =3D=3D 0)
>> > >+		if (cb=2Ecb_used =3D=3D 0) {
>> > >+			if (options !=3D NULL)
>> > >+				free(options);
>> > > 			return (0);
>> > >+		}
>> > >=20
>> > >-		qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp);
>> > > #ifdef illumos
>> > >-		sa_init_selective_arg_t sharearg;
>> > >-		sharearg=2Ezhandle_arr =3D dslist;
>> > >-		sharearg=2Ezhandle_len =3D count;
>> > >-		if ((ret =3D zfs_init_libshare_arg(zfs_get_handle(dslist[0]),
>> > >-		    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) !=3D SA_OK) {
>> > >-			(void) fprintf(stderr,
>> > >-			    gettext("Could not initialize libshare, %d"), ret);
>> > >-			return (ret);
>> > >+		if (op =3D=3D OP_SHARE) {
>> > >+			sa_init_selective_arg_t sharearg;
>> > >+			sharearg=2Ezhandle_arr =3D cb=2Ecb_handles;
>> > >+			sharearg=2Ezhandle_len =3D cb=2Ecb_used;
>> > >+			if ((ret =3D zfs_init_libshare_arg(g_zfs,
>> > >+			    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) !=3D SA_OK) {
>> > >+				(void) fprintf(stderr, gettext(
>> > >+				    "Could not initialize libshare, %d"), ret);
>> > >+				return (ret);
>> > >+			}
>> > > 		}
>> > > #endif
>> > >+		share_mount_state_t share_mount_state =3D { 0 };
>> > >+		share_mount_state=2Esm_op =3D op;
>> > >+		share_mount_state=2Esm_verbose =3D verbose;
>> > >+		share_mount_state=2Esm_flags =3D flags;
>> > >+		share_mount_state=2Esm_options =3D options;
>> > >+		share_mount_state=2Esm_proto =3D protocol;
>> > >+		share_mount_state=2Esm_total =3D cb=2Ecb_used;
>> > >+		pthread_mutex_init(&share_mount_state=2Esm_lock, NULL);
>> > >=20
>> > >-		for (i =3D 0; i < count; i++) {
>> > >-			if (verbose)
>> > >-				report_mount_progress(i, count);
>> > >+		/*
>> > >+		 * libshare isn't mt-safe, so only do the operation in parallel
>> > >+		 * if we're mounting=2E
>> > >+		 */
>> > >+		zfs_foreach_mountpoint(g_zfs, cb=2Ecb_handles, cb=2Ecb_used,
>> > >+		    share_mount_one_cb, &share_mount_state, op =3D=3D OP_MOUNT);
>> > >+		ret =3D share_mount_state=2Esm_status;
>> > >=20
>> > >-			if (share_mount_one(dslist[i], op, flags, protocol,
>> > >-			    B_FALSE, options) !=3D 0)
>> > >-				ret =3D 1;
>> > >-			zfs_close(dslist[i]);
>> > >-		}
>> > >-
>> > >-		free(dslist);
>> > >+		for (int i =3D 0; i < cb=2Ecb_used; i++)
>> > >+			zfs_close(cb=2Ecb_handles[i]);
>> > >+		free(cb=2Ecb_handles);
>> > > 	} else if (argc =3D=3D 0) {
>> > > 		struct mnttab entry;
>> > >=20
>> > >
>> > >Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs=2Eh
>> >
>>=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D
>> > >--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs=2Eh	Tue
>Feb 26
>> > >06:22:10 2019	(r344568)
>> > >+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs=2Eh	Tue
>Feb 26
>> > >08:18:34 2019	(r344569)
>> > >@@ -579,12 +579,12 @@ typedef struct get_all_cb {
>> > > 	zfs_handle_t	**cb_handles;
>> > > 	size_t		cb_alloc;
>> > > 	size_t		cb_used;
>> > >-	boolean_t	cb_verbose;
>> > >-	int		(*cb_getone)(zfs_handle_t *, void *);
>> > > } get_all_cb_t;
>> > >=20
>> > >+void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **,
>> > >size_t,
>> > >+    zfs_iter_f, void*, boolean_t);
>> > >+
>> > > void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
>> > >-int libzfs_dataset_cmp(const void *, const void *);
>> > >=20
>> > > /*
>> > >  * Functions to create and destroy datasets=2E
>> > >
>> > >Modified:
>> > >head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset=2Ec
>> >
>>=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D
>> > >---
>> >
>>head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset=2Ec	Tue
>> > >Feb 26 06:22:10 2019	(r344568)
>> > >+++
>> >
>>head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset=2Ec	Tue
>> > >Feb 26 08:18:34 2019	(r344569)
>> > >@@ -799,6 +799,7 @@ libzfs_mnttab_cache_compare(const void *arg1,
>const
>> > >vo
>> > > void
>> > > libzfs_mnttab_init(libzfs_handle_t *hdl)
>> > > {
>> > >+	pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL);
>> > > 	assert(avl_numnodes(&hdl->libzfs_mnttab_cache) =3D=3D 0);
>> > > 	avl_create(&hdl->libzfs_mnttab_cache,
>libzfs_mnttab_cache_compare,
>> > > 	    sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
>> > >@@ -839,6 +840,7 @@ libzfs_mnttab_fini(libzfs_handle_t *hdl)
>> > > 		free(mtn);
>> > > 	}
>> > > 	avl_destroy(&hdl->libzfs_mnttab_cache);
>> > >+	(void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock);
>> > > }
>> > >=20
>> > > void
>> > >@@ -853,6 +855,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const
>char
>> > >*f
>> > > {
>> > > 	mnttab_node_t find;
>> > > 	mnttab_node_t *mtn;
>> > >+	int ret =3D ENOENT;
>> > >=20
>> > > 	if (!hdl->libzfs_mnttab_enable) {
>> > > 		struct mnttab srch =3D { 0 };
>> > >@@ -868,6 +871,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const
>char
>> > >*f
>> > > 			return (ENOENT);
>> > > 	}
>> > >=20
>> > >+	pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
>> > > 	if (avl_numnodes(&hdl->libzfs_mnttab_cache) =3D=3D 0)
>> > > 		libzfs_mnttab_update(hdl);
>> > >=20
>> > >@@ -875,9 +879,10 @@ libzfs_mnttab_find(libzfs_handle_t *hdl,
>const
>> > >char *f
>> > > 	mtn =3D avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
>> > > 	if (mtn) {
>> > > 		*entry =3D mtn->mtn_mt;
>> > >-		return (0);
>> > >+		ret =3D 0;
>> > > 	}
>> > >-	return (ENOENT);
>> > >+	pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
>> > >+	return (ret);
>> > > }
>> > >=20
>> > > void
>> > >@@ -886,15 +891,17 @@ libzfs_mnttab_add(libzfs_handle_t *hdl,
>const
>> > >char *sp
>> > > {
>> > > 	mnttab_node_t *mtn;
>> > >=20
>> > >-	if (avl_numnodes(&hdl->libzfs_mnttab_cache) =3D=3D 0)
>> > >-		return;
>> > >-	mtn =3D zfs_alloc(hdl, sizeof (mnttab_node_t));
>> > >-	mtn->mtn_mt=2Emnt_special =3D zfs_strdup(hdl, special);
>> > >-	mtn->mtn_mt=2Emnt_mountp =3D zfs_strdup(hdl, mountp);
>> > >-	mtn->mtn_mt=2Emnt_fstype =3D zfs_strdup(hdl, MNTTYPE_ZFS);
>> > >-	mtn->mtn_mt=2Emnt_mntopts =3D zfs_strdup(hdl, mntopts);
>> > >-	avl_add(&hdl->libzfs_mnttab_cache, mtn);
>> > >-}
>> > >+	pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
>> > >+	if (avl_numnodes(&hdl->libzfs_mnttab_cache) =3D=3D 0) {
>> > >+		mtn =3D zfs_alloc(hdl, sizeof (mnttab_node_t));
>> > >+		mtn->mtn_mt=2Emnt_special =3D zfs_strdup(hdl, special);
>> > >+		mtn->mtn_mt=2Emnt_mountp =3D zfs_strdup(hdl, mountp);
>> > >+		mtn->mtn_mt=2Emnt_fstype =3D zfs_strdup(hdl, MNTTYPE_ZFS);
>> > >+		mtn->mtn_mt=2Emnt_mntopts =3D zfs_strdup(hdl, mntopts);
>> > >+		avl_add(&hdl->libzfs_mnttab_cache, mtn);
>> > >+	}
>> > >+	pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
>> > >+}	=09
>> > >=20
>> > > void
>> > > libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
>> > >@@ -902,6 +909,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl,
>const
>> > >char=20
>> > > 	mnttab_node_t find;
>> > > 	mnttab_node_t *ret;
>> > >=20
>> > >+	pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
>> > > 	find=2Emtn_mt=2Emnt_special =3D (char *)fsname;
>> > > 	if ((ret =3D avl_find(&hdl->libzfs_mnttab_cache, (void *)&find,
>NULL))
>> > > 	    !=3D NULL) {
>> > >@@ -912,6 +920,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl,
>const
>> > >char=20
>> > > 		free(ret->mtn_mt=2Emnt_mntopts);
>> > > 		free(ret);
>> > > 	}
>> > >+	pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
>> > > }
>> > >=20
>> > > int
>> > >
>> > >Modified:
>head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl=2Eh
>> >
>>=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D
>> > >---
>head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl=2Eh	Tue
>> > >Feb 26 06:22:10 2019	(r344568)
>> > >+++
>head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl=2Eh	Tue
>> > >Feb 26 08:18:34 2019	(r344569)
>> > >@@ -22,7 +22,7 @@
>> > > /*
>> > >* Copyright (c) 2005, 2010, Oracle and/or its affiliates=2E All
>rights
>> > >reserved=2E
>> > >  * Copyright (c) 2011 Pawel Jakub Dawidek=2E All rights reserved=2E
>> > >- * Copyright (c) 2011, 2016 by Delphix=2E All rights reserved=2E
>> > >+ * Copyright (c) 2011, 2017 by Delphix=2E All rights reserved=2E
>> > >* Copyright (c) 2013 Martin Matuska <mm@FreeBSD=2Eorg>=2E All rights
>> > >reserved=2E
>> > >  */
>> > >=20
>> > >@@ -73,6 +73,13 @@ struct libzfs_handle {
>> > > 	int libzfs_storeerr; /* stuff error messages into buffer */
>> > > 	void *libzfs_sharehdl; /* libshare handle */
>> > > 	boolean_t libzfs_mnttab_enable;
>> > >+	/*
>> > >+	 * We need a lock to handle the case where parallel mount
>> > >+	 * threads are populating the mnttab cache simultaneously=2E The
>> > >+	 * lock only protects the integrity of the avl tree, and does
>> > >+	 * not protect the contents of the mnttab entries themselves=2E
>> > >+	 */
>> > >+	pthread_mutex_t libzfs_mnttab_cache_lock;
>> > > 	avl_tree_t libzfs_mnttab_cache;
>> > > 	int libzfs_pool_iter;
>> > > 	libzfs_fru_t **libzfs_fru_hash;
>> > >
>> > >Modified:
>> > >head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount=2Ec
>> >
>>=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D
>> > >---
>head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount=2Ec	Tue
>> > >Feb 26 06:22:10 2019	(r344568)
>> > >+++
>head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount=2Ec	Tue
>> > >Feb 26 08:18:34 2019	(r344569)
>> > >@@ -26,6 +26,7 @@
>> > >  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail=2Ecom>
>> > >  * Copyright 2017 Joyent, Inc=2E
>> > >  * Copyright 2017 RackTop Systems=2E
>> > >+ * Copyright 2018 OmniOS Community Edition (OmniOSce)
>Association=2E
>> > >  */
>> > >=20
>> > > /*
>> > >@@ -34,25 +35,25 @@
>> > >  * they are used by mount and unmount and when changing a
>filesystem's
>> > >  * mountpoint=2E
>> > >  *
>> > >- * 	zfs_is_mounted()
>> > >- * 	zfs_mount()
>> > >- * 	zfs_unmount()
>> > >- * 	zfs_unmountall()
>> > >+ *	zfs_is_mounted()
>> > >+ *	zfs_mount()
>> > >+ *	zfs_unmount()
>> > >+ *	zfs_unmountall()
>> > >  *
>> > >* This file also contains the functions used to manage sharing
>> > >filesystems via
>> > >  * NFS and iSCSI:
>> > >  *
>> > >- * 	zfs_is_shared()
>> > >- * 	zfs_share()
>> > >- * 	zfs_unshare()
>> > >+ *	zfs_is_shared()
>> > >+ *	zfs_share()
>> > >+ *	zfs_unshare()
>> > >  *
>> > >- * 	zfs_is_shared_nfs()
>> > >- * 	zfs_is_shared_smb()
>> > >- * 	zfs_share_proto()
>> > >- * 	zfs_shareall();
>> > >- * 	zfs_unshare_nfs()
>> > >- * 	zfs_unshare_smb()
>> > >- * 	zfs_unshareall_nfs()
>> > >+ *	zfs_is_shared_nfs()
>> > >+ *	zfs_is_shared_smb()
>> > >+ *	zfs_share_proto()
>> > >+ *	zfs_shareall();
>> > >+ *	zfs_unshare_nfs()
>> > >+ *	zfs_unshare_smb()
>> > >+ *	zfs_unshareall_nfs()
>> > >  *	zfs_unshareall_smb()
>> > >  *	zfs_unshareall()
>> > >  *	zfs_unshareall_bypath()
>> > >@@ -60,8 +61,8 @@
>> > >  * The following functions are available for pool consumers, and
>will
>> > >  * mount/unmount and share/unshare all datasets within pool:
>> > >  *
>> > >- * 	zpool_enable_datasets()
>> > >- * 	zpool_disable_datasets()
>> > >+ *	zpool_enable_datasets()
>> > >+ *	zpool_disable_datasets()
>> > >  */
>> > >=20
>> > > #include <dirent=2Eh>
>> > >@@ -83,10 +84,14 @@
>> > > #include <libzfs=2Eh>
>> > >=20
>> > > #include "libzfs_impl=2Eh"
>> > >+#include <thread_pool=2Eh>
>> > >=20
>> > > #include <libshare=2Eh>
>> > > #define	MAXISALEN	257	/* based on sysinfo(2) man page */
>> > >=20
>> > >+static int mount_tp_nthr =3D 512; /* tpool threads for
>multi-threaded
>> > >mounting */
>> > >+
>> > >+static void zfs_mount_task(void *);
>> > > static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
>> > > zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
>> > >     zfs_share_proto_t);
>> > >@@ -1134,25 +1139,32 @@ remove_mountpoint(zfs_handle_t *zhp)
>> > > 	}
>> > > }
>> > >=20
>> > >+/*
>> > >+ * Add the given zfs handle to the cb_handles array, dynamically
>> > >reallocating
>> > >+ * the array if it is out of space
>> > >+ */
>> > > void
>> > > libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
>> > > {
>> > > 	if (cbp->cb_alloc =3D=3D cbp->cb_used) {
>> > > 		size_t newsz;
>> > >-		void *ptr;
>> > >+		zfs_handle_t **newhandles;
>> > >=20
>> > >-		newsz =3D cbp->cb_alloc ? cbp->cb_alloc * 2 : 64;
>> > >-		ptr =3D zfs_realloc(zhp->zfs_hdl,
>> > >-		    cbp->cb_handles, cbp->cb_alloc * sizeof (void *),
>> > >-		    newsz * sizeof (void *));
>> > >-		cbp->cb_handles =3D ptr;
>> > >+		newsz =3D cbp->cb_alloc !=3D 0 ? cbp->cb_alloc * 2 : 64;
>> > >+		newhandles =3D zfs_realloc(zhp->zfs_hdl,
>> > >+		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
>> > >+		    newsz * sizeof (zfs_handle_t *));
>> > >+		cbp->cb_handles =3D newhandles;
>> > > 		cbp->cb_alloc =3D newsz;
>> > > 	}
>> > > 	cbp->cb_handles[cbp->cb_used++] =3D zhp;
>> > > }
>> > >=20
>> > >+/*
>> > >+ * Recursive helper function used during file system enumeration
>> > >+ */
>> > > static int
>> > >-mount_cb(zfs_handle_t *zhp, void *data)
>> > >+zfs_iter_cb(zfs_handle_t *zhp, void *data)
>> > > {
>> > > 	get_all_cb_t *cbp =3D data;
>> > >=20
>> > >@@ -1178,104 +1190,362 @@ mount_cb(zfs_handle_t *zhp, void *data)
>> > > 	}
>> > >=20
>> > > 	libzfs_add_handle(cbp, zhp);
>> > >-	if (zfs_iter_filesystems(zhp, mount_cb, cbp) !=3D 0) {
>> > >+	if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) !=3D 0) {
>> > > 		zfs_close(zhp);
>> > > 		return (-1);
>> > > 	}
>> > > 	return (0);
>> > > }
>> > >=20
>> > >-int
>> > >-libzfs_dataset_cmp(const void *a, const void *b)
>> > >+/*
>> > >+ * Sort comparator that compares two mountpoint paths=2E We sort
>these
>> > >paths so
>> > >+ * that subdirectories immediately follow their parents=2E This
>means
>> > >that we
>> > >+ * effectively treat the '/' character as the lowest value
>non-nul
>> > >char=2E
>> > >+ * Since filesystems from non-global zones can have the same
>> > >mountpoint
>> > >+ * as other filesystems, the comparator sorts global zone
>filesystems
>> > >to
>> > >+ * the top of the list=2E This means that the global zone will
>traverse
>> > >the
>> > >+ * filesystem list in the correct order and can stop when it sees
>the
>> > >+ * first zoned filesystem=2E In a non-global zone, only the
>delegated
>> > >+ * filesystems are seen=2E
>> > >+ *
>> > >+ * An example sorted list using this comparator would look like:
>> > >+ *
>> > >+ * /foo
>> > >+ * /foo/bar
>> > >+ * /foo/bar/baz
>> > >+ * /foo/baz
>> > >+ * /foo=2Ebar
>> > >+ * /foo (NGZ1)
>> > >+ * /foo (NGZ2)
>> > >+ *
>> > >+ * The mount code depend on this ordering to deterministically
>iterate
>> > >+ * over filesystems in order to spawn parallel mount tasks=2E
>> > >+ */
>> > >+static int
>> > >+mountpoint_cmp(const void *arga, const void *argb)
>> > > {
>> > >-	zfs_handle_t **za =3D (zfs_handle_t **)a;
>> > >-	zfs_handle_t **zb =3D (zfs_handle_t **)b;
>> > >+	zfs_handle_t *const *zap =3D arga;
>> > >+	zfs_handle_t *za =3D *zap;
>> > >+	zfs_handle_t *const *zbp =3D argb;
>> > >+	zfs_handle_t *zb =3D *zbp;
>> > > 	char mounta[MAXPATHLEN];
>> > > 	char mountb[MAXPATHLEN];
>> > >+	const char *a =3D mounta;
>> > >+	const char *b =3D mountb;
>> > > 	boolean_t gota, gotb;
>> > >+	uint64_t zoneda, zonedb;
>> > >=20
>> > >-	if ((gota =3D (zfs_get_type(*za) =3D=3D ZFS_TYPE_FILESYSTEM)) !=3D=
 0)
>> > >-		verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
>> > >+	zoneda =3D zfs_prop_get_int(za, ZFS_PROP_ZONED);
>> > >+	zonedb =3D zfs_prop_get_int(zb, ZFS_PROP_ZONED);
>> > >+	if (zoneda && !zonedb)
>> > >+		return (1);
>> > >+	if (!zoneda && zonedb)
>> > >+		return (-1);
>> > >+	gota =3D (zfs_get_type(za) =3D=3D ZFS_TYPE_FILESYSTEM);
>> > >+	if (gota)
>> > >+		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
>> > > 		    sizeof (mounta), NULL, NULL, 0, B_FALSE) =3D=3D 0);
>> > >-	if ((gotb =3D (zfs_get_type(*zb) =3D=3D ZFS_TYPE_FILESYSTEM)) !=3D=
 0)
>> > >-		verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
>> > >+	gotb =3D (zfs_get_type(zb) =3D=3D ZFS_TYPE_FILESYSTEM);
>> > >+	if (gotb)
>> > >+		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
>> > > 		    sizeof (mountb), NULL, NULL, 0, B_FALSE) =3D=3D 0);
>> > >=20
>> > >-	if (gota && gotb)
>> > >-		return (strcmp(mounta, mountb));
>> > >+	if (gota && gotb) {
>> > >+		while (*a !=3D '\0' && (*a =3D=3D *b)) {
>> > >+			a++;
>> > >+			b++;
>> > >+		}
>> > >+		if (*a =3D=3D *b)
>> > >+			return (0);
>> > >+		if (*a =3D=3D '\0')
>> > >+			return (-1);
>> > >+		if (*b =3D=3D '\0')
>> > >+			return (-1);
>> > >+		if (*a =3D=3D '/')
>> > >+			return (-1);
>> > >+		if (*b =3D=3D '/')
>> > >+			return (-1);
>> > >+		return (*a < *b ? -1 : *a > *b);
>> > >+	}
>> > >=20
>> > > 	if (gota)
>> > > 		return (-1);
>> > > 	if (gotb)
>> > > 		return (1);
>> > >=20
>> > >-	return (strcmp(zfs_get_name(a), zfs_get_name(b)));
>> > >+	/*
>> > >+	 * If neither filesystem has a mountpoint, revert to sorting by
>> > >+	 * datset name=2E
>> > >+	 */
>> > >+	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
>> > > }
>> > >=20
>> > > /*
>> > >+ * Reutrn true if path2 is a child of path1
>> > >+ */
>> > >+static boolean_t
>> > >+libzfs_path_contains(const char *path1, const char *path2)
>> > >+{
>> > >+	return (strstr(path2, path1) =3D=3D path2 && path2[strlen(path1)] =
=3D=3D
>> > >'/');
>> > >+}
>> > >+
>> > >+
>> > >+static int
>> > >+non_descendant_idx(zfs_handle_t **handles, size_t num_handles,
>int
>> > >idx)
>> > >+{
>> > >+	char parent[ZFS_MAXPROPLEN];
>> > >+	char child[ZFS_MAXPROPLEN];
>> > >+	int i;
>> > >+
>> > >+	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
>> > >+	    sizeof (parent), NULL, NULL, 0, B_FALSE) =3D=3D 0);
>> > >+
>> > >+	for (i =3D idx + 1; i < num_handles; i++) {
>> > >+		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
>> > >+		    sizeof (child), NULL, NULL, 0, B_FALSE) =3D=3D 0);
>> > >+		if (!libzfs_path_contains(parent, child))
>> > >+			break;
>> > >+	}
>> > >+	return (i);
>> > >+}
>> > >+
>> > >+typedef struct mnt_param {
>> > >+	libzfs_handle_t	*mnt_hdl;
>> > >+	tpool_t		*mnt_tp;
>> > >+	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
>> > >+	size_t		mnt_num_handles;
>> > >+	int		mnt_idx;	/* Index of selected entry to mount */
>> > >+	zfs_iter_f	mnt_func;
>> > >+	void		*mnt_data;
>> > >+} mnt_param_t;
>> > >+
>> > >+/*
>> > >+ * Allocate and populate the parameter struct for mount function,
>and
>> > >+ * schedule mounting of the entry selected by idx=2E
>> > >+ */
>> > >+static void
>> > >+zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
>> > >+    size_t num_handles, int idx, zfs_iter_f func, void *data,
>tpool_t
>> > >*tp)
>> > >+{
>> > >+	mnt_param_t *mnt_param =3D zfs_alloc(hdl, sizeof (mnt_param_t));
>> > >+
>> > >+	mnt_param->mnt_hdl =3D hdl;
>> > >+	mnt_param->mnt_tp =3D tp;
>> > >+	mnt_param->mnt_zhps =3D handles;
>> > >+	mnt_param->mnt_num_handles =3D num_handles;
>> > >+	mnt_param->mnt_idx =3D idx;
>> > >+	mnt_param->mnt_func =3D func;
>> > >+	mnt_param->mnt_data =3D data;
>> > >+
>> > >+	(void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param);
>> > >+}
>> > >+
>> > >+/*
>> > >+ * This is the structure used to keep state of mounting or
>sharing
>> > >operations
>> > >+ * during a call to zpool_enable_datasets()=2E
>> > >+ */
>> > >+typedef struct mount_state {
>> > >+	/*
>> > >+	 * ms_mntstatus is set to -1 if any mount fails=2E While multiple
>> > >threads
>> > >+	 * could update this variable concurrently, no synchronization
>is
>> > >+	 * needed as it's only ever set to -1=2E
>> > >+	 */
>> > >+	int		ms_mntstatus;
>> > >+	int		ms_mntflags;
>> > >+	const char	*ms_mntopts;
>> > >+} mount_state_t;
>> > >+
>> > >+static int
>> > >+zfs_mount_one(zfs_handle_t *zhp, void *arg)
>> > >+{
>> > >+	mount_state_t *ms =3D arg;
>> > >+	int ret =3D 0;
>> > >+
>> > >+	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) !=3D 0)
>> > >+		ret =3D ms->ms_mntstatus =3D -1;
>> > >+	return (ret);
>> > >+}
>> > >+
>> > >+static int
>> > >+zfs_share_one(zfs_handle_t *zhp, void *arg)
>> > >+{
>> > >+	mount_state_t *ms =3D arg;
>> > >+	int ret =3D 0;
>> > >+
>> > >+	if (zfs_share(zhp) !=3D 0)
>> > >+		ret =3D ms->ms_mntstatus =3D -1;
>> > >+	return (ret);
>> > >+}
>> > >+
>> > >+/*
>> > >+ * Thread pool function to mount one file system=2E On completion,
>it
>> > >finds and
>> > >+ * schedules its children to be mounted=2E This depends on the
>sorting
>> > >done in
>> > >+ * zfs_foreach_mountpoint()=2E Note that the degenerate case (chain
>of
>> > >entries
>> > >+ * each descending from the previous) will have no parallelism
>since
>> > >we always
>> > >+ * have to wait for the parent to finish mounting before we can
>> > >schedule
>> > >+ * its children=2E
>> > >+ */
>> > >+static void
>> > >+zfs_mount_task(void *arg)
>> > >+{
>> > >+	mnt_param_t *mp =3D arg;
>> > >+	int idx =3D mp->mnt_idx;
>> > >+	zfs_handle_t **handles =3D mp->mnt_zhps;
>> > >+	size_t num_handles =3D mp->mnt_num_handles;
>> > >+	char mountpoint[ZFS_MAXPROPLEN];
>> > >+
>> > >+	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT,
>mountpoint,
>> > >+	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) =3D=3D 0);
>> > >+
>> > >+	if (mp->mnt_func(handles[idx], mp->mnt_data) !=3D 0)
>> > >+		return;
>> > >+
>> > >+	/*
>> > >+	 * We dispatch tasks to mount filesystems with mountpoints
>underneath
>> > >+	 * this one=2E We do this by dispatching the next filesystem with
>a
>> > >+	 * descendant mountpoint of the one we just mounted, then skip
>all of
>> > >+	 * its descendants, dispatch the next descendant mountpoint, and
>so
>> > >on=2E
>> > >+	 * The non_descendant_idx() function skips over filesystems that
>are
>> > >+	 * descendants of the filesystem we just dispatched=2E
>> > >+	 */
>> > >+	for (int i =3D idx + 1; i < num_handles;
>> > >+	    i =3D non_descendant_idx(handles, num_handles, i)) {
>> > >+		char child[ZFS_MAXPROPLEN];
>> > >+		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
>> > >+		    child, sizeof (child), NULL, NULL, 0, B_FALSE) =3D=3D 0);
>> > >+
>> > >+		if (!libzfs_path_contains(mountpoint, child))
>> > >+			break; /* not a descendant, return */
>> > >+		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
>> > >+		    mp->mnt_func, mp->mnt_data, mp->mnt_tp);
>> > >+	}
>> > >+	free(mp);
>> > >+}
>> > >+
>> > >+/*
>> > >+ * Issue the func callback for each ZFS handle contained in the
>> > >handles
>> > >+ * array=2E This function is used to mount all datasets, and so
>this
>> > >function
>> > >+ * guarantees that filesystems for parent mountpoints are called
>> > >before their
>> > >+ * children=2E As such, before issuing any callbacks, we first sort
>the
>> > >array
>> > >+ * of handles by mountpoint=2E
>> > >+ *
>> > >+ * Callbacks are issued in one of two ways:
>> > >+ *
>> > >+ * 1=2E Sequentially: If the parallel argument is B_FALSE or the
>> > >ZFS_SERIAL_MOUNT
>> > >+ *    environment variable is set, then we issue callbacks
>> > >sequentially=2E
>> > >+ *
>> > >+ * 2=2E In parallel: If the parallel argument is B_TRUE and the
>> > >ZFS_SERIAL_MOUNT
>> > >+ *    environment variable is not set, then we use a tpool to
>dispatch
>> > >threads
>> > >+ *    to mount filesystems in parallel=2E This function dispatches
>tasks
>> > >to mount
>> > >+ *    the filesystems at the top-level mountpoints, and these
>tasks in
>> > >turn
>> > >+ *    are responsible for recursively mounting filesystems in
>their
>> > >children
>> > >+ *    mountpoints=2E
>> > >+ */
>> > >+void
>> > >+zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t
>**handles,
>> > >+    size_t num_handles, zfs_iter_f func, void *data, boolean_t
>> > >parallel)
>> > >+{
>> > >+	zoneid_t zoneid =3D getzoneid();
>> > >+
>> > >+	/*
>> > >+	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
>> > >+	 * variable that can be used as a convenience to do a/b
>comparison
>> > >+	 * of serial vs=2E parallel mounting=2E
>> > >+	 */
>> > >+	boolean_t serial_mount =3D !parallel ||
>> > >+	    (getenv("ZFS_SERIAL_MOUNT") !=3D NULL);
>> > >+
>> > >+	/*
>> > >+	 * Sort the datasets by mountpoint=2E See mountpoint_cmp for
>details
>> > >+	 * of how these are sorted=2E
>> > >+	 */
>> > >+	qsort(handles, num_handles, sizeof (zfs_handle_t *),
>mountpoint_cmp);
>> > >+
>> > >+	if (serial_mount) {
>> > >+		for (int i =3D 0; i < num_handles; i++) {
>> > >+			func(handles[i], data);
>> > >+		}
>> > >+		return;
>> > >+	}
>> > >+
>> > >+	/*
>> > >+	 * Issue the callback function for each dataset using a parallel
>> > >+	 * algorithm that uses a thread pool to manage threads=2E
>> > >+	 */
>> > >+	tpool_t *tp =3D tpool_create(1, mount_tp_nthr, 0, NULL);
>> > >+
>> > >+	/*
>> > >+	 * There may be multiple "top level" mountpoints outside of the
>> > >pool's
>> > >+	 * root mountpoint, e=2Eg=2E: /foo /bar=2E Dispatch a mount task f=
or
>each
>> > >of
>> > >+	 * these=2E
>> > >+	 */
>> > >+	for (int i =3D 0; i < num_handles;
>> > >+	    i =3D non_descendant_idx(handles, num_handles, i)) {
>> > >+		/*
>> > >+		 * Since the mountpoints have been sorted so that the zoned
>> > >+		 * filesystems are at the end, a zoned filesystem seen from
>> > >+		 * the global zone means that we're done=2E
>> > >+		 */
>> > >+		if (zoneid =3D=3D GLOBAL_ZONEID &&
>> > >+		    zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))
>> > >+			break;
>> > >+		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
>> > >+		    tp);
>> > >+	}
>> > >+
>> > >+	tpool_wait(tp);	/* wait for all scheduled mounts to complete */
>> > >+	tpool_destroy(tp);
>> > >+}
>> > >+
>> > >+/*
>> > >* Mount and share all datasets within the given pool=2E  This
>assumes
>> > >that no
>> > >- * datasets within the pool are currently mounted=2E  Because users
>can
>> > >create
>> > >- * complicated nested hierarchies of mountpoints, we first gather
>all
>> > >the
>> > >- * datasets and mountpoints within the pool, and sort them by
>> > >mountpoint=2E  Once
>> > >- * we have the list of all filesystems, we iterate over them in
>order
>> > >and mount
>> > >- * and/or share each one=2E
>> > >+ * datasets within the pool are currently mounted=2E
>> > >  */
>> > > #pragma weak zpool_mount_datasets =3D zpool_enable_datasets
>> > > int
>> > >zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts,
>int
>> > >flags)
>> > > {
>> > > 	get_all_cb_t cb =3D { 0 };
>> > >-	libzfs_handle_t *hdl =3D zhp->zpool_hdl;
>> > >+	mount_state_t ms =3D { 0 };
>> > > 	zfs_handle_t *zfsp;
>> > >-	int i, ret =3D -1;
>> > >-	int *good;
>> > >+	int ret =3D 0;
>> > >=20
>> > >-	/*
>> > >-	 * Gather all non-snap datasets within the pool=2E
>> > >-	 */
>> > >-	if ((zfsp =3D zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) =
=3D=3D
>> > >NULL)
>> > >+	if ((zfsp =3D zfs_open(zhp->zpool_hdl, zhp->zpool_name,
>> > >+	    ZFS_TYPE_DATASET)) =3D=3D NULL)
>> > > 		goto out;
>> > >=20
>> > >-	libzfs_add_handle(&cb, zfsp);
>> > >-	if (zfs_iter_filesystems(zfsp, mount_cb, &cb) !=3D 0)
>> > >-		goto out;
>> > > 	/*
>> > >-	 * Sort the datasets by mountpoint=2E
>> > >+	 * Gather all non-snapshot datasets within the pool=2E Start by
>adding
>> > >+	 * the root filesystem for this pool to the list, and then
>iterate
>> > >+	 * over all child filesystems=2E
>> > > 	 */
>> > >-	qsort(cb=2Ecb_handles, cb=2Ecb_used, sizeof (void *),
>> > >-	    libzfs_dataset_cmp);
>> > >+	libzfs_add_handle(&cb, zfsp);
>> > >+	if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) !=3D 0)
>> > >+		goto out;
>> > >=20
>> > > 	/*
>> > >-	 * And mount all the datasets, keeping track of which ones
>> > >-	 * succeeded or failed=2E
>> > >+	 * Mount all filesystems
>> > > 	 */
>> > >-	if ((good =3D zfs_alloc(zhp->zpool_hdl,
>> > >-	    cb=2Ecb_used * sizeof (int))) =3D=3D NULL)
>> > >-		goto out;
>> > >+	ms=2Ems_mntopts =3D mntopts;
>> > >+	ms=2Ems_mntflags =3D flags;
>> > >+	zfs_foreach_mountpoint(zhp->zpool_hdl, cb=2Ecb_handles,
>cb=2Ecb_used,
>> > >+	    zfs_mount_one, &ms, B_TRUE);
>> > >+	if (ms=2Ems_mntstatus !=3D 0)
>> > >+		ret =3D ms=2Ems_mntstatus;
>> > >=20
>> > >-	ret =3D 0;
>> > >-	for (i =3D 0; i < cb=2Ecb_used; i++) {
>> > >-		if (zfs_mount(cb=2Ecb_handles[i], mntopts, flags) !=3D 0)
>> > >-			ret =3D -1;
>> > >-		else
>> > >-			good[i] =3D 1;
>> > >-	}
>> > >-
>> > > 	/*
>> > >-	 * Then share all the ones that need to be shared=2E This needs
>> > >-	 * to be a separate pass in order to avoid excessive reloading
>> > >-	 * of the configuration=2E Good should never be NULL since
>> > >-	 * zfs_alloc is supposed to exit if memory isn't available=2E
>> > >+	 * Share all filesystems that need to be shared=2E This needs to
>be
>> > >+	 * a separate pass because libshare is not mt-safe, and so we
>need
>> > >+	 * to share serially=2E
>> > > 	 */
>> > >-	for (i =3D 0; i < cb=2Ecb_used; i++) {
>> > >-		if (good[i] && zfs_share(cb=2Ecb_handles[i]) !=3D 0)
>> > >-			ret =3D -1;
>> > >-	}
>> > >+	ms=2Ems_mntstatus =3D 0;
>> > >+	zfs_foreach_mountpoint(zhp->zpool_hdl, cb=2Ecb_handles,
>cb=2Ecb_used,
>> > >+	    zfs_share_one, &ms, B_FALSE);
>> > >+	if (ms=2Ems_mntstatus !=3D 0)
>> > >+		ret =3D ms=2Ems_mntstatus;
>> > >=20
>> > >-	free(good);
>> > >-
>> > > out:
>> > >-	for (i =3D 0; i < cb=2Ecb_used; i++)
>> > >+	for (int i =3D 0; i < cb=2Ecb_used; i++)
>> > > 		zfs_close(cb=2Ecb_handles[i]);
>> > > 	free(cb=2Ecb_handles);
>> > >=20
>> >=20
>> > This broke my systems, many filesystems fail to mount causing
>nullfs late mounts to fail=2E No details now until tonight=2E
>> >=20
>> > Suggest we back this out until it is properly tested=2E
>> >=20
>>=20
>> What fails to mount? what message? can you provide Gandi folks more
>informations
>> so they can fix?
>>=20
>> I will revert if we cannot have a quick fix but let s give them a
>chance to fix
>> first=2E
>>=20
>With the proper email in CC there is a better chance to reach at them
>:)
>
>Best regards,
>Bapt

Sorry about that=2E I'm terribly frustrated as this broke my mail gateway,=
 having to fix it using juiceSSH on my phone on the bus=2E Ssh on the phone=
 makes for a very grumpy Cy=2E

I did bring my personal laptop to work, so I'll try to help out testing th=
is at noon here and maybe look at it more=2E I'll help out any way I can=2E


--=20
Pardon the typos and autocorrect, small keyboard in use=2E
Cheers,
Cy Schubert <Cy=2ESchubert@cschubert=2Ecom>
FreeBSD UNIX: <cy@FreeBSD=2Eorg> Web: http://www=2EFreeBSD=2Eorg

	The need of the many outweighs the greed of the few=2E



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?44EBCB1E-4872-429A-A967-FA24CC299A5D>