Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 27 Sep 2018 17:22:40 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r338975 - in stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201809271722.w8RHMevF037784@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Thu Sep 27 17:22:40 2018
New Revision: 338975
URL: https://svnweb.freebsd.org/changeset/base/338975

Log:
  MFC r334810 (by benno), r338205, r338206:
  r334810:
  Break recursion involving getnewvnode and zfs_rmnode.
  
  When we're at our vnode limit, getnewvnode will call into the vnode LRU
  cache to free up vnodes. If the vnode we try to recycle is a ZFS vnode we
  end up, eventually, in zfs_rmnode. If the ZFS vnode we're recycling
  represents something with extended attributes, zfs_rmnode will call
  zfs_zget which will attempt to allocate another vnode. If the next vnode we
  try to recycle is also a ZFS vnode representing something with extended
  attributes we can recurse further. This ends up being unbounded and can end
  up overflowing the stack.
  
  In order to avoid this, restructure zfs_rmnode to simply add the extended
  attribute directory's object ID to the unlinked set, thus not requiring the
  allocation of a vnode. We then schedule a task that calls zfs_unlinked_drain
  which will do the work of properly marking the vnodes for unlinking.
  zfs_unlinked_drain is also called on mount so these will be cleaned up
  there.
  
  r338205:
  Create separate taskqueue to call zfs_unlinked_drain().
  
  r334810 introduced zfs_unlinked_drain() dispatch to taskqueue on every
  deletion of a file with extended attributes.  Using system_taskq for that
  with its multiple threads in case of multiple files deletion caused all
  available CPU threads to uselessly spin on busy locks, completely blocking
  the system.
  
  Use of single dedicated taskqueue is the only easy solution I've found,
  while in would be great if we could specify that some task should be
  executed only once at a time, but never in parallel, while many tasks
  could use different threads same time.
  
  r338206:
  Add dmu_tx_assign() error handling in zfs_unlinked_drain().
  
  The error handling got lost during r334810, while according to the report
  error there may happen in case of dataset being over quota.  In such case
  just leave the node in the unlinked list to be freed sometimes later.

Modified:
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h	Thu Sep 27 17:11:11 2018	(r338974)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h	Thu Sep 27 17:22:40 2018	(r338975)
@@ -85,6 +85,9 @@ struct zfsvfs {
 	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
 #define	ZFS_OBJ_MTX_SZ	64
 	kmutex_t	z_hold_mtx[ZFS_OBJ_MTX_SZ];	/* znode hold locks */
+#if defined(__FreeBSD__)
+	struct task	z_unlinked_drain_task;
+#endif
 };
 
 /*

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c	Thu Sep 27 17:11:11 2018	(r338974)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c	Thu Sep 27 17:22:40 2018	(r338975)
@@ -281,6 +281,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
 	zap_attribute_t zap;
 	dmu_object_info_t doi;
 	znode_t		*zp;
+	dmu_tx_t	*tx;
 	int		error;
 
 	/*
@@ -317,6 +318,26 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
 			continue;
 
 		vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
+#if defined(__FreeBSD__)
+		/*
+		 * Due to changes in zfs_rmnode we need to make sure the
+		 * link count is set to zero here.
+		 */
+		if (zp->z_links != 0) {
+			tx = dmu_tx_create(zfsvfs->z_os);
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+			error = dmu_tx_assign(tx, TXG_WAIT);
+			if (error != 0) {
+				dmu_tx_abort(tx);
+				vput(ZTOV(zp));
+				continue;
+			}
+			zp->z_links = 0;
+			VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+			    &zp->z_links, sizeof (zp->z_links), tx));
+			dmu_tx_commit(tx);
+		}
+#endif
 		zp->z_unlinked = B_TRUE;
 		vput(ZTOV(zp));
 	}
@@ -388,12 +409,15 @@ zfs_purgedir(znode_t *dzp)
 	return (skipped);
 }
 
+#if defined(__FreeBSD__)
+extern taskq_t *zfsvfs_taskq;
+#endif
+
 void
 zfs_rmnode(znode_t *zp)
 {
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	objset_t	*os = zfsvfs->z_os;
-	znode_t		*xzp = NULL;
 	dmu_tx_t	*tx;
 	uint64_t	acl_obj;
 	uint64_t	xattr_obj;
@@ -443,11 +467,8 @@ zfs_rmnode(znode_t *zp)
 	 */
 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 	    &xattr_obj, sizeof (xattr_obj));
-	if (error == 0 && xattr_obj) {
-		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
-		ASSERT3S(error, ==, 0);
-		vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
-	}
+	if (error)
+		xattr_obj = 0;
 
 	acl_obj = zfs_external_acl(zp);
 
@@ -457,10 +478,8 @@ zfs_rmnode(znode_t *zp)
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-	if (xzp) {
+	if (xattr_obj)
 		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
-		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
-	}
 	if (acl_obj)
 		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
 
@@ -475,9 +494,25 @@ zfs_rmnode(znode_t *zp)
 		dmu_tx_abort(tx);
 		zfs_znode_dmu_fini(zp);
 		zfs_znode_free(zp);
-		goto out;
+		return;
 	}
 
+#if defined(__FreeBSD__)
+	/*
+	 * FreeBSD's implemention of zfs_zget requires a vnode to back it.
+	 * This means that we could end up calling into getnewvnode while
+	 * calling zfs_rmnode as a result of a prior call to getnewvnode
+	 * trying to clear vnodes out of the cache. If this repeats we can
+	 * recurse enough that we overflow our stack. To avoid this, we
+	 * avoid calling zfs_zget on the xattr znode and instead simply add
+	 * it to the unlinked set and schedule a call to zfs_unlinked_drain.
+	 */
+	if (xattr_obj) {
+		/* Add extended attribute directory to the unlinked set. */
+		VERIFY3U(0, ==,
+		    zap_add_int(os, zfsvfs->z_unlinkedobj, xattr_obj, tx));
+	}
+#else
 	if (xzp) {
 		ASSERT(error == 0);
 		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
@@ -486,6 +521,7 @@ zfs_rmnode(znode_t *zp)
 		    &xzp->z_links, sizeof (xzp->z_links), tx));
 		zfs_unlinked_add(xzp, tx);
 	}
+#endif
 
 	/* Remove this znode from the unlinked set */
 	VERIFY3U(0, ==,
@@ -494,9 +530,18 @@ zfs_rmnode(znode_t *zp)
 	zfs_znode_delete(zp, tx);
 
 	dmu_tx_commit(tx);
-out:
-	if (xzp)
-		vput(ZTOV(xzp));
+
+#if defined(__FreeBSD__)
+	if (xattr_obj) {
+		/*
+		 * We're using the FreeBSD taskqueue API here instead of
+		 * the Solaris taskq API since the FreeBSD API allows for a
+		 * task to be enqueued multiple times but executed once.
+		 */
+		taskqueue_enqueue(zfsvfs_taskq->tq_queue,
+		    &zfsvfs->z_unlinked_drain_task);
+	}
+#endif
 }
 
 static uint64_t

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Thu Sep 27 17:11:11 2018	(r338974)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Thu Sep 27 17:22:40 2018	(r338975)
@@ -972,6 +972,17 @@ zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
 	return (0);
 }
 
+#if defined(__FreeBSD__)
+taskq_t *zfsvfs_taskq;
+
+static void
+zfsvfs_task_unlinked_drain(void *context, int pending __unused)
+{
+
+	zfs_unlinked_drain((zfsvfs_t *)context);
+}
+#endif
+
 int
 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
 {
@@ -1023,6 +1034,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, 
 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
+#if defined(__FreeBSD__)
+	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
+	    zfsvfs_task_unlinked_drain, zfsvfs);
+#endif
 #ifdef DIAGNOSTIC
 	rrm_init(&zfsvfs->z_teardown_lock, B_TRUE);
 #else
@@ -2018,6 +2033,11 @@ zfs_umount(vfs_t *vfsp, int fflag)
 	}
 #endif
 
+	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
+	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
+		taskqueue_drain(zfsvfs_taskq->tq_queue,
+		    &zfsvfs->z_unlinked_drain_task);
+
 	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
 	os = zfsvfs->z_os;
 
@@ -2382,11 +2402,17 @@ zfs_init(void)
 	zfs_vnodes_adjust();
 
 	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
+#if defined(__FreeBSD__)
+	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
+#endif
 }
 
 void
 zfs_fini(void)
 {
+#if defined(__FreeBSD__)
+	taskq_destroy(zfsvfs_taskq);
+#endif
 	zfsctl_fini();
 	zfs_znode_fini();
 	zfs_vnodes_adjust_back();



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809271722.w8RHMevF037784>