Date: Thu, 27 Sep 2018 17:22:40 +0000 (UTC) From: Alexander Motin <mav@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r338975 - in stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <201809271722.w8RHMevF037784@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mav Date: Thu Sep 27 17:22:40 2018 New Revision: 338975 URL: https://svnweb.freebsd.org/changeset/base/338975 Log: MFC r334810 (by benno), r338205, r338206: r334810: Break recursion involving getnewvnode and zfs_rmnode. When we're at our vnode limit, getnewvnode will call into the vnode LRU cache to free up vnodes. If the vnode we try to recycle is a ZFS vnode we end up, eventually, in zfs_rmnode. If the ZFS vnode we're recycling represents something with extended attributes, zfs_rmnode will call zfs_zget which will attempt to allocate another vnode. If the next vnode we try to recycle is also a ZFS vnode representing something with extended attributes we can recurse further. This ends up being unbounded and can end up overflowing the stack. In order to avoid this, restructure zfs_rmnode to simply add the extended attribute directory's object ID to the unlinked set, thus not requiring the allocation of a vnode. We then schedule a task that calls zfs_unlinked_drain which will do the work of properly marking the vnodes for unlinking. zfs_unlinked_drain is also called on mount so these will be cleaned up there. r338205: Create separate taskqueue to call zfs_unlinked_drain(). r334810 introduced zfs_unlinked_drain() dispatch to taskqueue on every deletion of a file with extended attributes. Using system_taskq for that with its multiple threads in case of multiple files deletion caused all available CPU threads to uselessly spin on busy locks, completely blocking the system. Use of single dedicated taskqueue is the only easy solution I've found, while in would be great if we could specify that some task should be executed only once at a time, but never in parallel, while many tasks could use different threads same time. r338206: Add dmu_tx_assign() error handling in zfs_unlinked_drain(). The error handling got lost during r334810, while according to the report error there may happen in case of dataset being over quota. In such case just leave the node in the unlinked list to be freed sometimes later. Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Thu Sep 27 17:11:11 2018 (r338974) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Thu Sep 27 17:22:40 2018 (r338975) @@ -85,6 +85,9 @@ struct zfsvfs { sa_attr_type_t *z_attr_table; /* SA attr mapping->id */ #define ZFS_OBJ_MTX_SZ 64 kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */ +#if defined(__FreeBSD__) + struct task z_unlinked_drain_task; +#endif }; /* Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Thu Sep 27 17:11:11 2018 (r338974) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Thu Sep 27 17:22:40 2018 (r338975) @@ -281,6 +281,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) zap_attribute_t zap; dmu_object_info_t doi; znode_t *zp; + dmu_tx_t *tx; int error; /* @@ -317,6 +318,26 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) continue; vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY); +#if defined(__FreeBSD__) + /* + * Due to changes in zfs_rmnode we need to make sure the + * link count is set to zero here. + */ + if (zp->z_links != 0) { + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error != 0) { + dmu_tx_abort(tx); + vput(ZTOV(zp)); + continue; + } + zp->z_links = 0; + VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), + &zp->z_links, sizeof (zp->z_links), tx)); + dmu_tx_commit(tx); + } +#endif zp->z_unlinked = B_TRUE; vput(ZTOV(zp)); } @@ -388,12 +409,15 @@ zfs_purgedir(znode_t *dzp) return (skipped); } +#if defined(__FreeBSD__) +extern taskq_t *zfsvfs_taskq; +#endif + void zfs_rmnode(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; - znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; @@ -443,11 +467,8 @@ zfs_rmnode(znode_t *zp) */ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, sizeof (xattr_obj)); - if (error == 0 && xattr_obj) { - error = zfs_zget(zfsvfs, xattr_obj, &xzp); - ASSERT3S(error, ==, 0); - vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); - } + if (error) + xattr_obj = 0; acl_obj = zfs_external_acl(zp); @@ -457,10 +478,8 @@ zfs_rmnode(znode_t *zp) tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); - if (xzp) { + if (xattr_obj) dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); - dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); - } if (acl_obj) dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); @@ -475,9 +494,25 @@ zfs_rmnode(znode_t *zp) dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); zfs_znode_free(zp); - goto out; + return; } +#if defined(__FreeBSD__) + /* + * FreeBSD's implemention of zfs_zget requires a vnode to back it. + * This means that we could end up calling into getnewvnode while + * calling zfs_rmnode as a result of a prior call to getnewvnode + * trying to clear vnodes out of the cache. If this repeats we can + * recurse enough that we overflow our stack. To avoid this, we + * avoid calling zfs_zget on the xattr znode and instead simply add + * it to the unlinked set and schedule a call to zfs_unlinked_drain. + */ + if (xattr_obj) { + /* Add extended attribute directory to the unlinked set. */ + VERIFY3U(0, ==, + zap_add_int(os, zfsvfs->z_unlinkedobj, xattr_obj, tx)); + } +#else if (xzp) { ASSERT(error == 0); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ @@ -486,6 +521,7 @@ zfs_rmnode(znode_t *zp) &xzp->z_links, sizeof (xzp->z_links), tx)); zfs_unlinked_add(xzp, tx); } +#endif /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, @@ -494,9 +530,18 @@ zfs_rmnode(znode_t *zp) zfs_znode_delete(zp, tx); dmu_tx_commit(tx); -out: - if (xzp) - vput(ZTOV(xzp)); + +#if defined(__FreeBSD__) + if (xattr_obj) { + /* + * We're using the FreeBSD taskqueue API here instead of + * the Solaris taskq API since the FreeBSD API allows for a + * task to be enqueued multiple times but executed once. + */ + taskqueue_enqueue(zfsvfs_taskq->tq_queue, + &zfsvfs->z_unlinked_drain_task); + } +#endif } static uint64_t Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Thu Sep 27 17:11:11 2018 (r338974) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Thu Sep 27 17:22:40 2018 (r338975) @@ -972,6 +972,17 @@ zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) return (0); } +#if defined(__FreeBSD__) +taskq_t *zfsvfs_taskq; + +static void +zfsvfs_task_unlinked_drain(void *context, int pending __unused) +{ + + zfs_unlinked_drain((zfsvfs_t *)context); +} +#endif + int zfsvfs_create(const char *osname, zfsvfs_t **zfvp) { @@ -1023,6 +1034,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); +#if defined(__FreeBSD__) + TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, + zfsvfs_task_unlinked_drain, zfsvfs); +#endif #ifdef DIAGNOSTIC rrm_init(&zfsvfs->z_teardown_lock, B_TRUE); #else @@ -2018,6 +2033,11 @@ zfs_umount(vfs_t *vfsp, int fflag) } #endif + while (taskqueue_cancel(zfsvfs_taskq->tq_queue, + &zfsvfs->z_unlinked_drain_task, NULL) != 0) + taskqueue_drain(zfsvfs_taskq->tq_queue, + &zfsvfs->z_unlinked_drain_task); + VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); os = zfsvfs->z_os; @@ -2382,11 +2402,17 @@ zfs_init(void) zfs_vnodes_adjust(); dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); +#if defined(__FreeBSD__) + zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); +#endif } void zfs_fini(void) { +#if defined(__FreeBSD__) + taskq_destroy(zfsvfs_taskq); +#endif zfsctl_fini(); zfs_znode_fini(); zfs_vnodes_adjust_back();
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809271722.w8RHMevF037784>