Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 Sep 2018 17:58:06 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r338927 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201809251758.w8PHw6F6075060@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Tue Sep 25 17:58:06 2018
New Revision: 338927
URL: https://svnweb.freebsd.org/changeset/base/338927

Log:
  zfs: depessimize zfs_root with rmlocks
  
  Currently vfs calls the root method on each absolute lookup and when
  crossing mount points.
  
  zfs_root ends up looking up the inode internally as if it was not
  instantianted which results in significant lock contention on systems
  like EPYC.
  
  Store the vnode in the mount point and protect the access with rmlocks.
  This is a temporary hack for 12.0.
  
  Sample result:
  
  before:
  make -s -j 128 buildkernel 2778.09s user 3319.45s system 8370% cpu 1:12.85 total
  
  after:
  make -s -j 128 buildkernel 3199.57s user 1772.78s system 8232% cpu 1:00.40 total
  
  Tested by:	pho (zfs mount/unmount tests)
  Reviewed by:	kib, mav, sef (different parts)
  Approved by:	re (gjb)
  Differential Revision:	https://reviews.freebsd.org/D17233

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h	Tue Sep 25 17:41:48 2018	(r338926)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h	Tue Sep 25 17:58:06 2018	(r338927)
@@ -46,6 +46,8 @@ struct zfsvfs {
 	zfsvfs_t	*z_parent;	/* parent fs */
 	objset_t	*z_os;		/* objset reference */
 	uint64_t	z_root;		/* id of root znode */
+	struct vnode	*z_rootvnode;	/* root vnode */
+	struct rmlock	z_rootvnodelock;/* protection for root vnode */
 	uint64_t	z_unlinkedobj;	/* id of unlinked zapobj */
 	uint64_t	z_max_blksz;	/* maximum block size for files */
 	uint64_t	z_fuid_obj;	/* fuid table object number */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Tue Sep 25 17:41:48 2018	(r338926)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Tue Sep 25 17:58:06 2018	(r338927)
@@ -65,6 +65,7 @@
 #include <sys/spa_boot.h>
 #include <sys/jail.h>
 #include <ufs/ufs/quota.h>
+#include <sys/rmlock.h>
 
 #include "zfs_comutil.h"
 
@@ -92,6 +93,9 @@ static int zfs_version_zpl = ZPL_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
     "ZPL_VERSION");
 
+static int zfs_root_setvnode(zfsvfs_t *zfsvfs);
+static void zfs_root_dropvnode(zfsvfs_t *zfsvfs);
+
 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
 static int zfs_mount(vfs_t *vfsp);
 static int zfs_umount(vfs_t *vfsp, int fflag);
@@ -1209,6 +1213,8 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, 
 	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
+	rm_init(&zfsvfs->z_rootvnodelock, "zfs root vnode lock");
+
 	error = zfsvfs_init(zfsvfs, os);
 	if (error != 0) {
 		*zfvp = NULL;
@@ -1315,6 +1321,8 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
 	rw_enter(&zfsvfs_lock, RW_READER);
 	rw_exit(&zfsvfs_lock);
 
+	rm_destroy(&zfsvfs->z_rootvnodelock);
+
 	zfs_fuid_destroy(zfsvfs);
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
@@ -1921,6 +1929,8 @@ zfs_mount(vfs_t *vfsp)
 	error = zfs_domount(vfsp, osname);
 	PICKUP_GIANT();
 
+	zfs_root_setvnode((zfsvfs_t *)vfsp->vfs_data);
+
 #ifdef illumos
 	/*
 	 * Add an extra VFS_HOLD on our parent vfs so that it can't
@@ -1993,14 +2003,65 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp)
 }
 
 static int
+zfs_root_setvnode(zfsvfs_t *zfsvfs)
+{
+	znode_t *rootzp;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
+	if (error != 0)
+		panic("could not zfs_zget for root vnode");
+	ZFS_EXIT(zfsvfs);
+
+	rm_wlock(&zfsvfs->z_rootvnodelock);
+	if (zfsvfs->z_rootvnode != NULL)
+		panic("zfs mount point already has a root vnode: %p\n",
+		    zfsvfs->z_rootvnode);
+	zfsvfs->z_rootvnode = ZTOV(rootzp);
+	rm_wunlock(&zfsvfs->z_rootvnodelock);
+	return (0);
+}
+
+static void
+zfs_root_putvnode(zfsvfs_t *zfsvfs)
+{
+	struct vnode *vp;
+
+	rm_wlock(&zfsvfs->z_rootvnodelock);
+	vp = zfsvfs->z_rootvnode;
+	zfsvfs->z_rootvnode = NULL;
+	rm_wunlock(&zfsvfs->z_rootvnodelock);
+	if (vp != NULL)
+		vrele(vp);
+}
+
+static int
 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 {
+	struct rm_priotracker tracker;
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	znode_t *rootzp;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	rm_rlock(&zfsvfs->z_rootvnodelock, &tracker);
+	*vpp = zfsvfs->z_rootvnode;
+	if (*vpp != NULL && (((*vpp)->v_iflag & VI_DOOMED) == 0)) {
+		vrefact(*vpp);
+		rm_runlock(&zfsvfs->z_rootvnodelock, &tracker);
+		goto lock;
+	}
+	rm_runlock(&zfsvfs->z_rootvnodelock, &tracker);
 
+	/*
+	 * We found the vnode but did not like it.
+	 */
+	if (*vpp != NULL) {
+		*vpp = NULL;
+		zfs_root_putvnode(zfsvfs);
+	}
+
+	ZFS_ENTER(zfsvfs);
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0)
 		*vpp = ZTOV(rootzp);
@@ -2008,6 +2069,7 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 	ZFS_EXIT(zfsvfs);
 
 	if (error == 0) {
+lock:
 		error = vn_lock(*vpp, flags);
 		if (error != 0) {
 			VN_RELE(*vpp);
@@ -2125,6 +2187,8 @@ zfs_umount(vfs_t *vfsp, int fflag)
 	objset_t *os;
 	cred_t *cr = td->td_ucred;
 	int ret;
+
+	zfs_root_putvnode(zfsvfs);
 
 	ret = secpolicy_fs_unmount(cr, vfsp);
 	if (ret) {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809251758.w8PHw6F6075060>