Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 18 Jul 2014 18:09:20 +0000 (UTC)
From:      Xin LI <delphij@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r268852 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys
Message-ID:  <201407181809.s6II9KkB002810@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: delphij
Date: Fri Jul 18 18:09:20 2014
New Revision: 268852
URL: http://svnweb.freebsd.org/changeset/base/268852

Log:
  5008 lock contention (rrw_exit) while running a read only load
  Reviewed by: Matthew Ahrens <matthew.ahrens@delphix.com>
  Reviewed by: George Wilson <george.wilson@delphix.com>
  Reviewed by: Alex Reece <alex.reece@delphix.com>
  Reviewed by: Christopher Siden <christopher.siden@delphix.com>
  Reviewed by: Richard Yao <ryao@gentoo.org>
  Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com>
  Approved by: Garrett D'Amore <garrett@damore.org>
  
  illumos/illumos-gate@c9030f6c93613fe30ee0c16f92b96da7816ac052

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c	Fri Jul 18 18:05:09 2014	(r268851)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/rrwlock.c	Fri Jul 18 18:09:20 2014	(r268852)
@@ -286,3 +286,91 @@ rrw_tsd_destroy(void *arg)
 		    (void *)curthread, (void *)rn->rn_rrl);
 	}
 }
+
+/*
+ * A reader-mostly lock implementation, tuning above reader-writer locks
+ * for hightly parallel read acquisitions, while pessimizing writes.
+ *
+ * The idea is to split single busy lock into array of locks, so that
+ * each reader can lock only one of them for read, depending on result
+ * of simple hash function.  That proportionally reduces lock congestion.
+ * Writer same time has to sequentially aquire write on all the locks.
+ * That makes write aquisition proportionally slower, but in places where
+ * it is used (filesystem unmount) performance is not critical.
+ *
+ * All the functions below are direct wrappers around functions above.
+ */
+void
+rrm_init(rrmlock_t *rrl, boolean_t track_all)
+{
+	int i;
+
+	for (i = 0; i < RRM_NUM_LOCKS; i++)
+		rrw_init(&rrl->locks[i], track_all);
+}
+
+void
+rrm_destroy(rrmlock_t *rrl)
+{
+	int i;
+
+	for (i = 0; i < RRM_NUM_LOCKS; i++)
+		rrw_destroy(&rrl->locks[i]);
+}
+
+void
+rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag)
+{
+	if (rw == RW_READER)
+		rrm_enter_read(rrl, tag);
+	else
+		rrm_enter_write(rrl);
+}
+
+/*
+ * This maps the current thread to a specific lock.  Note that the lock
+ * must be released by the same thread that acquired it.  We do this
+ * mapping by taking the thread pointer mod a prime number.  We examine
+ * only the low 32 bits of the thread pointer, because 32-bit division
+ * is faster than 64-bit division, and the high 32 bits have little
+ * entropy anyway.
+ */
+#define	RRM_TD_LOCK()	(((uint32_t)(uintptr_t)(curthread)) % RRM_NUM_LOCKS)
+
+void
+rrm_enter_read(rrmlock_t *rrl, void *tag)
+{
+	rrw_enter_read(&rrl->locks[RRM_TD_LOCK()], tag);
+}
+
+void
+rrm_enter_write(rrmlock_t *rrl)
+{
+	int i;
+
+	for (i = 0; i < RRM_NUM_LOCKS; i++)
+		rrw_enter_write(&rrl->locks[i]);
+}
+
+void
+rrm_exit(rrmlock_t *rrl, void *tag)
+{
+	int i;
+
+	if (rrl->locks[0].rr_writer == curthread) {
+		for (i = 0; i < RRM_NUM_LOCKS; i++)
+			rrw_exit(&rrl->locks[i], tag);
+	} else {
+		rrw_exit(&rrl->locks[RRM_TD_LOCK()], tag);
+	}
+}
+
+boolean_t
+rrm_held(rrmlock_t *rrl, krw_t rw)
+{
+	if (rw == RW_WRITER) {
+		return (rrw_held(&rrl->locks[0], rw));
+	} else {
+		return (rrw_held(&rrl->locks[RRM_TD_LOCK()], rw));
+	}
+}

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h	Fri Jul 18 18:05:09 2014	(r268851)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/rrwlock.h	Fri Jul 18 18:09:20 2014	(r268852)
@@ -80,6 +80,31 @@ void rrw_tsd_destroy(void *arg);
 #define	RRW_LOCK_HELD(x) \
 	(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
 
+/*
+ * A reader-mostly lock implementation, tuning above reader-writer locks
+ * for hightly parallel read acquisitions, pessimizing write acquisitions.
+ *
+ * This should be a prime number.  See comment in rrwlock.c near
+ * RRM_TD_LOCK() for details.
+ */
+#define	RRM_NUM_LOCKS		17
+typedef struct rrmlock {
+	rrwlock_t	locks[RRM_NUM_LOCKS];
+} rrmlock_t;
+
+void rrm_init(rrmlock_t *rrl, boolean_t track_all);
+void rrm_destroy(rrmlock_t *rrl);
+void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag);
+void rrm_enter_read(rrmlock_t *rrl, void *tag);
+void rrm_enter_write(rrmlock_t *rrl);
+void rrm_exit(rrmlock_t *rrl, void *tag);
+boolean_t rrm_held(rrmlock_t *rrl, krw_t rw);
+
+#define	RRM_READ_HELD(x)	rrm_held(x, RW_READER)
+#define	RRM_WRITE_HELD(x)	rrm_held(x, RW_WRITER)
+#define	RRM_LOCK_HELD(x) \
+	(rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER))
+
 #ifdef	__cplusplus
 }
 #endif

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h	Fri Jul 18 18:05:09 2014	(r268851)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_vfsops.h	Fri Jul 18 18:09:20 2014	(r268852)
@@ -64,7 +64,7 @@ struct zfsvfs {
 	int		z_norm;		/* normalization flags */
 	boolean_t	z_atime;	/* enable atimes mount option */
 	boolean_t	z_unmounted;	/* unmounted */
-	rrwlock_t	z_teardown_lock;
+	rrmlock_t	z_teardown_lock;
 	krwlock_t	z_teardown_inactive_lock;
 	list_t		z_all_znodes;	/* all vnodes in the fs */
 	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h	Fri Jul 18 18:05:09 2014	(r268851)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h	Fri Jul 18 18:09:20 2014	(r268852)
@@ -238,7 +238,7 @@ typedef struct znode {
 /* Called on entry to each ZFS vnode and vfs operation  */
 #define	ZFS_ENTER(zfsvfs) \
 	{ \
-		rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
+		rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
 		if ((zfsvfs)->z_unmounted) { \
 			ZFS_EXIT(zfsvfs); \
 			return (EIO); \
@@ -246,7 +246,7 @@ typedef struct znode {
 	}
 
 /* Must be called before exiting the vop */
-#define	ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
+#define	ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
 /* Verifies the znode is valid */
 #define	ZFS_VERIFY_ZP(zp) \

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c	Fri Jul 18 18:05:09 2014	(r268851)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c	Fri Jul 18 18:09:20 2014	(r268852)
@@ -1420,7 +1420,7 @@ zfsvfs_hold(const char *name, void *tag,
 	if (getzfsvfs(name, zfvp) != 0)
 		error = zfsvfs_create(name, zfvp);
 	if (error == 0) {
-		rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
+		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
 		    RW_READER, tag);
 		if ((*zfvp)->z_unmounted) {
 			/*
@@ -1428,7 +1428,7 @@ zfsvfs_hold(const char *name, void *tag,
 			 * thread should be just about to disassociate the
 			 * objset from the zfsvfs.
 			 */
-			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
+			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
 			return (SET_ERROR(EBUSY));
 		}
 	}
@@ -1438,7 +1438,7 @@ zfsvfs_hold(const char *name, void *tag,
 static void
 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
 {
-	rrw_exit(&zfsvfs->z_teardown_lock, tag);
+	rrm_exit(&zfsvfs->z_teardown_lock, tag);
 
 	if (zfsvfs->z_vfs) {
 		VFS_RELE(zfsvfs->z_vfs);

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c	Fri Jul 18 18:05:09 2014	(r268851)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_vfsops.c	Fri Jul 18 18:09:20 2014	(r268852)
@@ -1004,7 +1004,7 @@ zfsvfs_create(const char *osname, zfsvfs
 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
-	rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
+	rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
 	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@@ -1119,7 +1119,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	mutex_destroy(&zfsvfs->z_lock);
 	list_destroy(&zfsvfs->z_all_znodes);
-	rrw_destroy(&zfsvfs->z_teardown_lock);
+	rrm_destroy(&zfsvfs->z_teardown_lock);
 	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
 	rw_destroy(&zfsvfs->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@@ -1784,7 +1784,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
 {
 	znode_t	*zp;
 
-	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
+	rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
 
 	if (!unmounting) {
 		/*
@@ -1814,7 +1814,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
 	 */
 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
-		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 		return (SET_ERROR(EIO));
 	}
 
@@ -1841,7 +1841,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
 	 */
 	if (unmounting) {
 		zfsvfs->z_unmounted = B_TRUE;
-		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
 	}
 
@@ -2073,7 +2073,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const ch
 	znode_t *zp;
 	uint64_t sa_obj = 0;
 
-	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
+	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
 
 	/*
@@ -2129,7 +2129,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const ch
 bail:
 	/* release the VOPs */
 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
-	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 
 	if (err) {
 		/*

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c	Fri Jul 18 18:05:09 2014	(r268851)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c	Fri Jul 18 18:09:20 2014	(r268852)
@@ -276,7 +276,7 @@ zfs_znode_move(void *buf, void *newbuf, 
 	 * can safely ensure that the filesystem is not and will not be
 	 * unmounted. The next statement is equivalent to ZFS_ENTER().
 	 */
-	rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
+	rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
 	if (zfsvfs->z_unmounted) {
 		ZFS_EXIT(zfsvfs);
 		rw_exit(&zfsvfs_lock);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201407181809.s6II9KkB002810>