Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 24 May 2017 22:21:25 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r318828 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201705242221.v4OMLPFi052453@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Wed May 24 22:21:24 2017
New Revision: 318828
URL: https://svnweb.freebsd.org/changeset/base/318828

Log:
  MFV r316917: 7968 multi-threaded spa_sync()
  
  illumos/illumos-gate@94c2d0eb22e9624151ee84a7edbf7178e1bf4087
  https://github.com/illumos/illumos-gate/commit/94c2d0eb22e9624151ee84a7edbf7178e1bf4087
  
  https://www.illumos.org/issues/7968
    spa_sync() iterates over all the dirty dnodes and processes each of them by
    calling dnode_sync(). If there are many dirty dnodes (e.g. because we created
    or removed a lot of files), the single thread of spa_sync() calling
    dnode_sync() can become a bottleneck. Additionally, if many dnodes are dirtied
    concurrently in open context (e.g. due to concurrent file creation), the
    os_lock will experience lock contention via dnode_setdirty().
    The solution is to track dirty dnodes on a multilist_t, and for spa_sync() to
    use separate threads to process each of the sublists in the multilist.
    On the concurrent file creation microbenchmark, the performance improvement
    from dnode_setdirty() is up to 7%. Additionally, the wall clock time spent in
    spa_sync() is reduced to 15%-40% of the single-threaded case. In terms of cost/
    reward, once the other bottlenecks are addressed, fixing this bug will provide
    a medium-large performance gain and require a medium amount of effort to
    implement.
  
  Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
  Reviewed by: Brad Lewis <brad.lewis@delphix.com>
  Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
  Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
  Approved by: Dan McDonald <danmcd@omniti.com>
  Author: Matthew Ahrens <mahrens@delphix.com>
  MFC after:	3 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed May 24 22:21:24 2017	(r318828)
@@ -473,7 +473,7 @@ typedef struct arc_state {
 	/*
 	 * list of evictable buffers
 	 */
-	multilist_t arcs_list[ARC_BUFC_NUMTYPES];
+	multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
 	/*
 	 * total amount of evictable data in this state
 	 */
@@ -2359,7 +2359,7 @@ add_reference(arc_buf_hdr_t *hdr, void *
 	    (state != arc_anon)) {
 		/* We don't use the L2-only state list. */
 		if (state != arc_l2c_only) {
-			multilist_remove(&state->arcs_list[arc_buf_type(hdr)],
+			multilist_remove(state->arcs_list[arc_buf_type(hdr)],
 			    hdr);
 			arc_evictable_space_decrement(hdr, state);
 		}
@@ -2389,7 +2389,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmu
 	 */
 	if (((cnt = refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
 	    (state != arc_anon)) {
-		multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
+		multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr);
 		ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
 		arc_evictable_space_increment(hdr, state);
 	}
@@ -2442,7 +2442,7 @@ arc_change_state(arc_state_t *new_state,
 	if (refcnt == 0) {
 		if (old_state != arc_anon && old_state != arc_l2c_only) {
 			ASSERT(HDR_HAS_L1HDR(hdr));
-			multilist_remove(&old_state->arcs_list[buftype], hdr);
+			multilist_remove(old_state->arcs_list[buftype], hdr);
 
 			if (GHOST_STATE(old_state)) {
 				ASSERT0(bufcnt);
@@ -2460,7 +2460,7 @@ arc_change_state(arc_state_t *new_state,
 			 * beforehand.
 			 */
 			ASSERT(HDR_HAS_L1HDR(hdr));
-			multilist_insert(&new_state->arcs_list[buftype], hdr);
+			multilist_insert(new_state->arcs_list[buftype], hdr);
 
 			if (GHOST_STATE(new_state)) {
 				ASSERT0(bufcnt);
@@ -2586,8 +2586,8 @@ arc_change_state(arc_state_t *new_state,
 	 * L2 headers should never be on the L2 state list since they don't
 	 * have L1 headers allocated.
 	 */
-	ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
-	    multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
+	ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
+	    multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
 }
 
 void
@@ -3671,7 +3671,7 @@ arc_evict_state(arc_state_t *state, uint
     arc_buf_contents_t type)
 {
 	uint64_t total_evicted = 0;
-	multilist_t *ml = &state->arcs_list[type];
+	multilist_t *ml = state->arcs_list[type];
 	int num_sublists;
 	arc_buf_hdr_t **markers;
 
@@ -3875,8 +3875,8 @@ arc_adjust_meta(void)
 static arc_buf_contents_t
 arc_adjust_type(arc_state_t *state)
 {
-	multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
-	multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
+	multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA];
+	multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA];
 	int data_idx = multilist_get_random_index(data_ml);
 	int meta_idx = multilist_get_random_index(meta_ml);
 	multilist_sublist_t *data_mls;
@@ -6209,44 +6209,44 @@ arc_state_init(void)
 	arc_mfu_ghost = &ARC_mfu_ghost;
 	arc_l2c_only = &ARC_l2c_only;
 
-	multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mru->arcs_list[ARC_BUFC_METADATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mru->arcs_list[ARC_BUFC_DATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mru_ghost->arcs_list[ARC_BUFC_DATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mfu->arcs_list[ARC_BUFC_METADATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mfu->arcs_list[ARC_BUFC_DATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_l2c_only->arcs_list[ARC_BUFC_METADATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
-	multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
-	    sizeof (arc_buf_hdr_t),
+	arc_l2c_only->arcs_list[ARC_BUFC_DATA] =
+	    multilist_create(sizeof (arc_buf_hdr_t),
 	    offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
 	    arc_state_multilist_index_func);
 
@@ -6294,14 +6294,14 @@ arc_state_fini(void)
 	refcount_destroy(&arc_mfu_ghost->arcs_size);
 	refcount_destroy(&arc_l2c_only->arcs_size);
 
-	multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
-	multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
-	multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
-	multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
-	multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
 }
 
 uint64_t
@@ -7098,16 +7098,16 @@ l2arc_sublist_lock(int list_num)
 
 	switch (list_num) {
 	case 0:
-		ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
+		ml = arc_mfu->arcs_list[ARC_BUFC_METADATA];
 		break;
 	case 1:
-		ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
+		ml = arc_mru->arcs_list[ARC_BUFC_METADATA];
 		break;
 	case 2:
-		ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
+		ml = arc_mfu->arcs_list[ARC_BUFC_DATA];
 		break;
 	case 3:
-		ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
+		ml = arc_mru->arcs_list[ARC_BUFC_DATA];
 		break;
 	}
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Wed May 24 22:21:24 2017	(r318828)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -80,7 +80,7 @@ static boolean_t dbuf_evict_thread_exit;
  * Dbufs that are aged out of the cache will be immediately destroyed and
  * become eligible for arc eviction.
  */
-static multilist_t dbuf_cache;
+static multilist_t *dbuf_cache;
 static refcount_t dbuf_cache_size;
 uint64_t dbuf_cache_max_bytes = 100 * 1024 * 1024;
 
@@ -454,8 +454,8 @@ dbuf_cache_above_lowater(void)
 static void
 dbuf_evict_one(void)
 {
-	int idx = multilist_get_random_index(&dbuf_cache);
-	multilist_sublist_t *mls = multilist_sublist_lock(&dbuf_cache, idx);
+	int idx = multilist_get_random_index(dbuf_cache);
+	multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx);
 
 	ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
 
@@ -621,7 +621,7 @@ retry:
 	 */
 	dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
 
-	multilist_create(&dbuf_cache, sizeof (dmu_buf_impl_t),
+	dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_cache_link),
 	    dbuf_cache_multilist_index_func);
 	refcount_create(&dbuf_cache_size);
@@ -659,7 +659,7 @@ dbuf_fini(void)
 	cv_destroy(&dbuf_evict_cv);
 
 	refcount_destroy(&dbuf_cache_size);
-	multilist_destroy(&dbuf_cache);
+	multilist_destroy(dbuf_cache);
 }
 
 /*
@@ -2029,7 +2029,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
 	dbuf_clear_data(db);
 
 	if (multilist_link_active(&db->db_cache_link)) {
-		multilist_remove(&dbuf_cache, db);
+		multilist_remove(dbuf_cache, db);
 		(void) refcount_remove_many(&dbuf_cache_size,
 		    db->db.db_size, db);
 	}
@@ -2577,7 +2577,7 @@ top:
 
 	if (multilist_link_active(&db->db_cache_link)) {
 		ASSERT(refcount_is_zero(&db->db_holds));
-		multilist_remove(&dbuf_cache, db);
+		multilist_remove(dbuf_cache, db);
 		(void) refcount_remove_many(&dbuf_cache_size,
 		    db->db.db_size, db);
 	}
@@ -2796,7 +2796,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db,
 			    db->db_pending_evict) {
 				dbuf_destroy(db);
 			} else if (!multilist_link_active(&db->db_cache_link)) {
-				multilist_insert(&dbuf_cache, db);
+				multilist_insert(dbuf_cache, db);
 				(void) refcount_add_many(&dbuf_cache_size,
 				    db->db.db_size, db);
 				mutex_exit(&db->db_mtx);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Wed May 24 22:21:24 2017	(r318828)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -303,6 +303,42 @@ dmu_objset_byteswap(void *buf, size_t si
 	}
 }
 
+/*
+ * The hash is a CRC-based hash of the objset_t pointer and the object number.
+ */
+static uint64_t
+dnode_hash(const objset_t *os, uint64_t obj)
+{
+	uintptr_t osv = (uintptr_t)os;
+	uint64_t crc = -1ULL;
+
+	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
+	/*
+	 * The low 6 bits of the pointer don't have much entropy, because
+	 * the objset_t is larger than 2^6 bytes long.
+	 */
+	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
+	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
+	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
+	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF];
+
+	crc ^= (osv>>14) ^ (obj>>24);
+
+	return (crc);
+}
+
+unsigned int
+dnode_multilist_index_func(multilist_t *ml, void *obj)
+{
+	dnode_t *dn = obj;
+	return (dnode_hash(dn->dn_objset, dn->dn_object) %
+	    multilist_get_num_sublists(ml));
+}
+
+/*
+ * Instantiates the objset_t in-memory structure corresponding to the
+ * objset_phys_t that's pointed to by the specified blkptr_t.
+ */
 int
 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
     objset_t **osp)
@@ -454,10 +490,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
 	os->os_zil = zil_alloc(os, &os->os_zil_header);
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
-		    offsetof(dnode_t, dn_dirty_link[i]));
-		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
-		    offsetof(dnode_t, dn_dirty_link[i]));
+		os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
+		    offsetof(dnode_t, dn_dirty_link[i]),
+		    dnode_multilist_index_func);
 	}
 	list_create(&os->os_dnodes, sizeof (dnode_t),
 	    offsetof(dnode_t, dn_link));
@@ -465,6 +500,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
 	    offsetof(dmu_buf_impl_t, db_link));
 
 	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 
@@ -748,8 +784,12 @@ dmu_objset_evict_done(objset_t *os)
 	rw_exit(&os_lock);
 
 	mutex_destroy(&os->os_lock);
+	mutex_destroy(&os->os_userused_lock);
 	mutex_destroy(&os->os_obj_lock);
 	mutex_destroy(&os->os_user_ptr_lock);
+	for (int i = 0; i < TXG_SIZE; i++) {
+		multilist_destroy(os->os_dirty_dnodes[i]);
+	}
 	spa_evicting_os_deregister(os->os_spa, os);
 	kmem_free(os, sizeof (objset_t));
 }
@@ -1027,11 +1067,11 @@ dmu_objset_snapshot_one(const char *fsna
 }
 
 static void
-dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
+dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 
-	while (dn = list_head(list)) {
+	while ((dn = multilist_sublist_head(list)) != NULL) {
 		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
 		ASSERT(dn->dn_dbuf->db_data_pending);
 		/*
@@ -1042,11 +1082,12 @@ dmu_objset_sync_dnodes(list_t *list, lis
 		ASSERT(dn->dn_zio);
 
 		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
-		list_remove(list, dn);
+		multilist_sublist_remove(list, dn);
 
-		if (newlist) {
+		multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
+		if (newlist != NULL) {
 			(void) dnode_add_ref(dn, newlist);
-			list_insert_tail(newlist, dn);
+			multilist_insert(newlist, dn);
 		}
 
 		dnode_sync(dn, tx);
@@ -1101,6 +1142,29 @@ dmu_objset_write_done(zio_t *zio, arc_bu
 	kmem_free(bp, sizeof (*bp));
 }
 
+typedef struct sync_dnodes_arg {
+	multilist_t *sda_list;
+	int sda_sublist_idx;
+	multilist_t *sda_newlist;
+	dmu_tx_t *sda_tx;
+} sync_dnodes_arg_t;
+
+static void
+sync_dnodes_task(void *arg)
+{
+	sync_dnodes_arg_t *sda = arg;
+
+	multilist_sublist_t *ms =
+	    multilist_sublist_lock(sda->sda_list, sda->sda_sublist_idx);
+
+	dmu_objset_sync_dnodes(ms, sda->sda_tx);
+
+	multilist_sublist_unlock(ms);
+
+	kmem_free(sda, sizeof (*sda));
+}
+
+
 /* called from dsl */
 void
 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
@@ -1110,7 +1174,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio
 	zio_prop_t zp;
 	zio_t *zio;
 	list_t *list;
-	list_t *newlist = NULL;
 	dbuf_dirty_record_t *dr;
 	blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
 	*blkptr_copy = *os->os_rootbp;
@@ -1164,20 +1227,36 @@ dmu_objset_sync(objset_t *os, zio_t *pio
 	txgoff = tx->tx_txg & TXG_MASK;
 
 	if (dmu_objset_userused_enabled(os)) {
-		newlist = &os->os_synced_dnodes;
 		/*
 		 * We must create the list here because it uses the
-		 * dn_dirty_link[] of this txg.
+		 * dn_dirty_link[] of this txg.  But it may already
+		 * exist because we call dsl_dataset_sync() twice per txg.
 		 */
-		list_create(newlist, sizeof (dnode_t),
-		    offsetof(dnode_t, dn_dirty_link[txgoff]));
+		if (os->os_synced_dnodes == NULL) {
+			os->os_synced_dnodes =
+			    multilist_create(sizeof (dnode_t),
+			    offsetof(dnode_t, dn_dirty_link[txgoff]),
+			    dnode_multilist_index_func);
+		} else {
+			ASSERT3U(os->os_synced_dnodes->ml_offset, ==,
+			    offsetof(dnode_t, dn_dirty_link[txgoff]));
+		}
 	}
 
-	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
-	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
+	for (int i = 0;
+	    i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) {
+		sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP);
+		sda->sda_list = os->os_dirty_dnodes[txgoff];
+		sda->sda_sublist_idx = i;
+		sda->sda_tx = tx;
+		(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
+		    sync_dnodes_task, sda, 0);
+		/* callback frees sda */
+	}
+	taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
 
 	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
-	while (dr = list_head(list)) {
+	while ((dr = list_head(list)) != NULL) {
 		ASSERT0(dr->dr_dbuf->db_level);
 		list_remove(list, dr);
 		if (dr->dr_zio)
@@ -1201,8 +1280,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio
 boolean_t
 dmu_objset_is_dirty(objset_t *os, uint64_t txg)
 {
-	return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
-	    !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
+	return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
 }
 
 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
@@ -1256,8 +1334,15 @@ do_userquota_cacheflush(objset_t *os, us
 	cookie = NULL;
 	while ((uqn = avl_destroy_nodes(&cache->uqc_user_deltas,
 	    &cookie)) != NULL) {
+		/*
+		 * os_userused_lock protects against concurrent calls to
+		 * zap_increment_int().  It's needed because zap_increment_int()
+		 * is not thread-safe (i.e. not atomic).
+		 */
+		mutex_enter(&os->os_userused_lock);
 		VERIFY0(zap_increment_int(os, DMU_USERUSED_OBJECT,
 		    uqn->uqn_id, uqn->uqn_delta, tx));
+		mutex_exit(&os->os_userused_lock);
 		kmem_free(uqn, sizeof (*uqn));
 	}
 	avl_destroy(&cache->uqc_user_deltas);
@@ -1265,8 +1350,10 @@ do_userquota_cacheflush(objset_t *os, us
 	cookie = NULL;
 	while ((uqn = avl_destroy_nodes(&cache->uqc_group_deltas,
 	    &cookie)) != NULL) {
+		mutex_enter(&os->os_userused_lock);
 		VERIFY0(zap_increment_int(os, DMU_GROUPUSED_OBJECT,
 		    uqn->uqn_id, uqn->uqn_delta, tx));
+		mutex_exit(&os->os_userused_lock);
 		kmem_free(uqn, sizeof (*uqn));
 	}
 	avl_destroy(&cache->uqc_group_deltas);
@@ -1301,37 +1388,38 @@ do_userquota_update(userquota_cache_t *c
 	}
 }
 
-void
-dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+typedef struct userquota_updates_arg {
+	objset_t *uua_os;
+	int uua_sublist_idx;
+	dmu_tx_t *uua_tx;
+} userquota_updates_arg_t;
+
+static void
+userquota_updates_task(void *arg)
 {
+	userquota_updates_arg_t *uua = arg;
+	objset_t *os = uua->uua_os;
+	dmu_tx_t *tx = uua->uua_tx;
 	dnode_t *dn;
-	list_t *list = &os->os_synced_dnodes;
 	userquota_cache_t cache = { 0 };
 
-	ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
+	multilist_sublist_t *list =
+	    multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx);
 
+	ASSERT(multilist_sublist_head(list) == NULL ||
+	    dmu_objset_userused_enabled(os));
 	avl_create(&cache.uqc_user_deltas, userquota_compare,
 	    sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
 	avl_create(&cache.uqc_group_deltas, userquota_compare,
 	    sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
 
-	while (dn = list_head(list)) {
+	while ((dn = multilist_sublist_head(list)) != NULL) {
 		int flags;
 		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
 		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
 		    dn->dn_phys->dn_flags &
 		    DNODE_FLAG_USERUSED_ACCOUNTED);
 
-		/* Allocate the user/groupused objects if necessary. */
-		if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
-			VERIFY0(zap_create_claim(os,
-			    DMU_USERUSED_OBJECT,
-			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
-			VERIFY0(zap_create_claim(os,
-			    DMU_GROUPUSED_OBJECT,
-			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
-		}
-
 		flags = dn->dn_id_flags;
 		ASSERT(flags);
 		if (flags & DN_ID_OLD_EXIST)  {
@@ -1361,10 +1449,42 @@ dmu_objset_do_userquota_updates(objset_t
 		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
 		mutex_exit(&dn->dn_mtx);
 
-		list_remove(list, dn);
-		dnode_rele(dn, list);
+		multilist_sublist_remove(list, dn);
+		dnode_rele(dn, os->os_synced_dnodes);
 	}
 	do_userquota_cacheflush(os, &cache, tx);
+	multilist_sublist_unlock(list);
+	kmem_free(uua, sizeof (*uua));
+}
+
+void
+dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+{
+	if (!dmu_objset_userused_enabled(os))
+		return;
+
+	/* Allocate the user/groupused objects if necessary. */
+	if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
+		VERIFY0(zap_create_claim(os,
+		    DMU_USERUSED_OBJECT,
+		    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+		VERIFY0(zap_create_claim(os,
+		    DMU_GROUPUSED_OBJECT,
+		    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+	}
+
+	for (int i = 0;
+	    i < multilist_get_num_sublists(os->os_synced_dnodes); i++) {
+		userquota_updates_arg_t *uua =
+		    kmem_alloc(sizeof (*uua), KM_SLEEP);
+		uua->uua_os = os;
+		uua->uua_sublist_idx = i;
+		uua->uua_tx = tx;
+		/* note: caller does taskq_wait() */
+		(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
+		    userquota_updates_task, uua, 0);
+		/* callback frees uua */
+	}
 }
 
 /*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Wed May 24 22:21:24 2017	(r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
@@ -1287,13 +1287,14 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx
 	 */
 	dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
 
-	mutex_enter(&os->os_lock);
+	multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK];
+	multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn);
 
 	/*
 	 * If we are already marked dirty, we're done.
 	 */
 	if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
-		mutex_exit(&os->os_lock);
+		multilist_sublist_unlock(mls);
 		return;
 	}
 
@@ -1307,13 +1308,9 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx
 	dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
 	    dn->dn_object, txg);
 
-	if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) {
-		list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn);
-	} else {
-		list_insert_tail(&os->os_dirty_dnodes[txg&TXG_MASK], dn);
-	}
+	multilist_sublist_insert_head(mls, dn);
 
-	mutex_exit(&os->os_lock);
+	multilist_sublist_unlock(mls);
 
 	/*
 	 * The dnode maintains a hold on its containing dbuf as
@@ -1334,13 +1331,6 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx
 void
 dnode_free(dnode_t *dn, dmu_tx_t *tx)
 {
-	int txgoff = tx->tx_txg & TXG_MASK;
-
-	dprintf("dn=%p txg=%llu\n", dn, tx->tx_txg);
-
-	/* we should be the only holder... hopefully */
-	/* ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); */
-
 	mutex_enter(&dn->dn_mtx);
 	if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) {
 		mutex_exit(&dn->dn_mtx);
@@ -1349,19 +1339,7 @@ dnode_free(dnode_t *dn, dmu_tx_t *tx)
 	dn->dn_free_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
 
-	/*
-	 * If the dnode is already dirty, it needs to be moved from
-	 * the dirty list to the free list.
-	 */
-	mutex_enter(&dn->dn_objset->os_lock);
-	if (list_link_active(&dn->dn_dirty_link[txgoff])) {
-		list_remove(&dn->dn_objset->os_dirty_dnodes[txgoff], dn);
-		list_insert_tail(&dn->dn_objset->os_free_dnodes[txgoff], dn);
-		mutex_exit(&dn->dn_objset->os_lock);
-	} else {
-		mutex_exit(&dn->dn_objset->os_lock);
-		dnode_setdirty(dn, tx);
-	}
+	dnode_setdirty(dn, tx);
 }
 
 /*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c	Wed May 24 22:21:24 2017	(r318828)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	Wed May 24 22:21:24 2017	(r318828)
@@ -1740,6 +1740,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds,
 	bplist_iterate(&ds->ds_pending_deadlist,
 	    deadlist_enqueue_cb, &ds->ds_deadlist, tx);
 
+	if (os->os_synced_dnodes != NULL) {
+		multilist_destroy(os->os_synced_dnodes);
+		os->os_synced_dnodes = NULL;
+	}
+
 	ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
 
 	dmu_buf_rele(ds->ds_dbuf, ds);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	Wed May 24 22:21:24 2017	(r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
@@ -132,6 +132,10 @@ int zfs_delay_min_dirty_percent = 60;
  */
 uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
 
+/*
+ * This determines the number of threads used by the dp_sync_taskq.
+ */
+int zfs_sync_taskq_batch_pct = 75;
 
 #if defined(__FreeBSD__) && defined(_KERNEL)
 
@@ -267,6 +271,10 @@ dsl_pool_open_impl(spa_t *spa, uint64_t 
 	txg_list_create(&dp->dp_sync_tasks,
 	    offsetof(dsl_sync_task_t, dst_node));
 
+	dp->dp_sync_taskq = taskq_create("dp_sync_taskq",
+	    zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX,
+	    TASKQ_THREADS_CPU_PCT);
+
 	mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
 
@@ -417,6 +425,8 @@ dsl_pool_close(dsl_pool_t *dp)
 	txg_list_destroy(&dp->dp_sync_tasks);
 	txg_list_destroy(&dp->dp_dirty_dirs);
 
+	taskq_destroy(dp->dp_sync_taskq);
+
 	/*
 	 * We can't set retry to TRUE since we're explicitly specifying
 	 * a spa to flush. This is good enough; any missed buffers for
@@ -605,12 +615,15 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t
 
 	/*
 	 * After the data blocks have been written (ensured by the zio_wait()
-	 * above), update the user/group space accounting.
+	 * above), update the user/group space accounting.  This happens
+	 * in tasks dispatched to dp_sync_taskq, so wait for them before
+	 * continuing.
 	 */
 	for (ds = list_head(&synced_datasets); ds != NULL;
 	    ds = list_next(&synced_datasets, ds)) {
 		dmu_objset_do_userquota_updates(ds->ds_objset, tx);
 	}
+	taskq_wait(dp->dp_sync_taskq);
 
 	/*
 	 * Sync the datasets again to push out the changes due to
@@ -657,8 +670,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t
 		dp->dp_mos_uncompressed_delta = 0;
 	}
 
-	if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
-	    list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
+	if (!multilist_is_empty(mos->os_dirty_dnodes[txg & TXG_MASK])) {
 		dsl_pool_sync_mos(dp, tx);
 	}
 
@@ -716,7 +728,8 @@ int
 dsl_pool_sync_context(dsl_pool_t *dp)
 {
 	return (curthread == dp->dp_tx.tx_sync_thread ||
-	    spa_is_initializing(dp->dp_spa));
+	    spa_is_initializing(dp->dp_spa) ||
+	    taskq_member(dp->dp_sync_taskq, curthread));
 }
 
 uint64_t

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c	Wed May 24 22:21:24 2017	(r318828)
@@ -65,16 +65,16 @@ multilist_d2l(multilist_t *ml, void *obj
  *     requirement, but a general rule of thumb in order to garner the
  *     best multi-threaded performance out of the data structure.
  */
-static void
-multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
+static multilist_t *
+multilist_create_impl(size_t size, size_t offset,
     unsigned int num, multilist_sublist_index_func_t *index_func)
 {
-	ASSERT3P(ml, !=, NULL);
 	ASSERT3U(size, >, 0);
 	ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
 	ASSERT3U(num, >, 0);
 	ASSERT3P(index_func, !=, NULL);
 
+	multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
 	ml->ml_offset = offset;
 	ml->ml_num_sublists = num;
 	ml->ml_index_func = index_func;
@@ -89,15 +89,16 @@ multilist_create_impl(multilist_t *ml, s
 		mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
 		list_create(&mls->mls_list, size, offset);
 	}
+	return (ml);
 }
 
 /*
- * Initialize a new sublist, using the default number of sublists
+ * Allocate a new multilist, using the default number of sublists
  * (the number of CPUs, or at least 4, or the tunable
  * zfs_multilist_num_sublists).
  */
-void
-multilist_create(multilist_t *ml, size_t size, size_t offset,
+multilist_t *
+multilist_create(size_t size, size_t offset,
     multilist_sublist_index_func_t *index_func)
 {
 	int num_sublists;
@@ -108,7 +109,7 @@ multilist_create(multilist_t *ml, size_t
 		num_sublists = MAX(max_ncpus, 4);
 	}
 
-	multilist_create_impl(ml, size, offset, num_sublists, index_func);
+	return (multilist_create_impl(size, offset, num_sublists, index_func));
 }
 
 /*
@@ -134,6 +135,7 @@ multilist_destroy(multilist_t *ml)
 
 	ml->ml_num_sublists = 0;
 	ml->ml_offset = 0;
+	kmem_free(ml, sizeof (multilist_t));
 }
 
 /*
@@ -285,6 +287,13 @@ multilist_sublist_lock(multilist_t *ml, 
 	return (mls);
 }
 
+/* Lock and return the sublist that would be used to store the specified obj */
+multilist_sublist_t *
+multilist_sublist_lock_obj(multilist_t *ml, void *obj)
+{
+	return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
+}
+
 void
 multilist_sublist_unlock(multilist_sublist_t *mls)
 {

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Wed May 24 22:21:24 2017	(r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -740,7 +740,7 @@ spa_add(const char *name, nvlist_t *conf
 		spa_active_count++;
 	}
 
-	avl_create(&spa->spa_alloc_tree, zio_timestamp_compare,
+	avl_create(&spa->spa_alloc_tree, zio_bookmark_compare,
 	    sizeof (zio_t), offsetof(zio_t, io_alloc_node));
 
 	/*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h	Wed May 24 22:21:24 2017	(r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
@@ -110,7 +110,7 @@ struct objset {
 	/* no lock needed: */
 	struct dmu_tx *os_synctx; /* XXX sketchy */
 	zil_header_t os_zil_header;
-	list_t os_synced_dnodes;
+	multilist_t *os_synced_dnodes;
 	uint64_t os_flags;
 	uint64_t os_freed_dnodes;
 	boolean_t os_rescan_dnodes;
@@ -121,11 +121,13 @@ struct objset {
 
 	/* Protected by os_lock */
 	kmutex_t os_lock;
-	list_t os_dirty_dnodes[TXG_SIZE];
-	list_t os_free_dnodes[TXG_SIZE];
+	multilist_t *os_dirty_dnodes[TXG_SIZE];
 	list_t os_dnodes;
 	list_t os_downgraded_dbufs;
 
+	/* Protects changes to DMU_{USER,GROUP}USED_OBJECT */
+	kmutex_t os_userused_lock;
+
 	/* stuff we store for the user */
 	kmutex_t os_user_ptr_lock;
 	void *os_user_ptr;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed May 24 22:21:24 2017	(r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
@@ -35,6 +35,7 @@
 #include <sys/refcount.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/zrlock.h>
+#include <sys/multilist.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -203,7 +204,7 @@ struct dnode {
 	uint32_t dn_dbufs_count;	/* count of dn_dbufs */
 
 	/* protected by os_lock: */
-	list_node_t dn_dirty_link[TXG_SIZE];	/* next on dataset's dirty */
+	multilist_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
 
 	/* protected by dn_mtx: */
 	kmutex_t dn_mtx;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h	Wed May 24 22:21:24 2017	(r318828)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  */
 
@@ -121,6 +121,7 @@ typedef struct dsl_pool {
 	txg_list_t dp_dirty_zilogs;
 	txg_list_t dp_dirty_dirs;
 	txg_list_t dp_sync_tasks;
+	taskq_t *dp_sync_taskq;
 
 	/*
 	 * Protects administrative changes (properties, namespace)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/multilist.h	Wed May 24 22:21:24 2017	(r318828)
@@ -73,8 +73,7 @@ struct multilist {
 };
 
 void multilist_destroy(multilist_t *);
-void multilist_create(multilist_t *, size_t, size_t,
-    multilist_sublist_index_func_t *);
+multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *);
 
 void multilist_insert(multilist_t *, void *);
 void multilist_remove(multilist_t *, void *);
@@ -84,6 +83,7 @@ unsigned int multilist_get_num_sublists(
 unsigned int multilist_get_random_index(multilist_t *);
 
 multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
+multilist_sublist_t *multilist_sublist_lock_obj(multilist_t *, void *);
 void multilist_sublist_unlock(multilist_sublist_t *);
 
 void multilist_sublist_insert_head(multilist_sublist_t *, void *);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Wed May 24 22:15:16 2017	(r318827)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Wed May 24 22:21:24 2017	(r318828)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright 2016 Toomas Soome <tsoome@me.com>
  */
@@ -485,7 +485,7 @@ struct zio {
 	list_node_t	io_trim_link;
 };
 
-extern int zio_timestamp_compare(const void *, const void *);
+extern int zio_bookmark_compare(const void *, const void *);
 
 extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
     zio_done_func_t *done, void *priv, enum zio_flag flags);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Wed May 24 22:15:16 2017	(r318827)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201705242221.v4OMLPFi052453>