Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 21 Nov 2019 14:02:54 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r354957 - vendor-sys/illumos/dist/uts/common/fs/zfs
Message-ID:  <201911211402.xALE2sCR047278@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Thu Nov 21 14:02:54 2019
New Revision: 354957
URL: https://svnweb.freebsd.org/changeset/base/354957

Log:
  10853 spa_sync, vs_alloc can underflow and checkpoint test fixes
  
  illumos/illumos-gate@9740f25f0360eb7d9131fa15fabebf958bf19126
  https://github.com/illumos/illumos-gate/commit/9740f25f0360eb7d9131fa15fabebf958bf19126
  
  https://www.illumos.org/issues/10853
    From ZoL
    7558997 vs_alloc can underflow in L2ARC vdevs
    8dc2197 Simplify spa_sync by breaking it up to smaller functions
    db58794 Make zdb results for checkpoint tests consistent
    Here are the commit msg summaries from ZoL:
    vs_alloc can underflow in L2ARC vdevs
    The current L2 ARC device code consistently uses psize to
        increment vs_alloc but varies between psize and lsize when
        decrementing it. The result of this behavior is that
        vs_alloc can be decremented more that it is incremented
        and underflow. This patch changes the code so asize is
        used anywhere.
    In addition, it ensures that vs_alloc gets incremented by
        the L2 ARC device code as buffers are written and not at
        the end of the l2arc_write_buffers() routine. The latter
        (and old) way would temporarily underflow vs_alloc as
        buffers that were just written, would be destroyed while
        l2arc_write_buffers() was still looping.
    Simplify spa_sync by breaking it up to smaller functions
    The point of this refactoring is to break the high-level conceptual
        steps of spa_sync() to their own helper functions. In general large
        functions can enhance readability if structured well, but in this
        case the amount of conceptual steps taken could use the help of
        helper functions.
    Make zdb results for checkpoint tests consistent
    This patch exports and re-imports the pool when these tests are
        analyzed with zdb to get consistent results.
  
  Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
  Author: Serapheim Dimitropoulos <serapheim@delphix.com>

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Thu Nov 21 14:01:44 2019	(r354956)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Thu Nov 21 14:02:54 2019	(r354957)
@@ -3124,7 +3124,8 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
 {
 	l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
 	l2arc_dev_t *dev = l2hdr->b_dev;
-	uint64_t psize = arc_hdr_size(hdr);
+	uint64_t psize = HDR_GET_PSIZE(hdr);
+	uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize);
 
 	ASSERT(MUTEX_HELD(&dev->l2ad_mtx));
 	ASSERT(HDR_HAS_L2HDR(hdr));
@@ -3134,9 +3135,10 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
 	ARCSTAT_INCR(arcstat_l2_psize, -psize);
 	ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
 
-	vdev_space_update(dev->l2ad_vdev, -psize, 0, 0);
+	vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
 
-	(void) zfs_refcount_remove_many(&dev->l2ad_alloc, psize, hdr);
+	(void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr),
+	    hdr);
 	arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
 }
 
@@ -6759,10 +6761,12 @@ top:
 			list_remove(buflist, hdr);
 			arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
 
-			ARCSTAT_INCR(arcstat_l2_psize, -arc_hdr_size(hdr));
+			uint64_t psize = HDR_GET_PSIZE(hdr);
+			ARCSTAT_INCR(arcstat_l2_psize, -psize);
 			ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
 
-			bytes_dropped += arc_hdr_size(hdr);
+			bytes_dropped +=
+			    vdev_psize_to_asize(dev->l2ad_vdev, psize);
 			(void) zfs_refcount_remove_many(&dev->l2ad_alloc,
 			    arc_hdr_size(hdr), hdr);
 		}
@@ -7213,6 +7217,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint
 			write_psize += psize;
 			write_asize += asize;
 			dev->l2ad_hand += asize;
+			vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
 
 			mutex_exit(hash_lock);
 
@@ -7238,7 +7243,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint
 	ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize);
 	ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
 	ARCSTAT_INCR(arcstat_l2_psize, write_psize);
-	vdev_space_update(dev->l2ad_vdev, write_psize, 0, 0);
 
 	/*
 	 * Bump device hand to the device start if it is approaching the end.

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c	Thu Nov 21 14:01:44 2019	(r354956)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c	Thu Nov 21 14:02:54 2019	(r354957)
@@ -7379,6 +7379,9 @@ spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx
 static void
 spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx)
 {
+	if (spa_sync_pass(spa) != 1)
+		return;
+
 	zio_t *zio = zio_root(spa, NULL, NULL, 0);
 	VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
 	    spa_free_sync_cb, zio, tx), ==, 0);
@@ -7774,10 +7777,10 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
 static void
 spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
 {
-	dsl_pool_t *dp = spa->spa_dsl_pool;
+	if (spa_sync_pass(spa) != 1)
+		return;
 
-	ASSERT(spa->spa_sync_pass == 1);
-
+	dsl_pool_t *dp = spa->spa_dsl_pool;
 	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
@@ -7872,116 +7875,31 @@ vdev_indirect_state_sync_verify(vdev_t *vd)
 }
 
 /*
- * Sync the specified transaction group.  New blocks may be dirtied as
- * part of the process, so we iterate until it converges.
+ * Set the top-level vdev's max queue depth. Evaluate each top-level's
+ * async write queue depth in case it changed. The max queue depth will
+ * not change in the middle of syncing out this txg.
  */
-void
-spa_sync(spa_t *spa, uint64_t txg)
+static void
+spa_sync_adjust_vdev_max_queue_depth(spa_t *spa)
 {
-	dsl_pool_t *dp = spa->spa_dsl_pool;
-	objset_t *mos = spa->spa_meta_objset;
-	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
-	metaslab_class_t *normal = spa_normal_class(spa);
-	metaslab_class_t *special = spa_special_class(spa);
-	metaslab_class_t *dedup = spa_dedup_class(spa);
+	ASSERT(spa_writeable(spa));
+
 	vdev_t *rvd = spa->spa_root_vdev;
-	vdev_t *vd;
-	dmu_tx_t *tx;
-	int error;
 	uint32_t max_queue_depth = zfs_vdev_async_write_max_active *
 	    zfs_vdev_queue_depth_pct / 100;
+	metaslab_class_t *normal = spa_normal_class(spa);
+	metaslab_class_t *special = spa_special_class(spa);
+	metaslab_class_t *dedup = spa_dedup_class(spa);
 
-	VERIFY(spa_writeable(spa));
-
-	/*
-	 * Wait for i/os issued in open context that need to complete
-	 * before this txg syncs.
-	 */
-	(void) zio_wait(spa->spa_txg_zio[txg & TXG_MASK]);
-	spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL,
-	    ZIO_FLAG_CANFAIL);
-
-	/*
-	 * Lock out configuration changes.
-	 */
-	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
-
-	spa->spa_syncing_txg = txg;
-	spa->spa_sync_pass = 0;
-
-	for (int i = 0; i < spa->spa_alloc_count; i++) {
-		mutex_enter(&spa->spa_alloc_locks[i]);
-		VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
-		mutex_exit(&spa->spa_alloc_locks[i]);
-	}
-
-	/*
-	 * If there are any pending vdev state changes, convert them
-	 * into config changes that go out with this transaction group.
-	 */
-	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
-	while (list_head(&spa->spa_state_dirty_list) != NULL) {
-		/*
-		 * We need the write lock here because, for aux vdevs,
-		 * calling vdev_config_dirty() modifies sav_config.
-		 * This is ugly and will become unnecessary when we
-		 * eliminate the aux vdev wart by integrating all vdevs
-		 * into the root vdev tree.
-		 */
-		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
-		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
-		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
-			vdev_state_clean(vd);
-			vdev_config_dirty(vd);
-		}
-		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
-		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
-	}
-	spa_config_exit(spa, SCL_STATE, FTAG);
-
-	tx = dmu_tx_create_assigned(dp, txg);
-
-	spa->spa_sync_starttime = gethrtime();
-	VERIFY(cyclic_reprogram(spa->spa_deadman_cycid,
-	    spa->spa_sync_starttime + spa->spa_deadman_synctime));
-
-	/*
-	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
-	 * set spa_deflate if we have no raid-z vdevs.
-	 */
-	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
-	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
-		int i;
-
-		for (i = 0; i < rvd->vdev_children; i++) {
-			vd = rvd->vdev_child[i];
-			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
-				break;
-		}
-		if (i == rvd->vdev_children) {
-			spa->spa_deflate = TRUE;
-			VERIFY(0 == zap_add(spa->spa_meta_objset,
-			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
-			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
-		}
-	}
-
-	/*
-	 * Set the top-level vdev's max queue depth. Evaluate each
-	 * top-level's async write queue depth in case it changed.
-	 * The max queue depth will not change in the middle of syncing
-	 * out this txg.
-	 */
 	uint64_t slots_per_allocator = 0;
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
-		metaslab_group_t *mg = tvd->vdev_mg;
-		metaslab_class_t *mc;
 
+		metaslab_group_t *mg = tvd->vdev_mg;
 		if (mg == NULL || !metaslab_group_initialized(mg))
 			continue;
 
-		mc = mg->mg_class;
+		metaslab_class_t *mc = mg->mg_class;
 		if (mc != normal && mc != special && mc != dedup)
 			continue;
 
@@ -8013,7 +7931,14 @@ spa_sync(spa_t *spa, uint64_t txg)
 	normal->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
 	special->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
 	dedup->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
+}
 
+static void
+spa_sync_condense_indirect(spa_t *spa, dmu_tx_t *tx)
+{
+	ASSERT(spa_writeable(spa));
+
+	vdev_t *rvd = spa->spa_root_vdev;
 	for (int c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *vd = rvd->vdev_child[c];
 		vdev_indirect_state_sync_verify(vd);
@@ -8023,10 +7948,16 @@ spa_sync(spa_t *spa, uint64_t txg)
 			break;
 		}
 	}
+}
 
-	/*
-	 * Iterate to convergence.
-	 */
+static void
+spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
+{
+	objset_t *mos = spa->spa_meta_objset;
+	dsl_pool_t *dp = spa->spa_dsl_pool;
+	uint64_t txg = tx->tx_txg;
+	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
+
 	do {
 		int pass = ++spa->spa_sync_pass;
 
@@ -8052,79 +7983,60 @@ spa_sync(spa_t *spa, uint64_t txg)
 
 		ddt_sync(spa, txg);
 		dsl_scan_sync(dp, tx);
+		svr_sync(spa, tx);
+		spa_sync_upgrades(spa, tx);
 
-		if (spa->spa_vdev_removal != NULL)
-			svr_sync(spa, tx);
-
+		vdev_t *vd = NULL;
 		while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
 		    != NULL)
 			vdev_sync(vd, txg);
 
-		if (pass == 1) {
-			spa_sync_upgrades(spa, tx);
-			ASSERT3U(txg, >=,
-			    spa->spa_uberblock.ub_rootbp.blk_birth);
+		/*
+		 * Note: We need to check if the MOS is dirty because we could
+		 * have marked the MOS dirty without updating the uberblock
+		 * (e.g. if we have sync tasks but no dirty user data). We need
+		 * to check the uberblock's rootbp because it is updated if we
+		 * have synced out dirty data (though in this case the MOS will
+		 * most likely also be dirty due to second order effects, we
+		 * don't want to rely on that here).
+		 */
+		if (pass == 1 &&
+		    spa->spa_uberblock.ub_rootbp.blk_birth < txg &&
+		    !dmu_objset_is_dirty(mos, txg)) {
 			/*
-			 * Note: We need to check if the MOS is dirty
-			 * because we could have marked the MOS dirty
-			 * without updating the uberblock (e.g. if we
-			 * have sync tasks but no dirty user data).  We
-			 * need to check the uberblock's rootbp because
-			 * it is updated if we have synced out dirty
-			 * data (though in this case the MOS will most
-			 * likely also be dirty due to second order
-			 * effects, we don't want to rely on that here).
+			 * Nothing changed on the first pass, therefore this
+			 * TXG is a no-op. Avoid syncing deferred frees, so
+			 * that we can keep this TXG as a no-op.
 			 */
-			if (spa->spa_uberblock.ub_rootbp.blk_birth < txg &&
-			    !dmu_objset_is_dirty(mos, txg)) {
-				/*
-				 * Nothing changed on the first pass,
-				 * therefore this TXG is a no-op.  Avoid
-				 * syncing deferred frees, so that we
-				 * can keep this TXG as a no-op.
-				 */
-				ASSERT(txg_list_empty(&dp->dp_dirty_datasets,
-				    txg));
-				ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
-				ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg));
-				ASSERT(txg_list_empty(&dp->dp_early_sync_tasks,
-				    txg));
-				break;
-			}
-			spa_sync_deferred_frees(spa, tx);
+			ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
+			ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
+			ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg));
+			ASSERT(txg_list_empty(&dp->dp_early_sync_tasks, txg));
+			break;
 		}
 
+		spa_sync_deferred_frees(spa, tx);
 	} while (dmu_objset_is_dirty(mos, txg));
+}
 
-	if (!list_is_empty(&spa->spa_config_dirty_list)) {
-		/*
-		 * Make sure that the number of ZAPs for all the vdevs matches
-		 * the number of ZAPs in the per-vdev ZAP list. This only gets
-		 * called if the config is dirty; otherwise there may be
-		 * outstanding AVZ operations that weren't completed in
-		 * spa_sync_config_object.
-		 */
-		uint64_t all_vdev_zap_entry_count;
-		ASSERT0(zap_count(spa->spa_meta_objset,
-		    spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count));
-		ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==,
-		    all_vdev_zap_entry_count);
-	}
+/*
+ * Rewrite the vdev configuration (which includes the uberblock) to
+ * commit the transaction group.
+ *
+ * If there are no dirty vdevs, we sync the uberblock to a few random
+ * top-level vdevs that are known to be visible in the config cache
+ * (see spa_vdev_add() for a complete description). If there *are* dirty
+ * vdevs, sync the uberblock to all vdevs.
+ */
+static void
+spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	uint64_t txg = tx->tx_txg;
 
-	if (spa->spa_vdev_removal != NULL) {
-		ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);
-	}
-
-	/*
-	 * Rewrite the vdev configuration (which includes the uberblock)
-	 * to commit the transaction group.
-	 *
-	 * If there are no dirty vdevs, we sync the uberblock to a few
-	 * random top-level vdevs that are known to be visible in the
-	 * config cache (see spa_vdev_add() for a complete description).
-	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
-	 */
 	for (;;) {
+		int error = 0;
+
 		/*
 		 * We hold SCL_STATE to prevent vdev open/close/etc.
 		 * while we're attempting to write the vdev labels.
@@ -8138,13 +8050,15 @@ spa_sync(spa_t *spa, uint64_t txg)
 			int c0 = spa_get_random(children);
 
 			for (int c = 0; c < children; c++) {
-				vd = rvd->vdev_child[(c0 + c) % children];
+				vdev_t *vd =
+				    rvd->vdev_child[(c0 + c) % children];
 
 				/* Stop when revisiting the first vdev */
 				if (c > 0 && svd[0] == vd)
 					break;
 
-				if (vd->vdev_ms_array == 0 || vd->vdev_islog ||
+				if (vd->vdev_ms_array == 0 ||
+				    vd->vdev_islog ||
 				    !vdev_is_concrete(vd))
 					continue;
 
@@ -8168,6 +8082,122 @@ spa_sync(spa_t *spa, uint64_t txg)
 		zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR);
 		zio_resume_wait(spa);
 	}
+}
+
+/*
+ * Sync the specified transaction group.  New blocks may be dirtied as
+ * part of the process, so we iterate until it converges.
+ */
+void
+spa_sync(spa_t *spa, uint64_t txg)
+{
+	vdev_t *vd = NULL;
+
+	VERIFY(spa_writeable(spa));
+
+	/*
+	 * Wait for i/os issued in open context that need to complete
+	 * before this txg syncs.
+	 */
+	(void) zio_wait(spa->spa_txg_zio[txg & TXG_MASK]);
+	spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL,
+	    ZIO_FLAG_CANFAIL);
+
+	/*
+	 * Lock out configuration changes.
+	 */
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+	spa->spa_syncing_txg = txg;
+	spa->spa_sync_pass = 0;
+
+	for (int i = 0; i < spa->spa_alloc_count; i++) {
+		mutex_enter(&spa->spa_alloc_locks[i]);
+		VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i]));
+		mutex_exit(&spa->spa_alloc_locks[i]);
+	}
+
+	/*
+	 * If there are any pending vdev state changes, convert them
+	 * into config changes that go out with this transaction group.
+	 */
+	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
+	while (list_head(&spa->spa_state_dirty_list) != NULL) {
+		/*
+		 * We need the write lock here because, for aux vdevs,
+		 * calling vdev_config_dirty() modifies sav_config.
+		 * This is ugly and will become unnecessary when we
+		 * eliminate the aux vdev wart by integrating all vdevs
+		 * into the root vdev tree.
+		 */
+		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
+		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
+			vdev_state_clean(vd);
+			vdev_config_dirty(vd);
+		}
+		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
+	}
+	spa_config_exit(spa, SCL_STATE, FTAG);
+
+	dsl_pool_t *dp = spa->spa_dsl_pool;
+	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
+
+	spa->spa_sync_starttime = gethrtime();
+	VERIFY(cyclic_reprogram(spa->spa_deadman_cycid,
+	    spa->spa_sync_starttime + spa->spa_deadman_synctime));
+
+	/*
+	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
+	 * set spa_deflate if we have no raid-z vdevs.
+	 */
+	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
+	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
+		vdev_t *rvd = spa->spa_root_vdev;
+
+		int i;
+		for (i = 0; i < rvd->vdev_children; i++) {
+			vd = rvd->vdev_child[i];
+			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
+				break;
+		}
+		if (i == rvd->vdev_children) {
+			spa->spa_deflate = TRUE;
+			VERIFY0(zap_add(spa->spa_meta_objset,
+			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
+			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
+		}
+	}
+
+	spa_sync_adjust_vdev_max_queue_depth(spa);
+
+	spa_sync_condense_indirect(spa, tx);
+
+	spa_sync_iterate_to_convergence(spa, tx);
+
+#ifdef ZFS_DEBUG
+	if (!list_is_empty(&spa->spa_config_dirty_list)) {
+		/*
+		 * Make sure that the number of ZAPs for all the vdevs matches
+		 * the number of ZAPs in the per-vdev ZAP list. This only gets
+		 * called if the config is dirty; otherwise there may be
+		 * outstanding AVZ operations that weren't completed in
+		 * spa_sync_config_object.
+		 */
+		uint64_t all_vdev_zap_entry_count;
+		ASSERT0(zap_count(spa->spa_meta_objset,
+		    spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count));
+		ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==,
+		    all_vdev_zap_entry_count);
+	}
+#endif
+
+	if (spa->spa_vdev_removal != NULL) {
+		ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);
+	}
+
+	spa_sync_rewrite_vdev_config(spa, tx);
 	dmu_tx_commit(tx);
 
 	VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY));

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c	Thu Nov 21 14:01:44 2019	(r354956)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c	Thu Nov 21 14:02:54 2019	(r354957)
@@ -3833,6 +3833,11 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int
 	dspace_delta = vdev_deflated_space(vd, space_delta);
 
 	mutex_enter(&vd->vdev_stat_lock);
+	/* ensure we won't underflow */
+	if (alloc_delta < 0) {
+		ASSERT3U(vd->vdev_stat.vs_alloc, >=, -alloc_delta);
+	}
+
 	vd->vdev_stat.vs_alloc += alloc_delta;
 	vd->vdev_stat.vs_space += space_delta;
 	vd->vdev_stat.vs_dspace += dspace_delta;
@@ -3840,6 +3845,7 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int
 
 	/* every class but log contributes to root space stats */
 	if (vd->vdev_mg != NULL && !vd->vdev_islog) {
+		ASSERT(!vd->vdev_isl2cache);
 		mutex_enter(&rvd->vdev_stat_lock);
 		rvd->vdev_stat.vs_alloc += alloc_delta;
 		rvd->vdev_stat.vs_space += space_delta;

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c	Thu Nov 21 14:01:44 2019	(r354956)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c	Thu Nov 21 14:02:54 2019	(r354957)
@@ -1688,6 +1688,9 @@ svr_sync(spa_t *spa, dmu_tx_t *tx)
 	spa_vdev_removal_t *svr = spa->spa_vdev_removal;
 	int txgoff = dmu_tx_get_txg(tx) & TXG_MASK;
 
+	if (svr == NULL)
+		return;
+
 	/*
 	 * This check is necessary so that we do not dirty the
 	 * DIRECTORY_OBJECT via spa_sync_removing_state() when there



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201911211402.xALE2sCR047278>