Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 7 Aug 2013 21:16:15 +0000 (UTC)
From:      Xin LI <delphij@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r254074 - in head: cddl/contrib/opensolaris/cmd/ztest sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <201308072116.r77LGFpZ001567@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: delphij
Date: Wed Aug  7 21:16:14 2013
New Revision: 254074
URL: http://svnweb.freebsd.org/changeset/base/254074

Log:
  MFV r254070:
  
  Merge vendor bugfix for ZFS test suite that triggers false positives.
  
  Illumos ZFS issues:
    3949 ztest fault injection should avoid resilvering devices
    3950 ztest: deadman fires when we're doing a scan
    3951 ztest hang when running dedup test
    3952 ztest: ztest_reguid test and ztest_fault_inject don't place nice together

Modified:
  head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
Directory Properties:
  head/cddl/contrib/opensolaris/   (props changed)
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Wed Aug  7 20:35:26 2013	(r254073)
+++ head/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Wed Aug  7 21:16:14 2013	(r254074)
@@ -186,6 +186,7 @@ static const ztest_shared_opts_t ztest_o
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
+extern uint64_t zfs_deadman_synctime;
 
 static ztest_shared_opts_t *ztest_shared_opts;
 static ztest_shared_opts_t ztest_opts;
@@ -365,7 +366,7 @@ ztest_info_t ztest_info[] = {
 	{ ztest_fault_inject,			1,	&zopt_sometimes	},
 	{ ztest_ddt_repair,			1,	&zopt_sometimes	},
 	{ ztest_dmu_snapshot_hold,		1,	&zopt_sometimes	},
-	{ ztest_reguid,				1,	&zopt_sometimes },
+	{ ztest_reguid,				1,	&zopt_rarely	},
 	{ ztest_spa_rename,			1,	&zopt_rarely	},
 	{ ztest_scrub,				1,	&zopt_rarely	},
 	{ ztest_spa_upgrade,			1,	&zopt_rarely	},
@@ -4756,6 +4757,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 	ASSERT(leaves >= 1);
 
 	/*
+	 * Grab the name lock as reader. There are some operations
+	 * which don't like to have their vdevs changed while
+	 * they are in progress (i.e. spa_change_guid). Those
+	 * operations will have grabbed the name lock as writer.
+	 */
+	(void) rw_rdlock(&ztest_name_lock);
+
+	/*
 	 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
@@ -4784,7 +4793,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 		if (vd0 != NULL && vd0->vdev_top->vdev_islog)
 			islog = B_TRUE;
 
-		if (vd0 != NULL && maxfaults != 1) {
+		/*
+		 * If the top-level vdev needs to be resilvered
+		 * then we only allow faults on the device that is
+		 * resilvering.
+		 */
+		if (vd0 != NULL && maxfaults != 1 &&
+		    (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
+		    vd0->vdev_resilvering)) {
 			/*
 			 * Make vd0 explicitly claim to be unreadable,
 			 * or unwriteable, or reach behind its back
@@ -4815,6 +4831,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 
 		if (sav->sav_count == 0) {
 			spa_config_exit(spa, SCL_STATE, FTAG);
+			(void) rw_unlock(&ztest_name_lock);
 			return;
 		}
 		vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
@@ -4828,6 +4845,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
 	}
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
+	(void) rw_unlock(&ztest_name_lock);
 
 	/*
 	 * If we can tolerate two or more faults, or we're dealing
@@ -5293,16 +5311,33 @@ static void *
 ztest_deadman_thread(void *arg)
 {
 	ztest_shared_t *zs = arg;
-	int grace = 300;
-	hrtime_t delta;
-
-	delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
+	spa_t *spa = ztest_spa;
+	hrtime_t delta, total = 0;
 
-	(void) poll(NULL, 0, (int)(1000 * delta));
+	for (;;) {
+		delta = (zs->zs_thread_stop - zs->zs_thread_start) /
+		    NANOSEC + zfs_deadman_synctime;
 
-	fatal(0, "failed to complete within %d seconds of deadline", grace);
+		(void) poll(NULL, 0, (int)(1000 * delta));
 
-	return (NULL);
+		/*
+		 * If the pool is suspended then fail immediately. Otherwise,
+		 * check to see if the pool is making any progress. If
+		 * vdev_deadman() discovers that there hasn't been any recent
+		 * I/Os then it will end up aborting the tests.
+		 */
+		if (spa_suspended(spa)) {
+			fatal(0, "aborting test after %llu seconds because "
+			    "pool has transitioned to a suspended state.",
+			    zfs_deadman_synctime);
+			return (NULL);
+		}
+		vdev_deadman(spa->spa_root_vdev);
+
+		total += zfs_deadman_synctime;
+		(void) printf("ztest has been running for %lld seconds\n",
+		    total);
+	}
 }
 
 static void
@@ -6031,6 +6066,7 @@ main(int argc, char **argv)
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
 	dprintf_setup(&argc, argv);
+	zfs_deadman_synctime = 300;
 
 	ztest_fd_rand = open("/dev/urandom", O_RDONLY);
 	ASSERT3S(ztest_fd_rand, >=, 0);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Wed Aug  7 20:35:26 2013	(r254073)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Wed Aug  7 21:16:14 2013	(r254074)
@@ -759,6 +759,7 @@ spa_change_guid(spa_t *spa)
 	int error;
 	uint64_t guid;
 
+	mutex_enter(&spa->spa_vdev_top_lock);
 	mutex_enter(&spa_namespace_lock);
 	guid = spa_generate_guid(NULL);
 
@@ -771,6 +772,7 @@ spa_change_guid(spa_t *spa)
 	}
 
 	mutex_exit(&spa_namespace_lock);
+	mutex_exit(&spa->spa_vdev_top_lock);
 
 	return (error);
 }
@@ -4861,7 +4863,6 @@ spa_vdev_detach(spa_t *spa, uint64_t gui
 		if (pvd->vdev_ops == &vdev_spare_ops)
 			cvd->vdev_unspare = B_FALSE;
 		vdev_remove_parent(cvd);
-		cvd->vdev_resilvering = B_FALSE;
 	}
 
 
@@ -5496,6 +5497,13 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
 			return (oldvd);
 	}
 
+	if (vd->vdev_resilvering && vdev_dtl_empty(vd, DTL_MISSING) &&
+	    vdev_dtl_empty(vd, DTL_OUTAGE)) {
+		ASSERT(vd->vdev_ops->vdev_op_leaf);
+		vd->vdev_resilvering = B_FALSE;
+		vdev_config_dirty(vd->vdev_top);
+	}
+
 	/*
 	 * Check for a completed replacement.  We always consider the first
 	 * vdev in the list to be the oldest vdev, and the last one to be



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308072116.r77LGFpZ001567>