Date: Tue, 31 Jul 2018 00:13:07 +0000 (UTC) From: Alexander Motin <mav@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r336950 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/zdb vendor/illumos/dist/cmd/ztest vendor/illumos/dist/lib/libzf... Message-ID: <201807310013.w6V0D7dV088745@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mav Date: Tue Jul 31 00:13:04 2018 New Revision: 336950 URL: https://svnweb.freebsd.org/changeset/base/336950 Log: 9290 device removal reduces redundancy of mirrors Mirrors are supposed to provide redundancy in the face of whole-disk failure and silent damage (e.g. some data on disk is not right, but ZFS hasn't detected the whole device as being broken). However, the current device removal implementation bypasses some of the mirror's redundancy. illumos/illumos-gate@3a4b1be953ee5601bab748afa07c26ed4996cde6 Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Prashanth Sreenivasa <pks@delphix.com> Reviewed by: Sara Hartse <sara.hartse@delphix.com> Reviewed by: Serapheim Dimitropoulos <serapheim@delphix.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Tim Chase <tim@chase2k.com> Approved by: Richard Lowe <richlowe@richlowe.net> Author: Matthew Ahrens <mahrens@delphix.com> Modified: vendor/illumos/dist/cmd/zdb/zdb.c vendor/illumos/dist/cmd/ztest/ztest.c vendor/illumos/dist/lib/libzfs/common/libzfs_pool.c Changes in other areas also in this revision: Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c vendor-sys/illumos/dist/uts/common/fs/zfs/metaslab.c vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev_removal.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zio.h vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_indirect.c vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_mirror.c vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_removal.c vendor-sys/illumos/dist/uts/common/fs/zfs/zio.c Modified: vendor/illumos/dist/cmd/zdb/zdb.c ============================================================================== --- vendor/illumos/dist/cmd/zdb/zdb.c Tue Jul 31 00:02:42 2018 (r336949) +++ vendor/illumos/dist/cmd/zdb/zdb.c Tue Jul 31 00:13:04 2018 (r336950) @@ -3004,7 +3004,7 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa_vdev_removal_t *svr = spa->spa_vdev_removal; - vdev_t *vd = svr->svr_vdev; + vdev_t *vd = vdev_lookup_top(spa, svr->svr_vdev_id); vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) { @@ -3020,13 +3020,17 @@ zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb) svr->svr_allocd_segs, SM_ALLOC)); /* - * Clear everything past what has been synced, - * because we have not allocated mappings for it yet. + * Clear everything past what has been synced unless + * it's past the spacemap, because we have not allocated + * mappings for it yet. */ - range_tree_clear(svr->svr_allocd_segs, - vdev_indirect_mapping_max_offset(vim), - msp->ms_sm->sm_start + msp->ms_sm->sm_size - - vdev_indirect_mapping_max_offset(vim)); + uint64_t vim_max_offset = + vdev_indirect_mapping_max_offset(vim); + uint64_t sm_end = msp->ms_sm->sm_start + + msp->ms_sm->sm_size; + if (sm_end > vim_max_offset) + range_tree_clear(svr->svr_allocd_segs, + vim_max_offset, sm_end - vim_max_offset); } zcb->zcb_removing_size += Modified: vendor/illumos/dist/cmd/ztest/ztest.c ============================================================================== --- vendor/illumos/dist/cmd/ztest/ztest.c Tue Jul 31 00:02:42 2018 (r336949) +++ vendor/illumos/dist/cmd/ztest/ztest.c Tue Jul 31 00:13:04 2018 (r336950) @@ -436,6 +436,7 @@ static ztest_ds_t *ztest_ds; static kmutex_t ztest_vdev_lock; static kmutex_t ztest_checkpoint_lock; +static boolean_t ztest_device_removal_active = B_FALSE; /* * The ztest_name_lock protects the pool and dataset namespace used by @@ -2880,7 +2881,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) * value. Don't bother trying to attach while we are in the middle * of removal. */ - if (spa->spa_vdev_removal != NULL) { + if (ztest_device_removal_active) { spa_config_exit(spa, SCL_ALL, FTAG); mutex_exit(&ztest_vdev_lock); return; @@ -3055,16 +3056,49 @@ ztest_device_removal(ztest_ds_t *zd, uint64_t id) spa_t *spa = ztest_spa; vdev_t *vd; uint64_t guid; + int error; mutex_enter(&ztest_vdev_lock); + if (ztest_device_removal_active) { + mutex_exit(&ztest_vdev_lock); + return; + } + + /* + * Remove a random top-level vdev and wait for removal to finish. + */ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE)); guid = vd->vdev_guid; spa_config_exit(spa, SCL_VDEV, FTAG); - (void) spa_vdev_remove(spa, guid, B_FALSE); + error = spa_vdev_remove(spa, guid, B_FALSE); + if (error == 0) { + ztest_device_removal_active = B_TRUE; + mutex_exit(&ztest_vdev_lock); + while (spa->spa_vdev_removal != NULL) + txg_wait_synced(spa_get_dsl(spa), 0); + } else { + mutex_exit(&ztest_vdev_lock); + return; + } + + /* + * The pool needs to be scrubbed after completing device removal. + * Failure to do so may result in checksum errors due to the + * strategy employed by ztest_fault_inject() when selecting which + * offset are redundant and can be damaged. + */ + error = spa_scan(spa, POOL_SCAN_SCRUB); + if (error == 0) { + while (dsl_scan_scrubbing(spa_get_dsl(spa))) + txg_wait_synced(spa_get_dsl(spa), 0); + } + + mutex_enter(&ztest_vdev_lock); + ztest_device_removal_active = B_FALSE; mutex_exit(&ztest_vdev_lock); } @@ -3203,7 +3237,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) * that the metaslab_class space increased (because it decreases * when the device removal completes). */ - if (spa->spa_vdev_removal != NULL) { + if (ztest_device_removal_active) { spa_config_exit(spa, SCL_STATE, spa); mutex_exit(&ztest_vdev_lock); mutex_exit(&ztest_checkpoint_lock); @@ -4985,6 +5019,18 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) boolean_t islog = B_FALSE; mutex_enter(&ztest_vdev_lock); + + /* + * Device removal is in progress, fault injection must be disabled + * until it completes and the pool is scrubbed. The fault injection + * strategy for damaging blocks does not take in to account evacuated + * blocks which may have already been damaged. + */ + if (ztest_device_removal_active) { + mutex_exit(&ztest_vdev_lock); + return; + } + maxfaults = MAXFAULTS(); leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; mirror_save = zs->zs_mirrors; @@ -5329,6 +5375,12 @@ void ztest_scrub(ztest_ds_t *zd, uint64_t id) { spa_t *spa = ztest_spa; + + /* + * Scrub in progress by device removal. + */ + if (ztest_device_removal_active) + return; (void) spa_scan(spa, POOL_SCAN_SCRUB); (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ Modified: vendor/illumos/dist/lib/libzfs/common/libzfs_pool.c ============================================================================== --- vendor/illumos/dist/lib/libzfs/common/libzfs_pool.c Tue Jul 31 00:02:42 2018 (r336949) +++ vendor/illumos/dist/lib/libzfs/common/libzfs_pool.c Tue Jul 31 00:13:04 2018 (r336950) @@ -2807,7 +2807,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, " - "or pool has removing/removed vdevs"), + "or device removal is in progress"), new_disk); (void) zfs_error(hdl, EZFS_BADDEV, msg); break;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201807310013.w6V0D7dV088745>