Date: Thu, 24 Nov 2016 10:29:21 +0000 (UTC) From: Andriy Gapon <avg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r309098 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <201611241029.uAOATL9W073641@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: avg Date: Thu Nov 24 10:29:21 2016 New Revision: 309098 URL: https://svnweb.freebsd.org/changeset/base/309098 Log: MFV r308988: 7199, 7200 dsl_dataset_rollback_sync may try to free already free blocks 7199 dsl_dataset_rollback_sync may try to free already free blocks 7200 no blocks must be born in a txg after a snaphot is created illumos/illumos-gate@bfaed0b91e57062c38bc16b4f89db3c8f0052a9b https://github.com/illumos/illumos-gate/commit/bfaed0b91e57062c38bc16b4f89db3c8f0052a9b https://www.illumos.org/issues/7199 dsl_dataset_rollback_sync may try to free already freed blocks when it calls dsl_destroy_head_sync_impl to destroy a temporary clone. That happens if a snapshot to which we are rolling back and from which the clone is created has some ZIL records. https://www.illumos.org/issues/7200 No new blocks must be born in a dataset in the same TXG after a snapshot of the dataset is taken. Those blocks would have the same blk_birth as the dataset's ds_prev_snap_txg and as such they would be presumed to belong o the snapshot while in fact they do not. All the datasets must be clean before sync tasks are run, so the described scenario may happen only if one of the sync tasks dirties the dataset and another sync task takes its snapshot. Then, there will be another sync pass because of the dirty data and the new blocks will be born in the same TXG when the data is written out. It seems that almost all of the existing sync tasks modify only MOS and do not dirty any objsets. The only exception that I've been able to identify so far is the rollback which can modify an objset when it zeroes out the objset's ZIL. Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Approved by: Gordon Ross <gordon.w.ross@gmail.com> Author: Andriy Gapon <andriy.gapon@clusterhq.com> MFC after: 3 weeks Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h Directory Properties: head/sys/cddl/contrib/opensolaris/ (props changed) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c Thu Nov 24 10:21:22 2016 (r309097) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c Thu Nov 24 10:29:21 2016 (r309098) @@ -88,6 +88,8 @@ extern inline dsl_dataset_phys_t *dsl_da extern int spa_asize_inflation; +static zil_header_t zero_zil; + /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been @@ -132,6 +134,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds return; } + ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); dmu_buf_will_dirty(ds->ds_dbuf, tx); mutex_enter(&ds->ds_lock); delta = parent_delta(ds, used); @@ -902,8 +905,20 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, objset_t *os; VERIFY0(dmu_objset_from_ds(ds, &os)); - bzero(&os->os_zil_header, sizeof (os->os_zil_header)); - dsl_dataset_dirty(ds, tx); + if (bcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) { + dsl_pool_t *dp = ds->ds_dir->dd_pool; + zio_t *zio; + + bzero(&os->os_zil_header, sizeof (os->os_zil_header)); + + zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + dsl_dataset_sync(ds, zio, tx); + VERIFY0(zio_wait(zio)); + + /* dsl_dataset_sync_done will drop this reference. */ + dmu_buf_add_ref(ds->ds_dbuf, ds); + dsl_dataset_sync_done(ds, tx); + } } uint64_t @@ -1083,8 +1098,10 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) panic("dirtying snapshot!"); - dp = ds->ds_dir->dd_pool; + /* Must not dirty a dataset in the same txg where it got snapshotted. */ + ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg); + dp = ds->ds_dir->dd_pool; if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, ds); @@ -1339,8 +1356,6 @@ void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, dmu_tx_t *tx) { - static zil_header_t zero_zil; - dsl_pool_t *dp = ds->ds_dir->dd_pool; dmu_buf_t *dbuf; dsl_dataset_phys_t *dsphys; @@ -1359,6 +1374,10 @@ dsl_dataset_snapshot_sync_impl(dsl_datas bcmp(&os->os_phys->os_zil_header, &zero_zil, sizeof (zero_zil)) == 0); + /* Should not snapshot a dirty dataset. */ + ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, + ds, tx->tx_txg)); + dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); /* @@ -1718,6 +1737,27 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_ } } +static int +deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + dsl_deadlist_t *dl = arg; + dsl_deadlist_insert(dl, bp, tx); + return (0); +} + +void +dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) +{ + objset_t *os = ds->ds_objset; + + bplist_iterate(&ds->ds_pending_deadlist, + deadlist_enqueue_cb, &ds->ds_deadlist, tx); + + ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); + + dmu_buf_rele(ds->ds_dbuf, ds); +} + static void get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) { @@ -2237,6 +2277,18 @@ dsl_dataset_rollback_check(void *arg, dm return (SET_ERROR(EINVAL)); } + /* + * No rollback to a snapshot created in the current txg, because + * the rollback may dirty the dataset and create blocks that are + * not reachable from the rootbp while having a birth txg that + * falls into the snapshot's range. + */ + if (dmu_tx_is_syncing(tx) && + dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EAGAIN)); + } + /* must not have any bookmarks after the most recent snapshot */ nvlist_t *proprequest = fnvlist_alloc(); fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c Thu Nov 24 10:21:22 2016 (r309097) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c Thu Nov 24 10:29:21 2016 (r309098) @@ -524,14 +524,6 @@ dsl_pool_mos_diduse_space(dsl_pool_t *dp mutex_exit(&dp->dp_lock); } -static int -deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) -{ - dsl_deadlist_t *dl = arg; - dsl_deadlist_insert(dl, bp, tx); - return (0); -} - static void dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx) { @@ -632,11 +624,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t * - release hold from dsl_dataset_dirty() */ while ((ds = list_remove_head(&synced_datasets)) != NULL) { - objset_t *os = ds->ds_objset; - bplist_iterate(&ds->ds_pending_deadlist, - deadlist_enqueue_cb, &ds->ds_deadlist, tx); - ASSERT(!dmu_objset_is_dirty(os, txg)); - dmu_buf_rele(ds->ds_dbuf, ds); + dsl_dataset_sync_done(ds, tx); } while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) { dsl_dir_sync(dd, tx); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h Thu Nov 24 10:21:22 2016 (r309097) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h Thu Nov 24 10:29:21 2016 (r309098) @@ -274,6 +274,7 @@ boolean_t dsl_dataset_modified_since_sna dsl_dataset_t *snap); void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx); +void dsl_dataset_sync_done(dsl_dataset_t *os, dmu_tx_t *tx); void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201611241029.uAOATL9W073641>