Date: Thu, 23 Aug 2012 09:18:53 +0000 (UTC) From: Martin Matuska <mm@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r239610 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/ztest vendor/illumos/dist/lib/libzfs/common Message-ID: <201208230918.q7N9IrZ8095013@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mm Date: Thu Aug 23 09:18:52 2012 New Revision: 239610 URL: http://svn.freebsd.org/changeset/base/239610 Log: Update vendor/illumos and vendor-sys/illumos to revision 13777:b1e53580146d Obtained from: ssh://anonhg@hg.illumos.org/illumos-gate Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_pool.c vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_pool.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/txg.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil_impl.h vendor-sys/illumos/dist/uts/common/fs/zfs/txg.c vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c Changes in other areas also in this revision: Modified: vendor/illumos/dist/cmd/ztest/ztest.c vendor/illumos/dist/lib/libzfs/common/libzfs_import.c Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c Thu Aug 23 09:18:52 2012 (r239610) @@ -1759,15 +1759,15 @@ dmu_init(void) dnode_init(); dbuf_init(); zfetch_init(); - arc_init(); l2arc_init(); + arc_init(); } void dmu_fini(void) { - l2arc_fini(); arc_fini(); + l2arc_fini(); zfetch_fini(); dbuf_fini(); dnode_fini(); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c Thu Aug 23 09:18:52 2012 (r239610) @@ -1608,13 +1608,6 @@ dmu_recv_existing_end(dmu_recv_cookie_t dsl_dataset_t *ds = drc->drc_logical_ds; int err, myerr; - /* - * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() - * expects it to have a ds_user_ptr (and zil), but clone_swap() - * can close it. - */ - txg_wait_synced(ds->ds_dir->dd_pool, 0); - if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, drc->drc_force); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c Thu Aug 23 09:18:52 2012 (r239610) @@ -103,14 +103,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); if (ds == NULL) { - /* - * Account for the meta-objset space in its placeholder - * dsl_dir. - */ - ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ - dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, - used, compressed, uncompressed, tx); - dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); + dsl_pool_mos_diduse_space(tx->tx_pool, + used, compressed, uncompressed); return; } dmu_buf_will_dirty(ds->ds_dbuf, tx); @@ -146,15 +140,9 @@ dsl_dataset_block_kill(dsl_dataset_t *ds ASSERT(used > 0); if (ds == NULL) { - /* - * Account for the meta-objset space in its placeholder - * dataset. - */ dsl_free(tx->tx_pool, tx->tx_txg, bp); - - dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, - -used, -compressed, -uncompressed, tx); - dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); + dsl_pool_mos_diduse_space(tx->tx_pool, + -used, -compressed, -uncompressed); return (used); } ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); @@ -1068,26 +1056,26 @@ dsl_dataset_destroy(dsl_dataset_t *ds, v dd = ds->ds_dir; - /* - * Check for errors and mark this ds as inconsistent, in - * case we crash while freeing the objects. - */ - err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, - dsl_dataset_destroy_begin_sync, ds, NULL, 0); - if (err) - goto out; - - err = dmu_objset_from_ds(ds, &os); - if (err) - goto out; - - /* - * If async destruction is not enabled try to remove all objects - * while in the open context so that there is less work to do in - * the syncing context. - */ if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { + /* + * Check for errors and mark this ds as inconsistent, in + * case we crash while freeing the objects. + */ + err = dsl_sync_task_do(dd->dd_pool, + dsl_dataset_destroy_begin_check, + dsl_dataset_destroy_begin_sync, ds, NULL, 0); + if (err) + goto out; + + err = dmu_objset_from_ds(ds, &os); + if (err) + goto out; + + /* + * Remove all objects while in the open context so that + * there is less work to do in the syncing context. + */ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, ds->ds_phys->ds_prev_snap_txg)) { /* @@ -1098,30 +1086,25 @@ dsl_dataset_destroy(dsl_dataset_t *ds, v } if (err != ESRCH) goto out; - } - /* - * Only the ZIL knows how to free log blocks. - */ - zil_destroy(dmu_objset_zil(os), B_FALSE); - - /* - * Sync out all in-flight IO. - */ - txg_wait_synced(dd->dd_pool, 0); - - /* - * If we managed to free all the objects in open - * context, the user space accounting should be zero. - */ - if (ds->ds_phys->ds_bp.blk_fill == 0 && - dmu_objset_userused_enabled(os)) { - uint64_t count; + /* + * Sync out all in-flight IO. + */ + txg_wait_synced(dd->dd_pool, 0); - ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || - count == 0); - ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || - count == 0); + /* + * If we managed to free all the objects in open + * context, the user space accounting should be zero. + */ + if (ds->ds_phys->ds_bp.blk_fill == 0 && + dmu_objset_userused_enabled(os)) { + uint64_t count; + + ASSERT(zap_count(os, DMU_USERUSED_OBJECT, + &count) != 0 || count == 0); + ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, + &count) != 0 || count == 0); + } } rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); @@ -1859,6 +1842,7 @@ dsl_dataset_destroy_sync(void *arg1, voi } else { zfeature_info_t *async_destroy = &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; + objset_t *os; /* * There's no next snapshot, so this is a head dataset. @@ -1870,6 +1854,8 @@ dsl_dataset_destroy_sync(void *arg1, voi dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); ds->ds_phys->ds_deadlist_obj = 0; + VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); + if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { err = old_synchronous_dataset_destroy(ds, tx); } else { @@ -1879,12 +1865,12 @@ dsl_dataset_destroy_sync(void *arg1, voi */ uint64_t used, comp, uncomp; - ASSERT(err == 0 || err == EBUSY); + zil_destroy_sync(dmu_objset_zil(os), tx); + if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { spa_feature_incr(dp->dp_spa, async_destroy, tx); - dp->dp_bptree_obj = bptree_alloc( - dp->dp_meta_objset, tx); - VERIFY(zap_add(dp->dp_meta_objset, + dp->dp_bptree_obj = bptree_alloc(mos, tx); + VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj, tx) == 0); @@ -1897,7 +1883,7 @@ dsl_dataset_destroy_sync(void *arg1, voi ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == used); - bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj, + bptree_add(mos, dp->dp_bptree_obj, &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, used, comp, uncomp, tx); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, @@ -2179,7 +2165,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; - dsl_dir_dirty(ds->ds_dir, tx); dmu_objset_sync(ds->ds_objset, zio, tx); } Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c Thu Aug 23 09:18:52 2012 (r239610) @@ -190,7 +190,6 @@ errout: kmem_free(dd, sizeof (dsl_dir_t)); dmu_buf_rele(dbuf, tag); return (err); - } void @@ -224,7 +223,7 @@ dsl_dir_name(dsl_dir_t *dd, char *buf) } } -/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */ +/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */ int dsl_dir_namelen(dsl_dir_t *dd) { @@ -582,8 +581,6 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx { ASSERT(dmu_tx_is_syncing(tx)); - dmu_buf_will_dirty(dd->dd_dbuf, tx); - mutex_enter(&dd->dd_lock); ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, @@ -940,8 +937,6 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_u ASSERT(dmu_tx_is_syncing(tx)); ASSERT(type < DD_USED_NUM); - dsl_dir_dirty(dd, tx); - if (needlock) mutex_enter(&dd->dd_lock); accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used); @@ -950,6 +945,7 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_u dd->dd_phys->dd_compressed_bytes >= -compressed); ASSERT(uncompressed >= 0 || dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); + dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_used_bytes += used; dd->dd_phys->dd_uncompressed_bytes += uncompressed; dd->dd_phys->dd_compressed_bytes += compressed; @@ -991,13 +987,13 @@ dsl_dir_transfer_space(dsl_dir_t *dd, in if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN)) return; - dsl_dir_dirty(dd, tx); if (needlock) mutex_enter(&dd->dd_lock); ASSERT(delta > 0 ? dd->dd_phys->dd_used_breakdown[oldtype] >= delta : dd->dd_phys->dd_used_breakdown[newtype] >= -delta); ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta)); + dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_used_breakdown[oldtype] -= delta; dd->dd_phys->dd_used_breakdown[newtype] += delta; if (needlock) Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_pool.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_pool.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_pool.c Thu Aug 23 09:18:52 2012 (r239610) @@ -42,6 +42,7 @@ #include <sys/dsl_deadlist.h> #include <sys/bptree.h> #include <sys/zfeature.h> +#include <sys/zil_impl.h> int zfs_no_write_throttle = 0; int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ @@ -86,12 +87,12 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg_list_create(&dp->dp_dirty_datasets, offsetof(dsl_dataset_t, ds_dirty_link)); + txg_list_create(&dp->dp_dirty_zilogs, + offsetof(zilog_t, zl_dirty_link)); txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, offsetof(dsl_sync_task_group_t, dstg_node)); - list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), - offsetof(dsl_dataset_t, ds_synced_link)); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); @@ -224,9 +225,9 @@ dsl_pool_close(dsl_pool_t *dp) dmu_objset_evict(dp->dp_meta_objset); txg_list_destroy(&dp->dp_dirty_datasets); + txg_list_destroy(&dp->dp_dirty_zilogs); txg_list_destroy(&dp->dp_sync_tasks); txg_list_destroy(&dp->dp_dirty_dirs); - list_destroy(&dp->dp_synced_datasets); arc_flush(dp->dp_spa); txg_fini(dp); @@ -306,6 +307,21 @@ dsl_pool_create(spa_t *spa, nvlist_t *zp return (dp); } +/* + * Account for the meta-objset space in its placeholder dsl_dir. + */ +void +dsl_pool_mos_diduse_space(dsl_pool_t *dp, + int64_t used, int64_t comp, int64_t uncomp) +{ + ASSERT3U(comp, ==, uncomp); /* it's all metadata */ + mutex_enter(&dp->dp_lock); + dp->dp_mos_used_delta += used; + dp->dp_mos_compressed_delta += comp; + dp->dp_mos_uncompressed_delta += uncomp; + mutex_exit(&dp->dp_lock); +} + static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { @@ -324,11 +340,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t dmu_tx_t *tx; dsl_dir_t *dd; dsl_dataset_t *ds; - dsl_sync_task_group_t *dstg; objset_t *mos = dp->dp_meta_objset; hrtime_t start, write_time; uint64_t data_written; int err; + list_t synced_datasets; + + list_create(&synced_datasets, sizeof (dsl_dataset_t), + offsetof(dsl_dataset_t, ds_synced_link)); /* * We need to copy dp_space_towrite() before doing @@ -351,7 +370,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t * may sync newly-created datasets on pass 2. */ ASSERT(!list_link_active(&ds->ds_synced_link)); - list_insert_tail(&dp->dp_synced_datasets, ds); + list_insert_tail(&synced_datasets, ds); dsl_dataset_sync(ds, zio, tx); } DTRACE_PROBE(pool_sync__1setup); @@ -361,15 +380,20 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t ASSERT(err == 0); DTRACE_PROBE(pool_sync__2rootzio); - for (ds = list_head(&dp->dp_synced_datasets); ds; - ds = list_next(&dp->dp_synced_datasets, ds)) + /* + * After the data blocks have been written (ensured by the zio_wait() + * above), update the user/group space accounting. + */ + for (ds = list_head(&synced_datasets); ds; + ds = list_next(&synced_datasets, ds)) dmu_objset_do_userquota_updates(ds->ds_objset, tx); /* * Sync the datasets again to push out the changes due to * userspace updates. This must be done before we process the - * sync tasks, because that could cause a snapshot of a dataset - * whose ds_bp will be rewritten when we do this 2nd sync. + * sync tasks, so that any snapshots will have the correct + * user accounting information (and we won't get confused + * about which blocks are part of the snapshot). */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { @@ -380,30 +404,42 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t err = zio_wait(zio); /* - * Move dead blocks from the pending deadlist to the on-disk - * deadlist. + * Now that the datasets have been completely synced, we can + * clean up our in-memory structures accumulated while syncing: + * + * - move dead blocks from the pending deadlist to the on-disk deadlist + * - clean up zil records + * - release hold from dsl_dataset_dirty() */ - for (ds = list_head(&dp->dp_synced_datasets); ds; - ds = list_next(&dp->dp_synced_datasets, ds)) { + while (ds = list_remove_head(&synced_datasets)) { + objset_t *os = ds->ds_objset; bplist_iterate(&ds->ds_pending_deadlist, deadlist_enqueue_cb, &ds->ds_deadlist, tx); + ASSERT(!dmu_objset_is_dirty(os, txg)); + dmu_buf_rele(ds->ds_dbuf, ds); } - while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) { - /* - * No more sync tasks should have been added while we - * were syncing. - */ - ASSERT(spa_sync_pass(dp->dp_spa) == 1); - dsl_sync_task_group_sync(dstg, tx); - } - DTRACE_PROBE(pool_sync__3task); - start = gethrtime(); while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) dsl_dir_sync(dd, tx); write_time += gethrtime() - start; + /* + * The MOS's space is accounted for in the pool/$MOS + * (dp_mos_dir). We can't modify the mos while we're syncing + * it, so we remember the deltas and apply them here. + */ + if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 || + dp->dp_mos_uncompressed_delta != 0) { + dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD, + dp->dp_mos_used_delta, + dp->dp_mos_compressed_delta, + dp->dp_mos_uncompressed_delta, tx); + dp->dp_mos_used_delta = 0; + dp->dp_mos_compressed_delta = 0; + dp->dp_mos_uncompressed_delta = 0; + } + start = gethrtime(); if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { @@ -419,6 +455,27 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t hrtime_t, dp->dp_read_overhead); write_time -= dp->dp_read_overhead; + /* + * If we modify a dataset in the same txg that we want to destroy it, + * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it. + * dsl_dir_destroy_check() will fail if there are unexpected holds. + * Therefore, we want to sync the MOS (thus syncing the dd_dbuf + * and clearing the hold on it) before we process the sync_tasks. + * The MOS data dirtied by the sync_tasks will be synced on the next + * pass. + */ + DTRACE_PROBE(pool_sync__3task); + if (!txg_list_empty(&dp->dp_sync_tasks, txg)) { + dsl_sync_task_group_t *dstg; + /* + * No more sync tasks should have been added while we + * were syncing. + */ + ASSERT(spa_sync_pass(dp->dp_spa) == 1); + while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) + dsl_sync_task_group_sync(dstg, tx); + } + dmu_tx_commit(tx); dp->dp_space_towrite[txg & TXG_MASK] = 0; @@ -467,15 +524,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) { + zilog_t *zilog; dsl_dataset_t *ds; - objset_t *os; - while (ds = list_head(&dp->dp_synced_datasets)) { - list_remove(&dp->dp_synced_datasets, ds); - os = ds->ds_objset; - zil_clean(os->os_zil, txg); - ASSERT(!dmu_objset_is_dirty(os, txg)); - dmu_buf_rele(ds->ds_dbuf, ds); + while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) { + ds = dmu_objset_ds(zilog->zl_os); + zil_clean(zilog, txg); + ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); + dmu_buf_rele(ds->ds_dbuf, zilog); } ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); } Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c Thu Aug 23 09:18:52 2012 (r239610) @@ -116,6 +116,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TY static dsl_syncfunc_t spa_sync_version; static dsl_syncfunc_t spa_sync_props; +static dsl_checkfunc_t spa_change_guid_check; +static dsl_syncfunc_t spa_change_guid_sync; static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, @@ -675,6 +677,47 @@ spa_prop_clear_bootfs(spa_t *spa, uint64 } } +/*ARGSUSED*/ +static int +spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + spa_t *spa = arg1; + uint64_t *newguid = arg2; + vdev_t *rvd = spa->spa_root_vdev; + uint64_t vdev_state; + + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + vdev_state = rvd->vdev_state; + spa_config_exit(spa, SCL_STATE, FTAG); + + if (vdev_state != VDEV_STATE_HEALTHY) + return (ENXIO); + + ASSERT3U(spa_guid(spa), !=, *newguid); + + return (0); +} + +static void +spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + spa_t *spa = arg1; + uint64_t *newguid = arg2; + uint64_t oldguid; + vdev_t *rvd = spa->spa_root_vdev; + + oldguid = spa_guid(spa); + + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + rvd->vdev_guid = *newguid; + rvd->vdev_guid_sum += (*newguid - oldguid); + vdev_config_dirty(rvd); + spa_config_exit(spa, SCL_STATE, FTAG); + + spa_history_log_internal(spa, "guid change", tx, "old=%lld new=%lld", + oldguid, *newguid); +} + /* * Change the GUID for the pool. This is done so that we can later * re-import a pool built from a clone of our own vdevs. We will modify @@ -687,29 +730,23 @@ spa_prop_clear_bootfs(spa_t *spa, uint64 int spa_change_guid(spa_t *spa) { - uint64_t oldguid, newguid; - uint64_t txg; - - if (!(spa_mode_global & FWRITE)) - return (EROFS); - - txg = spa_vdev_enter(spa); - - if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY) - return (spa_vdev_exit(spa, NULL, txg, ENXIO)); + int error; + uint64_t guid; - oldguid = spa_guid(spa); - newguid = spa_generate_guid(NULL); - ASSERT3U(oldguid, !=, newguid); + mutex_enter(&spa_namespace_lock); + guid = spa_generate_guid(NULL); - spa->spa_root_vdev->vdev_guid = newguid; - spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid); + error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check, + spa_change_guid_sync, spa, &guid, 5); - vdev_config_dirty(spa->spa_root_vdev); + if (error == 0) { + spa_config_sync(spa, B_FALSE, B_TRUE); + spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID); + } - spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID); + mutex_exit(&spa_namespace_lock); - return (spa_vdev_exit(spa, NULL, txg, 0)); + return (error); } /* @@ -6062,6 +6099,9 @@ spa_sync(spa_t *spa, uint64_t txg) rvd->vdev_children, txg, B_TRUE); } + if (error == 0) + spa->spa_last_synced_guid = rvd->vdev_guid; + spa_config_exit(spa, SCL_STATE, FTAG); if (error == 0) Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c Thu Aug 23 09:18:52 2012 (r239610) @@ -1348,16 +1348,29 @@ spa_name(spa_t *spa) uint64_t spa_guid(spa_t *spa) { + dsl_pool_t *dp = spa_get_dsl(spa); + uint64_t guid; + /* * If we fail to parse the config during spa_load(), we can go through * the error path (which posts an ereport) and end up here with no root * vdev. We stash the original pool guid in 'spa_config_guid' to handle * this case. */ - if (spa->spa_root_vdev != NULL) + if (spa->spa_root_vdev == NULL) + return (spa->spa_config_guid); + + guid = spa->spa_last_synced_guid != 0 ? + spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid; + + /* + * Return the most recently synced out guid unless we're + * in syncing context. + */ + if (dp && dsl_pool_sync_context(dp)) return (spa->spa_root_vdev->vdev_guid); else - return (spa->spa_config_guid); + return (guid); } uint64_t Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_pool.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_pool.h Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_pool.h Thu Aug 23 09:18:52 2012 (r239610) @@ -82,7 +82,6 @@ typedef struct dsl_pool { /* No lock needed - sync context only */ blkptr_t dp_meta_rootbp; - list_t dp_synced_datasets; hrtime_t dp_read_overhead; uint64_t dp_throughput; /* bytes per millisec */ uint64_t dp_write_limit; @@ -96,10 +95,14 @@ typedef struct dsl_pool { kmutex_t dp_lock; uint64_t dp_space_towrite[TXG_SIZE]; uint64_t dp_tempreserved[TXG_SIZE]; + uint64_t dp_mos_used_delta; + uint64_t dp_mos_compressed_delta; + uint64_t dp_mos_uncompressed_delta; /* Has its own locking */ tx_state_t dp_tx; txg_list_t dp_dirty_datasets; + txg_list_t dp_dirty_zilogs; txg_list_t dp_dirty_dirs; txg_list_t dp_sync_tasks; @@ -139,6 +142,8 @@ int dsl_read_nolock(zio_t *pio, spa_t *s void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx); +void dsl_pool_mos_diduse_space(dsl_pool_t *dp, + int64_t used, int64_t comp, int64_t uncomp); taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h Thu Aug 23 09:18:52 2012 (r239610) @@ -141,6 +141,7 @@ struct spa { vdev_t *spa_root_vdev; /* top-level vdev container */ uint64_t spa_config_guid; /* config pool guid */ uint64_t spa_load_guid; /* spa_load initialized guid */ + uint64_t spa_last_synced_guid; /* last synced guid */ list_t spa_config_dirty_list; /* vdevs with dirty config */ list_t spa_state_dirty_list; /* vdevs with dirty state */ spa_aux_vdev_t spa_spares; /* hot spares */ Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/txg.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/txg.h Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/txg.h Thu Aug 23 09:18:52 2012 (r239610) @@ -22,6 +22,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #ifndef _SYS_TXG_H #define _SYS_TXG_H @@ -115,7 +118,7 @@ extern boolean_t txg_sync_waiting(struct extern void txg_list_create(txg_list_t *tl, size_t offset); extern void txg_list_destroy(txg_list_t *tl); -extern int txg_list_empty(txg_list_t *tl, uint64_t txg); +extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg); extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg); extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg); extern void *txg_list_remove(txg_list_t *tl, uint64_t txg); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h Thu Aug 23 09:18:52 2012 (r239610) @@ -142,7 +142,7 @@ extern nvlist_t *vdev_config_generate(sp struct uberblock; extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset); extern int vdev_label_number(uint64_t psise, uint64_t offset); -extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label); +extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg); extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **); typedef enum { Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil.h Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil.h Thu Aug 23 09:18:52 2012 (r239610) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -395,6 +396,7 @@ extern void zil_replay(objset_t *os, voi zil_replay_func_t *replay_func[TX_MAX_TYPE]); extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx); extern void zil_destroy(zilog_t *zilog, boolean_t keep_first); +extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx); extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx); extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil_impl.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil_impl.h Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil_impl.h Thu Aug 23 09:18:52 2012 (r239610) @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -130,6 +131,7 @@ struct zilog { zil_header_t zl_old_header; /* debugging aid */ uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */ uint_t zl_prev_rotor; /* rotor for zl_prev[] */ + txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */ }; typedef struct zil_bp_node { Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/txg.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/txg.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/txg.c Thu Aug 23 09:18:52 2012 (r239610) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Portions Copyright 2011 Martin Matuska + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -587,7 +588,7 @@ txg_list_destroy(txg_list_t *tl) mutex_destroy(&tl->tl_lock); } -int +boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg) { return (tl->tl_head[txg & TXG_MASK] == NULL); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev.c Thu Aug 23 09:18:52 2012 (r239610) @@ -1327,9 +1327,9 @@ vdev_validate(vdev_t *vd, boolean_t stri if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { uint64_t aux_guid = 0; nvlist_t *nvl; + uint64_t txg = strict ? spa->spa_config_txg : -1ULL; - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == - NULL) { + if ((label = vdev_label_read_config(vd, txg)) == NULL) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_BAD_LABEL); return (0); @@ -1511,7 +1511,7 @@ vdev_reopen(vdev_t *vd) !l2arc_vdev_present(vd)) l2arc_add_vdev(spa, vd); } else { - (void) vdev_validate(vd, B_TRUE); + (void) vdev_validate(vd, spa_last_synced_txg(spa)); } /* @@ -1970,7 +1970,7 @@ vdev_validate_aux(vdev_t *vd) if (!vdev_readable(vd)) return (0); - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) { + if ((label = vdev_label_read_config(vd, -1ULL)) == NULL) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); return (-1); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_label.c Thu Aug 23 09:18:52 2012 (r239610) @@ -433,17 +433,22 @@ vdev_top_config_generate(spa_t *spa, nvl } /* - * Returns the configuration from the label of the given vdev. If 'label' is - * VDEV_BEST_LABEL, each label of the vdev will be read until a valid - * configuration is found; otherwise, only the specified label will be read. + * Returns the configuration from the label of the given vdev. For vdevs + * which don't have a txg value stored on their label (i.e. spares/cache) + * or have not been completely initialized (txg = 0) just return + * the configuration from the first valid label we find. Otherwise, + * find the most up-to-date label that does not exceed the specified + * 'txg' value. */ nvlist_t * -vdev_label_read_config(vdev_t *vd, int label) +vdev_label_read_config(vdev_t *vd, uint64_t txg) { spa_t *spa = vd->vdev_spa; nvlist_t *config = NULL; vdev_phys_t *vp; zio_t *zio; + uint64_t best_txg = 0; + int error = 0; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; @@ -456,8 +461,7 @@ vdev_label_read_config(vdev_t *vd, int l retry: for (int l = 0; l < VDEV_LABELS; l++) { - if (label >= 0 && label < VDEV_LABELS && label != l) - continue; + nvlist_t *label = NULL; zio = zio_root(spa, NULL, NULL, flags); @@ -467,12 +471,31 @@ retry: if (zio_wait(zio) == 0 && nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist), - &config, 0) == 0) - break; + &label, 0) == 0) { + uint64_t label_txg = 0; - if (config != NULL) { - nvlist_free(config); - config = NULL; + /* + * Auxiliary vdevs won't have txg values in their + * labels and newly added vdevs may not have been + * completely initialized so just return the + * configuration from the first valid label we + * encounter. + */ + error = nvlist_lookup_uint64(label, + ZPOOL_CONFIG_POOL_TXG, &label_txg); + if ((error || label_txg == 0) && !config) { + config = label; + break; + } else if (label_txg <= txg && label_txg > best_txg) { + best_txg = label_txg; + nvlist_free(config); + config = fnvlist_dup(label); + } + } + + if (label != NULL) { + nvlist_free(label); + label = NULL; } } @@ -507,7 +530,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, v /* * Read the label, if any, and perform some basic sanity checks. */ - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) + if ((label = vdev_label_read_config(vd, -1ULL)) == NULL) return (B_FALSE); (void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG, @@ -867,7 +890,6 @@ vdev_uberblock_compare(uberblock_t *ub1, struct ubl_cbdata { uberblock_t *ubl_ubbest; /* Best uberblock */ vdev_t *ubl_vd; /* vdev associated with the above */ - int ubl_label; /* Label associated with the above */ }; static void @@ -886,15 +908,13 @@ vdev_uberblock_load_done(zio_t *zio) if (ub->ub_txg <= spa->spa_load_max_txg && vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) { /* - * Keep track of the vdev and label in which this - * uberblock was found. We will use this information - * later to obtain the config nvlist associated with + * Keep track of the vdev in which this uberblock + * was found. We will use this information later + * to obtain the config nvlist associated with * this uberblock. */ *cbp->ubl_ubbest = *ub; cbp->ubl_vd = vd; - cbp->ubl_label = vdev_label_number(vd->vdev_psize, - zio->io_offset); } mutex_exit(&rio->io_lock); } @@ -926,12 +946,11 @@ vdev_uberblock_load_impl(zio_t *zio, vde * Reads the 'best' uberblock from disk along with its associated * configuration. First, we read the uberblock array of each label of each * vdev, keeping track of the uberblock with the highest txg in each array. - * Then, we read the configuration from the same label as the best uberblock. + * Then, we read the configuration from the same vdev as the best uberblock. */ void vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config) { - int i; zio_t *zio; spa_t *spa = rvd->vdev_spa; struct ubl_cbdata cb; @@ -951,13 +970,15 @@ vdev_uberblock_load(vdev_t *rvd, uberblo zio = zio_root(spa, NULL, &cb, flags); vdev_uberblock_load_impl(zio, rvd, flags, &cb); (void) zio_wait(zio); - if (cb.ubl_vd != NULL) { - for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) { - *config = vdev_label_read_config(cb.ubl_vd, i); - if (*config != NULL) - break; - } - } + + /* + * It's possible that the best uberblock was discovered on a label + * that has a configuration which was written in a future txg. + * Search all labels on this vdev to find the configuration that + * matches the txg for our uberblock. + */ + if (cb.ubl_vd != NULL) + *config = vdev_label_read_config(cb.ubl_vd, ub->ub_txg); spa_config_exit(spa, SCL_ALL, FTAG); } Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c Thu Aug 23 05:15:15 2012 (r239609) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c Thu Aug 23 09:18:52 2012 (r239610) @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -455,6 +455,37 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t * } /* + * Called when we create in-memory log transactions so that we know + * to cleanup the itxs at the end of spa_sync(). + */ +void +zilog_dirty(zilog_t *zilog, uint64_t txg) +{ + dsl_pool_t *dp = zilog->zl_dmu_pool; + dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); + + if (dsl_dataset_is_snapshot(ds)) + panic("dirtying snapshot!"); + + if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) { + /* up the hold count until we can be written out */ + dmu_buf_add_ref(ds->ds_dbuf, zilog); + } +} + +boolean_t +zilog_is_dirty(zilog_t *zilog) +{ + dsl_pool_t *dp = zilog->zl_dmu_pool; + + for (int t = 0; t < TXG_SIZE; t++) { + if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t)) + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Create an on-disk intent log. */ static lwb_t * @@ -570,14 +601,21 @@ zil_destroy(zilog_t *zilog, boolean_t ke kmem_cache_free(zil_lwb_cache, lwb); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201208230918.q7N9IrZ8095013>