Date: Fri, 25 Oct 2013 15:43:59 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r257119 - in stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <201310251543.r9PFhxMr018989@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Fri Oct 25 15:43:59 2013 New Revision: 257119 URL: http://svnweb.freebsd.org/changeset/base/257119 Log: MFC r253816: MFV r253780: To quote Illumos #3875: The problem here is that if we ever end up in the error path, we drop the locks protecting access to the zfsvfs_t prior to forcibly unmounting the filesystem. Because z_os is NULL, any thread that had already picked up the zfsvfs_t and was sitting in ZFS_ENTER() when we dropped our locks in zfs_resume_fs() will now acquire the lock, attempt to use z_os, and panic. Illumos ZFS issues: 3875 panic in zfs_root() after failed rollback Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Directory Properties: stable/9/sys/ (props changed) stable/9/sys/cddl/contrib/opensolaris/ (props changed) Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c Fri Oct 25 15:43:59 2013 (r257119) @@ -517,6 +517,38 @@ dmu_objset_rele(objset_t *os, void *tag) dsl_pool_rele(dp, tag); } +/* + * When we are called, os MUST refer to an objset associated with a dataset + * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner + * == tag. We will then release and reacquire ownership of the dataset while + * holding the pool config_rwlock to avoid intervening namespace or ownership + * changes may occur. + * + * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to + * release the hold on its dataset and acquire a new one on the dataset of the + * same name so that it can be partially torn down and reconstructed. + */ +void +dmu_objset_refresh_ownership(objset_t *os, void *tag) +{ + dsl_pool_t *dp; + dsl_dataset_t *ds, *newds; + char name[MAXNAMELEN]; + + ds = os->os_dsl_dataset; + VERIFY3P(ds, !=, NULL); + VERIFY3P(ds->ds_owner, ==, tag); + VERIFY(dsl_dataset_long_held(ds)); + + dsl_dataset_name(ds, name); + dp = dmu_objset_pool(os); + dsl_pool_config_enter(dp, FTAG); + dmu_objset_disown(os, tag); + VERIFY0(dsl_dataset_own(dp, name, tag, &newds)); + VERIFY3P(newds, ==, os->os_dsl_dataset); + dsl_pool_config_exit(dp, FTAG); +} + void dmu_objset_disown(objset_t *os, void *tag) { Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Fri Oct 25 15:43:59 2013 (r257119) @@ -1602,7 +1602,7 @@ dmu_recv_end_check(void *arg, dmu_tx_t * if (error != 0) return (error); error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, - origin_head, drc->drc_force); + origin_head, drc->drc_force, drc->drc_owner, tx); if (error != 0) { dsl_dataset_rele(origin_head, FTAG); return (error); @@ -1654,6 +1654,9 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *t dsl_dataset_rele(origin_head, FTAG); dsl_destroy_head_sync_impl(drc->drc_ds, tx); + + if (drc->drc_owner != NULL) + VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner); } else { dsl_dataset_t *ds = drc->drc_ds; @@ -1752,8 +1755,10 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc) } int -dmu_recv_end(dmu_recv_cookie_t *drc) +dmu_recv_end(dmu_recv_cookie_t *drc, void *owner) { + drc->drc_owner = owner; + if (drc->drc_newfs) return (dmu_recv_new_end(drc)); else Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c Fri Oct 25 15:43:59 2013 (r257119) @@ -1755,16 +1755,52 @@ dsl_dataset_rename_snapshot(const char * dsl_dataset_rename_snapshot_sync, &ddrsa, 1)); } +/* + * If we're doing an ownership handoff, we need to make sure that there is + * only one long hold on the dataset. We're not allowed to change anything here + * so we don't permanently release the long hold or regular hold here. We want + * to do this only when syncing to avoid the dataset unexpectedly going away + * when we release the long hold. + */ +static int +dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) +{ + boolean_t held; + + if (!dmu_tx_is_syncing(tx)) + return (0); + + if (owner != NULL) { + VERIFY3P(ds->ds_owner, ==, owner); + dsl_dataset_long_rele(ds, owner); + } + + held = dsl_dataset_long_held(ds); + + if (owner != NULL) + dsl_dataset_long_hold(ds, owner); + + if (held) + return (SET_ERROR(EBUSY)); + + return (0); +} + +typedef struct dsl_dataset_rollback_arg { + const char *ddra_fsname; + void *ddra_owner; +} dsl_dataset_rollback_arg_t; + static int dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) { - const char *fsname = arg; + dsl_dataset_rollback_arg_t *ddra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds; int64_t unused_refres_delta; int error; - error = dsl_dataset_hold(dp, fsname, FTAG, &ds); + error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); if (error != 0) return (error); @@ -1780,9 +1816,10 @@ dsl_dataset_rollback_check(void *arg, dm return (SET_ERROR(EINVAL)); } - if (dsl_dataset_long_held(ds)) { + error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); + if (error != 0) { dsl_dataset_rele(ds, FTAG); - return (SET_ERROR(EBUSY)); + return (error); } /* @@ -1819,12 +1856,12 @@ dsl_dataset_rollback_check(void *arg, dm static void dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) { - const char *fsname = arg; + dsl_dataset_rollback_arg_t *ddra = arg; dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds, *clone; uint64_t cloneobj; - VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds)); + VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); @@ -1840,11 +1877,26 @@ dsl_dataset_rollback_sync(void *arg, dmu dsl_dataset_rele(ds, FTAG); } +/* + * If owner != NULL: + * + * - The existing dataset MUST be owned by the specified owner at entry + * - Upon return, dataset will still be held by the same owner, whether we + * succeed or not. + * + * This mode is required any time the existing filesystem is mounted. See + * notes above zfs_suspend_fs() for further details. + */ int -dsl_dataset_rollback(const char *fsname) +dsl_dataset_rollback(const char *fsname, void *owner) { + dsl_dataset_rollback_arg_t ddra; + + ddra.ddra_fsname = fsname; + ddra.ddra_owner = owner; + return (dsl_sync_task(fsname, dsl_dataset_rollback_check, - dsl_dataset_rollback_sync, (void *)fsname, 1)); + dsl_dataset_rollback_sync, (void *)&ddra, 1)); } struct promotenode { @@ -2362,7 +2414,7 @@ dsl_dataset_promote(const char *name, ch int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, - dsl_dataset_t *origin_head, boolean_t force) + dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) { int64_t unused_refres_delta; @@ -2391,7 +2443,7 @@ dsl_dataset_clone_swap_check_impl(dsl_da return (SET_ERROR(ETXTBSY)); /* origin_head should have no long holds (e.g. is not mounted) */ - if (dsl_dataset_long_held(origin_head)) + if (dsl_dataset_handoff_check(origin_head, owner, tx)) return (SET_ERROR(EBUSY)); /* check amount of any unconsumed refreservation */ Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h Fri Oct 25 15:43:59 2013 (r257119) @@ -136,6 +136,7 @@ struct objset { int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp); +void dmu_objset_refresh_ownership(objset_t *os, void *tag); void dmu_objset_rele(objset_t *os, void *tag); void dmu_objset_disown(objset_t *os, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h Fri Oct 25 15:43:59 2013 (r257119) @@ -62,6 +62,7 @@ typedef struct dmu_recv_cookie { struct avl_tree *drc_guid_to_ds_map; zio_cksum_t drc_cksum; uint64_t drc_newsnapobj; + void *drc_owner; } dmu_recv_cookie_t; int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, @@ -72,6 +73,6 @@ int dmu_recv_stream(dmu_recv_cookie_t *d int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, #endif int cleanup_fd, uint64_t *action_handlep); -int dmu_recv_end(dmu_recv_cookie_t *drc); +int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner); #endif /* _DMU_SEND_H */ Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h Fri Oct 25 15:43:59 2013 (r257119) @@ -247,7 +247,7 @@ void dsl_dataset_long_rele(dsl_dataset_t boolean_t dsl_dataset_long_held(dsl_dataset_t *ds); int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, - dsl_dataset_t *origin_head, boolean_t force); + dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx); void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, dsl_dataset_t *origin_head, dmu_tx_t *tx); int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, @@ -264,7 +264,7 @@ int dsl_dataset_snap_lookup(dsl_dataset_ int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx); void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, zprop_source_t source, uint64_t value, dmu_tx_t *tx); -int dsl_dataset_rollback(const char *fsname); +int dsl_dataset_rollback(const char *fsname, void *owner); #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c Fri Oct 25 15:43:59 2013 (r257119) @@ -3568,13 +3568,13 @@ zfs_ioc_rollback(zfs_cmd_t *zc) if (error == 0) { int resume_err; - error = dsl_dataset_rollback(zc->zc_name); + error = dsl_dataset_rollback(zc->zc_name, zfsvfs); resume_err = zfs_resume_fs(zfsvfs, zc->zc_name); error = error ? error : resume_err; } VFS_RELE(zfsvfs->z_vfs); } else { - error = dsl_dataset_rollback(zc->zc_name); + error = dsl_dataset_rollback(zc->zc_name, NULL); } return (error); } @@ -4110,13 +4110,13 @@ zfs_ioc_recv(zfs_cmd_t *zc) * If the suspend fails, then the recv_end will * likely also fail, and clean up after itself. */ - end_err = dmu_recv_end(&drc); + end_err = dmu_recv_end(&drc, zfsvfs); if (error == 0) error = zfs_resume_fs(zfsvfs, tofs); error = error ? error : end_err; VFS_RELE(zfsvfs->z_vfs); } else { - error = dmu_recv_end(&drc); + error = dmu_recv_end(&drc, NULL); } } @@ -4607,8 +4607,11 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) * objset_phys_t). Suspend/resume the fs will do that. */ error = zfs_suspend_fs(zfsvfs); - if (error == 0) + if (error == 0) { + dmu_objset_refresh_ownership(zfsvfs->z_os, + zfsvfs); error = zfs_resume_fs(zfsvfs, zc->zc_name); + } } if (error == 0) error = dmu_objset_userspace_upgrade(zfsvfs->z_os); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Fri Oct 25 15:37:58 2013 (r257118) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Fri Oct 25 15:43:59 2013 (r257119) @@ -2210,7 +2210,9 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int * Block out VOPs and close zfsvfs_t::z_os * * Note, if successful, then we return with the 'z_teardown_lock' and - * 'z_teardown_inactive_lock' write held. + * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying + * dataset and objset intact so that they can be atomically handed off during + * a subsequent rollback or recv operation and the resume thereafter. */ int zfs_suspend_fs(zfsvfs_t *zfsvfs) @@ -2219,71 +2221,76 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs) if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) return (error); - dmu_objset_disown(zfsvfs->z_os, zfsvfs); return (0); } /* - * Reopen zfsvfs_t::z_os and release VOPs. + * Rebuild SA and release VOPs. Note that ownership of the underlying dataset + * is an invariant across any of the operations that can be performed while the + * filesystem was suspended. Whether it succeeded or failed, the preconditions + * are the same: the relevant objset and associated dataset are owned by + * zfsvfs, held, and long held on entry. */ int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) { int err; + znode_t *zp; + uint64_t sa_obj = 0; ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); - err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, - &zfsvfs->z_os); - if (err) { - zfsvfs->z_os = NULL; - } else { - znode_t *zp; - uint64_t sa_obj = 0; + /* + * We already own this, so just hold and rele it to update the + * objset_t, as the one we had before may have been evicted. + */ + VERIFY0(dmu_objset_hold(osname, zfsvfs, &zfsvfs->z_os)); + VERIFY3P(zfsvfs->z_os->os_dsl_dataset->ds_owner, ==, zfsvfs); + VERIFY(dsl_dataset_long_held(zfsvfs->z_os->os_dsl_dataset)); + dmu_objset_rele(zfsvfs->z_os, zfsvfs); - /* - * Make sure version hasn't changed - */ + /* + * Make sure version hasn't changed + */ - err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION, - &zfsvfs->z_version); + err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION, + &zfsvfs->z_version); - if (err) - goto bail; + if (err) + goto bail; - err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, - ZFS_SA_ATTRS, 8, 1, &sa_obj); + err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, + ZFS_SA_ATTRS, 8, 1, &sa_obj); - if (err && zfsvfs->z_version >= ZPL_VERSION_SA) - goto bail; + if (err && zfsvfs->z_version >= ZPL_VERSION_SA) + goto bail; - if ((err = sa_setup(zfsvfs->z_os, sa_obj, - zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) - goto bail; + if ((err = sa_setup(zfsvfs->z_os, sa_obj, + zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) + goto bail; - if (zfsvfs->z_version >= ZPL_VERSION_SA) - sa_register_update_callback(zfsvfs->z_os, - zfs_sa_upgrade); + if (zfsvfs->z_version >= ZPL_VERSION_SA) + sa_register_update_callback(zfsvfs->z_os, + zfs_sa_upgrade); - VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); + VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); - zfs_set_fuid_feature(zfsvfs); + zfs_set_fuid_feature(zfsvfs); - /* - * Attempt to re-establish all the active znodes with - * their dbufs. If a zfs_rezget() fails, then we'll let - * any potential callers discover that via ZFS_ENTER_VERIFY_VP - * when they try to use their znode. - */ - mutex_enter(&zfsvfs->z_znodes_lock); - for (zp = list_head(&zfsvfs->z_all_znodes); zp; - zp = list_next(&zfsvfs->z_all_znodes, zp)) { - (void) zfs_rezget(zp); - } - mutex_exit(&zfsvfs->z_znodes_lock); + /* + * Attempt to re-establish all the active znodes with + * their dbufs. If a zfs_rezget() fails, then we'll let + * any potential callers discover that via ZFS_ENTER_VERIFY_VP + * when they try to use their znode. + */ + mutex_enter(&zfsvfs->z_znodes_lock); + for (zp = list_head(&zfsvfs->z_all_znodes); zp; + zp = list_next(&zfsvfs->z_all_znodes, zp)) { + (void) zfs_rezget(zp); } + mutex_exit(&zfsvfs->z_znodes_lock); bail: /* release the VOPs */ @@ -2292,8 +2299,8 @@ bail: if (err) { /* - * Since we couldn't reopen zfsvfs::z_os, or - * setup the sa framework force unmount this file system. + * Since we couldn't setup the sa framework, try to force + * unmount this file system. */ if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201310251543.r9PFhxMr018989>