Date: Wed, 12 Jun 2013 07:07:07 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r251646 - in head: cddl/contrib/opensolaris/cmd/zfs cddl/contrib/opensolaris/cmd/zhack cddl/contrib/opensolaris/cmd/ztest cddl/contrib/opensolaris/lib/libzfs/common cddl/contrib/opensol... Message-ID: <201306120707.r5C777G1083875@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Wed Jun 12 07:07:06 2013 New Revision: 251646 URL: http://svnweb.freebsd.org/changeset/base/251646 Log: MFV r251644: Poor ZFS send / receive performance due to snapshot hold / release processing (by smh@) Illumos ZFS issues: 3740 Poor ZFS send / receive performance due to snapshot hold / release processing MFC after: 2 weeks Modified: head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c head/cddl/contrib/opensolaris/cmd/zhack/zhack.c head/cddl/contrib/opensolaris/cmd/ztest/ztest.c head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c Directory Properties: head/cddl/contrib/opensolaris/ (props changed) head/cddl/contrib/opensolaris/cmd/zfs/ (props changed) head/cddl/contrib/opensolaris/lib/libzfs/ (props changed) head/sys/cddl/contrib/opensolaris/ (props changed) Modified: head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c Wed Jun 12 07:07:06 2013 (r251646) @@ -28,6 +28,7 @@ * Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel@dawidek.net>. * All rights reserved. * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <assert.h> @@ -5233,8 +5234,7 @@ zfs_do_hold_rele_impl(int argc, char **a continue; } if (holding) { - if (zfs_hold(zhp, delim+1, tag, recursive, - B_FALSE, -1) != 0) + if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0) ++errors; } else { if (zfs_release(zhp, delim+1, tag, recursive) != 0) Modified: head/cddl/contrib/opensolaris/cmd/zhack/zhack.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/zhack/zhack.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/cddl/contrib/opensolaris/cmd/zhack/zhack.c Wed Jun 12 07:07:06 2013 (r251646) @@ -21,6 +21,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -153,7 +154,7 @@ import_pool(const char *target, boolean_ g_importargs.poolname = g_pool; pools = zpool_search_import(g_zfs, &g_importargs); - if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) { + if (nvlist_empty(pools)) { if (!g_importargs.can_be_active) { g_importargs.can_be_active = B_TRUE; if (zpool_search_import(g_zfs, &g_importargs) != NULL || Modified: head/cddl/contrib/opensolaris/cmd/ztest/ztest.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/ztest/ztest.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/cddl/contrib/opensolaris/cmd/ztest/ztest.c Wed Jun 12 07:07:06 2013 (r251646) @@ -23,6 +23,7 @@ * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -4713,7 +4714,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, error = user_release_one(fullname, tag); if (error) - fatal(0, "user_release_one(%s)", fullname, tag); + fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h ============================================================================== --- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h Wed Jun 12 07:04:27 2013 (r251645) +++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h Wed Jun 12 07:07:06 2013 (r251646) @@ -27,6 +27,7 @@ * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #ifndef _LIBZFS_H @@ -611,7 +612,8 @@ extern int zfs_send(zfs_handle_t *, cons extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, - boolean_t, boolean_t, int); + boolean_t, int); +extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c ============================================================================== --- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c Wed Jun 12 07:07:06 2013 (r251646) @@ -27,6 +27,7 @@ * Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel@dawidek.net>. * All rights reserved. * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <ctype.h> @@ -3158,18 +3159,14 @@ static int zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, dd->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) verify(nvlist_add_boolean(dd->nvl, name) == 0); - zfs_close(szhp); - } rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd); zfs_close(zhp); @@ -3189,7 +3186,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, cha verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0); (void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd); - if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) { + if (nvlist_empty(dd.nvl)) { ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"), zhp->zfs_name, snapname); @@ -3214,7 +3211,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *h if (ret == 0) return (0); - if (nvlist_next_nvpair(errlist, NULL) == NULL) { + if (nvlist_empty(errlist)) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy snapshots")); @@ -4168,18 +4165,14 @@ static int zfs_hold_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) fnvlist_add_string(ha->nvl, name, ha->tag); - zfs_close(szhp); - } if (ha->recursive) rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha); @@ -4189,14 +4182,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *ar int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive, boolean_t enoent_ok, int cleanup_fd) + boolean_t recursive, int cleanup_fd) { int ret; struct holdarg ha; - nvlist_t *errors; - libzfs_handle_t *hdl = zhp->zfs_hdl; - char errbuf[1024]; - nvpair_t *elem; ha.nvl = fnvlist_alloc(); ha.snapname = snapname; @@ -4204,26 +4193,44 @@ zfs_hold(zfs_handle_t *zhp, const char * ha.recursive = recursive; (void) zfs_hold_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { + char errbuf[1024]; + fnvlist_free(ha.nvl); ret = ENOENT; - if (!enoent_ok) { - (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, - "cannot hold snapshot '%s@%s'"), - zhp->zfs_name, snapname); - (void) zfs_standard_error(hdl, ret, errbuf); - } + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot hold snapshot '%s@%s'"), + zhp->zfs_name, snapname); + (void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf); return (ret); } - ret = lzc_hold(ha.nvl, cleanup_fd, &errors); + ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl); fnvlist_free(ha.nvl); - if (ret == 0) + return (ret); +} + +int +zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds) +{ + int ret; + nvlist_t *errors; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char errbuf[1024]; + nvpair_t *elem; + + errors = NULL; + ret = lzc_hold(holds, cleanup_fd, &errors); + + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); return (0); + } - if (nvlist_next_nvpair(errors, NULL) == NULL) { + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold")); @@ -4263,10 +4270,6 @@ zfs_hold(zfs_handle_t *zhp, const char * case EEXIST: (void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf); break; - case ENOENT: - if (enoent_ok) - return (ENOENT); - /* FALLTHROUGH */ default: (void) zfs_standard_error(hdl, fnvpair_value_int32(elem), errbuf); @@ -4277,30 +4280,21 @@ zfs_hold(zfs_handle_t *zhp, const char * return (ret); } -struct releasearg { - nvlist_t *nvl; - const char *snapname; - const char *tag; - boolean_t recursive; -}; - static int zfs_release_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; int rv = 0; (void) snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name, ha->snapname); - szhp = make_dataset_handle(zhp->zfs_hdl, name); - if (szhp) { + if (lzc_exists(name)) { nvlist_t *holds = fnvlist_alloc(); fnvlist_add_boolean(holds, ha->tag); fnvlist_add_nvlist(ha->nvl, name, holds); - zfs_close(szhp); + fnvlist_free(holds); } if (ha->recursive) @@ -4315,7 +4309,7 @@ zfs_release(zfs_handle_t *zhp, const cha { int ret; struct holdarg ha; - nvlist_t *errors; + nvlist_t *errors = NULL; nvpair_t *elem; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4326,7 +4320,7 @@ zfs_release(zfs_handle_t *zhp, const cha ha.recursive = recursive; (void) zfs_release_one(zfs_handle_dup(zhp), &ha); - if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) { + if (nvlist_empty(ha.nvl)) { fnvlist_free(ha.nvl); ret = ENOENT; (void) snprintf(errbuf, sizeof (errbuf), @@ -4340,10 +4334,13 @@ zfs_release(zfs_handle_t *zhp, const cha ret = lzc_release(ha.nvl, &errors); fnvlist_free(ha.nvl); - if (ret == 0) + if (ret == 0) { + /* There may be errors even in the success case. */ + fnvlist_free(errors); return (0); + } - if (nvlist_next_nvpair(errors, NULL) == NULL) { + if (nvlist_empty(errors)) { /* no hold-specific errors */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot release")); Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c ============================================================================== --- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Wed Jun 12 07:07:06 2013 (r251646) @@ -25,6 +25,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>. * All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <assert.h> @@ -802,6 +803,7 @@ typedef struct send_dump_data { int outfd; boolean_t err; nvlist_t *fss; + nvlist_t *snapholds; avl_tree_t *fsavl; snapfilter_cb_t *filter_cb; void *filter_cb_arg; @@ -952,41 +954,19 @@ dump_ioctl(zfs_handle_t *zhp, const char return (0); } -static int -hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) +static void +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) { - zfs_handle_t *pzhp; - int error = 0; - char *thissnap; - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - if (sdd->dryrun) - return (0); - /* - * zfs_send() only opens a cleanup_fd for sends that need it, + * zfs_send() only sets snapholds for sends that need them, * e.g. replication and doall. */ - if (sdd->cleanup_fd == -1) - return (0); - - thissnap = strchr(zhp->zfs_name, '@') + 1; - *(thissnap - 1) = '\0'; - pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); - *(thissnap - 1) = '@'; - - /* - * It's OK if the parent no longer exists. The send code will - * handle that error. - */ - if (pzhp) { - error = zfs_hold(pzhp, thissnap, sdd->holdtag, - B_FALSE, B_TRUE, sdd->cleanup_fd); - zfs_close(pzhp); - } + if (sdd->snapholds == NULL) + return; - return (error); + fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); } static void * @@ -1042,28 +1022,23 @@ dump_snapshot(zfs_handle_t *zhp, void *a send_dump_data_t *sdd = arg; progress_arg_t pa = { 0 }; pthread_t tid; - char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; + err = 0; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { - err = hold_for_send(zhp, sdd); - if (err == 0) { - sdd->seenfrom = B_TRUE; - (void) strcpy(sdd->prevsnap, thissnap); - sdd->prevsnap_obj = zfs_prop_get_int(zhp, - ZFS_PROP_OBJSETID); - } else if (err == ENOENT) { - err = 0; - } + gather_holds(zhp, sdd); + sdd->seenfrom = B_TRUE; + (void) strcpy(sdd->prevsnap, thissnap); + sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); - return (err); + return (0); } if (sdd->seento || !sdd->seenfrom) { @@ -1114,14 +1089,7 @@ dump_snapshot(zfs_handle_t *zhp, void *a return (0); } - err = hold_for_send(zhp, sdd); - if (err) { - if (err == ENOENT) - err = 0; - zfs_close(zhp); - return (err); - } - + gather_holds(zhp, sdd); fromorigin = sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate); @@ -1389,7 +1357,7 @@ zfs_send(zfs_handle_t *zhp, const char * avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; - pthread_t tid; + pthread_t tid = 0; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; @@ -1462,11 +1430,8 @@ zfs_send(zfs_handle_t *zhp, const char * *debugnvp = hdrnv; else nvlist_free(hdrnv); - if (err) { - fsavl_destroy(fsavl); - nvlist_free(fss); + if (err) goto stderr_out; - } } if (!flags->dryrun) { @@ -1490,8 +1455,6 @@ zfs_send(zfs_handle_t *zhp, const char * } free(packbuf); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1502,8 +1465,6 @@ zfs_send(zfs_handle_t *zhp, const char * drr.drr_u.drr_end.drr_checksum = zc; err = write(outfd, &drr, sizeof (drr)); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } @@ -1515,7 +1476,7 @@ zfs_send(zfs_handle_t *zhp, const char * /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; - if (flags->dedup) + if (tid != 0) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; @@ -1552,36 +1513,71 @@ zfs_send(zfs_handle_t *zhp, const char * err = errno; goto stderr_out; } + sdd.snapholds = fnvlist_alloc(); } else { sdd.cleanup_fd = -1; + sdd.snapholds = NULL; } - if (flags->verbose) { + if (flags->verbose || sdd.snapholds != NULL) { /* * Do a verbose no-op dry run to get all the verbose output - * before generating any data. Then do a non-verbose real - * run to generate the streams. + * or to gather snapshot hold's before generating any data, + * then do a non-verbose real run to generate the streams. */ sdd.dryrun = B_TRUE; err = dump_filesystems(zhp, &sdd); - sdd.dryrun = flags->dryrun; - sdd.verbose = B_FALSE; - if (flags->parsable) { - (void) fprintf(stderr, "size\t%llu\n", - (longlong_t)sdd.size); - } else { - char buf[16]; - zfs_nicenum(sdd.size, buf, sizeof (buf)); - (void) fprintf(stderr, dgettext(TEXT_DOMAIN, - "total estimated size is %s\n"), buf); + + if (err != 0) + goto stderr_out; + + if (flags->verbose) { + if (flags->parsable) { + (void) fprintf(stderr, "size\t%llu\n", + (longlong_t)sdd.size); + } else { + char buf[16]; + zfs_nicenum(sdd.size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "total estimated size is %s\n"), buf); + } + } + + /* Ensure no snaps found is treated as an error. */ + if (!sdd.seento) { + err = ENOENT; + goto err_out; } + + /* Skip the second run if dryrun was requested. */ + if (flags->dryrun) + goto err_out; + + if (sdd.snapholds != NULL) { + err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds); + if (err != 0) + goto stderr_out; + + fnvlist_free(sdd.snapholds); + sdd.snapholds = NULL; + } + + sdd.dryrun = B_FALSE; + sdd.verbose = B_FALSE; } + err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); - if (flags->dedup) { - (void) close(pipefd[0]); + /* Ensure no snaps found is treated as an error. */ + if (err == 0 && !sdd.seento) + err = ENOENT; + + if (tid != 0) { + if (err != 0) + (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); + (void) close(pipefd[0]); } if (sdd.cleanup_fd != -1) { @@ -1609,9 +1605,13 @@ zfs_send(zfs_handle_t *zhp, const char * stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: + fsavl_destroy(fsavl); + nvlist_free(fss); + fnvlist_free(sdd.snapholds); + if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); - if (flags->dedup) { + if (tid != 0) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); Modified: head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c ============================================================================== --- head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c Wed Jun 12 07:07:06 2013 (r251646) @@ -21,6 +21,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ /* @@ -301,8 +302,11 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t * * marked for deferred destruction, and will be destroyed when the last hold * or clone is removed/destroyed. * + * The return value will be ENOENT if none of the snapshots existed. + * * The return value will be 0 if all snapshots were destroyed (or marked for - * later destruction if 'defer' is set) or didn't exist to begin with. + * later destruction if 'defer' is set) or didn't exist to begin with and + * at least one snapshot was destroyed. * * Otherwise the return value will be the errno of a (unspecified) snapshot * that failed, no snapshots will be destroyed, and the errlist will have an @@ -333,7 +337,6 @@ lzc_destroy_snaps(nvlist_t *snaps, boole nvlist_free(args); return (error); - } int @@ -393,11 +396,22 @@ lzc_exists(const char *dataset) * uncleanly, the holds will be released when the pool is next opened * or imported. * - * The return value will be 0 if all holds were created. Otherwise the return - * value will be the errno of a (unspecified) hold that failed, no holds will - * be created, and the errlist will have an entry for each hold that - * failed (name = snapshot). The value in the errlist will be the error - * code (int32). + * Holds for snapshots which don't exist will be skipped and have an entry + * added to errlist, but will not cause an overall failure, except in the + * case that all holds where skipped. + * + * The return value will be ENOENT if none of the snapshots for the requested + * holds existed. + * + * The return value will be 0 if the nvl holds was empty or all holds, for + * snapshots that existed, were succesfully created and at least one hold + * was created. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed and no holds will be created. + * + * In all cases the errlist will have an entry for each hold that failed + * (name = snapshot), with its value being the error code (int32). */ int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) @@ -434,11 +448,20 @@ lzc_hold(nvlist_t *holds, int cleanup_fd * The snapshots must all be in the same pool. * The value is a nvlist whose keys are the holds to remove. * - * The return value will be 0 if all holds were removed. - * Otherwise the return value will be the errno of a (unspecified) release - * that failed, no holds will be released, and the errlist will have an - * entry for each snapshot that has failed releases (name = snapshot). - * The value in the errlist will be the error code (int32) of a failed release. + * Holds which failed to release because they didn't exist will have an entry + * added to errlist, but will not cause an overall failure, except in the + * case that all releases where skipped. + * + * The return value will be ENOENT if none of the specified holds existed. + * + * The return value will be 0 if the nvl holds was empty or all holds that + * existed, were successfully removed and at least one hold was removed. + * + * Otherwise the return value will be the errno of a (unspecified) hold that + * failed to release and no holds will be released. + * + * In all cases the errlist will have an entry for each hold that failed to + * to release. */ int lzc_release(nvlist_t *holds, nvlist_t **errlist) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c Wed Jun 12 07:07:06 2013 (r251646) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <sys/zfs_context.h> @@ -127,6 +128,10 @@ dsl_destroy_snapshot_check(void *arg, dm pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL); if (pair != NULL) return (fnvpair_value_int32(pair)); + + if (nvlist_empty(dsda->dsda_successful_snaps)) + return (SET_ERROR(ENOENT)); + return (0); } Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c Wed Jun 12 07:07:06 2013 (r251646) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <sys/dsl_pool.h> @@ -856,23 +857,34 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t * zap_cursor_t zc; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; + nvlist_t *holds; if (zapobj == 0) return; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); + holds = fnvlist_alloc(); + for (zap_cursor_init(&zc, mos, zapobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { char *htag; - uint64_t dsobj; + nvlist_t *tags; htag = strchr(za.za_name, '-'); *htag = '\0'; ++htag; - dsobj = strtonum(za.za_name, NULL); - dsl_dataset_user_release_tmp(dp, dsobj, htag); + if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) { + tags = fnvlist_alloc(); + fnvlist_add_boolean(tags, htag); + fnvlist_add_nvlist(holds, za.za_name, tags); + fnvlist_free(tags); + } else { + fnvlist_add_boolean(tags, htag); + } } + dsl_dataset_user_release_tmp(dp, holds); + fnvlist_free(holds); zap_cursor_fini(&zc); } Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c Wed Jun 12 07:04:27 2013 (r251645) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c Wed Jun 12 07:07:06 2013 (r251646) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <sys/zfs_context.h> @@ -37,6 +38,7 @@ typedef struct dsl_dataset_user_hold_arg { nvlist_t *dduha_holds; + nvlist_t *dduha_chkholds; nvlist_t *dduha_errlist; minor_t dduha_minor; } dsl_dataset_user_hold_arg_t; @@ -53,25 +55,24 @@ dsl_dataset_user_hold_check_one(dsl_data objset_t *mos = dp->dp_meta_objset; int error = 0; + ASSERT(dsl_pool_config_held(dp)); + if (strlen(htag) > MAXNAMELEN) - return (E2BIG); + return (SET_ERROR(E2BIG)); /* Tempholds have a more restricted length */ if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN) - return (E2BIG); + return (SET_ERROR(E2BIG)); /* tags must be unique (if ds already exists) */ - if (ds != NULL) { - mutex_enter(&ds->ds_lock); - if (ds->ds_phys->ds_userrefs_obj != 0) { - uint64_t value; - error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, - htag, 8, 1, &value); - if (error == 0) - error = SET_ERROR(EEXIST); - else if (error == ENOENT) - error = 0; - } - mutex_exit(&ds->ds_lock); + if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) { + uint64_t value; + + error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, + htag, 8, 1, &value); + if (error == 0) + error = SET_ERROR(EEXIST); + else if (error == ENOENT) + error = 0; } return (error); @@ -82,52 +83,67 @@ dsl_dataset_user_hold_check(void *arg, d { dsl_dataset_user_hold_arg_t *dduha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; - int rv = 0; if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) return (SET_ERROR(ENOTSUP)); - for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { - int error = 0; + if (!dmu_tx_is_syncing(tx)) + return (0); + + for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); + pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { dsl_dataset_t *ds; - char *htag; + int error = 0; + char *htag, *name; /* must be a snapshot */ - if (strchr(nvpair_name(pair), '@') == NULL) + name = nvpair_name(pair); + if (strchr(name, '@') == NULL) error = SET_ERROR(EINVAL); if (error == 0) error = nvpair_value_string(pair, &htag); - if (error == 0) { - error = dsl_dataset_hold(dp, - nvpair_name(pair), FTAG, &ds); - } + + if (error == 0) + error = dsl_dataset_hold(dp, name, FTAG, &ds); + if (error == 0) { error = dsl_dataset_user_hold_check_one(ds, htag, dduha->dduha_minor != 0, tx); dsl_dataset_rele(ds, FTAG); } - if (error != 0) { - rv = error; - fnvlist_add_int32(dduha->dduha_errlist, - nvpair_name(pair), error); + if (error == 0) { + fnvlist_add_string(dduha->dduha_chkholds, name, htag); + } else { + /* + * We register ENOENT errors so they can be correctly + * reported if needed, such as when all holds fail. + */ + fnvlist_add_int32(dduha->dduha_errlist, name, error); + if (error != ENOENT) + return (error); } } - return (rv); + + /* Return ENOENT if no holds would be created. */ + if (nvlist_empty(dduha->dduha_chkholds)) + return (SET_ERROR(ENOENT)); + + return (0); } -void -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, - minor_t minor, uint64_t now, dmu_tx_t *tx) + +static void +dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds, + const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx) { dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj; - mutex_enter(&ds->ds_lock); + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); + if (ds->ds_phys->ds_userrefs_obj == 0) { /* * This is the first user hold for this dataset. Create @@ -140,14 +156,26 @@ dsl_dataset_user_hold_sync_one(dsl_datas zapobj = ds->ds_phys->ds_userrefs_obj; } ds->ds_userrefs++; - mutex_exit(&ds->ds_lock); VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx)); if (minor != 0) { + char name[MAXNAMELEN]; + nvlist_t *tags; + VERIFY0(dsl_pool_user_hold(dp, ds->ds_object, htag, now, tx)); - dsl_register_onexit_hold_cleanup(ds, htag, minor); + (void) snprintf(name, sizeof (name), "%llx", + (u_longlong_t)ds->ds_object); + + if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) { + tags = fnvlist_alloc(); + fnvlist_add_boolean(tags, htag); + fnvlist_add_nvlist(tmpholds, name, tags); + fnvlist_free(tags); + } else { + fnvlist_add_boolean(tags, htag); + } } spa_history_log_internal_ds(ds, "hold", tx, @@ -155,140 +183,296 @@ dsl_dataset_user_hold_sync_one(dsl_datas htag, minor != 0, ds->ds_userrefs); } +typedef struct zfs_hold_cleanup_arg { + char zhca_spaname[MAXNAMELEN]; + uint64_t zhca_spa_load_guid; + nvlist_t *zhca_holds; +} zfs_hold_cleanup_arg_t; + +static void +dsl_dataset_user_release_onexit(void *arg) +{ + zfs_hold_cleanup_arg_t *ca = arg; + spa_t *spa; + int error; + + error = spa_open(ca->zhca_spaname, &spa, FTAG); + if (error != 0) { + zfs_dbgmsg("couldn't release holds on pool=%s " + "because pool is no longer loaded", + ca->zhca_spaname); + return; + } + if (spa_load_guid(spa) != ca->zhca_spa_load_guid) { + zfs_dbgmsg("couldn't release holds on pool=%s " + "because pool is no longer loaded (guid doesn't match)", + ca->zhca_spaname); + spa_close(spa, FTAG); + return; + } + + (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds); + fnvlist_free(ca->zhca_holds); + kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t)); + spa_close(spa, FTAG); +} + +static void +dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor) +{ + zfs_hold_cleanup_arg_t *ca; + + if (minor == 0 || nvlist_empty(holds)) { + fnvlist_free(holds); + return; + } + + ASSERT(spa != NULL); + ca = kmem_alloc(sizeof (*ca), KM_SLEEP); + + (void) strlcpy(ca->zhca_spaname, spa_name(spa), + sizeof (ca->zhca_spaname)); + ca->zhca_spa_load_guid = spa_load_guid(spa); + ca->zhca_holds = holds; + VERIFY0(zfs_onexit_add_cb(minor, + dsl_dataset_user_release_onexit, ca, NULL)); +} + +void +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, + minor_t minor, uint64_t now, dmu_tx_t *tx) +{ + nvlist_t *tmpholds; + + if (minor != 0) + tmpholds = fnvlist_alloc(); + else + tmpholds = NULL; + dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx); + dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor); +} + static void dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) { dsl_dataset_user_hold_arg_t *dduha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); - nvpair_t *pair; + nvlist_t *tmpholds; uint64_t now = gethrestime_sec(); - for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; - pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { + if (dduha->dduha_minor != 0) + tmpholds = fnvlist_alloc(); + else + tmpholds = NULL; + for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL); + pair != NULL; + pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) { dsl_dataset_t *ds; + VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); - dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair), - dduha->dduha_minor, now, tx); + dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, + fnvpair_value_string(pair), dduha->dduha_minor, now, tx); dsl_dataset_rele(ds, FTAG); } + dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor); } /* + * The full semantics of this function are described in the comment above + * lzc_hold(). + * + * To summarize: *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201306120707.r5C777G1083875>