From owner-svn-src-vendor@freebsd.org Mon Aug 10 19:32:32 2015 Return-Path: Delivered-To: svn-src-vendor@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 874E999E27E; Mon, 10 Aug 2015 19:32:32 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 75A2D1C0; Mon, 10 Aug 2015 19:32:32 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.70]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id t7AJWW7n098550; Mon, 10 Aug 2015 19:32:32 GMT (envelope-from mav@FreeBSD.org) Received: (from mav@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id t7AJWV9a098548; Mon, 10 Aug 2015 19:32:31 GMT (envelope-from mav@FreeBSD.org) Message-Id: <201508101932.t7AJWV9a098548@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mav set sender to mav@FreeBSD.org using -f From: Alexander Motin Date: Mon, 10 Aug 2015 19:32:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r286586 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/zstreamdump vendor/illumos/dist/lib/libzfs/common vendor/illumo... X-SVN-Group: vendor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-vendor@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: SVN commit messages for the vendor work area tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 10 Aug 2015 19:32:32 -0000 Author: mav Date: Mon Aug 10 19:32:30 2015 New Revision: 286586 URL: https://svnweb.freebsd.org/changeset/base/286586 Log: 5746 more checksumming in zfs send Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Bayard Bell Approved by: Albert Lee Author: Matthew Ahrens illumos/illumos-gate@98110f08fa182032082d98be2ddb9391fcd62bf1 Modified: vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c Changes in other areas also in this revision: Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_ioctl.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zio_checksum.h Modified: vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c ============================================================================== --- vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c Mon Aug 10 18:27:54 2015 (r286585) +++ vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c Mon Aug 10 19:32:30 2015 (r286586) @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ #include @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -83,7 +84,6 @@ safe_malloc(size_t size) * * Read while computing incremental checksum */ - static size_t ssread(void *buf, size_t len, zio_cksum_t *cksum) { @@ -92,7 +92,7 @@ ssread(void *buf, size_t len, zio_cksum_ if ((outlen = fread(buf, len, 1, send_stream)) == 0) return (0); - if (do_cksum && cksum) { + if (do_cksum) { if (do_byteswap) fletcher_4_incremental_byteswap(buf, len, cksum); else @@ -102,6 +102,34 @@ ssread(void *buf, size_t len, zio_cksum_ return (outlen); } +static size_t +read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) +{ + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + zio_cksum_t saved_cksum = *cksum; + r = ssread(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) && + !ZIO_CHECKSUM_EQUAL(saved_cksum, + drr->drr_u.drr_checksum.drr_checksum)) { + fprintf(stderr, "invalid checksum\n"); + (void) printf("Incorrect checksum in record header.\n"); + (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n", + saved_cksum.zc_word[0], + saved_cksum.zc_word[1], + saved_cksum.zc_word[2], + saved_cksum.zc_word[3]); + exit(1); + } + return (sizeof (*drr)); +} + /* * Print part of a block in ASCII characters */ @@ -183,8 +211,10 @@ main(int argc, char *argv[]) struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; + struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; char c; boolean_t verbose = B_FALSE; + boolean_t very_verbose = B_FALSE; boolean_t first = B_TRUE; /* * dump flag controls whether the contents of any modified data blocks @@ -202,11 +232,14 @@ main(int argc, char *argv[]) do_cksum = B_FALSE; break; case 'v': + if (verbose) + very_verbose = B_TRUE; verbose = B_TRUE; break; case 'd': dump = B_TRUE; verbose = B_TRUE; + very_verbose = B_TRUE; break; case ':': (void) fprintf(stderr, @@ -230,7 +263,7 @@ main(int argc, char *argv[]) send_stream = stdin; pcksum = zc; - while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) { + while (read_hdr(drr, &zc)) { /* * If this is the first DMU record being processed, check for @@ -432,7 +465,7 @@ main(int argc, char *argv[]) if (verbose) { (void) printf("WRITE object = %llu type = %u " "checksum type = %u\n" - "offset = %llu length = %llu " + " offset = %llu length = %llu " "props = %llx\n", (u_longlong_t)drrw->drr_object, drrw->drr_type, @@ -476,9 +509,9 @@ main(int argc, char *argv[]) if (verbose) { (void) printf("WRITE_BYREF object = %llu " "checksum type = %u props = %llx\n" - "offset = %llu length = %llu\n" + " offset = %llu length = %llu\n" "toguid = %llx refguid = %llx\n" - "refobject = %llu refoffset = %llu\n", + " refobject = %llu refoffset = %llu\n", (u_longlong_t)drrwbr->drr_object, drrwbr->drr_checksumtype, (u_longlong_t)drrwbr->drr_key.ddk_prop, @@ -538,7 +571,7 @@ main(int argc, char *argv[]) if (verbose) { (void) printf("WRITE_EMBEDDED object = %llu " "offset = %llu length = %llu\n" - "toguid = %llx comp = %u etype = %u " + " toguid = %llx comp = %u etype = %u " "lsize = %u psize = %u\n", (u_longlong_t)drrwe->drr_object, (u_longlong_t)drrwe->drr_offset, @@ -553,6 +586,13 @@ main(int argc, char *argv[]) P2ROUNDUP(drrwe->drr_psize, 8), &zc); break; } + if (drr->drr_type != DRR_BEGIN && very_verbose) { + (void) printf(" checksum = %llx/%llx/%llx/%llx\n", + (longlong_t)drrc->drr_checksum.zc_word[0], + (longlong_t)drrc->drr_checksum.zc_word[1], + (longlong_t)drrc->drr_checksum.zc_word[2], + (longlong_t)drrc->drr_checksum.zc_word[3]); + } pcksum = zc; } free(buf); Modified: vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c ============================================================================== --- vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c Mon Aug 10 18:27:54 2015 (r286585) +++ vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c Mon Aug 10 19:32:30 2015 (r286586) @@ -179,10 +179,28 @@ ddt_update(libzfs_handle_t *hdl, dedup_t } static int -cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd) +dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, + zio_cksum_t *zc, int outfd) { - fletcher_4_incremental_native(buf, len, zc); - return (write(outfd, buf, len)); + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); + if (drr->drr_type != DRR_BEGIN) { + ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. + drr_checksum.drr_checksum)); + drr->drr_u.drr_checksum.drr_checksum = *zc; + } + fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), zc); + if (write(outfd, drr, sizeof (*drr)) == -1) + return (errno); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, zc); + if (write(outfd, payload, payload_len) == -1) + return (errno); + } + return (0); } /* @@ -209,26 +227,18 @@ cksummer(void *arg) char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE); dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; - struct drr_begin *drrb = &thedrr.drr_u.drr_begin; - struct drr_end *drre = &thedrr.drr_u.drr_end; - struct drr_object *drro = &thedrr.drr_u.drr_object; - struct drr_write *drrw = &thedrr.drr_u.drr_write; - struct drr_spill *drrs = &thedrr.drr_u.drr_spill; - struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; FILE *ofp; int outfd; - dmu_replay_record_t wbr_drr = {0}; - struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref; dedup_table_t ddt; zio_cksum_t stream_cksum; uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); uint64_t numbuckets; ddt.max_ddt_size = - MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100, - SMALLEST_POSSIBLE_MAX_DDT_MB<<20); + MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100, + SMALLEST_POSSIBLE_MAX_DDT_MB << 20); - numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t)); + numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t)); /* * numbuckets must be a power of 2. Increase number to @@ -244,32 +254,29 @@ cksummer(void *arg) ddt.numhashbits = high_order_bit(numbuckets) - 1; ddt.ddt_full = B_FALSE; - /* Initialize the write-by-reference block. */ - wbr_drr.drr_type = DRR_WRITE_BYREF; - wbr_drr.drr_payloadlen = 0; - outfd = dda->outputfd; ofp = fdopen(dda->inputfd, "r"); - while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) { + while (ssread(drr, sizeof (*drr), ofp) != 0) { switch (drr->drr_type) { case DRR_BEGIN: { - int fflags; + struct drr_begin *drrb = &drr->drr_u.drr_begin; + int fflags; + int sz = 0; ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); + /* set the DEDUP feature flag for this stream */ fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); fflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { - int sz = drr->drr_payloadlen; + sz = drr->drr_payloadlen; if (sz > SPA_MAXBLOCKSIZE) { buf = zfs_realloc(dda->dedup_hdl, buf, @@ -278,64 +285,60 @@ cksummer(void *arg) (void) ssread(buf, sz, ofp); if (ferror(stdin)) perror("fread"); - if (cksum_and_write(buf, sz, &stream_cksum, - outfd) == -1) - goto out; } + if (dump_record(drr, buf, sz, &stream_cksum, + outfd) != 0) + goto out; break; } case DRR_END: { + struct drr_end *drre = &drr->drr_u.drr_end; /* use the recalculated checksum */ - ZIO_SET_CHECKSUM(&drre->drr_checksum, - stream_cksum.zc_word[0], stream_cksum.zc_word[1], - stream_cksum.zc_word[2], stream_cksum.zc_word[3]); - if ((write(outfd, drr, - sizeof (dmu_replay_record_t))) == -1) + drre->drr_checksum = stream_cksum; + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) goto out; break; } case DRR_OBJECT: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; + struct drr_object *drro = &drr->drr_u.drr_object; if (drro->drr_bonuslen > 0) { (void) ssread(buf, P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), ofp); - if (cksum_and_write(buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), - &stream_cksum, outfd) == -1) - goto out; } + if (dump_record(drr, buf, + P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + &stream_cksum, outfd) != 0) + goto out; break; } case DRR_SPILL: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; + struct drr_spill *drrs = &drr->drr_u.drr_spill; (void) ssread(buf, drrs->drr_length, ofp); - if (cksum_and_write(buf, drrs->drr_length, - &stream_cksum, outfd) == -1) + if (dump_record(drr, buf, drrs->drr_length, + &stream_cksum, outfd) != 0) goto out; break; } case DRR_FREEOBJECTS: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) goto out; break; } case DRR_WRITE: { + struct drr_write *drrw = &drr->drr_u.drr_write; dataref_t dataref; (void) ssread(buf, drrw->drr_length, ofp); @@ -373,7 +376,13 @@ cksummer(void *arg) if (ddt_update(dda->dedup_hdl, &ddt, &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop, &dataref)) { + dmu_replay_record_t wbr_drr = {0}; + struct drr_write_byref *wbr_drrr = + &wbr_drr.drr_u.drr_write_byref; + /* block already present in stream */ + wbr_drr.drr_type = DRR_WRITE_BYREF; + wbr_drrr->drr_object = drrw->drr_object; wbr_drrr->drr_offset = drrw->drr_offset; wbr_drrr->drr_length = drrw->drr_length; @@ -393,19 +402,13 @@ cksummer(void *arg) wbr_drrr->drr_key.ddk_prop = drrw->drr_key.ddk_prop; - if (cksum_and_write(&wbr_drr, - sizeof (dmu_replay_record_t), &stream_cksum, - outfd) == -1) + if (dump_record(&wbr_drr, NULL, 0, + &stream_cksum, outfd) != 0) goto out; } else { /* block not previously seen */ - if (cksum_and_write(drr, - sizeof (dmu_replay_record_t), &stream_cksum, - outfd) == -1) - goto out; - if (cksum_and_write(buf, - drrw->drr_length, - &stream_cksum, outfd) == -1) + if (dump_record(drr, buf, drrw->drr_length, + &stream_cksum, outfd) != 0) goto out; } break; @@ -413,28 +416,27 @@ cksummer(void *arg) case DRR_WRITE_EMBEDDED: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) - goto out; + struct drr_write_embedded *drrwe = + &drr->drr_u.drr_write_embedded; (void) ssread(buf, P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp); - if (cksum_and_write(buf, + if (dump_record(drr, buf, P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), - &stream_cksum, outfd) == -1) + &stream_cksum, outfd) != 0) goto out; break; } case DRR_FREE: { - if (cksum_and_write(drr, sizeof (dmu_replay_record_t), - &stream_cksum, outfd) == -1) + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) goto out; break; } default: - (void) printf("INVALID record type 0x%x\n", + (void) fprintf(stderr, "INVALID record type 0x%x\n", drr->drr_type); /* should never happen, so assert */ assert(B_FALSE); @@ -1460,18 +1462,11 @@ zfs_send(zfs_handle_t *zhp, const char * sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", zhp->zfs_name, tosnap); drr.drr_payloadlen = buflen; - err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); - /* write header nvlist */ - if (err != -1 && packbuf != NULL) { - err = cksum_and_write(packbuf, buflen, &zc, - outfd); - } + err = dump_record(&drr, packbuf, buflen, &zc, outfd); free(packbuf); - if (err == -1) { - err = errno; + if (err != 0) goto stderr_out; - } /* write end record */ bzero(&drr, sizeof (drr)); @@ -1702,6 +1697,8 @@ recv_read(libzfs_handle_t *hdl, int fd, int rv; int len = ilen; + assert(ilen <= SPA_MAXBLOCKSIZE); + do { rv = read(fd, cp, len); cp += rv; Modified: vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c ============================================================================== --- vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c Mon Aug 10 18:27:54 2015 (r286585) +++ vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c Mon Aug 10 19:32:30 2015 (r286586) @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. */ @@ -485,18 +485,30 @@ lzc_send(const char *snapname, const cha } /* - * If fromsnap is NULL, a full (non-incremental) stream will be estimated. + * "from" can be NULL, a snapshot, or a bookmark. + * + * If from is NULL, a full (non-incremental) stream will be estimated. This + * is calculated very efficiently. + * + * If from is a snapshot, lzc_send_space uses the deadlists attached to + * each snapshot to efficiently estimate the stream size. + * + * If from is a bookmark, the indirect blocks in the destination snapshot + * are traversed, looking for blocks with a birth time since the creation TXG of + * the snapshot this bookmark was created from. This will result in + * significantly more I/O and be less efficient than a send space estimation on + * an equivalent snapshot. */ int -lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) +lzc_send_space(const char *snapname, const char *from, uint64_t *spacep) { nvlist_t *args; nvlist_t *result; int err; args = fnvlist_alloc(); - if (fromsnap != NULL) - fnvlist_add_string(args, "fromsnap", fromsnap); + if (from != NULL) + fnvlist_add_string(args, "from", from); err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); nvlist_free(args); if (err == 0)