Date: Mon, 10 Aug 2015 19:32:30 +0000 (UTC) From: Alexander Motin <mav@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r286586 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/zstreamdump vendor/illumos/dist/lib/libzfs/common vendor/illumo... Message-ID: <201508101932.t7AJWUXO098537@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mav Date: Mon Aug 10 19:32:30 2015 New Revision: 286586 URL: https://svnweb.freebsd.org/changeset/base/286586 Log: 5746 more checksumming in zfs send Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com> Approved by: Albert Lee <trisk@omniti.com> Author: Matthew Ahrens <mahrens@delphix.com> illumos/illumos-gate@98110f08fa182032082d98be2ddb9391fcd62bf1 Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_ioctl.h vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zio_checksum.h Changes in other areas also in this revision: Modified: vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c Mon Aug 10 18:27:54 2015 (r286585) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c Mon Aug 10 19:32:30 2015 (r286586) @@ -66,7 +66,6 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf ssize_t resid; /* have to get resid to get detailed errno */ ASSERT0(len % 8); - fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, (caddr_t)buf, len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); @@ -78,6 +77,38 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf return (dsp->dsa_err); } +/* + * For all record types except BEGIN, fill in the checksum (overlaid in + * drr_u.drr_checksum.drr_checksum). The checksum verifies everything + * up to the start of the checksum itself. + */ +static int +dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) +{ + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(dsp->dsa_drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + &dsp->dsa_zc); + if (dsp->dsa_drr->drr_type != DRR_BEGIN) { + ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u. + drr_checksum.drr_checksum)); + dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc; + } + fletcher_4_incremental_native(&dsp->dsa_drr-> + drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), &dsp->dsa_zc); + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) + return (SET_ERROR(EINTR)); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, + &dsp->dsa_zc); + if (dump_bytes(dsp, payload, payload_len) != 0) + return (SET_ERROR(EINTR)); + } + return (0); +} + static int dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, uint64_t length) @@ -122,8 +153,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o */ if (dsp->dsa_pending_op != PENDING_NONE && dsp->dsa_pending_op != PENDING_FREE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -146,8 +176,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o return (0); } else { /* not a continuation. Push out pending record */ - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -160,8 +189,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o drrf->drr_length = length; drrf->drr_toguid = dsp->dsa_toguid; if (length == -1ULL) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); } else { dsp->dsa_pending_op = PENDING_FREE; @@ -193,12 +221,11 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec * of different types. */ if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } - /* write a DATA record */ + /* write a WRITE record */ bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); dsp->dsa_drr->drr_type = DRR_WRITE; drrw->drr_object = object; @@ -224,9 +251,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec drrw->drr_key.ddk_cksum = bp->blk_cksum; } - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (SET_ERROR(EINTR)); - if (dump_bytes(dsp, data, blksz) != 0) + if (dump_record(dsp, data, blksz) != 0) return (SET_ERROR(EINTR)); return (0); } @@ -240,8 +265,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, &(dsp->dsa_drr->drr_u.drr_write_embedded); if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (EINTR); dsp->dsa_pending_op = PENDING_NONE; } @@ -261,9 +285,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, decode_embedded_bp_compressed(bp, buf); - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (EINTR); - if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) + if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) return (EINTR); return (0); } @@ -274,8 +296,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -287,9 +308,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t drrs->drr_length = blksz; drrs->drr_toguid = dsp->dsa_toguid; - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) - return (SET_ERROR(EINTR)); - if (dump_bytes(dsp, data, blksz)) + if (dump_record(dsp, data, blksz) != 0) return (SET_ERROR(EINTR)); return (0); } @@ -312,8 +331,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin */ if (dsp->dsa_pending_op != PENDING_NONE && dsp->dsa_pending_op != PENDING_FREEOBJECTS) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -327,8 +345,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin return (0); } else { /* can't be aggregated. Push out pending record */ - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -355,8 +372,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t return (dump_freeobjects(dsp, object, 1)); if (dsp->dsa_pending_op != PENDING_NONE) { - if (dump_bytes(dsp, dsp->dsa_drr, - sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); dsp->dsa_pending_op = PENDING_NONE; } @@ -377,11 +393,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE) drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE; - if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) - return (SET_ERROR(EINTR)); - - if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) + if (dump_record(dsp, DN_BONUS(dnp), + P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) { return (SET_ERROR(EINTR)); + } /* Free anything past the end of the file. */ if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * @@ -625,7 +640,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsp->dsa_os = os; dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid; - ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); dsp->dsa_pending_op = PENDING_NONE; dsp->dsa_incremental = (fromzb != NULL); dsp->dsa_featureflags = featureflags; @@ -637,7 +651,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_long_hold(ds, FTAG); dsl_pool_rele(dp, tag); - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + if (dump_record(dsp, NULL, 0) != 0) { err = dsp->dsa_err; goto out; } @@ -646,7 +660,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, backup_cb, dsp); if (dsp->dsa_pending_op != PENDING_NONE) - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_record(dsp, NULL, 0) != 0) err = SET_ERROR(EINTR); if (err != 0) { @@ -660,7 +674,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; - if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + if (dump_record(dsp, NULL, 0) != 0) { err = dsp->dsa_err; goto out; } @@ -1217,13 +1231,19 @@ dmu_recv_begin(char *tofs, char *tosnap, } struct restorearg { + objset_t *os; int err; boolean_t byteswap; vnode_t *vp; - char *buf; uint64_t voff; int bufsize; /* amount of memory allocated for buf */ + + dmu_replay_record_t *drr; + dmu_replay_record_t *next_drr; + char *buf; zio_cksum_t cksum; + zio_cksum_t prev_cksum; + avl_tree_t *guid_to_ds_map; }; @@ -1262,14 +1282,11 @@ free_guid_map_onexit(void *arg) kmem_free(ca, sizeof (avl_tree_t)); } -static void * -restore_read(struct restorearg *ra, int len, char *buf) +static int +restore_read(struct restorearg *ra, int len, void *buf) { int done = 0; - if (buf == NULL) - buf = ra->buf; - /* some things will require 8-byte alignment, so everything must */ ASSERT0(len % 8); ASSERT3U(len, <=, ra->bufsize); @@ -1278,7 +1295,7 @@ restore_read(struct restorearg *ra, int ssize_t resid; ra->err = vn_rdwr(UIO_READ, ra->vp, - buf + done, len - done, + (char *)buf + done, len - done, ra->voff, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); @@ -1287,24 +1304,21 @@ restore_read(struct restorearg *ra, int ra->voff += len - done - resid; done = len - resid; if (ra->err != 0) - return (NULL); + return (ra->err); } ASSERT3U(done, ==, len); - if (ra->byteswap) - fletcher_4_incremental_byteswap(buf, len, &ra->cksum); - else - fletcher_4_incremental_native(buf, len, &ra->cksum); - return (buf); + return (0); } static void -backup_byteswap(dmu_replay_record_t *drr) +byteswap_record(dmu_replay_record_t *drr) { #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) drr->drr_type = BSWAP_32(drr->drr_type); drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); + switch (drr->drr_type) { case DRR_BEGIN: DO64(drr_begin.drr_magic); @@ -1334,10 +1348,7 @@ backup_byteswap(dmu_replay_record_t *drr DO64(drr_write.drr_offset); DO64(drr_write.drr_length); DO64(drr_write.drr_toguid); - DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); - DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); - DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); - DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum); DO64(drr_write.drr_key.ddk_prop); break; case DRR_WRITE_BYREF: @@ -1348,10 +1359,8 @@ backup_byteswap(dmu_replay_record_t *drr DO64(drr_write_byref.drr_refguid); DO64(drr_write_byref.drr_refobject); DO64(drr_write_byref.drr_refoffset); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); - DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref. + drr_key.ddk_cksum); DO64(drr_write_byref.drr_key.ddk_prop); break; case DRR_WRITE_EMBEDDED: @@ -1374,13 +1383,15 @@ backup_byteswap(dmu_replay_record_t *drr DO64(drr_spill.drr_toguid); break; case DRR_END: - DO64(drr_end.drr_checksum.zc_word[0]); - DO64(drr_end.drr_checksum.zc_word[1]); - DO64(drr_end.drr_checksum.zc_word[2]); - DO64(drr_end.drr_checksum.zc_word[3]); DO64(drr_end.drr_toguid); + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum); break; } + + if (drr->drr_type != DRR_BEGIN) { + ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum); + } + #undef DO64 #undef DO32 } @@ -1397,11 +1408,10 @@ deduce_nblkptr(dmu_object_type_t bonus_t } static int -restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) +restore_object(struct restorearg *ra, struct drr_object *drro, void *data) { dmu_object_info_t doi; dmu_tx_t *tx; - void *data = NULL; uint64_t object; int err; @@ -1412,23 +1422,17 @@ restore_object(struct restorearg *ra, ob drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || drro->drr_blksz < SPA_MINBLOCKSIZE || - drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) || + drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(ra->os)) || drro->drr_bonuslen > DN_MAX_BONUSLEN) { return (SET_ERROR(EINVAL)); } - err = dmu_object_info(os, drro->drr_object, &doi); + err = dmu_object_info(ra->os, drro->drr_object, &doi); if (err != 0 && err != ENOENT) return (SET_ERROR(EINVAL)); object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; - if (drro->drr_bonuslen) { - data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); - if (ra->err != 0) - return (ra->err); - } - /* * If we are losing blkptrs or changing the block size this must * be a new file instance. We must clear out the previous file @@ -1442,14 +1446,14 @@ restore_object(struct restorearg *ra, ob if (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr) { - err = dmu_free_long_range(os, drro->drr_object, + err = dmu_free_long_range(ra->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) return (SET_ERROR(EINVAL)); } } - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_bonus(tx, object); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { @@ -1459,7 +1463,7 @@ restore_object(struct restorearg *ra, ob if (object == DMU_NEW_OBJECT) { /* currently free, want to be allocated */ - err = dmu_object_claim(os, drro->drr_object, + err = dmu_object_claim(ra->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, tx); } else if (drro->drr_type != doi.doi_type || @@ -1467,7 +1471,7 @@ restore_object(struct restorearg *ra, ob drro->drr_bonustype != doi.doi_bonus_type || drro->drr_bonuslen != doi.doi_bonus_size) { /* currently allocated, but with different properties */ - err = dmu_object_reclaim(os, drro->drr_object, + err = dmu_object_reclaim(ra->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, tx); } @@ -1476,14 +1480,15 @@ restore_object(struct restorearg *ra, ob return (SET_ERROR(EINVAL)); } - dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, - tx); - dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); + dmu_object_set_checksum(ra->os, drro->drr_object, + drro->drr_checksumtype, tx); + dmu_object_set_compress(ra->os, drro->drr_object, + drro->drr_compress, tx); if (data != NULL) { dmu_buf_t *db; - VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); + VERIFY0(dmu_bonus_hold(ra->os, drro->drr_object, FTAG, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); @@ -1502,7 +1507,7 @@ restore_object(struct restorearg *ra, ob /* ARGSUSED */ static int -restore_freeobjects(struct restorearg *ra, objset_t *os, +restore_freeobjects(struct restorearg *ra, struct drr_freeobjects *drrfo) { uint64_t obj; @@ -1512,13 +1517,13 @@ restore_freeobjects(struct restorearg *r for (obj = drrfo->drr_firstobj; obj < drrfo->drr_firstobj + drrfo->drr_numobjs; - (void) dmu_object_next(os, &obj, FALSE, 0)) { + (void) dmu_object_next(ra->os, &obj, FALSE, 0)) { int err; - if (dmu_object_info(os, obj, NULL) != 0) + if (dmu_object_info(ra->os, obj, NULL) != 0) continue; - err = dmu_free_long_object(os, obj); + err = dmu_free_long_object(ra->os, obj); if (err != 0) return (err); } @@ -1526,49 +1531,37 @@ restore_freeobjects(struct restorearg *r } static int -restore_write(struct restorearg *ra, objset_t *os, - struct drr_write *drrw) +restore_write(struct restorearg *ra, struct drr_write *drrw, arc_buf_t *abuf) { dmu_tx_t *tx; - void *data; int err; if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || !DMU_OT_IS_VALID(drrw->drr_type)) return (SET_ERROR(EINVAL)); - if (dmu_object_info(os, drrw->drr_object, NULL) != 0) + if (dmu_object_info(ra->os, drrw->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); - dmu_buf_t *bonus; - if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0) - return (SET_ERROR(EINVAL)); - - arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length); - - data = restore_read(ra, drrw->drr_length, abuf->b_data); - if (data == NULL) { - dmu_return_arcbuf(abuf); - dmu_buf_rele(bonus, FTAG); - return (ra->err); - } - - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { - dmu_return_arcbuf(abuf); - dmu_buf_rele(bonus, FTAG); dmu_tx_abort(tx); return (err); } if (ra->byteswap) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drrw->drr_type); - dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); + dmu_ot_byteswap[byteswap].ob_func(abuf->b_data, + drrw->drr_length); } + + dmu_buf_t *bonus; + if (dmu_bonus_hold(ra->os, drrw->drr_object, FTAG, &bonus) != 0) + return (SET_ERROR(EINVAL)); dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); dmu_tx_commit(tx); dmu_buf_rele(bonus, FTAG); @@ -1583,8 +1576,7 @@ restore_write(struct restorearg *ra, obj * data from the stream to fulfill this write. */ static int -restore_write_byref(struct restorearg *ra, objset_t *os, - struct drr_write_byref *drrwbr) +restore_write_byref(struct restorearg *ra, struct drr_write_byref *drrwbr) { dmu_tx_t *tx; int err; @@ -1610,7 +1602,7 @@ restore_write_byref(struct restorearg *r if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) return (SET_ERROR(EINVAL)); } else { - ref_os = os; + ref_os = ra->os; } err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, @@ -1618,7 +1610,7 @@ restore_write_byref(struct restorearg *r if (err != 0) return (err); - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_write(tx, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length); @@ -1627,7 +1619,7 @@ restore_write_byref(struct restorearg *r dmu_tx_abort(tx); return (err); } - dmu_write(os, drrwbr->drr_object, + dmu_write(ra->os, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); dmu_buf_rele(dbp, FTAG); dmu_tx_commit(tx); @@ -1635,12 +1627,11 @@ restore_write_byref(struct restorearg *r } static int -restore_write_embedded(struct restorearg *ra, objset_t *os, - struct drr_write_embedded *drrwnp) +restore_write_embedded(struct restorearg *ra, + struct drr_write_embedded *drrwnp, void *data) { dmu_tx_t *tx; int err; - void *data; if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset) return (EINVAL); @@ -1653,11 +1644,7 @@ restore_write_embedded(struct restorearg if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (EINVAL); - data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL); - if (data == NULL) - return (ra->err); - - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_write(tx, drrwnp->drr_object, drrwnp->drr_offset, drrwnp->drr_length); @@ -1667,7 +1654,7 @@ restore_write_embedded(struct restorearg return (err); } - dmu_write_embedded(os, drrwnp->drr_object, + dmu_write_embedded(ra->os, drrwnp->drr_object, drrwnp->drr_offset, data, drrwnp->drr_etype, drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize, ra->byteswap ^ ZFS_HOST_BYTEORDER, tx); @@ -1677,31 +1664,26 @@ restore_write_embedded(struct restorearg } static int -restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) +restore_spill(struct restorearg *ra, struct drr_spill *drrs, void *data) { dmu_tx_t *tx; - void *data; dmu_buf_t *db, *db_spill; int err; if (drrs->drr_length < SPA_MINBLOCKSIZE || - drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os))) + drrs->drr_length > spa_maxblocksize(dmu_objset_spa(ra->os))) return (SET_ERROR(EINVAL)); - data = restore_read(ra, drrs->drr_length, NULL); - if (data == NULL) - return (ra->err); - - if (dmu_object_info(os, drrs->drr_object, NULL) != 0) + if (dmu_object_info(ra->os, drrs->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); - VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); + VERIFY0(dmu_bonus_hold(ra->os, drrs->drr_object, FTAG, &db)); if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { dmu_buf_rele(db, FTAG); return (err); } - tx = dmu_tx_create(os); + tx = dmu_tx_create(ra->os); dmu_tx_hold_spill(tx, db->db_object); @@ -1728,8 +1710,7 @@ restore_spill(struct restorearg *ra, obj /* ARGSUSED */ static int -restore_free(struct restorearg *ra, objset_t *os, - struct drr_free *drrf) +restore_free(struct restorearg *ra, struct drr_free *drrf) { int err; @@ -1737,10 +1718,10 @@ restore_free(struct restorearg *ra, objs drrf->drr_offset + drrf->drr_length < drrf->drr_offset) return (SET_ERROR(EINVAL)); - if (dmu_object_info(os, drrf->drr_object, NULL) != 0) + if (dmu_object_info(ra->os, drrf->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); - err = dmu_free_long_range(os, drrf->drr_object, + err = dmu_free_long_range(ra->os, drrf->drr_object, drrf->drr_offset, drrf->drr_length); return (err); } @@ -1755,6 +1736,155 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *d (void) dsl_destroy_head(name); } +static void +restore_cksum(struct restorearg *ra, int len, void *buf) +{ + if (ra->byteswap) { + fletcher_4_incremental_byteswap(buf, len, &ra->cksum); + } else { + fletcher_4_incremental_native(buf, len, &ra->cksum); + } +} + +/* + * If len != 0, read payload into buf. + * Read next record's header into ra->next_drr. + * Verify checksum of payload and next record. + */ +static int +restore_read_payload_and_next_header(struct restorearg *ra, int len, void *buf) +{ + int err; + + if (len != 0) { + ASSERT3U(len, <=, ra->bufsize); + err = restore_read(ra, len, buf); + if (err != 0) + return (err); + restore_cksum(ra, len, buf); + } + + ra->prev_cksum = ra->cksum; + + err = restore_read(ra, sizeof (*ra->next_drr), ra->next_drr); + if (err != 0) + return (err); + if (ra->next_drr->drr_type == DRR_BEGIN) + return (SET_ERROR(EINVAL)); + + /* + * Note: checksum is of everything up to but not including the + * checksum itself. + */ + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + restore_cksum(ra, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ra->next_drr); + + zio_cksum_t cksum_orig = ra->next_drr->drr_u.drr_checksum.drr_checksum; + zio_cksum_t *cksump = &ra->next_drr->drr_u.drr_checksum.drr_checksum; + + if (ra->byteswap) + byteswap_record(ra->next_drr); + + if ((!ZIO_CHECKSUM_IS_ZERO(cksump)) && + !ZIO_CHECKSUM_EQUAL(ra->cksum, *cksump)) + return (SET_ERROR(ECKSUM)); + + restore_cksum(ra, sizeof (cksum_orig), &cksum_orig); + + return (0); +} + +static int +restore_process_record(struct restorearg *ra) +{ + int err; + + switch (ra->drr->drr_type) { + case DRR_OBJECT: + { + struct drr_object *drro = &ra->drr->drr_u.drr_object; + err = restore_read_payload_and_next_header(ra, + P2ROUNDUP(drro->drr_bonuslen, 8), ra->buf); + if (err != 0) + return (err); + return (restore_object(ra, drro, ra->buf)); + } + case DRR_FREEOBJECTS: + { + struct drr_freeobjects *drrfo = + &ra->drr->drr_u.drr_freeobjects; + err = restore_read_payload_and_next_header(ra, 0, NULL); + if (err != 0) + return (err); + return (restore_freeobjects(ra, drrfo)); + } + case DRR_WRITE: + { + struct drr_write *drrw = &ra->drr->drr_u.drr_write; + arc_buf_t *abuf = arc_loan_buf(dmu_objset_spa(ra->os), + drrw->drr_length); + + err = restore_read_payload_and_next_header(ra, + drrw->drr_length, abuf->b_data); + if (err != 0) + return (err); + err = restore_write(ra, drrw, abuf); + /* if restore_write() is successful, it consumes the arc_buf */ + if (err != 0) + dmu_return_arcbuf(abuf); + return (err); + } + case DRR_WRITE_BYREF: + { + struct drr_write_byref *drrwbr = + &ra->drr->drr_u.drr_write_byref; + err = restore_read_payload_and_next_header(ra, 0, NULL); + if (err != 0) + return (err); + return (restore_write_byref(ra, drrwbr)); + } + case DRR_WRITE_EMBEDDED: + { + struct drr_write_embedded *drrwe = + &ra->drr->drr_u.drr_write_embedded; + err = restore_read_payload_and_next_header(ra, + P2ROUNDUP(drrwe->drr_psize, 8), ra->buf); + if (err != 0) + return (err); + return (restore_write_embedded(ra, drrwe, ra->buf)); + } + case DRR_FREE: + { + struct drr_free *drrf = &ra->drr->drr_u.drr_free; + err = restore_read_payload_and_next_header(ra, 0, NULL); + if (err != 0) + return (err); + return (restore_free(ra, drrf)); + } + case DRR_END: + { + struct drr_end *drre = &ra->drr->drr_u.drr_end; + if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum)) + return (SET_ERROR(EINVAL)); + return (0); + } + case DRR_SPILL: + { + struct drr_spill *drrs = &ra->drr->drr_u.drr_spill; + err = restore_read_payload_and_next_header(ra, + drrs->drr_length, ra->buf); + if (err != 0) + return (err); + return (restore_spill(ra, drrs, ra->buf)); + } + default: + return (SET_ERROR(EINVAL)); + } +} + /* * NB: callers *must* call dmu_recv_end() if this succeeds. */ @@ -1762,10 +1892,8 @@ int dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, int cleanup_fd, uint64_t *action_handlep) { + int err = 0; struct restorearg ra = { 0 }; - dmu_replay_record_t *drr; - objset_t *os; - zio_cksum_t pcksum; int featureflags; ra.byteswap = drc->drc_byteswap; @@ -1773,7 +1901,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, ra.vp = vp; ra.voff = *voffp; ra.bufsize = SPA_MAXBLOCKSIZE; + ra.drr = kmem_alloc(sizeof (*ra.drr), KM_SLEEP); ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); + ra.next_drr = kmem_alloc(sizeof (*ra.next_drr), KM_SLEEP); /* these were verified in dmu_recv_begin */ ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, @@ -1783,7 +1913,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, /* * Open the objset we are modifying. */ - VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); + VERIFY0(dmu_objset_from_ds(drc->drc_ds, &ra.os)); ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT); @@ -1809,13 +1939,13 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, avl_create(ra.guid_to_ds_map, guid_compare, sizeof (guid_map_entry_t), offsetof(guid_map_entry_t, avlnode)); - ra.err = zfs_onexit_add_cb(minor, + err = zfs_onexit_add_cb(minor, free_guid_map_onexit, ra.guid_to_ds_map, action_handlep); if (ra.err != 0) goto out; } else { - ra.err = zfs_onexit_cb_data(minor, *action_handlep, + err = zfs_onexit_cb_data(minor, *action_handlep, (void **)&ra.guid_to_ds_map); if (ra.err != 0) goto out; @@ -1824,96 +1954,34 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, drc->drc_guid_to_ds_map = ra.guid_to_ds_map; } - /* - * Read records and process them. - */ - pcksum = ra.cksum; - while (ra.err == 0 && - NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) { + err = restore_read_payload_and_next_header(&ra, 0, NULL); + if (err != 0) + goto out; + for (;;) { + void *tmp; + if (issig(JUSTLOOKING) && issig(FORREAL)) { - ra.err = SET_ERROR(EINTR); - goto out; + err = SET_ERROR(EINTR); + break; } - if (ra.byteswap) - backup_byteswap(drr); + tmp = ra.next_drr; + ra.next_drr = ra.drr; + ra.drr = tmp; - switch (drr->drr_type) { - case DRR_OBJECT: - { - /* - * We need to make a copy of the record header, - * because restore_{object,write} may need to - * restore_read(), which will invalidate drr. - */ - struct drr_object drro = drr->drr_u.drr_object; - ra.err = restore_object(&ra, os, &drro); - break; - } - case DRR_FREEOBJECTS: - { - struct drr_freeobjects drrfo = - drr->drr_u.drr_freeobjects; - ra.err = restore_freeobjects(&ra, os, &drrfo); - break; - } - case DRR_WRITE: - { - struct drr_write drrw = drr->drr_u.drr_write; - ra.err = restore_write(&ra, os, &drrw); - break; - } - case DRR_WRITE_BYREF: - { - struct drr_write_byref drrwbr = - drr->drr_u.drr_write_byref; - ra.err = restore_write_byref(&ra, os, &drrwbr); - break; - } - case DRR_WRITE_EMBEDDED: - { - struct drr_write_embedded drrwe = - drr->drr_u.drr_write_embedded; - ra.err = restore_write_embedded(&ra, os, &drrwe); - break; - } - case DRR_FREE: - { - struct drr_free drrf = drr->drr_u.drr_free; - ra.err = restore_free(&ra, os, &drrf); + /* process ra.drr, read in ra.next_drr */ + err = restore_process_record(&ra); + if (err != 0) break; - } - case DRR_END: - { - struct drr_end drre = drr->drr_u.drr_end; - /* - * We compare against the *previous* checksum - * value, because the stored checksum is of - * everything before the DRR_END record. - */ - if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) - ra.err = SET_ERROR(ECKSUM); - goto out; - } - case DRR_SPILL: - { - struct drr_spill drrs = drr->drr_u.drr_spill; - ra.err = restore_spill(&ra, os, &drrs); + if (ra.drr->drr_type == DRR_END) break; - } - default: - ra.err = SET_ERROR(EINVAL); - goto out; - } - pcksum = ra.cksum; } - ASSERT(ra.err != 0); out: if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) zfs_onexit_fd_rele(cleanup_fd); - if (ra.err != 0) { + if (err != 0) { /* * destroy what we created, so we don't leave it in the * inconsistent restoring state. @@ -1921,9 +1989,11 @@ out: dmu_recv_cleanup_ds(drc); } + kmem_free(ra.drr, sizeof (*ra.drr)); kmem_free(ra.buf, ra.bufsize); + kmem_free(ra.next_drr, sizeof (*ra.next_drr)); *voffp = ra.voff; - return (ra.err); + return (err); } static int Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h Mon Aug 10 18:27:54 2015 (r286585) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h Mon Aug 10 19:32:30 2015 (r286586) @@ -443,6 +443,19 @@ _NOTE(CONSTCOND) } while (0) ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ ((zc1).zc_word[3] - (zc2).zc_word[3]))) +#define ZIO_CHECKSUM_IS_ZERO(zc) \ + (0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \ + (zc)->zc_word[2] | (zc)->zc_word[3])) + *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201508101932.t7AJWUXO098537>