Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 10 Aug 2015 19:32:30 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r286586 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/zstreamdump vendor/illumos/dist/lib/libzfs/common vendor/illumo...
Message-ID:  <201508101932.t7AJWUXO098537@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Mon Aug 10 19:32:30 2015
New Revision: 286586
URL: https://svnweb.freebsd.org/changeset/base/286586

Log:
  5746 more checksumming in zfs send
  
  Reviewed by: Christopher Siden <christopher.siden@delphix.com>
  Reviewed by: George Wilson <george.wilson@delphix.com>
  Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
  Approved by: Albert Lee <trisk@omniti.com>
  Author: Matthew Ahrens <mahrens@delphix.com>
  
  illumos/illumos-gate@98110f08fa182032082d98be2ddb9391fcd62bf1

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_ioctl.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zio_checksum.h

Changes in other areas also in this revision:
Modified:
  vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c
  vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c
  vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c	Mon Aug 10 18:27:54 2015	(r286585)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c	Mon Aug 10 19:32:30 2015	(r286586)
@@ -66,7 +66,6 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf
 	ssize_t resid; /* have to get resid to get detailed errno */
 	ASSERT0(len % 8);
 
-	fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
 	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
 	    (caddr_t)buf, len,
 	    0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
@@ -78,6 +77,38 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf
 	return (dsp->dsa_err);
 }
 
+/*
+ * For all record types except BEGIN, fill in the checksum (overlaid in
+ * drr_u.drr_checksum.drr_checksum).  The checksum verifies everything
+ * up to the start of the checksum itself.
+ */
+static int
+dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
+{
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	fletcher_4_incremental_native(dsp->dsa_drr,
+	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    &dsp->dsa_zc);
+	if (dsp->dsa_drr->drr_type != DRR_BEGIN) {
+		ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
+		    drr_checksum.drr_checksum));
+		dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
+	}
+	fletcher_4_incremental_native(&dsp->dsa_drr->
+	    drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), &dsp->dsa_zc);
+	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
+		return (SET_ERROR(EINTR));
+	if (payload_len != 0) {
+		fletcher_4_incremental_native(payload, payload_len,
+		    &dsp->dsa_zc);
+		if (dump_bytes(dsp, payload, payload_len) != 0)
+			return (SET_ERROR(EINTR));
+	}
+	return (0);
+}
+
 static int
 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
     uint64_t length)
@@ -122,8 +153,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE &&
 	    dsp->dsa_pending_op != PENDING_FREE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -146,8 +176,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
 			return (0);
 		} else {
 			/* not a continuation.  Push out pending record */
-			if (dump_bytes(dsp, dsp->dsa_drr,
-			    sizeof (dmu_replay_record_t)) != 0)
+			if (dump_record(dsp, NULL, 0) != 0)
 				return (SET_ERROR(EINTR));
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
@@ -160,8 +189,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
 	drrf->drr_length = length;
 	drrf->drr_toguid = dsp->dsa_toguid;
 	if (length == -1ULL) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 	} else {
 		dsp->dsa_pending_op = PENDING_FREE;
@@ -193,12 +221,11 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec
 	 * of different types.
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
-	/* write a DATA record */
+	/* write a WRITE record */
 	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 	dsp->dsa_drr->drr_type = DRR_WRITE;
 	drrw->drr_object = object;
@@ -224,9 +251,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec
 		drrw->drr_key.ddk_cksum = bp->blk_cksum;
 	}
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (SET_ERROR(EINTR));
-	if (dump_bytes(dsp, data, blksz) != 0)
+	if (dump_record(dsp, data, blksz) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
@@ -240,8 +265,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, 
 	    &(dsp->dsa_drr->drr_u.drr_write_embedded);
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (EINTR);
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -261,9 +285,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, 
 
 	decode_embedded_bp_compressed(bp, buf);
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (EINTR);
-	if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
+	if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
 		return (EINTR);
 	return (0);
 }
@@ -274,8 +296,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t 
 	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -287,9 +308,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t 
 	drrs->drr_length = blksz;
 	drrs->drr_toguid = dsp->dsa_toguid;
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
-		return (SET_ERROR(EINTR));
-	if (dump_bytes(dsp, data, blksz))
+	if (dump_record(dsp, data, blksz) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
@@ -312,8 +331,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE &&
 	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -327,8 +345,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin
 			return (0);
 		} else {
 			/* can't be aggregated.  Push out pending record */
-			if (dump_bytes(dsp, dsp->dsa_drr,
-			    sizeof (dmu_replay_record_t)) != 0)
+			if (dump_record(dsp, NULL, 0) != 0)
 				return (SET_ERROR(EINTR));
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
@@ -355,8 +372,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t 
 		return (dump_freeobjects(dsp, object, 1));
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -377,11 +393,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t 
 	    drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
 		drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (SET_ERROR(EINTR));
-
-	if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
+	if (dump_record(dsp, DN_BONUS(dnp),
+	    P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
 		return (SET_ERROR(EINTR));
+	}
 
 	/* Free anything past the end of the file. */
 	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
@@ -625,7 +640,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	dsp->dsa_os = os;
 	dsp->dsa_off = off;
 	dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid;
-	ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
 	dsp->dsa_pending_op = PENDING_NONE;
 	dsp->dsa_incremental = (fromzb != NULL);
 	dsp->dsa_featureflags = featureflags;
@@ -637,7 +651,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	dsl_dataset_long_hold(ds, FTAG);
 	dsl_pool_rele(dp, tag);
 
-	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+	if (dump_record(dsp, NULL, 0) != 0) {
 		err = dsp->dsa_err;
 		goto out;
 	}
@@ -646,7 +660,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	    backup_cb, dsp);
 
 	if (dsp->dsa_pending_op != PENDING_NONE)
-		if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			err = SET_ERROR(EINTR);
 
 	if (err != 0) {
@@ -660,7 +674,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
 	drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 
-	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+	if (dump_record(dsp, NULL, 0) != 0) {
 		err = dsp->dsa_err;
 		goto out;
 	}
@@ -1217,13 +1231,19 @@ dmu_recv_begin(char *tofs, char *tosnap,
 }
 
 struct restorearg {
+	objset_t *os;
 	int err;
 	boolean_t byteswap;
 	vnode_t *vp;
-	char *buf;
 	uint64_t voff;
 	int bufsize; /* amount of memory allocated for buf */
+
+	dmu_replay_record_t *drr;
+	dmu_replay_record_t *next_drr;
+	char *buf;
 	zio_cksum_t cksum;
+	zio_cksum_t prev_cksum;
+
 	avl_tree_t *guid_to_ds_map;
 };
 
@@ -1262,14 +1282,11 @@ free_guid_map_onexit(void *arg)
 	kmem_free(ca, sizeof (avl_tree_t));
 }
 
-static void *
-restore_read(struct restorearg *ra, int len, char *buf)
+static int
+restore_read(struct restorearg *ra, int len, void *buf)
 {
 	int done = 0;
 
-	if (buf == NULL)
-		buf = ra->buf;
-
 	/* some things will require 8-byte alignment, so everything must */
 	ASSERT0(len % 8);
 	ASSERT3U(len, <=, ra->bufsize);
@@ -1278,7 +1295,7 @@ restore_read(struct restorearg *ra, int 
 		ssize_t resid;
 
 		ra->err = vn_rdwr(UIO_READ, ra->vp,
-		    buf + done, len - done,
+		    (char *)buf + done, len - done,
 		    ra->voff, UIO_SYSSPACE, FAPPEND,
 		    RLIM64_INFINITY, CRED(), &resid);
 
@@ -1287,24 +1304,21 @@ restore_read(struct restorearg *ra, int 
 		ra->voff += len - done - resid;
 		done = len - resid;
 		if (ra->err != 0)
-			return (NULL);
+			return (ra->err);
 	}
 
 	ASSERT3U(done, ==, len);
-	if (ra->byteswap)
-		fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
-	else
-		fletcher_4_incremental_native(buf, len, &ra->cksum);
-	return (buf);
+	return (0);
 }
 
 static void
-backup_byteswap(dmu_replay_record_t *drr)
+byteswap_record(dmu_replay_record_t *drr)
 {
 #define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
 #define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
 	drr->drr_type = BSWAP_32(drr->drr_type);
 	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
+
 	switch (drr->drr_type) {
 	case DRR_BEGIN:
 		DO64(drr_begin.drr_magic);
@@ -1334,10 +1348,7 @@ backup_byteswap(dmu_replay_record_t *drr
 		DO64(drr_write.drr_offset);
 		DO64(drr_write.drr_length);
 		DO64(drr_write.drr_toguid);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
 		DO64(drr_write.drr_key.ddk_prop);
 		break;
 	case DRR_WRITE_BYREF:
@@ -1348,10 +1359,8 @@ backup_byteswap(dmu_replay_record_t *drr
 		DO64(drr_write_byref.drr_refguid);
 		DO64(drr_write_byref.drr_refobject);
 		DO64(drr_write_byref.drr_refoffset);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref.
+		    drr_key.ddk_cksum);
 		DO64(drr_write_byref.drr_key.ddk_prop);
 		break;
 	case DRR_WRITE_EMBEDDED:
@@ -1374,13 +1383,15 @@ backup_byteswap(dmu_replay_record_t *drr
 		DO64(drr_spill.drr_toguid);
 		break;
 	case DRR_END:
-		DO64(drr_end.drr_checksum.zc_word[0]);
-		DO64(drr_end.drr_checksum.zc_word[1]);
-		DO64(drr_end.drr_checksum.zc_word[2]);
-		DO64(drr_end.drr_checksum.zc_word[3]);
 		DO64(drr_end.drr_toguid);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum);
 		break;
 	}
+
+	if (drr->drr_type != DRR_BEGIN) {
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum);
+	}
+
 #undef DO64
 #undef DO32
 }
@@ -1397,11 +1408,10 @@ deduce_nblkptr(dmu_object_type_t bonus_t
 }
 
 static int
-restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
+restore_object(struct restorearg *ra, struct drr_object *drro, void *data)
 {
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
-	void *data = NULL;
 	uint64_t object;
 	int err;
 
@@ -1412,23 +1422,17 @@ restore_object(struct restorearg *ra, ob
 	    drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
 	    P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
 	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
-	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) ||
+	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(ra->os)) ||
 	    drro->drr_bonuslen > DN_MAX_BONUSLEN) {
 		return (SET_ERROR(EINVAL));
 	}
 
-	err = dmu_object_info(os, drro->drr_object, &doi);
+	err = dmu_object_info(ra->os, drro->drr_object, &doi);
 
 	if (err != 0 && err != ENOENT)
 		return (SET_ERROR(EINVAL));
 	object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
 
-	if (drro->drr_bonuslen) {
-		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL);
-		if (ra->err != 0)
-			return (ra->err);
-	}
-
 	/*
 	 * If we are losing blkptrs or changing the block size this must
 	 * be a new file instance.  We must clear out the previous file
@@ -1442,14 +1446,14 @@ restore_object(struct restorearg *ra, ob
 
 		if (drro->drr_blksz != doi.doi_data_block_size ||
 		    nblkptr < doi.doi_nblkptr) {
-			err = dmu_free_long_range(os, drro->drr_object,
+			err = dmu_free_long_range(ra->os, drro->drr_object,
 			    0, DMU_OBJECT_END);
 			if (err != 0)
 				return (SET_ERROR(EINVAL));
 		}
 	}
 
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 	dmu_tx_hold_bonus(tx, object);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
@@ -1459,7 +1463,7 @@ restore_object(struct restorearg *ra, ob
 
 	if (object == DMU_NEW_OBJECT) {
 		/* currently free, want to be allocated */
-		err = dmu_object_claim(os, drro->drr_object,
+		err = dmu_object_claim(ra->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 	} else if (drro->drr_type != doi.doi_type ||
@@ -1467,7 +1471,7 @@ restore_object(struct restorearg *ra, ob
 	    drro->drr_bonustype != doi.doi_bonus_type ||
 	    drro->drr_bonuslen != doi.doi_bonus_size) {
 		/* currently allocated, but with different properties */
-		err = dmu_object_reclaim(os, drro->drr_object,
+		err = dmu_object_reclaim(ra->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 	}
@@ -1476,14 +1480,15 @@ restore_object(struct restorearg *ra, ob
 		return (SET_ERROR(EINVAL));
 	}
 
-	dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
-	    tx);
-	dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
+	dmu_object_set_checksum(ra->os, drro->drr_object,
+	    drro->drr_checksumtype, tx);
+	dmu_object_set_compress(ra->os, drro->drr_object,
+	    drro->drr_compress, tx);
 
 	if (data != NULL) {
 		dmu_buf_t *db;
 
-		VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
+		VERIFY0(dmu_bonus_hold(ra->os, drro->drr_object, FTAG, &db));
 		dmu_buf_will_dirty(db, tx);
 
 		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
@@ -1502,7 +1507,7 @@ restore_object(struct restorearg *ra, ob
 
 /* ARGSUSED */
 static int
-restore_freeobjects(struct restorearg *ra, objset_t *os,
+restore_freeobjects(struct restorearg *ra,
     struct drr_freeobjects *drrfo)
 {
 	uint64_t obj;
@@ -1512,13 +1517,13 @@ restore_freeobjects(struct restorearg *r
 
 	for (obj = drrfo->drr_firstobj;
 	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
-	    (void) dmu_object_next(os, &obj, FALSE, 0)) {
+	    (void) dmu_object_next(ra->os, &obj, FALSE, 0)) {
 		int err;
 
-		if (dmu_object_info(os, obj, NULL) != 0)
+		if (dmu_object_info(ra->os, obj, NULL) != 0)
 			continue;
 
-		err = dmu_free_long_object(os, obj);
+		err = dmu_free_long_object(ra->os, obj);
 		if (err != 0)
 			return (err);
 	}
@@ -1526,49 +1531,37 @@ restore_freeobjects(struct restorearg *r
 }
 
 static int
-restore_write(struct restorearg *ra, objset_t *os,
-    struct drr_write *drrw)
+restore_write(struct restorearg *ra, struct drr_write *drrw, arc_buf_t *abuf)
 {
 	dmu_tx_t *tx;
-	void *data;
 	int err;
 
 	if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
 	    !DMU_OT_IS_VALID(drrw->drr_type))
 		return (SET_ERROR(EINVAL));
 
-	if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
+	if (dmu_object_info(ra->os, drrw->drr_object, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
-	dmu_buf_t *bonus;
-	if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0)
-		return (SET_ERROR(EINVAL));
-
-	arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length);
-
-	data = restore_read(ra, drrw->drr_length, abuf->b_data);
-	if (data == NULL) {
-		dmu_return_arcbuf(abuf);
-		dmu_buf_rele(bonus, FTAG);
-		return (ra->err);
-	}
-
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 
 	dmu_tx_hold_write(tx, drrw->drr_object,
 	    drrw->drr_offset, drrw->drr_length);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
-		dmu_return_arcbuf(abuf);
-		dmu_buf_rele(bonus, FTAG);
 		dmu_tx_abort(tx);
 		return (err);
 	}
 	if (ra->byteswap) {
 		dmu_object_byteswap_t byteswap =
 		    DMU_OT_BYTESWAP(drrw->drr_type);
-		dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
+		dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
+		    drrw->drr_length);
 	}
+
+	dmu_buf_t *bonus;
+	if (dmu_bonus_hold(ra->os, drrw->drr_object, FTAG, &bonus) != 0)
+		return (SET_ERROR(EINVAL));
 	dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
 	dmu_tx_commit(tx);
 	dmu_buf_rele(bonus, FTAG);
@@ -1583,8 +1576,7 @@ restore_write(struct restorearg *ra, obj
  * data from the stream to fulfill this write.
  */
 static int
-restore_write_byref(struct restorearg *ra, objset_t *os,
-    struct drr_write_byref *drrwbr)
+restore_write_byref(struct restorearg *ra, struct drr_write_byref *drrwbr)
 {
 	dmu_tx_t *tx;
 	int err;
@@ -1610,7 +1602,7 @@ restore_write_byref(struct restorearg *r
 		if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
 			return (SET_ERROR(EINVAL));
 	} else {
-		ref_os = os;
+		ref_os = ra->os;
 	}
 
 	err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
@@ -1618,7 +1610,7 @@ restore_write_byref(struct restorearg *r
 	if (err != 0)
 		return (err);
 
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 
 	dmu_tx_hold_write(tx, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length);
@@ -1627,7 +1619,7 @@ restore_write_byref(struct restorearg *r
 		dmu_tx_abort(tx);
 		return (err);
 	}
-	dmu_write(os, drrwbr->drr_object,
+	dmu_write(ra->os, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
 	dmu_buf_rele(dbp, FTAG);
 	dmu_tx_commit(tx);
@@ -1635,12 +1627,11 @@ restore_write_byref(struct restorearg *r
 }
 
 static int
-restore_write_embedded(struct restorearg *ra, objset_t *os,
-    struct drr_write_embedded *drrwnp)
+restore_write_embedded(struct restorearg *ra,
+    struct drr_write_embedded *drrwnp, void *data)
 {
 	dmu_tx_t *tx;
 	int err;
-	void *data;
 
 	if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
 		return (EINVAL);
@@ -1653,11 +1644,7 @@ restore_write_embedded(struct restorearg
 	if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
 		return (EINVAL);
 
-	data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL);
-	if (data == NULL)
-		return (ra->err);
-
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 
 	dmu_tx_hold_write(tx, drrwnp->drr_object,
 	    drrwnp->drr_offset, drrwnp->drr_length);
@@ -1667,7 +1654,7 @@ restore_write_embedded(struct restorearg
 		return (err);
 	}
 
-	dmu_write_embedded(os, drrwnp->drr_object,
+	dmu_write_embedded(ra->os, drrwnp->drr_object,
 	    drrwnp->drr_offset, data, drrwnp->drr_etype,
 	    drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
 	    ra->byteswap ^ ZFS_HOST_BYTEORDER, tx);
@@ -1677,31 +1664,26 @@ restore_write_embedded(struct restorearg
 }
 
 static int
-restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
+restore_spill(struct restorearg *ra, struct drr_spill *drrs, void *data)
 {
 	dmu_tx_t *tx;
-	void *data;
 	dmu_buf_t *db, *db_spill;
 	int err;
 
 	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
-	    drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os)))
+	    drrs->drr_length > spa_maxblocksize(dmu_objset_spa(ra->os)))
 		return (SET_ERROR(EINVAL));
 
-	data = restore_read(ra, drrs->drr_length, NULL);
-	if (data == NULL)
-		return (ra->err);
-
-	if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
+	if (dmu_object_info(ra->os, drrs->drr_object, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
-	VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
+	VERIFY0(dmu_bonus_hold(ra->os, drrs->drr_object, FTAG, &db));
 	if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
 		dmu_buf_rele(db, FTAG);
 		return (err);
 	}
 
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 
 	dmu_tx_hold_spill(tx, db->db_object);
 
@@ -1728,8 +1710,7 @@ restore_spill(struct restorearg *ra, obj
 
 /* ARGSUSED */
 static int
-restore_free(struct restorearg *ra, objset_t *os,
-    struct drr_free *drrf)
+restore_free(struct restorearg *ra, struct drr_free *drrf)
 {
 	int err;
 
@@ -1737,10 +1718,10 @@ restore_free(struct restorearg *ra, objs
 	    drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
 		return (SET_ERROR(EINVAL));
 
-	if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
+	if (dmu_object_info(ra->os, drrf->drr_object, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
-	err = dmu_free_long_range(os, drrf->drr_object,
+	err = dmu_free_long_range(ra->os, drrf->drr_object,
 	    drrf->drr_offset, drrf->drr_length);
 	return (err);
 }
@@ -1755,6 +1736,155 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *d
 	(void) dsl_destroy_head(name);
 }
 
+static void
+restore_cksum(struct restorearg *ra, int len, void *buf)
+{
+	if (ra->byteswap) {
+		fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
+	} else {
+		fletcher_4_incremental_native(buf, len, &ra->cksum);
+	}
+}
+
+/*
+ * If len != 0, read payload into buf.
+ * Read next record's header into ra->next_drr.
+ * Verify checksum of payload and next record.
+ */
+static int
+restore_read_payload_and_next_header(struct restorearg *ra, int len, void *buf)
+{
+	int err;
+
+	if (len != 0) {
+		ASSERT3U(len, <=, ra->bufsize);
+		err = restore_read(ra, len, buf);
+		if (err != 0)
+			return (err);
+		restore_cksum(ra, len, buf);
+	}
+
+	ra->prev_cksum = ra->cksum;
+
+	err = restore_read(ra, sizeof (*ra->next_drr), ra->next_drr);
+	if (err != 0)
+		return (err);
+	if (ra->next_drr->drr_type == DRR_BEGIN)
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * Note: checksum is of everything up to but not including the
+	 * checksum itself.
+	 */
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	restore_cksum(ra,
+	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ra->next_drr);
+
+	zio_cksum_t cksum_orig = ra->next_drr->drr_u.drr_checksum.drr_checksum;
+	zio_cksum_t *cksump = &ra->next_drr->drr_u.drr_checksum.drr_checksum;
+
+	if (ra->byteswap)
+		byteswap_record(ra->next_drr);
+
+	if ((!ZIO_CHECKSUM_IS_ZERO(cksump)) &&
+	    !ZIO_CHECKSUM_EQUAL(ra->cksum, *cksump))
+		return (SET_ERROR(ECKSUM));
+
+	restore_cksum(ra, sizeof (cksum_orig), &cksum_orig);
+
+	return (0);
+}
+
+static int
+restore_process_record(struct restorearg *ra)
+{
+	int err;
+
+	switch (ra->drr->drr_type) {
+	case DRR_OBJECT:
+	{
+		struct drr_object *drro = &ra->drr->drr_u.drr_object;
+		err = restore_read_payload_and_next_header(ra,
+		    P2ROUNDUP(drro->drr_bonuslen, 8), ra->buf);
+		if (err != 0)
+			return (err);
+		return (restore_object(ra, drro, ra->buf));
+	}
+	case DRR_FREEOBJECTS:
+	{
+		struct drr_freeobjects *drrfo =
+		    &ra->drr->drr_u.drr_freeobjects;
+		err = restore_read_payload_and_next_header(ra, 0, NULL);
+		if (err != 0)
+			return (err);
+		return (restore_freeobjects(ra, drrfo));
+	}
+	case DRR_WRITE:
+	{
+		struct drr_write *drrw = &ra->drr->drr_u.drr_write;
+		arc_buf_t *abuf = arc_loan_buf(dmu_objset_spa(ra->os),
+		    drrw->drr_length);
+
+		err = restore_read_payload_and_next_header(ra,
+		    drrw->drr_length, abuf->b_data);
+		if (err != 0)
+			return (err);
+		err = restore_write(ra, drrw, abuf);
+		/* if restore_write() is successful, it consumes the arc_buf */
+		if (err != 0)
+			dmu_return_arcbuf(abuf);
+		return (err);
+	}
+	case DRR_WRITE_BYREF:
+	{
+		struct drr_write_byref *drrwbr =
+		    &ra->drr->drr_u.drr_write_byref;
+		err = restore_read_payload_and_next_header(ra, 0, NULL);
+		if (err != 0)
+			return (err);
+		return (restore_write_byref(ra, drrwbr));
+	}
+	case DRR_WRITE_EMBEDDED:
+	{
+		struct drr_write_embedded *drrwe =
+		    &ra->drr->drr_u.drr_write_embedded;
+		err = restore_read_payload_and_next_header(ra,
+		    P2ROUNDUP(drrwe->drr_psize, 8), ra->buf);
+		if (err != 0)
+			return (err);
+		return (restore_write_embedded(ra, drrwe, ra->buf));
+	}
+	case DRR_FREE:
+	{
+		struct drr_free *drrf = &ra->drr->drr_u.drr_free;
+		err = restore_read_payload_and_next_header(ra, 0, NULL);
+		if (err != 0)
+			return (err);
+		return (restore_free(ra, drrf));
+	}
+	case DRR_END:
+	{
+		struct drr_end *drre = &ra->drr->drr_u.drr_end;
+		if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum))
+			return (SET_ERROR(EINVAL));
+		return (0);
+	}
+	case DRR_SPILL:
+	{
+		struct drr_spill *drrs = &ra->drr->drr_u.drr_spill;
+		err = restore_read_payload_and_next_header(ra,
+		    drrs->drr_length, ra->buf);
+		if (err != 0)
+			return (err);
+		return (restore_spill(ra, drrs, ra->buf));
+	}
+	default:
+		return (SET_ERROR(EINVAL));
+	}
+}
+
 /*
  * NB: callers *must* call dmu_recv_end() if this succeeds.
  */
@@ -1762,10 +1892,8 @@ int
 dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
     int cleanup_fd, uint64_t *action_handlep)
 {
+	int err = 0;
 	struct restorearg ra = { 0 };
-	dmu_replay_record_t *drr;
-	objset_t *os;
-	zio_cksum_t pcksum;
 	int featureflags;
 
 	ra.byteswap = drc->drc_byteswap;
@@ -1773,7 +1901,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, 
 	ra.vp = vp;
 	ra.voff = *voffp;
 	ra.bufsize = SPA_MAXBLOCKSIZE;
+	ra.drr = kmem_alloc(sizeof (*ra.drr), KM_SLEEP);
 	ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
+	ra.next_drr = kmem_alloc(sizeof (*ra.next_drr), KM_SLEEP);
 
 	/* these were verified in dmu_recv_begin */
 	ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
@@ -1783,7 +1913,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, 
 	/*
 	 * Open the objset we are modifying.
 	 */
-	VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os));
+	VERIFY0(dmu_objset_from_ds(drc->drc_ds, &ra.os));
 
 	ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT);
 
@@ -1809,13 +1939,13 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, 
 			avl_create(ra.guid_to_ds_map, guid_compare,
 			    sizeof (guid_map_entry_t),
 			    offsetof(guid_map_entry_t, avlnode));
-			ra.err = zfs_onexit_add_cb(minor,
+			err = zfs_onexit_add_cb(minor,
 			    free_guid_map_onexit, ra.guid_to_ds_map,
 			    action_handlep);
 			if (ra.err != 0)
 				goto out;
 		} else {
-			ra.err = zfs_onexit_cb_data(minor, *action_handlep,
+			err = zfs_onexit_cb_data(minor, *action_handlep,
 			    (void **)&ra.guid_to_ds_map);
 			if (ra.err != 0)
 				goto out;
@@ -1824,96 +1954,34 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, 
 		drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
 	}
 
-	/*
-	 * Read records and process them.
-	 */
-	pcksum = ra.cksum;
-	while (ra.err == 0 &&
-	    NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) {
+	err = restore_read_payload_and_next_header(&ra, 0, NULL);
+	if (err != 0)
+		goto out;
+	for (;;) {
+		void *tmp;
+
 		if (issig(JUSTLOOKING) && issig(FORREAL)) {
-			ra.err = SET_ERROR(EINTR);
-			goto out;
+			err = SET_ERROR(EINTR);
+			break;
 		}
 
-		if (ra.byteswap)
-			backup_byteswap(drr);
+		tmp = ra.next_drr;
+		ra.next_drr = ra.drr;
+		ra.drr = tmp;
 
-		switch (drr->drr_type) {
-		case DRR_OBJECT:
-		{
-			/*
-			 * We need to make a copy of the record header,
-			 * because restore_{object,write} may need to
-			 * restore_read(), which will invalidate drr.
-			 */
-			struct drr_object drro = drr->drr_u.drr_object;
-			ra.err = restore_object(&ra, os, &drro);
-			break;
-		}
-		case DRR_FREEOBJECTS:
-		{
-			struct drr_freeobjects drrfo =
-			    drr->drr_u.drr_freeobjects;
-			ra.err = restore_freeobjects(&ra, os, &drrfo);
-			break;
-		}
-		case DRR_WRITE:
-		{
-			struct drr_write drrw = drr->drr_u.drr_write;
-			ra.err = restore_write(&ra, os, &drrw);
-			break;
-		}
-		case DRR_WRITE_BYREF:
-		{
-			struct drr_write_byref drrwbr =
-			    drr->drr_u.drr_write_byref;
-			ra.err = restore_write_byref(&ra, os, &drrwbr);
-			break;
-		}
-		case DRR_WRITE_EMBEDDED:
-		{
-			struct drr_write_embedded drrwe =
-			    drr->drr_u.drr_write_embedded;
-			ra.err = restore_write_embedded(&ra, os, &drrwe);
-			break;
-		}
-		case DRR_FREE:
-		{
-			struct drr_free drrf = drr->drr_u.drr_free;
-			ra.err = restore_free(&ra, os, &drrf);
+		/* process ra.drr, read in ra.next_drr */
+		err = restore_process_record(&ra);
+		if (err != 0)
 			break;
-		}
-		case DRR_END:
-		{
-			struct drr_end drre = drr->drr_u.drr_end;
-			/*
-			 * We compare against the *previous* checksum
-			 * value, because the stored checksum is of
-			 * everything before the DRR_END record.
-			 */
-			if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
-				ra.err = SET_ERROR(ECKSUM);
-			goto out;
-		}
-		case DRR_SPILL:
-		{
-			struct drr_spill drrs = drr->drr_u.drr_spill;
-			ra.err = restore_spill(&ra, os, &drrs);
+		if (ra.drr->drr_type == DRR_END)
 			break;
-		}
-		default:
-			ra.err = SET_ERROR(EINVAL);
-			goto out;
-		}
-		pcksum = ra.cksum;
 	}
-	ASSERT(ra.err != 0);
 
 out:
 	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
 		zfs_onexit_fd_rele(cleanup_fd);
 
-	if (ra.err != 0) {
+	if (err != 0) {
 		/*
 		 * destroy what we created, so we don't leave it in the
 		 * inconsistent restoring state.
@@ -1921,9 +1989,11 @@ out:
 		dmu_recv_cleanup_ds(drc);
 	}
 
+	kmem_free(ra.drr, sizeof (*ra.drr));
 	kmem_free(ra.buf, ra.bufsize);
+	kmem_free(ra.next_drr, sizeof (*ra.next_drr));
 	*voffp = ra.voff;
-	return (ra.err);
+	return (err);
 }
 
 static int

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h	Mon Aug 10 18:27:54 2015	(r286585)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h	Mon Aug 10 19:32:30 2015	(r286586)
@@ -443,6 +443,19 @@ _NOTE(CONSTCOND) } while (0)
 	((zc1).zc_word[2] - (zc2).zc_word[2]) | \
 	((zc1).zc_word[3] - (zc2).zc_word[3])))
 
+#define	ZIO_CHECKSUM_IS_ZERO(zc) \
+	(0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \
+	(zc)->zc_word[2] | (zc)->zc_word[3]))
+

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201508101932.t7AJWUXO098537>