Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 Apr 2017 17:57:43 +0000 (UTC)
From:      Josh Paetzel <jpaetzel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r317414 - in head: cddl/contrib/opensolaris/cmd/zfs cddl/contrib/opensolaris/cmd/zstreamdump cddl/contrib/opensolaris/lib/libzfs/common cddl/contrib/opensolaris/lib/libzfs_core/common s...
Message-ID:  <201704251757.v3PHvh8i002900@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jpaetzel
Date: Tue Apr 25 17:57:43 2017
New Revision: 317414
URL: https://svnweb.freebsd.org/changeset/base/317414

Log:
  MFV 316894
  
  7252 7628 compressed zfs send / receive
  
  illumos/illumos-gate@5602294fda888d923d57a78bafdaf48ae6223dea
  https://github.com/illumos/illumos-gate/commit/5602294fda888d923d57a78bafdaf48ae6223dea
  
  https://www.illumos.org/issues/7252
    This feature includes code to allow a system with compressed ARC enabled to
    send data in its compressed form straight out of the ARC, and receive data in
    its compressed form directly into the ARC.
  
  https://www.illumos.org/issues/7628
    We should have longer, more readable versions of the ZFS send / recv options.
  
  7628 create long versions of ZFS send / receive options
  
  Reviewed by: George Wilson <george.wilson@delphix.com>
  Reviewed by: John Kennedy <john.kennedy@delphix.com>
  Reviewed by: Matthew Ahrens <mahrens@delphix.com>
  Reviewed by: Paul Dagnelie <pcd@delphix.com>
  Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
  Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
  Reviewed by: David Quigley <dpquigl@davequigley.com>
  Reviewed by: Thomas Caputi <tcaputi@datto.com>
  Approved by: Dan McDonald <danmcd@omniti.com>
  Author: Dan Kimmel <dan.kimmel@delphix.com>

Modified:
  head/cddl/contrib/opensolaris/cmd/zfs/zfs.8
  head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
  head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
  head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
  head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/refcount.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
Directory Properties:
  head/cddl/contrib/opensolaris/   (props changed)
  head/cddl/contrib/opensolaris/cmd/zfs/   (props changed)
  head/cddl/contrib/opensolaris/lib/libzfs/   (props changed)
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/cmd/zfs/zfs.8
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zfs/zfs.8	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/cddl/contrib/opensolaris/cmd/zfs/zfs.8	Tue Apr 25 17:57:43 2017	(r317414)
@@ -180,12 +180,12 @@
 .Ar bookmark
 .Nm
 .Cm send
-.Op Fl DnPpRveL
+.Op Fl DLPRcenpv
 .Op Fl i Ar snapshot | Fl I Ar snapshot
 .Ar snapshot
 .Nm
 .Cm send
-.Op Fl eL
+.Op Fl Lce
 .Op Fl i Ar snapshot Ns | Ns bookmark
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Nm
@@ -2535,7 +2535,7 @@ feature.
 .It Xo
 .Nm
 .Cm send
-.Op Fl DnPpRveL
+.Op Fl DLPRcenpv
 .Op Fl i Ar snapshot | Fl I Ar snapshot
 .Ar snapshot
 .Xc
@@ -2580,7 +2580,7 @@ The incremental
 source may be specified as with the
 .Fl i
 option.
-.It Fl R
+.It Fl R, -replicate
 Generate a replication stream package, which will replicate the specified
 filesystem, and all descendent file systems, up to the named snapshot. When
 received, all properties, snapshots, descendent file systems, and clones are
@@ -2598,7 +2598,7 @@ is received. If the
 .Fl F
 flag is specified when this stream is received, snapshots and file systems that
 do not exist on the sending side are destroyed.
-.It Fl D
+.It Fl D, -dedup
 Generate a deduplicated stream. Blocks which would have been sent multiple
 times in the send stream will only be sent once.  The receiving system must
 also support this feature to receive a deduplicated stream.  This flag can
@@ -2607,7 +2607,7 @@ be used regardless of the dataset's
 property, but performance will be much better if the filesystem uses a
 dedup-capable checksum (eg.
 .Sy sha256 ) .
-.It Fl L
+.It Fl L, -large-block
 Generate a stream which may contain blocks larger than 128KB.
 This flag
 has no effect if the
@@ -2623,7 +2623,7 @@ See
 for details on ZFS feature flags and the
 .Sy large_blocks
 feature.
-.It Fl e
+.It Fl e, -embed
 Generate a more compact stream by using WRITE_EMBEDDED records for blocks
 which are stored more compactly on disk by the
 .Sy embedded_data
@@ -2646,11 +2646,25 @@ See
 for details on ZFS feature flags and the
 .Sy embedded_data
 feature.
-.It Fl p
+.It Fl c, -compressed
+Generate a more compact stream by using compressed WRITE records for blocks
+which are compressed on disk and in memory (see the
+.Sy compression property for details).  If the
+.Sy lz4_compress
+feature is active on the sending system, then the receiving system must have that
+feature enabled as well. If the
+.Sy large_blocks
+feature is enabled on the sending system but the
+.Fl L
+option is not supplied in conjunction with
+.Fl c
+then the data will be decompressed before sending so it can be split
+into smaller block sizes.
+.It Fl p, -props
 Include the dataset's properties in the stream. This flag is implicit when
 .Fl R
 is specified. The receiving system must also support this feature.
-.It Fl n
+.It Fl n, -dryrun
 Do a dry-run ("No-op") send.  Do not generate any actual send data.  This is
 useful in conjunction with the
 .Fl v
@@ -2660,9 +2674,9 @@ flags to determine what data will be sen
 In this case, the verbose output will be written to
 standard output (contrast with a non-dry-run, where the stream is written
 to standard output and the verbose output goes to standard error).
-.It Fl P
+.It Fl P, -parsable
 Print machine-parsable verbose information about the stream package generated.
-.It Fl v
+.It Fl v, -verbose
 Print verbose information about the stream package generated.
 This information includes a per-second report of how much data has been sent.
 .El
@@ -2673,7 +2687,7 @@ on future versions of
 .It Xo
 .Nm
 .Cm send
-.Op Fl eL
+.Op Fl Lce
 .Op Fl i Ar snapshot Ns | Ns Ar bookmark
 .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
 .Xc
@@ -2699,7 +2713,7 @@ specified as the last component of the n
 If the incremental target is a clone, the incremental source can
 be the origin snapshot, or an earlier snapshot in the origin's filesystem,
 or the origin's origin, etc.
-.It Fl L
+.It Fl L, -large-block
 Generate a stream which may contain blocks larger than 128KB.
 This flag
 has no effect if the
@@ -2715,7 +2729,22 @@ See
 for details on ZFS feature flags and the
 .Sy large_blocks
 feature.
-.It Fl e
+.It Fl c, -compressed
+Generate a more compact stream by using compressed WRITE records for blocks
+which are compressed on disk and in memory (see the
+.Sy compression
+property for details).  If the
+.Sy lz4_compress
+feature is active on the sending system, then the receiving system must have
+that feature enabled as well. If the
+.Sy large_blocks
+feature is enabled on the sending system but the
+.Fl L
+option is not supplied in conjunction with
+.Fl c
+then the data will be decompressed before sending so it can be split
+into smaller block sizes.
+.It Fl e, -embed
 Generate a more compact stream by using WRITE_EMBEDDED records for blocks
 which are stored more compactly on disk by the
 .Sy embedded_data

Modified: head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	Tue Apr 25 17:57:43 2017	(r317414)
@@ -35,6 +35,7 @@
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
+#include <getopt.h>
 #include <libgen.h>
 #include <libintl.h>
 #include <libuutil.h>
@@ -278,7 +279,7 @@ get_usage(zfs_help_t idx)
 	case HELP_ROLLBACK:
 		return (gettext("\trollback [-rRf] <snapshot>\n"));
 	case HELP_SEND:
-		return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] "
+		return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] "
 		    "<snapshot>\n"
 		    "\tsend [-Le] [-i snapshot|bookmark] "
 		    "<filesystem|volume|snapshot>\n"
@@ -3771,8 +3772,23 @@ zfs_do_send(int argc, char **argv)
 	nvlist_t *dbgnv = NULL;
 	boolean_t extraverbose = B_FALSE;
 
+	struct option long_options[] = {
+		{"replicate",	no_argument,		NULL, 'R'},
+		{"props",	no_argument,		NULL, 'p'},
+		{"parsable",	no_argument,		NULL, 'P'},
+		{"dedup",	no_argument,		NULL, 'D'},
+		{"verbose",	no_argument,		NULL, 'v'},
+		{"dryrun",	no_argument,		NULL, 'n'},
+		{"large-block",	no_argument,		NULL, 'L'},
+		{"embed",	no_argument,		NULL, 'e'},
+		{"resume",	required_argument,	NULL, 't'},
+		{"compressed",	no_argument,		NULL, 'c'},
+		{0, 0, 0, 0}
+	};
+
 	/* check options */
-	while ((c = getopt(argc, argv, ":i:I:RDpvnPLet:")) != -1) {
+	while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:c", long_options,
+	    NULL)) != -1) {
 		switch (c) {
 		case 'i':
 			if (fromname)
@@ -3816,12 +3832,17 @@ zfs_do_send(int argc, char **argv)
 		case 't':
 			resume_token = optarg;
 			break;
+		case 'c':
+			flags.compress = B_TRUE;
+			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
+			/*FALLTHROUGH*/
+		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
@@ -3892,6 +3913,8 @@ zfs_do_send(int argc, char **argv)
 			lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
 		if (flags.embed_data)
 			lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
+		if (flags.compress)
+			lzc_flags |= LZC_SEND_FLAG_COMPRESS;
 
 		if (fromname != NULL &&
 		    (fromname[0] == '#' || fromname[0] == '@')) {

Modified: head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	Tue Apr 25 17:57:43 2017	(r317414)
@@ -25,8 +25,8 @@
  */
 
 /*
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
  */
 
 #include <ctype.h>
@@ -39,6 +39,7 @@
 
 #include <sys/dmu.h>
 #include <sys/zfs_ioctl.h>
+#include <sys/zio.h>
 #include <zfs_fletcher.h>
 
 /*
@@ -251,6 +252,7 @@ main(int argc, char *argv[])
 			(void) fprintf(stderr, "invalid option '%c'\n",
 			    optopt);
 			usage();
+			break;
 		}
 	}
 
@@ -453,38 +455,50 @@ main(int argc, char *argv[])
 				drrw->drr_object = BSWAP_64(drrw->drr_object);
 				drrw->drr_type = BSWAP_32(drrw->drr_type);
 				drrw->drr_offset = BSWAP_64(drrw->drr_offset);
-				drrw->drr_length = BSWAP_64(drrw->drr_length);
+				drrw->drr_logical_size =
+				    BSWAP_64(drrw->drr_logical_size);
 				drrw->drr_toguid = BSWAP_64(drrw->drr_toguid);
 				drrw->drr_key.ddk_prop =
 				    BSWAP_64(drrw->drr_key.ddk_prop);
+				drrw->drr_compressed_size =
+				    BSWAP_64(drrw->drr_compressed_size);
 			}
+
+			uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+
 			/*
 			 * If this is verbose and/or dump output,
 			 * print info on the modified block
 			 */
 			if (verbose) {
 				(void) printf("WRITE object = %llu type = %u "
-				    "checksum type = %u\n"
-				    "    offset = %llu length = %llu "
+				    "checksum type = %u compression type = %u\n"
+				    "    offset = %llu logical_size = %llu "
+				    "compressed_size = %llu "
+				    "payload_size = %llu "
 				    "props = %llx\n",
 				    (u_longlong_t)drrw->drr_object,
 				    drrw->drr_type,
 				    drrw->drr_checksumtype,
+				    drrw->drr_compressiontype,
 				    (u_longlong_t)drrw->drr_offset,
-				    (u_longlong_t)drrw->drr_length,
+				    (u_longlong_t)drrw->drr_logical_size,
+				    (u_longlong_t)drrw->drr_compressed_size,
+				    (u_longlong_t)payload_size,
 				    (u_longlong_t)drrw->drr_key.ddk_prop);
 			}
+
 			/*
 			 * Read the contents of the block in from STDIN to buf
 			 */
-			(void) ssread(buf, drrw->drr_length, &zc);
+			(void) ssread(buf, payload_size, &zc);
 			/*
 			 * If in dump mode
 			 */
 			if (dump) {
-				print_block(buf, drrw->drr_length);
+				print_block(buf, payload_size);
 			}
-			total_write_size += drrw->drr_length;
+			total_write_size += payload_size;
 			break;
 
 		case DRR_WRITE_BYREF:

Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Tue Apr 25 17:57:43 2017	(r317414)
@@ -616,6 +616,9 @@ typedef struct sendflags {
 
 	/* WRITE_EMBEDDED records of type DATA are permitted */
 	boolean_t embed_data;
+
+	/* compressed WRITE records are permitted */
+	boolean_t compress;
 } sendflags_t;
 
 typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);

Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	Tue Apr 25 17:57:43 2017	(r317414)
@@ -354,8 +354,10 @@ cksummer(void *arg)
 		{
 			struct drr_write *drrw = &drr->drr_u.drr_write;
 			dataref_t	dataref;
+			uint64_t	payload_size;
 
-			(void) ssread(buf, drrw->drr_length, ofp);
+			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+			(void) ssread(buf, payload_size, ofp);
 
 			/*
 			 * Use the existing checksum if it's dedup-capable,
@@ -369,7 +371,7 @@ cksummer(void *arg)
 				zio_cksum_t	tmpsha256;
 
 				SHA256Init(&ctx);
-				SHA256Update(&ctx, buf, drrw->drr_length);
+				SHA256Update(&ctx, buf, payload_size);
 				SHA256Final(&tmpsha256, &ctx);
 				drrw->drr_key.ddk_cksum.zc_word[0] =
 				    BE_64(tmpsha256.zc_word[0]);
@@ -399,7 +401,7 @@ cksummer(void *arg)
 
 				wbr_drrr->drr_object = drrw->drr_object;
 				wbr_drrr->drr_offset = drrw->drr_offset;
-				wbr_drrr->drr_length = drrw->drr_length;
+				wbr_drrr->drr_length = drrw->drr_logical_size;
 				wbr_drrr->drr_toguid = drrw->drr_toguid;
 				wbr_drrr->drr_refguid = dataref.ref_guid;
 				wbr_drrr->drr_refobject =
@@ -421,7 +423,7 @@ cksummer(void *arg)
 					goto out;
 			} else {
 				/* block not previously seen */
-				if (dump_record(drr, buf, drrw->drr_length,
+				if (dump_record(drr, buf, payload_size,
 				    &stream_cksum, outfd) != 0)
 					goto out;
 			}
@@ -924,7 +926,7 @@ typedef struct send_dump_data {
 	uint64_t prevsnap_obj;
 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
 	boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
-	boolean_t large_block;
+	boolean_t large_block, compress;
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
@@ -940,7 +942,7 @@ typedef struct send_dump_data {
 
 static int
 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
-    boolean_t fromorigin, uint64_t *sizep)
+    boolean_t fromorigin, enum lzc_send_flags flags, uint64_t *sizep)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
@@ -953,6 +955,7 @@ estimate_ioctl(zfs_handle_t *zhp, uint64
 	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zc.zc_fromobj = fromsnap_obj;
 	zc.zc_guid = 1;  /* estimate flag */
+	zc.zc_flags = flags;
 
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 		char errbuf[1024];
@@ -1192,6 +1195,7 @@ dump_snapshot(zfs_handle_t *zhp, void *a
 	progress_arg_t pa = { 0 };
 	pthread_t tid;
 	char *thissnap;
+	enum lzc_send_flags flags = 0;
 	int err;
 	boolean_t isfromsnap, istosnap, fromorigin;
 	boolean_t exclude = B_FALSE;
@@ -1220,6 +1224,13 @@ dump_snapshot(zfs_handle_t *zhp, void *a
 	if (istosnap)
 		sdd->seento = B_TRUE;
 
+	if (sdd->large_block)
+		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
+	if (sdd->embed_data)
+		flags |= LZC_SEND_FLAG_EMBED_DATA;
+	if (sdd->compress)
+		flags |= LZC_SEND_FLAG_COMPRESS;
+
 	if (!sdd->doall && !isfromsnap && !istosnap) {
 		if (sdd->replicate) {
 			char *snapname;
@@ -1266,7 +1277,7 @@ dump_snapshot(zfs_handle_t *zhp, void *a
 	if (sdd->verbose) {
 		uint64_t size = 0;
 		(void) estimate_ioctl(zhp, sdd->prevsnap_obj,
-		    fromorigin, &size);
+		    fromorigin, flags, &size);
 
 		send_print_verbose(fout, zhp->zfs_name,
 		    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
@@ -1291,12 +1302,6 @@ dump_snapshot(zfs_handle_t *zhp, void *a
 			}
 		}
 
-		enum lzc_send_flags flags = 0;
-		if (sdd->large_block)
-			flags |= LZC_SEND_FLAG_LARGE_BLOCK;
-		if (sdd->embed_data)
-			flags |= LZC_SEND_FLAG_EMBED_DATA;
-
 		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
 		    fromorigin, sdd->outfd, flags, sdd->debugnv);
 
@@ -1602,8 +1607,12 @@ zfs_send_resume(libzfs_handle_t *hdl, se
 	fromguid = 0;
 	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
 
+	if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
+		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
 	if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
 		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
+	if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
+		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
 
 	if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
 		if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
@@ -1636,7 +1645,8 @@ zfs_send_resume(libzfs_handle_t *hdl, se
 
 	if (flags->verbose) {
 		uint64_t size = 0;
-		error = lzc_send_space(zhp->zfs_name, fromname, &size);
+		error = lzc_send_space(zhp->zfs_name, fromname,
+		    lzc_flags, &size);
 		if (error == 0)
 			size = MAX(0, (int64_t)(size - bytes));
 		send_print_verbose(stderr, zhp->zfs_name, fromname,
@@ -1866,6 +1876,7 @@ zfs_send(zfs_handle_t *zhp, const char *
 	sdd.dryrun = flags->dryrun;
 	sdd.large_block = flags->largeblock;
 	sdd.embed_data = flags->embed_data;
+	sdd.compress = flags->compress;
 	sdd.filter_cb = filter_func;
 	sdd.filter_cb_arg = cb_arg;
 	if (debugnvp)
@@ -2960,11 +2971,17 @@ recv_skip(libzfs_handle_t *hdl, int fd, 
 
 		case DRR_WRITE:
 			if (byteswap) {
-				drr->drr_u.drr_write.drr_length =
-				    BSWAP_64(drr->drr_u.drr_write.drr_length);
+				drr->drr_u.drr_write.drr_logical_size =
+				    BSWAP_64(
+				    drr->drr_u.drr_write.drr_logical_size);
+				drr->drr_u.drr_write.drr_compressed_size =
+				    BSWAP_64(
+				    drr->drr_u.drr_write.drr_compressed_size);
 			}
+			uint64_t payload_size =
+			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
 			(void) recv_read(hdl, fd, buf,
-			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
+			    payload_size, B_FALSE, NULL);
 			break;
 		case DRR_SPILL:
 			if (byteswap) {

Modified: head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c	Tue Apr 25 17:57:43 2017	(r317414)
@@ -534,6 +534,8 @@ lzc_send_resume(const char *snapname, co
 		fnvlist_add_boolean(args, "largeblockok");
 	if (flags & LZC_SEND_FLAG_EMBED_DATA)
 		fnvlist_add_boolean(args, "embedok");
+	if (flags & LZC_SEND_FLAG_COMPRESS)
+		fnvlist_add_boolean(args, "compressok");
 	if (resumeobj != 0 || resumeoff != 0) {
 		fnvlist_add_uint64(args, "resume_object", resumeobj);
 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
@@ -559,7 +561,8 @@ lzc_send_resume(const char *snapname, co
  * an equivalent snapshot.
  */
 int
-lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
+lzc_send_space(const char *snapname, const char *from,
+    enum lzc_send_flags flags, uint64_t *spacep)
 {
 	nvlist_t *args;
 	nvlist_t *result;
@@ -568,6 +571,12 @@ lzc_send_space(const char *snapname, con
 	args = fnvlist_alloc();
 	if (from != NULL)
 		fnvlist_add_string(args, "from", from);
+	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
+		fnvlist_add_boolean(args, "largeblockok");
+	if (flags & LZC_SEND_FLAG_EMBED_DATA)
+		fnvlist_add_boolean(args, "embedok");
+	if (flags & LZC_SEND_FLAG_COMPRESS)
+		fnvlist_add_boolean(args, "compressok");
 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
 	nvlist_free(args);
 	if (err == 0)

Modified: head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h	Tue Apr 25 17:57:43 2017	(r317414)
@@ -62,13 +62,14 @@ int lzc_get_holds(const char *, nvlist_t
 
 enum lzc_send_flags {
 	LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
-	LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1
+	LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
+	LZC_SEND_FLAG_COMPRESS = 1 << 2
 };
 
 int lzc_send(const char *, const char *, int, enum lzc_send_flags);
 int lzc_send_resume(const char *, const char *, int,
     enum lzc_send_flags, uint64_t, uint64_t);
-int lzc_send_space(const char *, const char *, uint64_t *);
+int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
 
 struct dmu_replay_record;
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Tue Apr 25 17:46:44 2017	(r317413)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Tue Apr 25 17:57:43 2017	(r317414)
@@ -77,10 +77,10 @@
  * A new reference to a cache buffer can be obtained in two
  * ways: 1) via a hash table lookup using the DVA as a key,
  * or 2) via one of the ARC lists.  The arc_read() interface
- * uses method 1, while the internal arc algorithms for
+ * uses method 1, while the internal ARC algorithms for
  * adjusting the cache use method 2.  We therefore provide two
  * types of locks: 1) the hash table lock array, and 2) the
- * arc list locks.
+ * ARC list locks.
  *
  * Buffers do not have their own mutexes, rather they rely on the
  * hash table mutexes for the bulk of their protection (i.e. most
@@ -93,21 +93,12 @@
  * buf_hash_remove() expects the appropriate hash mutex to be
  * already held before it is invoked.
  *
- * Each arc state also has a mutex which is used to protect the
+ * Each ARC state also has a mutex which is used to protect the
  * buffer list associated with the state.  When attempting to
- * obtain a hash table lock while holding an arc list lock you
+ * obtain a hash table lock while holding an ARC list lock you
  * must use: mutex_tryenter() to avoid deadlock.  Also note that
  * the active state mutex must be held before the ghost state mutex.
  *
- * Arc buffers may have an associated eviction callback function.
- * This function will be invoked prior to removing the buffer (e.g.
- * in arc_do_user_evicts()).  Note however that the data associated
- * with the buffer may be evicted prior to the callback.  The callback
- * must be made with *no locks held* (to prevent deadlock).  Additionally,
- * the users of callbacks must ensure that their private data is
- * protected from simultaneous callbacks from arc_clear_callback()
- * and arc_do_user_evicts().
- *
  * Note that the majority of the performance stats are manipulated
  * with atomic operations.
  *
@@ -136,67 +127,81 @@
  * are cached in the L1ARC. The L1ARC (l1arc_buf_hdr_t) is a structure within
  * the arc_buf_hdr_t that will point to the data block in memory. A block can
  * only be read by a consumer if it has an l1arc_buf_hdr_t. The L1ARC
- * caches data in two ways -- in a list of arc buffers (arc_buf_t) and
+ * caches data in two ways -- in a list of ARC buffers (arc_buf_t) and
  * also in the arc_buf_hdr_t's private physical data block pointer (b_pdata).
- * Each arc buffer (arc_buf_t) is being actively accessed by a specific ARC
- * consumer, and always contains uncompressed data. The ARC will provide
- * references to this data and will keep it cached until it is no longer in
- * use. Typically, the arc will try to cache only the L1ARC's physical data
- * block and will aggressively evict any arc_buf_t that is no longer referenced.
- * The amount of memory consumed by the arc_buf_t's can be seen via the
- * "overhead_size" kstat.
- *
  *
- *                arc_buf_hdr_t
- *                +-----------+
- *                |           |
- *                |           |
- *                |           |
- *                +-----------+
- * l2arc_buf_hdr_t|           |
- *                |           |
- *                +-----------+
- * l1arc_buf_hdr_t|           |
- *                |           |                 arc_buf_t
- *                |    b_buf  +------------>+---------+      arc_buf_t
- *                |           |             |b_next   +---->+---------+
- *                |  b_pdata  +-+           |---------|     |b_next   +-->NULL
- *                +-----------+ |           |         |     +---------+
- *                              |           |b_data   +-+   |         |
- *                              |           +---------+ |   |b_data   +-+
- *                              +->+------+             |   +---------+ |
- *                   (potentially) |      |             |               |
- *                     compressed  |      |             |               |
- *                        data     +------+             |               v
- *                                                      +->+------+     +------+
- *                                            uncompressed |      |     |      |
- *                                                data     |      |     |      |
- *                                                         +------+     +------+
- *
- * The L1ARC's data pointer, however, may or may not be uncompressed. The
- * ARC has the ability to store the physical data (b_pdata) associated with
- * the DVA of the arc_buf_hdr_t. Since the b_pdata is a copy of the on-disk
- * physical block, it will match its on-disk compression characteristics.
- * If the block on-disk is compressed, then the physical data block
- * in the cache will also be compressed and vice-versa. This behavior
- * can be disabled by setting 'zfs_compressed_arc_enabled' to B_FALSE. When the
+ * The L1ARC's data pointer may or may not be uncompressed. The ARC has the
+ * ability to store the physical data (b_pdata) associated with the DVA of the
+ * arc_buf_hdr_t. Since the b_pdata is a copy of the on-disk physical block,
+ * it will match its on-disk compression characteristics. This behavior can be
+ * disabled by setting 'zfs_compressed_arc_enabled' to B_FALSE. When the
  * compressed ARC functionality is disabled, the b_pdata will point to an
  * uncompressed version of the on-disk data.
  *
+ * Data in the L1ARC is not accessed by consumers of the ARC directly. Each
+ * arc_buf_hdr_t can have multiple ARC buffers (arc_buf_t) which reference it.
+ * Each ARC buffer (arc_buf_t) is being actively accessed by a specific ARC
+ * consumer. The ARC will provide references to this data and will keep it
+ * cached until it is no longer in use. The ARC caches only the L1ARC's physical
+ * data block and will evict any arc_buf_t that is no longer referenced. The
+ * amount of memory consumed by the arc_buf_ts' data buffers can be seen via the
+ * "overhead_size" kstat.
+ *
+ * Depending on the consumer, an arc_buf_t can be requested in uncompressed or
+ * compressed form. The typical case is that consumers will want uncompressed
+ * data, and when that happens a new data buffer is allocated where the data is
+ * decompressed for them to use. Currently the only consumer who wants
+ * compressed arc_buf_t's is "zfs send", when it streams data exactly as it
+ * exists on disk. When this happens, the arc_buf_t's data buffer is shared
+ * with the arc_buf_hdr_t.
+ *
+ * Here is a diagram showing an arc_buf_hdr_t referenced by two arc_buf_t's. The
+ * first one is owned by a compressed send consumer (and therefore references
+ * the same compressed data buffer as the arc_buf_hdr_t) and the second could be
+ * used by any other consumer (and has its own uncompressed copy of the data
+ * buffer).
+ *
+ *   arc_buf_hdr_t
+ *   +-----------+
+ *   | fields    |
+ *   | common to |
+ *   | L1- and   |
+ *   | L2ARC     |
+ *   +-----------+
+ *   | l2arc_buf_hdr_t
+ *   |           |
+ *   +-----------+
+ *   | l1arc_buf_hdr_t
+ *   |           |              arc_buf_t
+ *   | b_buf     +------------>+-----------+      arc_buf_t
+ *   | b_pdata   +-+           |b_next     +---->+-----------+
+ *   +-----------+ |           |-----------|     |b_next     +-->NULL
+ *                 |           |b_comp = T |     +-----------+
+ *                 |           |b_data     +-+   |b_comp = F |
+ *                 |           +-----------+ |   |b_data     +-+
+ *                 +->+------+               |   +-----------+ |
+ *        compressed  |      |               |                 |
+ *           data     |      |<--------------+                 | uncompressed
+ *                    +------+          compressed,            |     data
+ *                                        shared               +-->+------+
+ *                                         data                    |      |
+ *                                                                 |      |
+ *                                                                 +------+
+ *
  * When a consumer reads a block, the ARC must first look to see if the
- * arc_buf_hdr_t is cached. If the hdr is cached and already has an arc_buf_t,
- * then an additional arc_buf_t is allocated and the uncompressed data is
- * bcopied from the existing arc_buf_t. If the hdr is cached but does not
- * have an arc_buf_t, then the ARC allocates a new arc_buf_t and decompresses
- * the b_pdata contents into the arc_buf_t's b_data. If the arc_buf_hdr_t's
- * b_pdata is not compressed, then the block is shared with the newly
- * allocated arc_buf_t. This block sharing only occurs with one arc_buf_t
- * in the arc buffer chain. Sharing the block reduces the memory overhead
- * required when the hdr is caching uncompressed blocks or the compressed
- * arc functionality has been disabled via 'zfs_compressed_arc_enabled'.
+ * arc_buf_hdr_t is cached. If the hdr is cached then the ARC allocates a new
+ * arc_buf_t and either copies uncompressed data into a new data buffer from an
+ * existing uncompressed arc_buf_t, decompresses the hdr's b_pdata buffer into a
+ * new data buffer, or shares the hdr's b_pdata buffer, depending on whether the
+ * hdr is compressed and the desired compression characteristics of the
+ * arc_buf_t consumer. If the arc_buf_t ends up sharing data with the
+ * arc_buf_hdr_t and both of them are uncompressed then the arc_buf_t must be
+ * the last buffer in the hdr's b_buf list, however a shared compressed buf can
+ * be anywhere in the hdr's list.
  *
  * The diagram below shows an example of an uncompressed ARC hdr that is
- * sharing its data with an arc_buf_t:
+ * sharing its data with an arc_buf_t (note that the shared uncompressed buf is
+ * the last element in the buf list):
  *
  *                arc_buf_hdr_t
  *                +-----------+
@@ -225,20 +230,24 @@
  *                                    |                    +------+     |
  *                                    +---------------------------------+
  *
- * Writing to the arc requires that the ARC first discard the b_pdata
+ * Writing to the ARC requires that the ARC first discard the hdr's b_pdata
  * since the physical block is about to be rewritten. The new data contents
- * will be contained in the arc_buf_t (uncompressed). As the I/O pipeline
- * performs the write, it may compress the data before writing it to disk.
- * The ARC will be called with the transformed data and will bcopy the
- * transformed on-disk block into a newly allocated b_pdata.
+ * will be contained in the arc_buf_t. As the I/O pipeline performs the write,
+ * it may compress the data before writing it to disk. The ARC will be called
+ * with the transformed data and will bcopy the transformed on-disk block into
+ * a newly allocated b_pdata. Writes are always done into buffers which have
+ * either been loaned (and hence are new and don't have other readers) or
+ * buffers which have been released (and hence have their own hdr, if there
+ * were originally other readers of the buf's original hdr). This ensures that
+ * the ARC only needs to update a single buf and its hdr after a write occurs.
  *
  * When the L2ARC is in use, it will also take advantage of the b_pdata. The
  * L2ARC will always write the contents of b_pdata to the L2ARC. This means
- * that when compressed arc is enabled that the L2ARC blocks are identical
+ * that when compressed ARC is enabled that the L2ARC blocks are identical
  * to the on-disk block in the main data pool. This provides a significant
  * advantage since the ARC can leverage the bp's checksum when reading from the
  * L2ARC to determine if the contents are valid. However, if the compressed
- * arc is disabled, then the L2ARC's block must be transformed to look
+ * ARC is disabled, then the L2ARC's block must be transformed to look
  * like the physical block in the main data pool before comparing the
  * checksum and determining its validity.
  */
@@ -910,6 +919,7 @@ struct arc_callback {
 	void			*acb_private;
 	arc_done_func_t		*acb_done;
 	arc_buf_t		*acb_buf;
+	boolean_t		acb_compressed;
 	zio_t			*acb_zio_dummy;
 	arc_callback_t		*acb_next;
 };
@@ -961,7 +971,7 @@ typedef struct l1arc_buf_hdr {
 	zio_cksum_t		*b_freeze_cksum;
 #ifdef ZFS_DEBUG
 	/*
-	 * used for debugging wtih kmem_flags - by allocating and freeing
+	 * Used for debugging with kmem_flags - by allocating and freeing
 	 * b_thawed when the buffer is thawed, we get a record of the stack
 	 * trace that thawed it.
 	 */
@@ -1172,6 +1182,8 @@ sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_AR
 	HDR_COMPRESS_OFFSET, SPA_COMPRESSBITS, (cmp));
 
 #define	ARC_BUF_LAST(buf)	((buf)->b_next == NULL)
+#define	ARC_BUF_SHARED(buf)	((buf)->b_flags & ARC_BUF_FLAG_SHARED)
+#define	ARC_BUF_COMPRESSED(buf)	((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED)
 
 /*
  * Other sizes
@@ -1332,7 +1344,7 @@ static kmutex_t l2arc_free_on_write_mtx;
 static uint64_t l2arc_ndev;			/* number of devices */
 
 typedef struct l2arc_read_callback {
-	arc_buf_hdr_t		*l2rcb_hdr;		/* read buffer */
+	arc_buf_hdr_t		*l2rcb_hdr;		/* read header */
 	blkptr_t		l2rcb_bp;		/* original blkptr */
 	zbookmark_phys_t	l2rcb_zb;		/* original bookmark */
 	int			l2rcb_flags;		/* original flags */
@@ -1682,6 +1694,31 @@ retry:
 	}
 }
 
+/*
+ * This is the size that the buf occupies in memory. If the buf is compressed,
+ * it will correspond to the compressed size. You should use this method of
+ * getting the buf size unless you explicitly need the logical size.
+ */
+int32_t
+arc_buf_size(arc_buf_t *buf)
+{
+	return (ARC_BUF_COMPRESSED(buf) ?
+	    HDR_GET_PSIZE(buf->b_hdr) : HDR_GET_LSIZE(buf->b_hdr));
+}
+
+int32_t
+arc_buf_lsize(arc_buf_t *buf)
+{
+	return (HDR_GET_LSIZE(buf->b_hdr));
+}
+
+enum zio_compress
+arc_get_compression(arc_buf_t *buf)
+{
+	return (ARC_BUF_COMPRESSED(buf) ?
+	    HDR_GET_COMPRESS(buf->b_hdr) : ZIO_COMPRESS_OFF);
+}
+
 #define	ARC_MINTIME	(hz>>4) /* 62 ms */
 
 static inline boolean_t
@@ -1690,9 +1727,21 @@ arc_buf_is_shared(arc_buf_t *buf)
 	boolean_t shared = (buf->b_data != NULL &&
 	    buf->b_data == buf->b_hdr->b_l1hdr.b_pdata);
 	IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr));
+	IMPLY(shared, ARC_BUF_SHARED(buf));
+	IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf));
+
+	/*
+	 * It would be nice to assert arc_can_share() too, but the "hdr isn't
+	 * already being shared" requirement prevents us from doing that.
+	 */
+
 	return (shared);
 }
 
+/*
+ * Free the checksum associated with this header. If there is no checksum, this
+ * is a no-op.
+ */
 static inline void
 arc_cksum_free(arc_buf_hdr_t *hdr)
 {
@@ -1705,6 +1754,25 @@ arc_cksum_free(arc_buf_hdr_t *hdr)
 	mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
 }
 
+/*
+ * Return true iff at least one of the bufs on hdr is not compressed.
+ */
+static boolean_t
+arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
+{
+	for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) {
+		if (!ARC_BUF_COMPRESSED(b)) {
+			return (B_TRUE);
+		}
+	}
+	return (B_FALSE);
+}
+
+/*
+ * If we've turned on the ZFS_DEBUG_MODIFY flag, verify that the buf's data
+ * matches the checksum that is stored in the hdr. If there is no checksum,
+ * or if the buf is compressed, this is a no-op.
+ */
 static void
 arc_cksum_verify(arc_buf_t *buf)
 {
@@ -1714,6 +1782,12 @@ arc_cksum_verify(arc_buf_t *buf)
 	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
 		return;
 
+	if (ARC_BUF_COMPRESSED(buf)) {
+		ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
+		    arc_hdr_has_uncompressed_buf(hdr));
+		return;
+	}
+
 	ASSERT(HDR_HAS_L1HDR(hdr));
 
 	mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
@@ -1721,7 +1795,8 @@ arc_cksum_verify(arc_buf_t *buf)
 		mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
 		return;
 	}
-	fletcher_2_native(buf->b_data, HDR_GET_LSIZE(hdr), NULL, &zc);
+
+	fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, &zc);
 	if (!ZIO_CHECKSUM_EQUAL(*hdr->b_l1hdr.b_freeze_cksum, zc))
 		panic("buffer modified while frozen!");
 	mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
@@ -1795,6 +1870,12 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, z
 	return (valid_cksum);
 }
 
+/*
+ * Given a buf full of data, if ZFS_DEBUG_MODIFY is enabled this computes a
+ * checksum and attaches it to the buf's hdr so that we can ensure that the buf
+ * isn't modified later on. If buf is compressed or there is already a checksum
+ * on the hdr, this is a no-op (we only checksum uncompressed bufs).
+ */
 static void
 arc_cksum_compute(arc_buf_t *buf)
 {
@@ -1804,14 +1885,21 @@ arc_cksum_compute(arc_buf_t *buf)
 		return;
 
 	ASSERT(HDR_HAS_L1HDR(hdr));
+
 	mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 	if (hdr->b_l1hdr.b_freeze_cksum != NULL) {
+		ASSERT(arc_hdr_has_uncompressed_buf(hdr));
+		mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
+		return;
+	} else if (ARC_BUF_COMPRESSED(buf)) {
 		mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
 		return;
 	}
+
+	ASSERT(!ARC_BUF_COMPRESSED(buf));
 	hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
 	    KM_SLEEP);
-	fletcher_2_native(buf->b_data, HDR_GET_LSIZE(hdr), NULL,
+	fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL,
 	    hdr->b_l1hdr.b_freeze_cksum);
 	mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
 #ifdef illumos
@@ -1855,7 +1943,7 @@ arc_buf_watch(arc_buf_t *buf)
 		procctl_t ctl;
 		ctl.cmd = PCWATCH;
 		ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data;
-		ctl.prwatch.pr_size = HDR_GET_LSIZE(buf->b_hdr);
+		ctl.prwatch.pr_size = arc_buf_size(buf);
 		ctl.prwatch.pr_wflags = WA_WRITE;
 		result = write(arc_procfd, &ctl, sizeof (ctl));
 		ASSERT3U(result, ==, sizeof (ctl));
@@ -1877,6 +1965,12 @@ arc_buf_type(arc_buf_hdr_t *hdr)
 	return (type);
 }
 
+boolean_t
+arc_is_metadata(arc_buf_t *buf)
+{
+	return (HDR_ISTYPE_METADATA(buf->b_hdr) != 0);
+}
+
 static uint32_t
 arc_bufc_to_flags(arc_buf_contents_t type)
 {
@@ -1898,12 +1992,19 @@ arc_buf_thaw(arc_buf_t *buf)
 {
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 
-	if (zfs_flags & ZFS_DEBUG_MODIFY) {
-		if (hdr->b_l1hdr.b_state != arc_anon)
-			panic("modifying non-anon buffer!");
-		if (HDR_IO_IN_PROGRESS(hdr))
-			panic("modifying buffer while i/o in progress!");
-		arc_cksum_verify(buf);
+	ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+	ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+
+	arc_cksum_verify(buf);
+
+	/*
+	 * Compressed buffers do not manipulate the b_freeze_cksum or
+	 * allocate b_thawed.
+	 */
+	if (ARC_BUF_COMPRESSED(buf)) {
+		ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
+		    arc_hdr_has_uncompressed_buf(hdr));
+		return;
 	}
 
 	ASSERT(HDR_HAS_L1HDR(hdr));
@@ -1934,6 +2035,12 @@ arc_buf_freeze(arc_buf_t *buf)
 	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
 		return;
 
+	if (ARC_BUF_COMPRESSED(buf)) {
+		ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
+		    arc_hdr_has_uncompressed_buf(hdr));
+		return;
+	}
+
 	hash_lock = HDR_LOCK(hdr);
 	mutex_enter(hash_lock);
 
@@ -1942,7 +2049,6 @@ arc_buf_freeze(arc_buf_t *buf)
 	    hdr->b_l1hdr.b_state == arc_anon);
 	arc_cksum_compute(buf);
 	mutex_exit(hash_lock);
-
 }
 
 /*
@@ -1999,47 +2105,157 @@ arc_hdr_set_compress(arc_buf_hdr_t *hdr,
 	}
 }
 
+/*
+ * Looks for another buf on the same hdr which has the data decompressed, copies
+ * from it, and returns true. If no such buf exists, returns false.
+ */
+static boolean_t
+arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
+{
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+	boolean_t copied = B_FALSE;
+
+	ASSERT(HDR_HAS_L1HDR(hdr));
+	ASSERT3P(buf->b_data, !=, NULL);
+	ASSERT(!ARC_BUF_COMPRESSED(buf));
+
+	for (arc_buf_t *from = hdr->b_l1hdr.b_buf; from != NULL;
+	    from = from->b_next) {
+		/* can't use our own data buffer */
+		if (from == buf) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201704251757.v3PHvh8i002900>