Date: Tue, 15 Jul 2014 04:53:34 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r268649 - in stable/10: cddl/contrib/opensolaris/cmd/zdb cddl/contrib/opensolaris/cmd/zfs cddl/contrib/opensolaris/cmd/zpool cddl/contrib/opensolaris/cmd/zstreamdump cddl/contrib/openso... Message-ID: <201407150453.s6F4rYxA020667@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Tue Jul 15 04:53:34 2014 New Revision: 268649 URL: http://svnweb.freebsd.org/changeset/base/268649 Log: MFC r268075: MFV r267565: 4757 ZFS embedded-data block pointers ("zero block compression") 4913 zfs release should not be subject to space checks Added: stable/10/sys/cddl/boot/zfs/blkptr.c - copied unchanged from r268075, head/sys/cddl/boot/zfs/blkptr.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/blkptr.c - copied unchanged from r268075, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/blkptr.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/blkptr.h - copied unchanged from r268075, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/blkptr.h Modified: stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8 stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c stable/10/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h stable/10/cddl/sbin/zpool/Makefile stable/10/cddl/usr.bin/zinject/Makefile stable/10/cddl/usr.sbin/zdb/Makefile stable/10/cddl/usr.sbin/zhack/Makefile stable/10/sys/boot/zfs/zfsimpl.c stable/10/sys/cddl/boot/zfs/README stable/10/sys/cddl/boot/zfs/zfsimpl.h stable/10/sys/cddl/boot/zfs/zfssubr.c stable/10/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.c stable/10/sys/cddl/contrib/opensolaris/common/zfs/zfeature_common.h stable/10/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c stable/10/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h stable/10/sys/cddl/contrib/opensolaris/uts/common/Makefile.files stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c stable/10/sys/conf/files Directory Properties: stable/10/ (props changed) Modified: stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c Tue Jul 15 04:53:34 2014 (r268649) @@ -1059,8 +1059,17 @@ snprintf_blkptr_compact(char *blkbuf, si return; } - blkbuf[0] = '\0'; + if (BP_IS_EMBEDDED(bp)) { + (void) sprintf(blkbuf, + "EMBEDDED et=%u %llxL/%llxP B=%llu", + (int)BPE_GET_ETYPE(bp), + (u_longlong_t)BPE_GET_LSIZE(bp), + (u_longlong_t)BPE_GET_PSIZE(bp), + (u_longlong_t)bp->blk_birth); + return; + } + blkbuf[0] = '\0'; for (int i = 0; i < ndvas; i++) (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), "%llu:%llx:%llx ", @@ -1078,7 +1087,7 @@ snprintf_blkptr_compact(char *blkbuf, si "%llxL/%llxP F=%llu B=%llu/%llu", (u_longlong_t)BP_GET_LSIZE(bp), (u_longlong_t)BP_GET_PSIZE(bp), - (u_longlong_t)bp->blk_fill, + (u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); } @@ -1091,8 +1100,10 @@ print_indirect(blkptr_t *bp, const zbook char blkbuf[BP_SPRINTF_LEN]; int l; - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + if (!BP_IS_EMBEDDED(bp)) { + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + } (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); @@ -1146,10 +1157,10 @@ visit_indirect(spa_t *spa, const dnode_p err = visit_indirect(spa, dnp, cbp, &czb); if (err) break; - fill += cbp->blk_fill; + fill += BP_GET_FILL(cbp); } if (!err) - ASSERT3U(fill, ==, bp->blk_fill); + ASSERT3U(fill, ==, BP_GET_FILL(bp)); (void) arc_buf_remove_ref(buf, &buf); } @@ -1816,14 +1827,14 @@ dump_dir(objset_t *os) if (dds.dds_type == DMU_OST_META) { dds.dds_creation_txg = TXG_INITIAL; - usedobjs = os->os_rootbp->blk_fill; + usedobjs = BP_GET_FILL(os->os_rootbp); refdbytes = os->os_spa->spa_dsl_pool-> dp_mos_dir->dd_phys->dd_used_bytes; } else { dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); } - ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill); + ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); zdb_nicenum(refdbytes, numbuf); @@ -2134,6 +2145,9 @@ typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; + uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] + [BPE_PAYLOAD_SIZE]; uint64_t zcb_start; uint64_t zcb_lastprint; uint64_t zcb_totalasize; @@ -2188,6 +2202,13 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t * } + if (BP_IS_EMBEDDED(bp)) { + zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; + zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] + [BPE_GET_PSIZE(bp)]++; + return; + } + if (dump_opt['L']) return; @@ -2287,7 +2308,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); - if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) { + if (!BP_IS_EMBEDDED(bp) && + (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { size_t size = BP_GET_PSIZE(bp); void *data = zio_data_buf_alloc(size); int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; @@ -2479,7 +2501,7 @@ dump_block_stats(spa_t *spa) zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; - int leaks = 0; + boolean_t leaks = B_FALSE; (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", @@ -2567,7 +2589,7 @@ dump_block_stats(spa_t *spa) (u_longlong_t)total_alloc, (dump_opt['L']) ? "unreachable" : "leaked", (longlong_t)(total_alloc - total_found)); - leaks = 1; + leaks = B_TRUE; } if (tzb->zb_count == 0) @@ -2599,6 +2621,23 @@ dump_block_stats(spa_t *spa) (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n", (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); + for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { + if (zcb.zcb_embedded_blocks[i] == 0) + continue; + (void) printf("\n"); + (void) printf("\tadditional, non-pointer bps of type %u: " + "%10llu\n", + i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); + + if (dump_opt['b'] >= 3) { + (void) printf("\t number of (compressed) bytes: " + "number of bps\n"); + dump_histogram(zcb.zcb_embedded_histogram[i], + sizeof (zcb.zcb_embedded_histogram[i]) / + sizeof (zcb.zcb_embedded_histogram[i][0]), 0); + } + } + if (tzb->zb_ditto_samevdev != 0) { (void) printf("\tDittoed blocks on same vdev: %llu\n", (longlong_t)tzb->zb_ditto_samevdev); @@ -2711,14 +2750,14 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilo avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (BP_IS_HOLE(bp)) + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { (void) printf("traversing objset %llu, %llu objects, " "%lu blocks so far\n", (u_longlong_t)zb->zb_objset, - (u_longlong_t)bp->blk_fill, + (u_longlong_t)BP_GET_FILL(bp), avl_numnodes(t)); } Modified: stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8 ============================================================================== --- stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8 Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8 Tue Jul 15 04:53:34 2014 (r268649) @@ -30,7 +30,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 27, 2014 +.Dd June 30, 2014 .Dt ZFS 8 .Os .Sh NAME @@ -179,11 +179,12 @@ .Ar bookmark .Nm .Cm send -.Op Fl DnPpRv +.Op Fl DnPpRve .Op Fl i Ar snapshot | Fl I Ar snapshot .Ar snapshot .Nm .Cm send +.Op Fl e .Op Fl i Ar snapshot Ns | Ns bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Nm @@ -2476,7 +2477,7 @@ feature. .It Xo .Nm .Cm send -.Op Fl DnPpRv +.Op Fl DnPpRve .Op Fl i Ar snapshot | Fl I Ar snapshot .Ar snapshot .Xc @@ -2548,6 +2549,29 @@ be used regardless of the dataset's property, but performance will be much better if the filesystem uses a dedup-capable checksum (eg. .Sy sha256 ) . +.It Fl e +Generate a more compact stream by using WRITE_EMBEDDED records for blocks +which are stored more compactly on disk by the +.Sy embedded_data +pool +feature. +This flag has no effect if the +.Sy embedded_data +feature is +disabled. +The receiving system must have the +.Sy embedded_data +feature +enabled. +If the +.Sy lz4_compress +feature is active on the sending system, +then the receiving system must have that feature enabled as well. +See +.Xr zpool-features 7 +for details on ZFS feature flags and the +.Sy embedded_data +feature. .It Fl p Include the dataset's properties in the stream. This flag is implicit when .Fl R @@ -2572,6 +2596,7 @@ on future versions of .It Xo .Nm .Cm send +.Op Fl e .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Xc @@ -2597,6 +2622,29 @@ specified as the last component of the n If the incremental target is a clone, the incremental source can be the origin snapshot, or an earlier snapshot in the origin's filesystem, or the origin's origin, etc. +.It Fl e +Generate a more compact stream by using WRITE_EMBEDDED records for blocks +which are stored more compactly on disk by the +.Sy embedded_data +pool +feature. +This flag has no effect if the +.Sy embedded_data +feature is +disabled. +The receiving system must have the +.Sy embedded_data +feature +enabled. +If the +.Sy lz4_compress +feature is active on the sending system, +then the receiving system must have that feature enabled as well. +See +.Xr zpool-features 7 +for details on ZFS feature flags and the +.Sy embedded_data +feature. .El .It Xo .Nm Modified: stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c Tue Jul 15 04:53:34 2014 (r268649) @@ -274,9 +274,9 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] <snapshot>\n")); case HELP_SEND: - return (gettext("\tsend [-DnPpRv] [-[iI] snapshot] " + return (gettext("\tsend [-DnPpRve] [-[iI] snapshot] " "<snapshot>\n" - "\tsend [-i snapshot|bookmark] " + "\tsend [-e] [-i snapshot|bookmark] " "<filesystem|volume|snapshot>\n")); case HELP_SET: return (gettext("\tset <property=value> " @@ -590,6 +590,7 @@ finish_progress(char *done) free(pt_header); pt_header = NULL; } + /* * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol> * @@ -3368,6 +3369,7 @@ rollback_check_dependent(zfs_handle_t *z zfs_close(zhp); return (0); } + /* * Report any snapshots more recent than the one specified. Used when '-r' is * not specified. We reuse this same callback for the snapshot dependents - if @@ -3707,7 +3709,7 @@ zfs_do_send(int argc, char **argv) boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) { switch (c) { case 'i': if (fromname) @@ -3742,6 +3744,9 @@ zfs_do_send(int argc, char **argv) case 'n': flags.dryrun = B_TRUE; break; + case 'e': + flags.embed_data = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -3780,6 +3785,7 @@ zfs_do_send(int argc, char **argv) if (strchr(argv[0], '@') == NULL || (fromname && strchr(fromname, '#') != NULL)) { char frombuf[ZFS_MAXNAMELEN]; + enum lzc_send_flags lzc_flags = 0; if (flags.replicate || flags.doall || flags.props || flags.dedup || flags.dryrun || flags.verbose || @@ -3794,6 +3800,9 @@ zfs_do_send(int argc, char **argv) if (zhp == NULL) return (1); + if (flags.embed_data) + lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; + if (fromname != NULL && (fromname[0] == '#' || fromname[0] == '@')) { /* @@ -3807,7 +3816,7 @@ zfs_do_send(int argc, char **argv) (void) strlcat(frombuf, fromname, sizeof (frombuf)); fromname = frombuf; } - err = zfs_send_one(zhp, fromname, STDOUT_FILENO); + err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags); zfs_close(zhp); return (err != 0); } Modified: stable/10/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 ============================================================================== --- stable/10/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 Tue Jul 15 04:53:34 2014 (r268649) @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 23, 2014 +.Dd June 30, 2014 .Dt ZPOOL-FEATURES 7 .Os .Sh NAME @@ -396,6 +396,34 @@ This feature becomes as soon as it is enabled and will never return to being .Sy enabled . +.It Sy embedded_data +.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:embedded_data" +.It GUID Ta com.delphix:embedded_data +.It READ\-ONLY COMPATIBLE Ta no +.It DEPENDENCIES Ta none +.El +.Pp +This feature improves the performance and compression ratio of +highly-compressible blocks. +Blocks whose contents can compress to 112 bytes +or smaller can take advantage of this feature. +.Pp +When this feature is enabled, the contents of highly-compressible blocks are +stored in the block "pointer" itself +.Po a misnomer in this case, as it contains +the compresseed data, rather than a pointer to its location on disk +.Pc . +Thus +the space of the block +.Pq one sector, typically 512 bytes or 4KB +is saved, +and no additional i/o is needed to read and write the data block. +.Pp +This feature becomes +.Sy active +as soon as it is enabled and will +never return to being +.Sy enabled . .El .Sh SEE ALSO .Xr zpool 8 Modified: stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c Tue Jul 15 04:53:34 2014 (r268649) @@ -49,7 +49,6 @@ */ #define DUMP_GROUPING 4 -uint64_t drr_record_count[DRR_NUMTYPES]; uint64_t total_write_size = 0; uint64_t total_stream_len = 0; FILE *send_stream = 0; @@ -123,7 +122,7 @@ print_block(char *buf, int length) * Start printing ASCII characters at a constant offset, after * the hex prints. Leave 3 characters per byte on a line (2 digit * hex number plus 1 space) plus spaces between characters and - * groupings + * groupings. */ int ascii_start = BYTES_PER_LINE * 3 + BYTES_PER_LINE / DUMP_GROUPING + 2; @@ -160,6 +159,8 @@ int main(int argc, char *argv[]) { char *buf = malloc(INITIAL_BUFLEN); + uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + uint64_t total_records = 0; dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; @@ -170,6 +171,7 @@ main(int argc, char *argv[]) struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref; struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; + struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; char c; boolean_t verbose = B_FALSE; boolean_t first = B_TRUE; @@ -264,6 +266,7 @@ main(int argc, char *argv[]) } drr_record_count[drr->drr_type]++; + total_records++; switch (drr->drr_type) { case DRR_BEGIN: @@ -376,8 +379,8 @@ main(int argc, char *argv[]) drro->drr_bonuslen); } if (drro->drr_bonuslen > 0) { - (void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen, - 8), &zc); + (void) ssread(buf, + P2ROUNDUP(drro->drr_bonuslen, 8), &zc); if (dump) { print_block(buf, P2ROUNDUP(drro->drr_bonuslen, 8)); @@ -506,6 +509,38 @@ main(int argc, char *argv[]) print_block(buf, drrs->drr_length); } break; + case DRR_WRITE_EMBEDDED: + if (do_byteswap) { + drrwe->drr_object = + BSWAP_64(drrwe->drr_object); + drrwe->drr_offset = + BSWAP_64(drrwe->drr_offset); + drrwe->drr_length = + BSWAP_64(drrwe->drr_length); + drrwe->drr_toguid = + BSWAP_64(drrwe->drr_toguid); + drrwe->drr_lsize = + BSWAP_32(drrwe->drr_lsize); + drrwe->drr_psize = + BSWAP_32(drrwe->drr_psize); + } + if (verbose) { + (void) printf("WRITE_EMBEDDED object = %llu " + "offset = %llu length = %llu\n" + "toguid = %llx comp = %u etype = %u " + "lsize = %u psize = %u\n", + (u_longlong_t)drrwe->drr_object, + (u_longlong_t)drrwe->drr_offset, + (u_longlong_t)drrwe->drr_length, + (u_longlong_t)drrwe->drr_toguid, + drrwe->drr_compression, + drrwe->drr_etype, + drrwe->drr_lsize, + drrwe->drr_psize); + } + (void) ssread(buf, + P2ROUNDUP(drrwe->drr_psize, 8), &zc); + break; } pcksum = zc; } @@ -524,18 +559,16 @@ main(int argc, char *argv[]) (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]); (void) printf("\tTotal DRR_WRITE records = %lld\n", (u_longlong_t)drr_record_count[DRR_WRITE]); + (void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n", + (u_longlong_t)drr_record_count[DRR_WRITE_BYREF]); + (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n", + (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]); (void) printf("\tTotal DRR_FREE records = %lld\n", (u_longlong_t)drr_record_count[DRR_FREE]); (void) printf("\tTotal DRR_SPILL records = %lld\n", (u_longlong_t)drr_record_count[DRR_SPILL]); (void) printf("\tTotal records = %lld\n", - (u_longlong_t)(drr_record_count[DRR_BEGIN] + - drr_record_count[DRR_OBJECT] + - drr_record_count[DRR_FREEOBJECTS] + - drr_record_count[DRR_WRITE] + - drr_record_count[DRR_FREE] + - drr_record_count[DRR_SPILL] + - drr_record_count[DRR_END])); + (u_longlong_t)total_records); (void) printf("\tTotal write size = %lld (0x%llx)\n", (u_longlong_t)total_write_size, (u_longlong_t)total_write_size); (void) printf("\tTotal stream length = %lld (0x%llx)\n", Modified: stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c Tue Jul 15 04:53:34 2014 (r268649) @@ -53,7 +53,7 @@ * At random times, the child self-immolates with a SIGKILL. * This is the software equivalent of pulling the power cord. * The parent then runs the test again, using the existing - * storage pool, as many times as desired. If backwards compatability + * storage pool, as many times as desired. If backwards compatibility * testing is enabled ztest will sometimes run the "older" version * of ztest after a SIGKILL. * @@ -1267,13 +1267,13 @@ static void ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) { - ASSERT(bt->bt_magic == BT_MAGIC); - ASSERT(bt->bt_objset == dmu_objset_id(os)); - ASSERT(bt->bt_object == object); - ASSERT(bt->bt_offset == offset); - ASSERT(bt->bt_gen <= gen); - ASSERT(bt->bt_txg <= txg); - ASSERT(bt->bt_crtxg == crtxg); + ASSERT3U(bt->bt_magic, ==, BT_MAGIC); + ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os)); + ASSERT3U(bt->bt_object, ==, object); + ASSERT3U(bt->bt_offset, ==, offset); + ASSERT3U(bt->bt_gen, <=, gen); + ASSERT3U(bt->bt_txg, <=, txg); + ASSERT3U(bt->bt_crtxg, ==, crtxg); } static ztest_block_tag_t * @@ -3472,6 +3472,11 @@ ztest_dsl_dataset_promote_busy(ztest_ds_ if (error) fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); error = dsl_dataset_promote(clone2name, NULL); + if (error == ENOSPC) { + dmu_objset_disown(os, FTAG); + ztest_record_enospc(FTAG); + goto out; + } if (error != EBUSY) fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, error); @@ -3627,11 +3632,19 @@ ztest_dmu_read_write(ztest_ds_t *zd, uin return; } - dmu_object_set_checksum(os, bigobj, - (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx); + enum zio_checksum cksum; + do { + cksum = (enum zio_checksum) + ztest_random_dsl_prop(ZFS_PROP_CHECKSUM); + } while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS); + dmu_object_set_checksum(os, bigobj, cksum, tx); - dmu_object_set_compress(os, bigobj, - (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx); + enum zio_compress comp; + do { + comp = (enum zio_compress) + ztest_random_dsl_prop(ZFS_PROP_COMPRESSION); + } while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS); + dmu_object_set_compress(os, bigobj, comp, tx); /* * For each index from n to n + s, verify that the existing bufwad @@ -4711,8 +4724,13 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, error = dsl_dataset_user_hold(holds, 0, NULL); fnvlist_free(holds); - if (error) - fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); + if (error == ENOSPC) { + ztest_record_enospc("dsl_dataset_user_hold"); + goto out; + } else if (error) { + fatal(0, "dsl_dataset_user_hold(%s, %s) = %u", + fullname, tag, error); + } error = dsl_destroy_snapshot(fullname, B_FALSE); if (error != EBUSY) { @@ -5165,7 +5183,7 @@ ztest_run_zdb(char *pool) isa = strdup(isa); /* LINTED */ (void) sprintf(bin, - "/usr/sbin%.*s/zdb -bcc%s%s -U %s %s", + "/usr/sbin%.*s/zdb -bcc%s%s -d -U %s %s", isalen, isa, ztest_opts.zo_verbose >= 3 ? "s" : "", Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h ============================================================================== --- stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h Tue Jul 15 04:53:34 2014 (r268649) @@ -42,6 +42,7 @@ #include <sys/fs/zfs.h> #include <sys/avl.h> #include <sys/zfs_ioctl.h> +#include <libzfs_core.h> #ifdef __cplusplus extern "C" { @@ -607,13 +608,16 @@ typedef struct sendflags { /* show progress (ie. -v) */ boolean_t progress; + + /* WRITE_EMBEDDED records of type DATA are permitted */ + boolean_t embed_data; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); -extern int zfs_send_one(zfs_handle_t *, const char *, int); +extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Tue Jul 15 04:53:34 2014 (r268649) @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>. * All rights reserved. @@ -45,6 +45,7 @@ #include <time.h> #include <libzfs.h> +#include <libzfs_core.h> #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -222,6 +223,7 @@ cksummer(void *arg) struct drr_object *drro = &thedrr.drr_u.drr_object; struct drr_write *drrw = &thedrr.drr_u.drr_write; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; + struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; FILE *ofp; int outfd; dmu_replay_record_t wbr_drr = {0}; @@ -418,6 +420,20 @@ cksummer(void *arg) break; } + case DRR_WRITE_EMBEDDED: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + (void) ssread(buf, + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp); + if (cksum_and_write(buf, + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), + &stream_cksum, outfd) == -1) + goto out; + break; + } + case DRR_FREE: { if (cksum_and_write(drr, sizeof (dmu_replay_record_t), @@ -799,7 +815,7 @@ typedef struct send_dump_data { char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose, dryrun, parsable, progress; + boolean_t verbose, dryrun, parsable, progress, embed_data; int outfd; boolean_t err; nvlist_t *fss; @@ -878,7 +894,8 @@ estimate_ioctl(zfs_handle_t *zhp, uint64 */ static int dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, - boolean_t fromorigin, int outfd, nvlist_t *debugnv) + boolean_t fromorigin, int outfd, enum lzc_send_flags flags, + nvlist_t *debugnv) { zfs_cmd_t zc = { 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; @@ -892,6 +909,7 @@ dump_ioctl(zfs_handle_t *zhp, const char zc.zc_obj = fromorigin; zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zc.zc_fromobj = fromsnap_obj; + zc.zc_flags = flags; VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0)); if (fromsnap && fromsnap[0] != '\0') { @@ -1144,8 +1162,12 @@ dump_snapshot(zfs_handle_t *zhp, void *a } } + enum lzc_send_flags flags = 0; + if (sdd->embed_data) + flags |= LZC_SEND_FLAG_EMBED_DATA; + err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, - fromorigin, sdd->outfd, sdd->debugnv); + fromorigin, sdd->outfd, flags, sdd->debugnv); if (sdd->progress) { (void) pthread_cancel(tid); @@ -1489,6 +1511,7 @@ zfs_send(zfs_handle_t *zhp, const char * sdd.parsable = flags->parsable; sdd.progress = flags->progress; sdd.dryrun = flags->dryrun; + sdd.embed_data = flags->embed_data; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; if (debugnvp) @@ -1620,7 +1643,8 @@ err_out: } int -zfs_send_one(zfs_handle_t *zhp, const char *from, int fd) +zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, + enum lzc_send_flags flags) { int err; libzfs_handle_t *hdl = zhp->zfs_hdl; @@ -1629,7 +1653,7 @@ zfs_send_one(zfs_handle_t *zhp, const ch (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "warning: cannot send '%s'"), zhp->zfs_name); - err = lzc_send(zhp->zfs_name, from, fd); + err = lzc_send(zhp->zfs_name, from, fd, flags); if (err != 0) { switch (errno) { case EXDEV: @@ -2576,6 +2600,16 @@ recv_skip(libzfs_handle_t *hdl, int fd, (void) recv_read(hdl, fd, buf, drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); break; + case DRR_WRITE_EMBEDDED: + if (byteswap) { + drr->drr_u.drr_write_embedded.drr_psize = + BSWAP_32(drr->drr_u.drr_write_embedded. + drr_psize); + } + (void) recv_read(hdl, fd, buf, + P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize, + 8), B_FALSE, NULL); + break; case DRR_WRITE_BYREF: case DRR_FREEOBJECTS: case DRR_FREE: Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c ============================================================================== --- stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c Tue Jul 15 04:53:34 2014 (r268649) @@ -486,6 +486,8 @@ lzc_get_holds(const char *snapname, nvli } /* + * Generate a zfs send stream for the specified snapshot and write it to + * the specified file descriptor. * * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") * @@ -499,9 +501,15 @@ lzc_get_holds(const char *snapname, nvli * snapshot in the origin, etc. * * "fd" is the file descriptor to write the send stream to. + * + * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted + * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, + * which the receiving system must support (as indicated by support + * for the "embedded_data" feature). */ int -lzc_send(const char *snapname, const char *from, int fd) +lzc_send(const char *snapname, const char *from, int fd, + enum lzc_send_flags flags) { nvlist_t *args; int err; @@ -510,6 +518,8 @@ lzc_send(const char *snapname, const cha fnvlist_add_int32(args, "fd", fd); if (from != NULL) fnvlist_add_string(args, "fromsnap", from); + if (flags & LZC_SEND_FLAG_EMBED_DATA) + fnvlist_add_boolean(args, "embedok"); err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); nvlist_free(args); return (err); Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h ============================================================================== --- stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.h Tue Jul 15 04:53:34 2014 (r268649) @@ -53,7 +53,11 @@ int lzc_hold(nvlist_t *, int, nvlist_t * int lzc_release(nvlist_t *, nvlist_t **); int lzc_get_holds(const char *, nvlist_t **); -int lzc_send(const char *, const char *, int); +enum lzc_send_flags { + LZC_SEND_FLAG_EMBED_DATA = 1 << 0 +}; + +int lzc_send(const char *, const char *, int, enum lzc_send_flags); int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); int lzc_send_space(const char *, const char *, uint64_t *); Modified: stable/10/cddl/sbin/zpool/Makefile ============================================================================== --- stable/10/cddl/sbin/zpool/Makefile Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/sbin/zpool/Makefile Tue Jul 15 04:53:34 2014 (r268649) @@ -18,6 +18,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/cont CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libumem/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common Modified: stable/10/cddl/usr.bin/zinject/Makefile ============================================================================== --- stable/10/cddl/usr.bin/zinject/Makefile Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/usr.bin/zinject/Makefile Tue Jul 15 04:53:34 2014 (r268649) @@ -11,6 +11,7 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/ CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzfs_core/common CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libnvpair CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs Modified: stable/10/cddl/usr.sbin/zdb/Makefile ============================================================================== --- stable/10/cddl/usr.sbin/zdb/Makefile Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/usr.sbin/zdb/Makefile Tue Jul 15 04:53:34 2014 (r268649) @@ -15,6 +15,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/comp CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common Modified: stable/10/cddl/usr.sbin/zhack/Makefile ============================================================================== --- stable/10/cddl/usr.sbin/zhack/Makefile Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/cddl/usr.sbin/zhack/Makefile Tue Jul 15 04:53:34 2014 (r268649) @@ -14,6 +14,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/comp CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common +CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common Modified: stable/10/sys/boot/zfs/zfsimpl.c ============================================================================== --- stable/10/sys/boot/zfs/zfsimpl.c Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/sys/boot/zfs/zfsimpl.c Tue Jul 15 04:53:34 2014 (r268649) @@ -56,6 +56,7 @@ static const char *features_for_read[] = "org.illumos:lz4_compress", "com.delphix:hole_birth", "com.delphix:extensible_dataset", + "com.delphix:embedded_data", NULL }; @@ -1133,6 +1134,34 @@ zio_read(const spa_t *spa, const blkptr_ void *pbuf; int i, error; + /* + * Process data embedded in block pointer + */ + if (BP_IS_EMBEDDED(bp)) { + ASSERT(BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA); + + size = BPE_GET_PSIZE(bp); + ASSERT(size <= BPE_PAYLOAD_SIZE); + + if (cpfunc != ZIO_COMPRESS_OFF) + pbuf = zfs_alloc(size); + else + pbuf = buf; + + decode_embedded_bp_compressed(bp, pbuf); + error = 0; + + if (cpfunc != ZIO_COMPRESS_OFF) { + error = zio_decompress_data(cpfunc, pbuf, + size, buf, BP_GET_LSIZE(bp)); + zfs_free(pbuf, size); + } + if (error != 0) + printf("ZFS: i/o error - unable to decompress block pointer data, error %d\n", + error); + return (error); + } + error = EIO; for (i = 0; i < SPA_DVAS_PER_BP; i++) { Modified: stable/10/sys/cddl/boot/zfs/README ============================================================================== --- stable/10/sys/cddl/boot/zfs/README Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/sys/cddl/boot/zfs/README Tue Jul 15 04:53:34 2014 (r268649) @@ -7,9 +7,10 @@ are used by the ZFS bootstrap: sha256.c checksum support lz4.c compression support lzjb.c compression support + blkptr.c ZFS embedded-data block pointers support zfssubr.c checksum, compression and raidz support zfsimpl.h mostly describing the physical layout -The files fletcher.c, lzjb.c and sha256.c are largely identical to the -ZFS base code (with write support removed) and could be shared but -that might complicate future imports from OpenSolaris. +The files fletcher.c, lzjb.c, lz4.c, sha256.c and blkptr.c are largely identical +to the ZFS base code (with write support removed) and could be shared but that +might complicate future imports from Illumos. Copied: stable/10/sys/cddl/boot/zfs/blkptr.c (from r268075, head/sys/cddl/boot/zfs/blkptr.c) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ stable/10/sys/cddl/boot/zfs/blkptr.c Tue Jul 15 04:53:34 2014 (r268649, copy of r268075, head/sys/cddl/boot/zfs/blkptr.c) @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +/* + * Embedded-data Block Pointers + * + * Normally, block pointers point (via their DVAs) to a block which holds data. + * If the data that we need to store is very small, this is an inefficient + * use of space, because a block must be at minimum 1 sector (typically 512 + * bytes or 4KB). Additionally, reading these small blocks tends to generate + * more random reads. + * + * Embedded-data Block Pointers allow small pieces of data (the "payload", + * up to 112 bytes) to be stored in the block pointer itself, instead of + * being pointed to. The "Pointer" part of this name is a bit of a + * misnomer, as nothing is pointed to. + * + * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to + * be embedded in the block pointer. The logic for this is handled in + * the SPA, by the zio pipeline. Therefore most code outside the zio + * pipeline doesn't need special-cases to handle these block pointers. + * + * See spa.h for details on the exact layout of embedded block pointers. + */ + +/* + * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be + * more than BPE_PAYLOAD_SIZE bytes). + */ +void +decode_embedded_bp_compressed(const blkptr_t *bp, void *buf) +{ + int psize; + uint8_t *buf8 = buf; + uint64_t w = 0; + const uint64_t *bp64 = (const uint64_t *)bp; + + ASSERT(BP_IS_EMBEDDED(bp)); + + psize = BPE_GET_PSIZE(bp); + + /* + * Decode the words of the block pointer into the byte array. + * Low bits of first word are the first byte (little endian). + */ + for (int i = 0; i < psize; i++) { + if (i % sizeof (w) == 0) { + /* beginning of a word */ + ASSERT3P(bp64, <, bp + 1); + w = *bp64; + bp64++; + if (!BPE_IS_PAYLOADWORD(bp, bp64)) + bp64++; + } + buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY); + } +} Modified: stable/10/sys/cddl/boot/zfs/zfsimpl.h ============================================================================== --- stable/10/sys/cddl/boot/zfs/zfsimpl.h Tue Jul 15 04:44:06 2014 (r268648) +++ stable/10/sys/cddl/boot/zfs/zfsimpl.h Tue Jul 15 04:53:34 2014 (r268649) @@ -55,9 +55,14 @@ /* * Copyright 2013 by Saso Kiselkov. All rights reserved. */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + */ *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201407150453.s6F4rYxA020667>