Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 10 Dec 2013 13:33:56 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r259170 - vendor-sys/illumos/dist/common/zfs vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/zdb vendor/illumos/dist/cmd/...
Message-ID:  <201312101333.rBADXuI5052602@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Tue Dec 10 13:33:56 2013
New Revision: 259170
URL: http://svnweb.freebsd.org/changeset/base/259170

Log:
  4370 avoid transmitting holes during zfs send
  
  4371 DMU code clean up
  
  illumos/illumos-gate@43466aae47bfcd2ad9bf501faec8e75c08095e4f

Modified:
  vendor-sys/illumos/dist/common/zfs/zfeature_common.c
  vendor-sys/illumos/dist/common/zfs/zfeature_common.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/bptree.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/ddt.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_diff.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_traverse.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dnode_sync.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_destroy.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dbuf.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dmu.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa_impl.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/vdev.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfeature.h
  vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_cache.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/vdev_disk.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/zfeature.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/zio.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/zvol.c

Changes in other areas also in this revision:
Modified:
  vendor/illumos/dist/cmd/zdb/zdb.c
  vendor/illumos/dist/cmd/zdb/zdb_il.c
  vendor/illumos/dist/cmd/zhack/zhack.c
  vendor/illumos/dist/man/man5/zpool-features.5

Modified: vendor-sys/illumos/dist/common/zfs/zfeature_common.c
==============================================================================
--- vendor-sys/illumos/dist/common/zfs/zfeature_common.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/common/zfs/zfeature_common.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -115,10 +115,21 @@ zfeature_lookup_name(const char *name, s
 	return (ENOENT);
 }
 
+boolean_t
+zfeature_depends_on(spa_feature_t fid, spa_feature_t check) {
+	zfeature_info_t *feature = &spa_feature_table[fid];
+
+	for (int i = 0; feature->fi_depends[i] != SPA_FEATURE_NONE; i++) {
+		if (feature->fi_depends[i] == check)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
 static void
 zfeature_register(spa_feature_t fid, const char *guid, const char *name,
     const char *desc, boolean_t readonly, boolean_t mos,
-    const spa_feature_t *deps)
+    boolean_t activate_on_enable, const spa_feature_t *deps)
 {
 	zfeature_info_t *feature = &spa_feature_table[fid];
 	static spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
@@ -138,6 +149,7 @@ zfeature_register(spa_feature_t fid, con
 	feature->fi_desc = desc;
 	feature->fi_can_readonly = readonly;
 	feature->fi_mos = mos;
+	feature->fi_activate_on_enable = activate_on_enable;
 	feature->fi_depends = deps;
 }
 
@@ -146,21 +158,43 @@ zpool_feature_init(void)
 {
 	zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
 	    "com.delphix:async_destroy", "async_destroy",
-	    "Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
+	    "Destroy filesystems asynchronously.", B_TRUE, B_FALSE,
+	    B_FALSE, NULL);
+
 	zfeature_register(SPA_FEATURE_EMPTY_BPOBJ,
 	    "com.delphix:empty_bpobj", "empty_bpobj",
-	    "Snapshots use less space.", B_TRUE, B_FALSE, NULL);
+	    "Snapshots use less space.", B_TRUE, B_FALSE,
+	    B_FALSE, NULL);
+
 	zfeature_register(SPA_FEATURE_LZ4_COMPRESS,
 	    "org.illumos:lz4_compress", "lz4_compress",
-	    "LZ4 compression algorithm support.", B_FALSE, B_FALSE, NULL);
+	    "LZ4 compression algorithm support.", B_FALSE, B_FALSE,
+	    B_FALSE, NULL);
+
 	zfeature_register(SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
 	    "com.joyent:multi_vdev_crash_dump", "multi_vdev_crash_dump",
-	    "Crash dumps to multiple vdev pools.", B_FALSE, B_FALSE, NULL);
+	    "Crash dumps to multiple vdev pools.", B_FALSE, B_FALSE,
+	    B_FALSE, NULL);
+
 	zfeature_register(SPA_FEATURE_SPACEMAP_HISTOGRAM,
 	    "com.delphix:spacemap_histogram", "spacemap_histogram",
-	    "Spacemaps maintain space histograms.", B_TRUE, B_FALSE, NULL);
+	    "Spacemaps maintain space histograms.", B_TRUE, B_FALSE,
+	    B_FALSE, NULL);
+
+	zfeature_register(SPA_FEATURE_ENABLED_TXG,
+	    "com.delphix:enabled_txg", "enabled_txg",
+	    "Record txg at which a feature is enabled", B_TRUE, B_FALSE,
+	    B_FALSE, NULL);
+
+	static spa_feature_t hole_birth_deps[] = { SPA_FEATURE_ENABLED_TXG,
+	    SPA_FEATURE_NONE };
+	zfeature_register(SPA_FEATURE_HOLE_BIRTH,
+	    "com.delphix:hole_birth", "hole_birth",
+	    "Retain hole birth txg for more precise zfs send",
+	    B_FALSE, B_TRUE, B_TRUE, hole_birth_deps);
+
 	zfeature_register(SPA_FEATURE_EXTENSIBLE_DATASET,
 	    "com.delphix:extensible_dataset", "extensible_dataset",
 	    "Enhanced dataset functionality, used by other features.",
-	    B_FALSE, B_FALSE, NULL);
+	    B_FALSE, B_FALSE, B_FALSE, NULL);
 }

Modified: vendor-sys/illumos/dist/common/zfs/zfeature_common.h
==============================================================================
--- vendor-sys/illumos/dist/common/zfs/zfeature_common.h	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/common/zfs/zfeature_common.h	Tue Dec 10 13:33:56 2013	(r259170)
@@ -45,10 +45,14 @@ typedef enum spa_feature {
 	SPA_FEATURE_LZ4_COMPRESS,
 	SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
 	SPA_FEATURE_SPACEMAP_HISTOGRAM,
+	SPA_FEATURE_ENABLED_TXG,
+	SPA_FEATURE_HOLE_BIRTH,
 	SPA_FEATURE_EXTENSIBLE_DATASET,
 	SPA_FEATURES
 } spa_feature_t;
 
+#define	SPA_FEATURE_DISABLED	(-1ULL)
+
 typedef struct zfeature_info {
 	spa_feature_t fi_feature;
 	const char *fi_uname;	/* User-facing feature name */
@@ -56,6 +60,8 @@ typedef struct zfeature_info {
 	const char *fi_desc;	/* Feature description */
 	boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
 	boolean_t fi_mos;	/* Is the feature necessary to read the MOS? */
+	/* Activate this feature at the same time it is enabled */
+	boolean_t fi_activate_on_enable;
 	/* array of dependencies, terminated by SPA_FEATURE_NONE */
 	const spa_feature_t *fi_depends;
 } zfeature_info_t;
@@ -70,6 +76,7 @@ extern boolean_t zfeature_is_valid_guid(
 
 extern boolean_t zfeature_is_supported(const char *);
 extern int zfeature_lookup_name(const char *name, spa_feature_t *res);
+extern boolean_t zfeature_depends_on(spa_feature_t fid, spa_feature_t check);
 
 extern void zpool_feature_init(void);
 

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/arc.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -727,7 +727,7 @@ buf_hash(uint64_t spa, const dva_t *dva,
 #define	BUF_EMPTY(buf)						\
 	((buf)->b_dva.dva_word[0] == 0 &&			\
 	(buf)->b_dva.dva_word[1] == 0 &&			\
-	(buf)->b_birth == 0)
+	(buf)->b_cksum0 == 0)
 
 #define	BUF_EQUAL(spa, dva, birth, buf)				\
 	((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) &&	\
@@ -3509,9 +3509,13 @@ arc_write_done(zio_t *zio)
 	ASSERT(hdr->b_acb == NULL);
 
 	if (zio->io_error == 0) {
-		hdr->b_dva = *BP_IDENTITY(zio->io_bp);
-		hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
-		hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
+		if (BP_IS_HOLE(zio->io_bp)) {
+			buf_discard_identity(hdr);
+		} else {
+			hdr->b_dva = *BP_IDENTITY(zio->io_bp);
+			hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
+			hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
+		}
 	} else {
 		ASSERT(BUF_EMPTY(hdr));
 	}

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/bptree.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/bptree.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/bptree.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/arc.h>
@@ -141,7 +141,7 @@ bptree_visit_cb(spa_t *spa, zilog_t *zil
 	int err;
 	struct bptree_args *ba = arg;
 
-	if (bp == NULL)
+	if (BP_IS_HOLE(bp))
 		return (0);
 
 	err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -455,10 +455,9 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
 	mutex_enter(&db->db_mtx);
 	if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
 		int blksz = db->db.db_size;
-		spa_t *spa;
+		spa_t *spa = db->db_objset->os_spa;
 
 		mutex_exit(&db->db_mtx);
-		DB_GET_SPA(&spa, db);
 		abuf = arc_loan_buf(spa, blksz);
 		bcopy(db->db.db_data, abuf->b_data, blksz);
 	} else {
@@ -519,7 +518,6 @@ static void
 dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
 {
 	dnode_t *dn;
-	spa_t *spa;
 	zbookmark_t zb;
 	uint32_t aflags = ARC_NOWAIT;
 
@@ -559,9 +557,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
 	    BP_IS_HOLE(db->db_blkptr)))) {
 		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 
-		dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa,
-		    db->db.db_size, db, type));
 		DB_DNODE_EXIT(db);
+		dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
+		    db->db.db_size, db, type));
 		bzero(db->db.db_data, db->db.db_size);
 		db->db_state = DB_CACHED;
 		*flags |= DB_RF_CACHED;
@@ -569,7 +567,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
 		return;
 	}
 
-	spa = dn->dn_objset->os_spa;
 	DB_DNODE_EXIT(db);
 
 	db->db_state = DB_READ;
@@ -586,7 +583,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t
 
 	dbuf_add_ref(db, NULL);
 
-	(void) arc_read(zio, spa, db->db_blkptr,
+	(void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
 	    dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
 	    (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
 	    &aflags, &zb);
@@ -598,8 +595,8 @@ int
 dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 {
 	int err = 0;
-	int havepzio = (zio != NULL);
-	int prefetch;
+	boolean_t havepzio = (zio != NULL);
+	boolean_t prefetch;
 	dnode_t *dn;
 
 	/*
@@ -694,11 +691,10 @@ dbuf_noread(dmu_buf_impl_t *db)
 		cv_wait(&db->db_changed, &db->db_mtx);
 	if (db->db_state == DB_UNCACHED) {
 		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-		spa_t *spa;
+		spa_t *spa = db->db_objset->os_spa;
 
 		ASSERT(db->db_buf == NULL);
 		ASSERT(db->db.db_data == NULL);
-		DB_GET_SPA(&spa, db);
 		dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
 		db->db_state = DB_FILL;
 	} else if (db->db_state == DB_NOFILL) {
@@ -753,9 +749,8 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, ui
 	} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
 		int size = db->db.db_size;
 		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-		spa_t *spa;
+		spa_t *spa = db->db_objset->os_spa;
 
-		DB_GET_SPA(&spa, db);
 		dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
 		bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
 	} else {
@@ -781,12 +776,9 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
 	ASSERT(db->db_data_pending != dr);
 
 	/* free this block */
-	if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) {
-		spa_t *spa;
+	if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
+		zio_free(db->db_objset->os_spa, txg, bp);
 
-		DB_GET_SPA(&spa, db);
-		zio_free(spa, txg, bp);
-	}
 	dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
 	dr->dt.dl.dr_nopwrite = B_FALSE;
 
@@ -804,9 +796,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
 /*
  * Evict (if its unreferenced) or clear (if its referenced) any level-0
  * data blocks in the free range, so that any future readers will find
- * empty blocks.  Also, if we happen across any level-1 dbufs in the
- * range that have not already been marked dirty, mark them dirty so
- * they stay in memory.
+ * empty blocks.
  *
  * This is a no-op if the dataset is in the middle of an incremental
  * receive; see comment below for details.
@@ -816,14 +806,9 @@ dbuf_free_range(dnode_t *dn, uint64_t st
 {
 	dmu_buf_impl_t *db, *db_next;
 	uint64_t txg = tx->tx_txg;
-	int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
-	uint64_t first_l1 = start >> epbs;
-	uint64_t last_l1 = end >> epbs;
 
-	if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
+	if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID))
 		end = dn->dn_maxblkid;
-		last_l1 = end >> epbs;
-	}
 	dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
 
 	mutex_enter(&dn->dn_dbufs_mtx);
@@ -846,23 +831,8 @@ dbuf_free_range(dnode_t *dn, uint64_t st
 		db_next = list_next(&dn->dn_dbufs, db);
 		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 
-		if (db->db_level == 1 &&
-		    db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
-			mutex_enter(&db->db_mtx);
-			if (db->db_last_dirty &&
-			    db->db_last_dirty->dr_txg < txg) {
-				dbuf_add_ref(db, FTAG);
-				mutex_exit(&db->db_mtx);
-				dbuf_will_dirty(db, tx);
-				dbuf_rele(db, FTAG);
-			} else {
-				mutex_exit(&db->db_mtx);
-			}
-		}
-
 		if (db->db_level != 0)
 			continue;
-		dprintf_dbuf(db, "found buf %s\n", "");
 		if (db->db_blkid < start || db->db_blkid > end)
 			continue;
 
@@ -939,24 +909,29 @@ dbuf_block_freeable(dmu_buf_impl_t *db)
 	 * We don't need any locking to protect db_blkptr:
 	 * If it's syncing, then db_last_dirty will be set
 	 * so we'll ignore db_blkptr.
+	 *
+	 * This logic ensures that only block births for
+	 * filled blocks are considered.
 	 */
 	ASSERT(MUTEX_HELD(&db->db_mtx));
-	if (db->db_last_dirty)
+	if (db->db_last_dirty && (db->db_blkptr == NULL ||
+	    !BP_IS_HOLE(db->db_blkptr))) {
 		birth_txg = db->db_last_dirty->dr_txg;
-	else if (db->db_blkptr)
+	} else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
 		birth_txg = db->db_blkptr->blk_birth;
+	}
 
 	/*
-	 * If we don't exist or are in a snapshot, we can't be freed.
+	 * If this block don't exist or is in a snapshot, it can't be freed.
 	 * Don't pass the bp to dsl_dataset_block_freeable() since we
 	 * are holding the db_mtx lock and might deadlock if we are
 	 * prefetching a dedup-ed block.
 	 */
-	if (birth_txg)
+	if (birth_txg != 0)
 		return (ds == NULL ||
 		    dsl_dataset_block_freeable(ds, NULL, birth_txg));
 	else
-		return (FALSE);
+		return (B_FALSE);
 }
 
 void
@@ -976,7 +951,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int si
 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
 
 	/*
-	 * This call to dbuf_will_dirty() with the dn_struct_rwlock held
+	 * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held
 	 * is OK, because there can be no other references to the db
 	 * when we are changing its size, so no concurrent DB_FILL can
 	 * be happening.
@@ -985,7 +960,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int si
 	 * XXX we should be doing a dbuf_read, checking the return
 	 * value and returning that up to our callers
 	 */
-	dbuf_will_dirty(db, tx);
+	dmu_buf_will_dirty(&db->db, tx);
 
 	/* create the data buffer for the new block */
 	buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type);
@@ -1015,9 +990,8 @@ dbuf_new_size(dmu_buf_impl_t *db, int si
 void
 dbuf_release_bp(dmu_buf_impl_t *db)
 {
-	objset_t *os;
+	objset_t *os = db->db_objset;
 
-	DB_GET_OBJSET(&os, db);
 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
 	ASSERT(arc_released(os->os_phys_buf) ||
 	    list_link_active(&os->os_dsl_dataset->ds_synced_link));
@@ -1391,10 +1365,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_
 	return (B_FALSE);
 }
 
-#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
 void
-dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
+dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
 {
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 	int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
 
 	ASSERT(tx->tx_txg != 0);
@@ -1517,7 +1491,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, a
 	db->db_state = DB_FILL;
 	mutex_exit(&db->db_mtx);
 	(void) dbuf_dirty(db, tx);
-	dbuf_fill_done(db, tx);
+	dmu_buf_fill_done(&db->db, tx);
 }
 
 /*
@@ -2022,7 +1996,6 @@ dbuf_add_ref(dmu_buf_impl_t *db, void *t
  * Without that, the dbuf_rele() could lead to a dnode_rele() followed by the
  * dnode's parent dbuf evicting its dnode handles.
  */
-#pragma weak dmu_buf_rele = dbuf_rele
 void
 dbuf_rele(dmu_buf_impl_t *db, void *tag)
 {
@@ -2030,6 +2003,12 @@ dbuf_rele(dmu_buf_impl_t *db, void *tag)
 	dbuf_rele_and_unlock(db, tag);
 }
 
+void
+dmu_buf_rele(dmu_buf_t *db, void *tag)
+{
+	dbuf_rele((dmu_buf_impl_t *)db, tag);
+}
+
 /*
  * dbuf_rele() for an already-locked dbuf.  This is necessary to allow
  * db_dirtycnt and db_holds to be updated atomically.
@@ -2476,18 +2455,14 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
 	dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
 	zio->io_prev_space_delta = delta;
 
-	if (BP_IS_HOLE(bp)) {
-		ASSERT(bp->blk_fill == 0);
-		DB_DNODE_EXIT(db);
-		return;
+	if (bp->blk_birth != 0) {
+		ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
+		    BP_GET_TYPE(bp) == dn->dn_type) ||
+		    (db->db_blkid == DMU_SPILL_BLKID &&
+		    BP_GET_TYPE(bp) == dn->dn_bonustype));
+		ASSERT(BP_GET_LEVEL(bp) == db->db_level);
 	}
 
-	ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
-	    BP_GET_TYPE(bp) == dn->dn_type) ||
-	    (db->db_blkid == DMU_SPILL_BLKID &&
-	    BP_GET_TYPE(bp) == dn->dn_bonustype));
-	ASSERT(BP_GET_LEVEL(bp) == db->db_level);
-
 	mutex_enter(&db->db_mtx);
 
 #ifdef ZFS_DEBUG
@@ -2513,7 +2488,11 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *
 					fill++;
 			}
 		} else {
-			fill = 1;
+			if (BP_IS_HOLE(bp)) {
+				fill = 0;
+			} else {
+				fill = 1;
+			}
 		}
 	} else {
 		blkptr_t *ibp = db->db.db_data;
@@ -2568,9 +2547,10 @@ static void
 dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
 {
 	dmu_buf_impl_t *db = vdb;
-	blkptr_t *bp = zio->io_bp;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
-	uint64_t txg = zio->io_txg;
+	blkptr_t *bp = db->db_blkptr;
+	objset_t *os = db->db_objset;
+	dmu_tx_t *tx = os->os_synctx;
 	dbuf_dirty_record_t **drp, *dr;
 
 	ASSERT0(zio->io_error);
@@ -2583,14 +2563,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *b
 	if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) {
 		ASSERT(BP_EQUAL(bp, bp_orig));
 	} else {
-		objset_t *os;
-		dsl_dataset_t *ds;
-		dmu_tx_t *tx;
-
-		DB_GET_OBJSET(&os, db);
-		ds = os->os_dsl_dataset;
-		tx = os->os_synctx;
-
+		dsl_dataset_t *ds = os->os_dsl_dataset;
 		(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
 		dsl_dataset_block_born(ds, bp, tx);
 	}
@@ -2603,7 +2576,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *b
 	while ((dr = *drp) != db->db_data_pending)
 		drp = &dr->dr_next;
 	ASSERT(!list_link_active(&dr->dr_dirty_node));
-	ASSERT(dr->dr_txg == txg);
 	ASSERT(dr->dr_dbuf == db);
 	ASSERT(dr->dr_next == NULL);
 	*drp = dr->dr_next;
@@ -2637,14 +2609,14 @@ dbuf_write_done(zio_t *zio, arc_buf_t *b
 		DB_DNODE_ENTER(db);
 		dn = DB_DNODE(db);
 		ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
-		ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
+		ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
 		if (!BP_IS_HOLE(db->db_blkptr)) {
 			int epbs =
 			    dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+			ASSERT3U(db->db_blkid, <=,
+			    dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
 			ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
 			    db->db.db_size);
-			ASSERT3U(dn->dn_phys->dn_maxblkid
-			    >> (db->db_level * epbs), >=, db->db_blkid);
 			arc_set_callback(db->db_buf, dbuf_do_evict, db);
 		}
 		DB_DNODE_EXIT(db);
@@ -2657,8 +2629,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *b
 	ASSERT(db->db_dirtycnt > 0);
 	db->db_dirtycnt -= 1;
 	db->db_data_pending = NULL;
-
-	dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
+	dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
 }
 
 static void

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/ddt.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/ddt.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/ddt.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -111,12 +111,12 @@ ddt_object_load(ddt_t *ddt, enum ddt_typ
 	error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name,
 	    sizeof (uint64_t), 1, &ddt->ddt_object[type][class]);
 
-	if (error)
+	if (error != 0)
 		return (error);
 
-	error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
+	VERIFY0(zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
 	    sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
-	    &ddt->ddt_histogram[type][class]);
+	    &ddt->ddt_histogram[type][class]));
 
 	/*
 	 * Seed the cached statistics.
@@ -127,8 +127,7 @@ ddt_object_load(ddt_t *ddt, enum ddt_typ
 	ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
 	ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
 
-	ASSERT(error == 0);
-	return (error);
+	return (0);
 }
 
 static void
@@ -581,7 +580,10 @@ ddt_compress(void *src, uchar_t *dst, si
 		bcopy(src, dst, s_len);
 	}
 
-	*version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc;
+	*version = cpfunc;
+	/* CONSTCOND */
+	if (ZFS_HOST_BYTEORDER)
+		*version |= DDT_COMPRESS_BYTEORDER_MASK;
 
 	return (c_len + 1);
 }
@@ -598,7 +600,8 @@ ddt_decompress(uchar_t *src, void *dst, 
 	else
 		bcopy(src, dst, d_len);
 
-	if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK)
+	if (((version & DDT_COMPRESS_BYTEORDER_MASK) != 0) !=
+	    (ZFS_HOST_BYTEORDER != 0))
 		byteswap_uint64_array(dst, d_len);
 }
 

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -680,7 +680,7 @@ dmu_free_long_range(objset_t *os, uint64
 	 * will take the fast path, and (b) dnode_reallocate() can verify
 	 * that the entire file has been freed.
 	 */
-	if (offset == 0 && length == DMU_OBJECT_END)
+	if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
 		dn->dn_maxblkid = 0;
 
 	dnode_rele(dn, FTAG);
@@ -1184,10 +1184,8 @@ arc_buf_t *
 dmu_request_arcbuf(dmu_buf_t *handle, int size)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
-	spa_t *spa;
 
-	DB_GET_SPA(&spa, db);
-	return (arc_loan_buf(spa, size));
+	return (arc_loan_buf(db->db_objset->os_spa, size));
 }
 
 /*

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_diff.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_diff.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_diff.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -118,7 +118,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, cons
 	if (zb->zb_object != DMU_META_DNODE_OBJECT)
 		return (0);
 
-	if (bp == NULL) {
+	if (BP_IS_HOLE(bp)) {
 		uint64_t span = DBP_SPAN(dnp, zb->zb_level);
 		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
 

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -356,11 +356,12 @@ backup_cb(spa_t *spa, zilog_t *zilog, co
 	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
 	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
 		return (0);
-	} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
+	} else if (BP_IS_HOLE(bp) &&
+	    zb->zb_object == DMU_META_DNODE_OBJECT) {
 		uint64_t span = BP_SPAN(dnp, zb->zb_level);
 		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
 		err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
-	} else if (bp == NULL) {
+	} else if (BP_IS_HOLE(bp)) {
 		uint64_t span = BP_SPAN(dnp, zb->zb_level);
 		err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
 	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_traverse.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_traverse.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_traverse.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -36,6 +36,7 @@
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/callb.h>
+#include <sys/zfeature.h>
 
 int zfs_pd_blks_max = 100;
 
@@ -72,7 +73,7 @@ traverse_zil_block(zilog_t *zilog, blkpt
 	traverse_data_t *td = arg;
 	zbookmark_t zb;
 
-	if (bp->blk_birth == 0)
+	if (BP_IS_HOLE(bp))
 		return (0);
 
 	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
@@ -96,7 +97,7 @@ traverse_zil_record(zilog_t *zilog, lr_t
 		blkptr_t *bp = &lr->lr_blkptr;
 		zbookmark_t zb;
 
-		if (bp->blk_birth == 0)
+		if (BP_IS_HOLE(bp))
 			return (0);
 
 		if (claim_txg == 0 || bp->blk_birth < claim_txg)
@@ -226,14 +227,35 @@ traverse_visitbp(traverse_data_t *td, co
 		ASSERT(0);
 	}
 
+	if (bp->blk_birth == 0) {
+		if (spa_feature_is_active(td->td_spa, SPA_FEATURE_HOLE_BIRTH)) {
+			/*
+			 * Since this block has a birth time of 0 it must be a
+			 * hole created before the SPA_FEATURE_HOLE_BIRTH
+			 * feature was enabled.  If SPA_FEATURE_HOLE_BIRTH
+			 * was enabled before the min_txg for this traveral we
+			 * know the hole must have been created before the
+			 * min_txg for this traveral, so we can skip it. If
+			 * SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg
+			 * for this traveral we cannot tell if the hole was
+			 * created before or after the min_txg for this
+			 * traversal, so we cannot skip it.
+			 */
+			uint64_t hole_birth_enabled_txg;
+			VERIFY(spa_feature_enabled_txg(td->td_spa,
+			    SPA_FEATURE_HOLE_BIRTH, &hole_birth_enabled_txg));
+			if (hole_birth_enabled_txg < td->td_min_txg)
+				return (0);
+		}
+	} else if (bp->blk_birth <= td->td_min_txg) {
+		return (0);
+	}
+
 	if (BP_IS_HOLE(bp)) {
-		err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
+		err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
 		return (err);
 	}
 
-	if (bp->blk_birth <= td->td_min_txg)
-		return (0);
-
 	if (pd && !pd->pd_exited &&
 	    ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
 	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
@@ -436,7 +458,8 @@ traverse_prefetcher(spa_t *spa, zilog_t 
 	if (pfd->pd_cancel)
 		return (SET_ERROR(EINTR));
 
-	if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
+	if (BP_IS_HOLE(bp) ||
+	    !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
 	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
 	    BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
 		return (0);

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -1544,7 +1544,13 @@ dnode_free_range(dnode_t *dn, uint64_t o
 	} else {
 		ASSERT(dn->dn_maxblkid == 0);
 		if (off == 0 && len >= blksz) {
-			/* Freeing the whole block; fast-track this request */
+			/*
+			 * Freeing the whole block; fast-track this request.
+			 * Note that we won't dirty any indirect blocks,
+			 * which is fine because we will be freeing the entire
+			 * file and thus all indirect blocks will be freed
+			 * by free_children().
+			 */
 			blkid = 0;
 			nblks = 1;
 			goto done;
@@ -1571,7 +1577,7 @@ dnode_free_range(dnode_t *dn, uint64_t o
 			if (db->db_last_dirty ||
 			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
 				rw_exit(&dn->dn_struct_rwlock);
-				dbuf_will_dirty(db, tx);
+				dmu_buf_will_dirty(&db->db, tx);
 				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				data = db->db.db_data;
 				bzero(data + blkoff, head);
@@ -1607,7 +1613,7 @@ dnode_free_range(dnode_t *dn, uint64_t o
 			if (db->db_last_dirty ||
 			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
 				rw_exit(&dn->dn_struct_rwlock);
-				dbuf_will_dirty(db, tx);
+				dmu_buf_will_dirty(&db->db, tx);
 				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				bzero(db->db.db_data, tail);
 			}
@@ -1628,18 +1634,18 @@ dnode_free_range(dnode_t *dn, uint64_t o
 		nblks += 1;
 
 	/*
-	 * Read in and mark all the level-1 indirects dirty,
-	 * so that they will stay in memory until syncing phase.
-	 * Always dirty the first and last indirect to make sure
-	 * we dirty all the partial indirects.
+	 * Dirty the first and last indirect blocks, as they (and/or their
+	 * parents) will need to be written out if they were only
+	 * partially freed.  Interior indirect blocks will be themselves freed,
+	 * by free_children(), so they need not be dirtied.  Note that these
+	 * interior blocks have already been prefetched by dmu_tx_hold_free().
 	 */
 	if (dn->dn_nlevels > 1) {
-		uint64_t i, first, last;
-		int shift = epbs + dn->dn_datablkshift;
+		uint64_t first, last;
 
 		first = blkid >> epbs;
 		if (db = dbuf_hold_level(dn, 1, first, FTAG)) {
-			dbuf_will_dirty(db, tx);
+			dmu_buf_will_dirty(&db->db, tx);
 			dbuf_rele(db, FTAG);
 		}
 		if (trunc)
@@ -1647,26 +1653,11 @@ dnode_free_range(dnode_t *dn, uint64_t o
 		else
 			last = (blkid + nblks - 1) >> epbs;
 		if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) {
-			dbuf_will_dirty(db, tx);
+			dmu_buf_will_dirty(&db->db, tx);
 			dbuf_rele(db, FTAG);
 		}
-		for (i = first + 1; i < last; i++) {
-			uint64_t ibyte = i << shift;
-			int err;
-
-			err = dnode_next_offset(dn,
-			    DNODE_FIND_HAVELOCK, &ibyte, 1, 1, 0);
-			i = ibyte >> shift;
-			if (err == ESRCH || i >= last)
-				break;
-			ASSERT(err == 0);
-			db = dbuf_hold_level(dn, 1, i, FTAG);
-			if (db) {
-				dbuf_will_dirty(db, tx);
-				dbuf_rele(db, FTAG);
-			}
-		}
 	}
+
 done:
 	/*
 	 * Add this range to the dnode range list.
@@ -1694,8 +1685,6 @@ done:
 	dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
 	dnode_setdirty(dn, tx);
 out:
-	if (trunc && dn->dn_maxblkid >= (off >> blkshift))
-		dn->dn_maxblkid = (off >> blkshift ? (off >> blkshift) - 1 : 0);
 
 	rw_exit(&dn->dn_struct_rwlock);
 }
@@ -1872,8 +1861,10 @@ dnode_next_offset_level(dnode_t *dn, int
 		data = db->db.db_data;
 	}
 
-	if (db && txg &&
-	    (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) {
+
+	if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
+	    db->db_blkptr->blk_birth <= txg ||
+	    BP_IS_HOLE(db->db_blkptr))) {
 		/*
 		 * This can only happen when we are searching up the tree
 		 * and these conditions mean that we need to keep climbing.

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dnode_sync.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dnode_sync.c	Tue Dec 10 13:14:54 2013	(r259169)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dnode_sync.c	Tue Dec 10 13:33:56 2013	(r259170)
@@ -32,6 +32,7 @@
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h>
 #include <sys/spa.h>
+#include <sys/zfeature.h>
 
 static void
 dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
@@ -112,26 +113,44 @@ dnode_increase_indirection(dnode_t *dn, 
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
-static int
+static void
 free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
 	uint64_t bytesfreed = 0;
-	int i, blocks_freed = 0;
 
 	dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
 
-	for (i = 0; i < num; i++, bp++) {
+	for (int i = 0; i < num; i++, bp++) {
 		if (BP_IS_HOLE(bp))
 			continue;
 
 		bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
 		ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
+
+		/*
+		 * Save some useful information on the holes being
+		 * punched, including logical size, type, and indirection
+		 * level. Retaining birth time enables detection of when
+		 * holes are punched for reducing the number of free
+		 * records transmitted during a zfs send.
+		 */
+
+		uint64_t lsize = BP_GET_LSIZE(bp);
+		dmu_object_type_t type = BP_GET_TYPE(bp);
+		uint64_t lvl = BP_GET_LEVEL(bp);
+
 		bzero(bp, sizeof (blkptr_t));
-		blocks_freed += 1;
+
+		if (spa_feature_is_active(dn->dn_objset->os_spa,
+		    SPA_FEATURE_HOLE_BIRTH)) {
+			BP_SET_LSIZE(bp, lsize);
+			BP_SET_TYPE(bp, type);
+			BP_SET_LEVEL(bp, lvl);
+			BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0);
+		}
 	}
 	dnode_diduse_space(dn, -bytesfreed);
-	return (blocks_freed);
 }
 
 #ifdef ZFS_DEBUG
@@ -215,30 +234,27 @@ free_verify(dmu_buf_impl_t *db, uint64_t
 
 #define	ALL -1
 
-static int
-free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
+static void
+free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
     dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	blkptr_t *bp;
 	dmu_buf_impl_t *subdb;
 	uint64_t start, end, dbstart, dbend, i;
-	int epbs, shift, err;
-	int all = TRUE;
-	int blocks_freed = 0;
+	int epbs, shift;
 
 	/*
 	 * There is a small possibility that this block will not be cached:
 	 *   1 - if level > 1 and there are no children with level <= 1
-	 *   2 - if we didn't get a dirty hold (because this block had just
-	 *	 finished being written -- and so had no holds), and then this
-	 *	 block got evicted before we got here.
+	 *   2 - if this block was evicted since we read it from
+	 *	 dmu_tx_hold_free().
 	 */
 	if (db->db_state != DB_CACHED)
 		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
 
 	dbuf_release_bp(db);
-	bp = (blkptr_t *)db->db.db_data;
+	bp = db->db.db_data;
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
@@ -248,7 +264,6 @@ free_children(dmu_buf_impl_t *db, uint64
 	start = blkid >> shift;
 	if (dbstart < start) {
 		bp += start - dbstart;
-		all = FALSE;
 	} else {
 		start = dbstart;
 	}
@@ -256,49 +271,46 @@ free_children(dmu_buf_impl_t *db, uint64
 	end = (blkid + nblks - 1) >> shift;
 	if (dbend <= end)
 		end = dbend;
-	else if (all)
-		all = trunc;
+
 	ASSERT3U(start, <=, end);
 
 	if (db->db_level == 1) {
 		FREE_VERIFY(db, start, end, tx);
-		blocks_freed = free_blocks(dn, bp, end-start+1, tx);
-		arc_buf_freeze(db->db_buf);
-		ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
-		DB_DNODE_EXIT(db);
-		return (all ? ALL : blocks_freed);
+		free_blocks(dn, bp, end-start+1, tx);
+	} else {
+		for (i = start; i <= end; i++, bp++) {
+			if (BP_IS_HOLE(bp))
+				continue;
+			rw_enter(&dn->dn_struct_rwlock, RW_READER);
+			VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
+			    i, B_TRUE, FTAG, &subdb));
+			rw_exit(&dn->dn_struct_rwlock);
+			ASSERT3P(bp, ==, subdb->db_blkptr);
+
+			free_children(subdb, blkid, nblks, tx);
+			dbuf_rele(subdb, FTAG);
+		}
 	}
 
-	for (i = start; i <= end; i++, bp++) {
-		if (BP_IS_HOLE(bp))
-			continue;
-		rw_enter(&dn->dn_struct_rwlock, RW_READER);
-		err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
-		ASSERT0(err);
-		rw_exit(&dn->dn_struct_rwlock);
-
-		if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
-			ASSERT3P(subdb->db_blkptr, ==, bp);
-			blocks_freed += free_blocks(dn, bp, 1, tx);
-		} else {
-			all = FALSE;
-		}
-		dbuf_rele(subdb, FTAG);
+	/* If this whole block is free, free ourself too. */
+	for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
+		if (!BP_IS_HOLE(bp))
+			break;
 	}
+	if (i == 1 << epbs) {
+		/* didn't find any non-holes */
+		bzero(db->db.db_data, db->db.db_size);
+		free_blocks(dn, db->db_blkptr, 1, tx);
+	} else {
+		/*
+		 * Partial block free; must be marked dirty so that it
+		 * will be written out.
+		 */
+		ASSERT(db->db_dirtycnt > 0);
+	}
+
 	DB_DNODE_EXIT(db);
 	arc_buf_freeze(db->db_buf);
-#ifdef ZFS_DEBUG
-	bp -= (end-start)+1;
-	for (i = start; i <= end; i++, bp++) {
-		if (i == start && blkid != 0)
-			continue;
-		else if (i == end && !trunc)
-			continue;
-		ASSERT0(bp->blk_birth);
-	}
-#endif
-	ASSERT(all || blocks_freed == 0 || db->db_last_dirty);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201312101333.rBADXuI5052602>