Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 5 Sep 2012 12:02:09 +0000 (UTC)
From:      Martin Matuska <mm@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r240133 - in head: cddl/contrib/opensolaris/cmd/ztest sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys
Message-ID:  <201209051202.q85C29Wj033805@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mm
Date: Wed Sep  5 12:02:09 2012
New Revision: 240133
URL: http://svn.freebsd.org/changeset/base/240133

Log:
  Merge recent vendor changes and sync code:
  1862 incremental zfs receive fails for sparse file > 8PB
  3112 ztest does not honor ZFS_DEBUG
  3122 zfs destroy filesystem should prefetch blocks
  3129 'zpool reopen' restarts resilvers
  3130 ztest failure: Assertion failed:
         0 == dmu_objset_destroy(name, B_FALSE) (0x0 == 0x10)
  
  References:
    https://www.illumos.org/issues/1862
    https://www.illumos.org/issues/3112
    https://www.illumos.org/issues/3122
    https://www.illumos.org/issues/3129
    https://www.illumos.org/issues/3130
  
  Obtained from:	illumos (vendor/illumos, vendor/illumos-sys)
  MFC after:	2 weeks

Modified:
  head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
Directory Properties:
  head/cddl/contrib/opensolaris/   (props changed)
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -5862,6 +5862,8 @@ main(int argc, char **argv)
 
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
+	dprintf_setup(&argc, argv);
+
 	if (!ischild) {
 		process_options(argc, argv);
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -135,6 +135,14 @@
 
 #include <vm/vm_pageout.h>
 
+#ifdef illumos
+#ifndef _KERNEL
+/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
+boolean_t arc_watch = B_FALSE;
+int arc_procfd;
+#endif
+#endif /* illumos */
+
 static kmutex_t		arc_reclaim_thr_lock;
 static kcondvar_t	arc_reclaim_thr_cv;	/* used to signal reclaim thr */
 static uint8_t		arc_thread_exit;
@@ -534,6 +542,9 @@ static void arc_get_data_buf(arc_buf_t *
 static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
 static int arc_evict_needed(arc_buf_contents_t type);
 static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes);
+#ifdef illumos
+static void arc_buf_watch(arc_buf_t *buf);
+#endif /* illumos */
 
 static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
 
@@ -1069,8 +1080,56 @@ arc_cksum_compute(arc_buf_t *buf, boolea
 	fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
 	    buf->b_hdr->b_freeze_cksum);
 	mutex_exit(&buf->b_hdr->b_freeze_lock);
+#ifdef illumos
+	arc_buf_watch(buf);
+#endif /* illumos */
+}
+
+#ifdef illumos
+#ifndef _KERNEL
+typedef struct procctl {
+	long cmd;
+	prwatch_t prwatch;
+} procctl_t;
+#endif
+
+/* ARGSUSED */
+static void
+arc_buf_unwatch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+	if (arc_watch) {
+		int result;
+		procctl_t ctl;
+		ctl.cmd = PCWATCH;
+		ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data;
+		ctl.prwatch.pr_size = 0;
+		ctl.prwatch.pr_wflags = 0;
+		result = write(arc_procfd, &ctl, sizeof (ctl));
+		ASSERT3U(result, ==, sizeof (ctl));
+	}
+#endif
 }
 
+/* ARGSUSED */
+static void
+arc_buf_watch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+	if (arc_watch) {
+		int result;
+		procctl_t ctl;
+		ctl.cmd = PCWATCH;
+		ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data;
+		ctl.prwatch.pr_size = buf->b_hdr->b_size;
+		ctl.prwatch.pr_wflags = WA_WRITE;
+		result = write(arc_procfd, &ctl, sizeof (ctl));
+		ASSERT3U(result, ==, sizeof (ctl));
+	}
+#endif
+}
+#endif /* illumos */
+
 void
 arc_buf_thaw(arc_buf_t *buf)
 {
@@ -1095,6 +1154,10 @@ arc_buf_thaw(arc_buf_t *buf)
 	}
 
 	mutex_exit(&buf->b_hdr->b_freeze_lock);
+
+#ifdef illumos
+	arc_buf_unwatch(buf);
+#endif /* illumos */
 }
 
 void
@@ -1112,6 +1175,7 @@ arc_buf_freeze(arc_buf_t *buf)
 	    buf->b_hdr->b_state == arc_anon);
 	arc_cksum_compute(buf, B_FALSE);
 	mutex_exit(hash_lock);
+
 }
 
 static void
@@ -1496,21 +1560,22 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
  * the buffer is placed on l2arc_free_on_write to be freed later.
  */
 static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
-    void *data, size_t size)
+arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
 {
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+
 	if (HDR_L2_WRITING(hdr)) {
 		l2arc_data_free_t *df;
 		df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
-		df->l2df_data = data;
-		df->l2df_size = size;
+		df->l2df_data = buf->b_data;
+		df->l2df_size = hdr->b_size;
 		df->l2df_func = free_func;
 		mutex_enter(&l2arc_free_on_write_mtx);
 		list_insert_head(l2arc_free_on_write, df);
 		mutex_exit(&l2arc_free_on_write_mtx);
 		ARCSTAT_BUMP(arcstat_l2_free_on_write);
 	} else {
-		free_func(data, size);
+		free_func(buf->b_data, hdr->b_size);
 	}
 }
 
@@ -1526,16 +1591,17 @@ arc_buf_destroy(arc_buf_t *buf, boolean_
 		arc_buf_contents_t type = buf->b_hdr->b_type;
 
 		arc_cksum_verify(buf);
+#ifdef illumos
+		arc_buf_unwatch(buf);
+#endif /* illumos */
 
 		if (!recycle) {
 			if (type == ARC_BUFC_METADATA) {
-				arc_buf_data_free(buf->b_hdr, zio_buf_free,
-				    buf->b_data, size);
+				arc_buf_data_free(buf, zio_buf_free);
 				arc_space_return(size, ARC_SPACE_DATA);
 			} else {
 				ASSERT(type == ARC_BUFC_DATA);
-				arc_buf_data_free(buf->b_hdr,
-				    zio_data_buf_free, buf->b_data, size);
+				arc_buf_data_free(buf, zio_data_buf_free);
 				ARCSTAT_INCR(arcstat_data_size, -size);
 				atomic_add_64(&arc_size, -size);
 			}
@@ -2803,6 +2869,9 @@ arc_read_done(zio_t *zio)
 	}
 
 	arc_cksum_compute(buf, B_FALSE);
+#ifdef illumos
+	arc_buf_watch(buf);
+#endif /* illumos */
 
 	if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
 		/*
@@ -3367,6 +3436,9 @@ arc_release(arc_buf_t *buf, void *tag)
 		}
 		hdr->b_datacnt -= 1;
 		arc_cksum_verify(buf);
+#ifdef illumos
+		arc_buf_unwatch(buf);
+#endif /* illumos */
 
 		mutex_exit(hash_lock);
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -189,7 +189,8 @@ bptree_iterate(objset_t *os, uint64_t ob
 			break;
 
 		err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
-		    bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST,
+		    bte.be_birth_txg, &bte.be_zb,
+		    TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST,
 		    bptree_visit_cb, &ba);
 		if (free) {
 			ASSERT(err == 0 || err == ERESTART);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -63,6 +63,8 @@ typedef struct traverse_data {
 
 static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
     arc_buf_t *buf, uint64_t objset, uint64_t object);
+static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
+    arc_buf_t *buf, uint64_t objset, uint64_t object);
 
 static int
 traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
@@ -178,9 +180,34 @@ traverse_pause(traverse_data_t *td, cons
 	bcopy(zb, td->td_resume, sizeof (*td->td_resume));
 }
 
+static void
+traverse_prefetch_metadata(traverse_data_t *td,
+    arc_buf_t *pbuf, const blkptr_t *bp, const zbookmark_t *zb)
+{
+	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
+
+	if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
+		return;
+	/*
+	 * If we are in the process of resuming, don't prefetch, because
+	 * some children will not be needed (and in fact may have already
+	 * been freed).
+	 */
+	if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume))
+		return;
+	if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg)
+		return;
+	if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
+		return;
+
+	(void) arc_read(NULL, td->td_spa, bp,
+	    pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
+	    ZIO_FLAG_CANFAIL, &flags, zb);
+}
+
 static int
 traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
-    arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
+    arc_buf_t *pbuf, const blkptr_t *bp, const zbookmark_t *zb)
 {
 	zbookmark_t czb;
 	int err = 0, lasterr = 0;
@@ -243,14 +270,21 @@ traverse_visitbp(traverse_data_t *td, co
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
 			return (err);
+		cbp = buf->b_data;
+
+		for (i = 0; i < epb; i++) {
+			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
+			    zb->zb_level - 1,
+			    zb->zb_blkid * epb + i);
+			traverse_prefetch_metadata(td, buf, &cbp[i], &czb);
+		}
 
 		/* recursively visitbp() blocks below this */
-		cbp = buf->b_data;
-		for (i = 0; i < epb; i++, cbp++) {
+		for (i = 0; i < epb; i++) {
 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1,
 			    zb->zb_blkid * epb + i);
-			err = traverse_visitbp(td, dnp, buf, cbp, &czb);
+			err = traverse_visitbp(td, dnp, buf, &cbp[i], &czb);
 			if (err) {
 				if (!hard)
 					break;
@@ -267,11 +301,16 @@ traverse_visitbp(traverse_data_t *td, co
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
 			return (err);
+		dnp = buf->b_data;
+
+		for (i = 0; i < epb; i++) {
+			prefetch_dnode_metadata(td, &dnp[i], buf, zb->zb_objset,
+			    zb->zb_blkid * epb + i);
+		}
 
 		/* recursively visitbp() blocks below this */
-		dnp = buf->b_data;
-		for (i = 0; i < epb; i++, dnp++) {
-			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
+		for (i = 0; i < epb; i++) {
+			err = traverse_dnode(td, &dnp[i], buf, zb->zb_objset,
 			    zb->zb_blkid * epb + i);
 			if (err) {
 				if (!hard)
@@ -292,6 +331,15 @@ traverse_visitbp(traverse_data_t *td, co
 
 		osp = buf->b_data;
 		dnp = &osp->os_meta_dnode;
+		prefetch_dnode_metadata(td, dnp, buf, zb->zb_objset,
+		    DMU_META_DNODE_OBJECT);
+		if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+			prefetch_dnode_metadata(td, &osp->os_userused_dnode,
+			    buf, zb->zb_objset, DMU_USERUSED_OBJECT);
+			prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
+			    buf, zb->zb_objset, DMU_USERUSED_OBJECT);
+		}
+
 		err = traverse_dnode(td, dnp, buf, zb->zb_objset,
 		    DMU_META_DNODE_OBJECT);
 		if (err && hard) {
@@ -334,6 +382,24 @@ post:
 	return (err != 0 ? err : lasterr);
 }
 
+static void
+prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
+    arc_buf_t *buf, uint64_t objset, uint64_t object)
+{
+	int j;
+	zbookmark_t czb;
+
+	for (j = 0; j < dnp->dn_nblkptr; j++) {
+		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
+		traverse_prefetch_metadata(td, buf, &dnp->dn_blkptr[j], &czb);
+	}
+
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
+		traverse_prefetch_metadata(td, buf, &dnp->dn_spill, &czb);
+	}
+}
+
 static int
 traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
     arc_buf_t *buf, uint64_t objset, uint64_t object)
@@ -344,8 +410,7 @@ traverse_dnode(traverse_data_t *td, cons
 
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
-		err = traverse_visitbp(td, dnp, buf,
-		    (blkptr_t *)&dnp->dn_blkptr[j], &czb);
+		err = traverse_visitbp(td, dnp, buf, &dnp->dn_blkptr[j], &czb);
 		if (err) {
 			if (!hard)
 				break;
@@ -354,10 +419,8 @@ traverse_dnode(traverse_data_t *td, cons
 	}
 
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-		SET_BOOKMARK(&czb, objset,
-		    object, 0, DMU_SPILL_BLKID);
-		err = traverse_visitbp(td, dnp, buf,
-		    (blkptr_t *)&dnp->dn_spill, &czb);
+		SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
+		err = traverse_visitbp(td, dnp, buf, &dnp->dn_spill, &czb);
 		if (err) {
 			if (!hard)
 				return (err);
@@ -438,6 +501,12 @@ traverse_impl(spa_t *spa, dsl_dataset_t 
 	ASSERT(ds == NULL || objset == ds->ds_object);
 	ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
 
+	/*
+	 * The data prefetching mechanism (the prefetch thread) is incompatible
+	 * with resuming from a bookmark.
+	 */
+	ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
+
 	td.td_spa = spa;
 	td.td_objset = objset;
 	td.td_rootbp = rootbp;
@@ -464,7 +533,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t 
 		traverse_zil(&td, &os->os_zil_header);
 	}
 
-	if (!(flags & TRAVERSE_PREFETCH) ||
+	if (!(flags & TRAVERSE_PREFETCH_DATA) ||
 	    0 == taskq_dispatch(system_taskq, traverse_prefetch_thread,
 	    &td, TQ_NOQUEUE))
 		pd.pd_exited = B_TRUE;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -429,6 +429,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, ui
 	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
 	spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
 	int epbs;
+	uint64_t l0span = 0, nl1blks = 0;
 
 	if (dn->dn_nlevels == 0)
 		return;
@@ -461,6 +462,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, ui
 			nblks = dn->dn_maxblkid - blkid;
 
 	}
+	l0span = nblks;    /* save for later use to calc level > 1 overhead */
 	if (dn->dn_nlevels == 1) {
 		int i;
 		for (i = 0; i < nblks; i++) {
@@ -473,24 +475,10 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, ui
 			}
 			unref += BP_GET_ASIZE(bp);
 		}
+		nl1blks = 1;
 		nblks = 0;
 	}
 
-	/*
-	 * Add in memory requirements of higher-level indirects.
-	 * This assumes a worst-possible scenario for dn_nlevels.
-	 */
-	{
-		uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs);
-		int level = (dn->dn_nlevels > 1) ? 2 : 1;
-
-		while (level++ < DN_MAX_LEVELS) {
-			txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift;
-			blkcnt = 1 + (blkcnt >> epbs);
-		}
-		ASSERT(blkcnt <= dn->dn_nblkptr);
-	}
-
 	lastblk = blkid + nblks - 1;
 	while (nblks) {
 		dmu_buf_impl_t *dbuf;
@@ -561,11 +549,35 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, ui
 		}
 		dbuf_rele(dbuf, FTAG);
 
+		++nl1blks;
 		blkid += tochk;
 		nblks -= tochk;
 	}
 	rw_exit(&dn->dn_struct_rwlock);
 
+	/*
+	 * Add in memory requirements of higher-level indirects.
+	 * This assumes a worst-possible scenario for dn_nlevels and a
+	 * worst-possible distribution of l1-blocks over the region to free.
+	 */
+	{
+		uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs);
+		int level = 2;
+		/*
+		 * Here we don't use DN_MAX_LEVEL, but calculate it with the
+		 * given datablkshift and indblkshift. This makes the
+		 * difference between 19 and 8 on large files.
+		 */
+		int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) /
+		    (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
+
+		while (level++ < maxlevel) {
+			txh->txh_memory_tohold += MIN(blkcnt, (nl1blks >> epbs))
+			    << dn->dn_indblkshift;
+			blkcnt = 1 + (blkcnt >> epbs);
+		}
+	}
+
 	/* account for new level 1 indirect blocks that might show up */
 	if (skipped > 0) {
 		txh->txh_fudge += skipped << dn->dn_indblkshift;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -2343,7 +2343,6 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvl
 			}
 		}
 	}
-
 	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
 	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
 	    ds->ds_phys->ds_compressed_bytes);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -462,12 +462,14 @@ dsl_dir_destroy_check(void *arg1, void *
 	/*
 	 * There should be exactly two holds, both from
 	 * dsl_dataset_destroy: one on the dd directory, and one on its
-	 * head ds.  Otherwise, someone is trying to lookup something
-	 * inside this dir while we want to destroy it.  The
-	 * config_rwlock ensures that nobody else opens it after we
-	 * check.
+	 * head ds.  If there are more holds, then a concurrent thread is
+	 * performing a lookup inside this dir while we're trying to destroy
+	 * it.  To minimize this possibility, we perform this check only
+	 * in syncing context and fail the operation if we encounter
+	 * additional holds.  The dp_config_rwlock ensures that nobody else
+	 * opens it after we check.
 	 */
-	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
+	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
 		return (EBUSY);
 
 	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -228,12 +228,7 @@ dsl_sync_task_do_nowait(dsl_pool_t *dp,
     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
     void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
 {
-	dsl_sync_task_group_t *dstg;
-
-	if (!spa_writeable(dp->dp_spa))
-		return;
-
-	dstg = dsl_sync_task_group_create(dp);
+	dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp);
 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
 	    arg1, arg2, blocks_modified);
 	dsl_sync_task_group_nowait(dstg, tx);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -306,6 +306,9 @@ spa_history_log(spa_t *spa, const char *
 
 	ASSERT(what != LOG_INTERNAL);
 
+	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
+		return (EINVAL);
+
 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
@@ -435,8 +438,9 @@ log_internal(history_internal_events_t e
 	/*
 	 * If this is part of creating a pool, not everything is
 	 * initialized yet, so don't bother logging the internal events.
+	 * Likewise if the pool is not writeable.
 	 */
-	if (tx->tx_txg == TXG_INITIAL)
+	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa))
 		return;
 
 	va_copy(adx2, adx);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -1604,6 +1604,19 @@ spa_init(int mode)
 
 	spa_mode_global = mode;
 
+#ifdef illumos
+#ifndef _KERNEL
+	if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
+		arc_procfd = open("/proc/self/ctl", O_WRONLY);
+		if (arc_procfd == -1) {
+			perror("could not enable watchpoints: "
+			    "opening /proc/self/ctl failed: ");
+		} else {
+			arc_watch = B_TRUE;
+		}
+	}
+#endif
+#endif /* illumos */
 	refcount_sysinit();
 	unique_init();
 	zio_init();

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h	Wed Sep  5 12:02:09 2012	(r240133)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_ARC_H
@@ -135,6 +136,13 @@ void l2arc_fini(void);
 void l2arc_start(void);
 void l2arc_stop(void);
 
+#ifdef illumos
+#ifndef _KERNEL
+extern boolean_t arc_watch;
+extern int arc_procfd;
+#endif
+#endif /* illumos */
+
 #ifdef	__cplusplus
 }
 #endif

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed Sep  5 12:02:09 2012	(r240133)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DNODE_H
@@ -276,7 +277,6 @@ void dnode_byteswap(dnode_phys_t *dnp);
 void dnode_buf_byteswap(void *buf, size_t size);
 void dnode_verify(dnode_t *dn);
 int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
-uint64_t dnode_current_max_length(dnode_t *dn);
 void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
 void dnode_clear_range(dnode_t *dn, uint64_t blkid,
     uint64_t nblks, dmu_tx_t *tx);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h	Wed Sep  5 12:02:09 2012	(r240133)
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_DEBUG_H
@@ -75,6 +76,12 @@ extern void zfs_dbgmsg_init(void);
 extern void zfs_dbgmsg_fini(void);
 extern void zfs_dbgmsg(const char *fmt, ...);
 
+#ifdef illumos
+#ifndef _KERNEL
+extern int dprintf_find_string(const char *string);
+#endif
+#endif /* illumos */
+
 #ifdef	__cplusplus
 }
 #endif

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -4149,7 +4149,17 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
+
+	/*
+	 * If a resilver is already in progress then set the
+	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
+	 * the scan as a side effect of the reopen. Otherwise, let
+	 * vdev_open() decided if a resilver is required.
+	 */
+	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
 	vdev_reopen(spa->spa_root_vdev);
+	spa->spa_scrub_reopen = B_FALSE;
+
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (0);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Wed Sep  5 11:55:53 2012	(r240132)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Wed Sep  5 12:02:09 2012	(r240133)
@@ -130,11 +130,23 @@ zio_init(void)
 		while (p2 & (p2 - 1))
 			p2 &= p2 - 1;
 
+#ifdef illumos
+#ifndef _KERNEL
+		/*
+		 * If we are using watchpoints, put each buffer on its own page,
+		 * to eliminate the performance overhead of trapping to the
+		 * kernel when modifying a non-watched buffer that shares the
+		 * page with a watched buffer.
+		 */
+		if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
+			continue;
+#endif
+#endif /* illumos */
 		if (size <= 4 * SPA_MINBLOCKSIZE) {
 			align = SPA_MINBLOCKSIZE;
-		} else if (P2PHASE(size, PAGESIZE) == 0) {
+		} else if (IS_P2ALIGNED(size, PAGESIZE)) {
 			align = PAGESIZE;
-		} else if (P2PHASE(size, p2 >> 2) == 0) {
+		} else if (IS_P2ALIGNED(size, p2 >> 2)) {
 			align = p2 >> 2;
 		}
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201209051202.q85C29Wj033805>