Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 26 Jul 2017 16:42:32 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r321548 - stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <201707261642.v6QGgWg3076540@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Wed Jul 26 16:42:32 2017
New Revision: 321548
URL: https://svnweb.freebsd.org/changeset/base/321548

Log:
  MFC r318822: MFC r316913: 7869 panic in bpobj_space(): null pointer dereference
  
  illumos/illumos-gate@a3905a45920de250d181b66ac0b6b71bd200d9ef
  https://github.com/illumos/illumos-gate/commit/a3905a45920de250d181b66ac0b6b71bd200d9ef
  
  https://www.illumos.org/issues/7869
    The issue fixed by this patch is a race condition in the deadlist code.
    A thread executing an administrative command that uses
    `dsl_deadlist_space_range()` holds the lock of the whole `deadlist_t` to
    protect the access of all its entries that the deadlist contains in an
    avl tree.
    Sync threads trying to insert a new entry in the deadlist
    (through `dsl_deadlist_insert()` -> `dle_enqueue()`) do not hold the
    deadlist lock at that moment. If the `dle_bpobj` is the empty bpobj (our
    sentinel value), we close and reopen it. Between these two operations,
    it is possible for the `dsl_deadlist_space_range()` thread to dereference
    that bpobj which is `NULL` during that window.
    Threads should hold the a deadlist's `dl_lock` when they manipulate its
    internal data so scenarios like the one above are avoided. In addition,
    threads should also hold the bpobj lock whenever they are allocating the
    subobj list of a bpobj, and not just when they actually insert the subobj
    to the list. This way we can avoid potential memory leaks.
  
  Reviewed by: Matt Ahrens <mahrens@delphix.com>
  Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
  Reviewed by: Steve Gonczi <steve.gonczi@delphix.com>
  Reviewed by: John Kennedy <john.kennedy@delphix.com>
  Reviewed by: George Melikov <mail@gmelikov.ru>
  Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
  Approved by: Dan McDonald <danmcd@omniti.com>
  Author: Serapheim Dimitropoulos <serapheim@delphix.com>

Modified:
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c	Wed Jul 26 16:35:17 2017	(r321547)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c	Wed Jul 26 16:42:32 2017	(r321548)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
@@ -395,6 +395,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dm
 		return;
 	}
 
+	mutex_enter(&bpo->bpo_lock);
 	dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
 	if (bpo->bpo_phys->bpo_subobjs == 0) {
 		bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
@@ -406,7 +407,6 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dm
 	ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
 	ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
 
-	mutex_enter(&bpo->bpo_lock);
 	dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
 	    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
 	    sizeof (subobj), &subobj, tx);

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c	Wed Jul 26 16:35:17 2017	(r321547)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c	Wed Jul 26 16:42:32 2017	(r321548)
@@ -72,6 +72,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
 	zap_cursor_t zc;
 	zap_attribute_t za;
 
+	ASSERT(MUTEX_HELD(&dl->dl_lock));
+
 	ASSERT(!dl->dl_oldfmt);
 	if (dl->dl_havetree)
 		return;
@@ -182,6 +184,7 @@ static void
 dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
     const blkptr_t *bp, dmu_tx_t *tx)
 {
+	ASSERT(MUTEX_HELD(&dl->dl_lock));
 	if (dle->dle_bpobj.bpo_object ==
 	    dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
 		uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
@@ -198,6 +201,7 @@ static void
 dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
     uint64_t obj, dmu_tx_t *tx)
 {
+	ASSERT(MUTEX_HELD(&dl->dl_lock));
 	if (dle->dle_bpobj.bpo_object !=
 	    dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
 		bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
@@ -222,15 +226,14 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t
 		return;
 	}
 
+	mutex_enter(&dl->dl_lock);
 	dsl_deadlist_load_tree(dl);
 
 	dmu_buf_will_dirty(dl->dl_dbuf, tx);
-	mutex_enter(&dl->dl_lock);
 	dl->dl_phys->dl_used +=
 	    bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
 	dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
 	dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
-	mutex_exit(&dl->dl_lock);
 
 	dle_tofind.dle_mintxg = bp->blk_birth;
 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -239,6 +242,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t
 	else
 		dle = AVL_PREV(&dl->dl_tree, dle);
 	dle_enqueue(dl, dle, bp, tx);
+	mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -254,16 +258,19 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mint
 	if (dl->dl_oldfmt)
 		return;
 
-	dsl_deadlist_load_tree(dl);
-
 	dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
 	dle->dle_mintxg = mintxg;
+
+	mutex_enter(&dl->dl_lock);
+	dsl_deadlist_load_tree(dl);
+
 	obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
 	VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
 	avl_add(&dl->dl_tree, dle);
 
 	VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
 	    mintxg, obj, tx));
+	mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -278,6 +285,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t m
 	if (dl->dl_oldfmt)
 		return;
 
+	mutex_enter(&dl->dl_lock);
 	dsl_deadlist_load_tree(dl);
 
 	dle_tofind.dle_mintxg = mintxg;
@@ -291,6 +299,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t m
 	kmem_free(dle, sizeof (*dle));
 
 	VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
+	mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -334,6 +343,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg
 		return (newobj);
 	}
 
+	mutex_enter(&dl->dl_lock);
 	dsl_deadlist_load_tree(dl);
 
 	for (dle = avl_first(&dl->dl_tree); dle;
@@ -347,6 +357,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg
 		VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
 		    dle->dle_mintxg, obj, tx));
 	}
+	mutex_exit(&dl->dl_lock);
 	return (newobj);
 }
 
@@ -424,6 +435,8 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t
 	uint64_t used, comp, uncomp;
 	bpobj_t bpo;
 
+	ASSERT(MUTEX_HELD(&dl->dl_lock));
+
 	VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
 	VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
 	bpobj_close(&bpo);
@@ -431,11 +444,9 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t
 	dsl_deadlist_load_tree(dl);
 
 	dmu_buf_will_dirty(dl->dl_dbuf, tx);
-	mutex_enter(&dl->dl_lock);
 	dl->dl_phys->dl_used += used;
 	dl->dl_phys->dl_comp += comp;
 	dl->dl_phys->dl_uncomp += uncomp;
-	mutex_exit(&dl->dl_lock);
 
 	dle_tofind.dle_mintxg = birth;
 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -475,6 +486,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, d
 		return;
 	}
 
+	mutex_enter(&dl->dl_lock);
 	for (zap_cursor_init(&zc, dl->dl_os, obj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
@@ -489,6 +501,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, d
 	dmu_buf_will_dirty(bonus, tx);
 	bzero(dlp, sizeof (*dlp));
 	dmu_buf_rele(bonus, FTAG);
+	mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -503,6 +516,8 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *b
 	avl_index_t where;
 
 	ASSERT(!dl->dl_oldfmt);
+
+	mutex_enter(&dl->dl_lock);
 	dmu_buf_will_dirty(dl->dl_dbuf, tx);
 	dsl_deadlist_load_tree(dl);
 
@@ -518,14 +533,12 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *b
 
 		VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
 		    &used, &comp, &uncomp));
-		mutex_enter(&dl->dl_lock);
 		ASSERT3U(dl->dl_phys->dl_used, >=, used);
 		ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
 		ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
 		dl->dl_phys->dl_used -= used;
 		dl->dl_phys->dl_comp -= comp;
 		dl->dl_phys->dl_uncomp -= uncomp;
-		mutex_exit(&dl->dl_lock);
 
 		VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
 		    dle->dle_mintxg, tx));
@@ -536,4 +549,5 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *b
 		kmem_free(dle, sizeof (*dle));
 		dle = dle_next;
 	}
+	mutex_exit(&dl->dl_lock);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201707261642.v6QGgWg3076540>