From owner-svn-src-vendor@freebsd.org Fri Apr 14 18:24:39 2017 Return-Path: Delivered-To: svn-src-vendor@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id A0B29D3E0A0; Fri, 14 Apr 2017 18:24:39 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 5EBA1BE4; Fri, 14 Apr 2017 18:24:39 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v3EIOcUR070624; Fri, 14 Apr 2017 18:24:38 GMT (envelope-from avg@FreeBSD.org) Received: (from avg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v3EIOc5T070622; Fri, 14 Apr 2017 18:24:38 GMT (envelope-from avg@FreeBSD.org) Message-Id: <201704141824.v3EIOc5T070622@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: avg set sender to avg@FreeBSD.org using -f From: Andriy Gapon Date: Fri, 14 Apr 2017 18:24:38 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org Subject: svn commit: r316913 - vendor-sys/illumos/dist/uts/common/fs/zfs X-SVN-Group: vendor-sys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-vendor@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for the vendor work area tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 14 Apr 2017 18:24:39 -0000 Author: avg Date: Fri Apr 14 18:24:38 2017 New Revision: 316913 URL: https://svnweb.freebsd.org/changeset/base/316913 Log: 7869 panic in bpobj_space(): null pointer dereference illumos/illumos-gate@a3905a45920de250d181b66ac0b6b71bd200d9ef https://github.com/illumos/illumos-gate/commit/a3905a45920de250d181b66ac0b6b71bd200d9ef https://www.illumos.org/issues/7869 The issue fixed by this patch is a race condition in the deadlist code. A thread executing an administrative command that uses `dsl_deadlist_space_range()` holds the lock of the whole `deadlist_t` to protect the access of all its entries that the deadlist contains in an avl tree. Sync threads trying to insert a new entry in the deadlist (through `dsl_deadlist_insert()` -> `dle_enqueue()`) do not hold the deadlist lock at that moment. If the `dle_bpobj` is the empty bpobj (our sentinel value), we close and reopen it. Between these two operations, it is possible for the `dsl_deadlist_space_range()` thread to dereference that bpobj which is `NULL` during that window. Threads should hold the a deadlist's `dl_lock` when they manipulate its internal data so scenarios like the one above are avoided. In addition, threads should also hold the bpobj lock whenever they are allocating the subobj list of a bpobj, and not just when they actually insert the subobj to the list. This way we can avoid potential memory leaks. Reviewed by: Matt Ahrens Reviewed by: Dan Kimmel Reviewed by: Steve Gonczi Reviewed by: John Kennedy Reviewed by: George Melikov Reviewed by: Brian Behlendorf Approved by: Dan McDonald Author: Serapheim Dimitropoulos Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_deadlist.c Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c Fri Apr 14 18:24:03 2017 (r316912) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/bpobj.c Fri Apr 14 18:24:38 2017 (r316913) @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ @@ -393,6 +393,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint6 return; } + mutex_enter(&bpo->bpo_lock); dmu_buf_will_dirty(bpo->bpo_dbuf, tx); if (bpo->bpo_phys->bpo_subobjs == 0) { bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os, @@ -404,7 +405,6 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint6 ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi)); ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ); - mutex_enter(&bpo->bpo_lock); dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), sizeof (subobj), &subobj, tx); Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_deadlist.c ============================================================================== --- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_deadlist.c Fri Apr 14 18:24:03 2017 (r316912) +++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_deadlist.c Fri Apr 14 18:24:38 2017 (r316913) @@ -72,6 +72,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *d zap_cursor_t zc; zap_attribute_t za; + ASSERT(MUTEX_HELD(&dl->dl_lock)); + ASSERT(!dl->dl_oldfmt); if (dl->dl_havetree) return; @@ -182,6 +184,7 @@ static void dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle, const blkptr_t *bp, dmu_tx_t *tx) { + ASSERT(MUTEX_HELD(&dl->dl_lock)); if (dle->dle_bpobj.bpo_object == dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) { uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx); @@ -198,6 +201,7 @@ static void dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle, uint64_t obj, dmu_tx_t *tx) { + ASSERT(MUTEX_HELD(&dl->dl_lock)); if (dle->dle_bpobj.bpo_object != dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) { bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx); @@ -222,15 +226,14 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, return; } + mutex_enter(&dl->dl_lock); dsl_deadlist_load_tree(dl); dmu_buf_will_dirty(dl->dl_dbuf, tx); - mutex_enter(&dl->dl_lock); dl->dl_phys->dl_used += bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp); dl->dl_phys->dl_comp += BP_GET_PSIZE(bp); dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp); - mutex_exit(&dl->dl_lock); dle_tofind.dle_mintxg = bp->blk_birth; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); @@ -239,6 +242,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, else dle = AVL_PREV(&dl->dl_tree, dle); dle_enqueue(dl, dle, bp, tx); + mutex_exit(&dl->dl_lock); } /* @@ -254,16 +258,19 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, if (dl->dl_oldfmt) return; - dsl_deadlist_load_tree(dl); - dle = kmem_alloc(sizeof (*dle), KM_SLEEP); dle->dle_mintxg = mintxg; + + mutex_enter(&dl->dl_lock); + dsl_deadlist_load_tree(dl); + obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj)); avl_add(&dl->dl_tree, dle); VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object, mintxg, obj, tx)); + mutex_exit(&dl->dl_lock); } /* @@ -278,6 +285,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t * if (dl->dl_oldfmt) return; + mutex_enter(&dl->dl_lock); dsl_deadlist_load_tree(dl); dle_tofind.dle_mintxg = mintxg; @@ -291,6 +299,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t * kmem_free(dle, sizeof (*dle)); VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx)); + mutex_exit(&dl->dl_lock); } /* @@ -334,6 +343,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, u return (newobj); } + mutex_enter(&dl->dl_lock); dsl_deadlist_load_tree(dl); for (dle = avl_first(&dl->dl_tree); dle; @@ -347,6 +357,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, u VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj, dle->dle_mintxg, obj, tx)); } + mutex_exit(&dl->dl_lock); return (newobj); } @@ -424,6 +435,8 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t uint64_t used, comp, uncomp; bpobj_t bpo; + ASSERT(MUTEX_HELD(&dl->dl_lock)); + VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj)); VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp)); bpobj_close(&bpo); @@ -431,11 +444,9 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t dsl_deadlist_load_tree(dl); dmu_buf_will_dirty(dl->dl_dbuf, tx); - mutex_enter(&dl->dl_lock); dl->dl_phys->dl_used += used; dl->dl_phys->dl_comp += comp; dl->dl_phys->dl_uncomp += uncomp; - mutex_exit(&dl->dl_lock); dle_tofind.dle_mintxg = birth; dle = avl_find(&dl->dl_tree, &dle_tofind, &where); @@ -475,6 +486,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, u return; } + mutex_enter(&dl->dl_lock); for (zap_cursor_init(&zc, dl->dl_os, obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { @@ -489,6 +501,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, u dmu_buf_will_dirty(bonus, tx); bzero(dlp, sizeof (*dlp)); dmu_buf_rele(bonus, FTAG); + mutex_exit(&dl->dl_lock); } /* @@ -503,6 +516,8 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t * avl_index_t where; ASSERT(!dl->dl_oldfmt); + + mutex_enter(&dl->dl_lock); dmu_buf_will_dirty(dl->dl_dbuf, tx); dsl_deadlist_load_tree(dl); @@ -518,14 +533,12 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t * VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj, &used, &comp, &uncomp)); - mutex_enter(&dl->dl_lock); ASSERT3U(dl->dl_phys->dl_used, >=, used); ASSERT3U(dl->dl_phys->dl_comp, >=, comp); ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp); dl->dl_phys->dl_used -= used; dl->dl_phys->dl_comp -= comp; dl->dl_phys->dl_uncomp -= uncomp; - mutex_exit(&dl->dl_lock); VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, dle->dle_mintxg, tx)); @@ -536,4 +549,5 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t * kmem_free(dle, sizeof (*dle)); dle = dle_next; } + mutex_exit(&dl->dl_lock); }