Date: Thu, 2 Jan 2014 08:10:35 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r260185 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys Message-ID: <201401020810.s028AZBp086769@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Thu Jan 2 08:10:35 2014 New Revision: 260185 URL: http://svnweb.freebsd.org/changeset/base/260185 Log: MFV r260155: When we encounter an I/O error on a piece of metadata while deleting a file system or zvol, we don't update the bptree_entry_phys_t's bookmark. This would lead to double free of bp's which will lead to space map corruption. Instead of tolerating and allowing the corruption, panic immediately. See Illumos #4390 for more details. 4391 panic system rather than corrupting pool if we hit bug 4390 Illumos/illumos-gate@8b36997aa24d9817807faa4efa301ac9c07a2b78 MFC after: 2 weeks Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h Directory Properties: head/sys/cddl/contrib/opensolaris/ (props changed) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c Thu Jan 2 08:02:57 2014 (r260184) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c Thu Jan 2 08:10:35 2014 (r260185) @@ -180,6 +180,7 @@ bptree_iterate(objset_t *os, uint64_t ob err = 0; for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { bptree_entry_phys_t bte; + int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; ASSERT(!free || i == ba.ba_phys->bt_begin); @@ -188,13 +189,13 @@ bptree_iterate(objset_t *os, uint64_t ob if (err != 0) break; + if (zfs_recover) + flags |= TRAVERSE_HARD; err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, - bte.be_birth_txg, &bte.be_zb, - TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST, + bte.be_birth_txg, &bte.be_zb, flags, bptree_visit_cb, &ba); if (free) { - ASSERT(err == 0 || err == ERESTART); - if (err != 0) { + if (err == ERESTART) { /* save bookmark for future resume */ ASSERT3U(bte.be_zb.zb_objset, ==, ZB_DESTROYED_OBJSET); @@ -202,11 +203,21 @@ bptree_iterate(objset_t *os, uint64_t ob dmu_write(os, obj, i * sizeof (bte), sizeof (bte), &bte, tx); break; - } else { - ba.ba_phys->bt_begin++; - (void) dmu_free_range(os, obj, - i * sizeof (bte), sizeof (bte), tx); } + if (err != 0) { + /* + * We can not properly handle an i/o + * error, because the traversal code + * does not know how to resume from an + * arbitrary bookmark. + */ + zfs_panic_recover("error %u from " + "traverse_dataset_destroyed()", err); + } + + ba.ba_phys->bt_begin++; + (void) dmu_free_range(os, obj, + i * sizeof (bte), sizeof (bte), tx); } } Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c Thu Jan 2 08:02:57 2014 (r260184) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c Thu Jan 2 08:10:35 2014 (r260185) @@ -383,7 +383,7 @@ traverse_visitbp(traverse_data_t *td, co (void) arc_buf_remove_ref(buf, &buf); post: - if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) { + if (err == 0 && (td->td_flags & TRAVERSE_POST)) { err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); if (err == ERESTART) pause = B_TRUE; Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c Thu Jan 2 08:02:57 2014 (r260184) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c Thu Jan 2 08:10:35 2014 (r260185) @@ -1348,6 +1348,9 @@ dsl_scan_free_should_pause(dsl_scan_t *s { uint64_t elapsed_nanosecs; + if (zfs_recover) + return (B_FALSE); + elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms && Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c Thu Jan 2 08:02:57 2014 (r260184) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c Thu Jan 2 08:10:35 2014 (r260185) @@ -252,6 +252,8 @@ SYSCTL_INT(_debug, OID_AUTO, zfs_flags, * zfs_recover can be set to nonzero to attempt to recover from * otherwise-fatal errors, typically caused by on-disk corruption. When * set, calls to zfs_panic_recover() will turn into warning messages. + * This should only be used as a last resort, as it typically results + * in leaked space, or worse. */ int zfs_recover = 0; SYSCTL_DECL(_vfs_zfs); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h Thu Jan 2 08:02:57 2014 (r260184) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h Thu Jan 2 08:10:35 2014 (r260185) @@ -49,6 +49,7 @@ extern "C" { #endif extern int zfs_flags; +extern int zfs_recover; #define ZFS_DEBUG_DPRINTF (1<<0) #define ZFS_DEBUG_DBUF_VERIFY (1<<1)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201401020810.s028AZBp086769>