From owner-svn-src-stable@freebsd.org Sat Nov 12 23:37:27 2016 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id A3BBEC3D885; Sat, 12 Nov 2016 23:37:27 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 7DEA31215; Sat, 12 Nov 2016 23:37:27 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id uACNbQQx079263; Sat, 12 Nov 2016 23:37:26 GMT (envelope-from mav@FreeBSD.org) Received: (from mav@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id uACNbQMc079262; Sat, 12 Nov 2016 23:37:26 GMT (envelope-from mav@FreeBSD.org) Message-Id: <201611122337.uACNbQMc079262@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mav set sender to mav@FreeBSD.org using -f From: Alexander Motin Date: Sat, 12 Nov 2016 23:37:26 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r308589 - stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs X-SVN-Group: stable-11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 12 Nov 2016 23:37:27 -0000 Author: mav Date: Sat Nov 12 23:37:26 2016 New Revision: 308589 URL: https://svnweb.freebsd.org/changeset/base/308589 Log: MFC r308051: Matching GUIDs, handle possible race on vdev detach. In case of vdev detach, causing top level mirror vdev destruction, leaf vdev changes its GUID to one of the destroyed mirror, that creates race condition when GUID in vdev label may not match one in the pool config. This change replicates logic nuance of vdev_validate() by adding special exception, matching the vdev GUID against the top level vdev GUID. Since this exception is not completely reliable (may give false positives if we fail to erase label on detached vdev), use it only as last resort. Quick way to reproduce this scenario now is detach vdev from a pool with enabled autoextend. During vdev detach autoextend logic tries to reopen remaining vdev, that always fails now since in-memory configuration is already updated, while on-disk labels are not yet. Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Sat Nov 12 23:36:27 2016 (r308588) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Sat Nov 12 23:37:26 2016 (r308589) @@ -338,14 +338,6 @@ vdev_geom_close_locked(vdev_t *vd) vdev_geom_detach(cp, B_TRUE); } -static void -nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) -{ - - (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); - (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); -} - /* * Issue one or more bios to the vdev in parallel * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO @@ -606,58 +598,69 @@ vdev_geom_read_pool_label(const char *na return (*count > 0 ? 0 : ENOENT); } -static void -vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) -{ - nvlist_t *config; - - g_topology_assert_not(); - - *pguid = 0; - *vguid = 0; - if (vdev_geom_read_config(cp, &config) == 0) { - nvlist_get_guids(config, pguid, vguid); - nvlist_free(config); - } -} +enum match { + NO_MATCH, + TOP_MATCH, + FULL_MATCH +}; -static boolean_t +static enum match vdev_attach_ok(vdev_t *vd, struct g_provider *pp) { - uint64_t pool_guid; - uint64_t vdev_guid; - struct g_consumer *zcp; - boolean_t pool_ok; - boolean_t vdev_ok; + nvlist_t *config; + uint64_t pool_guid, top_guid, vdev_guid; + struct g_consumer *cp; - zcp = vdev_geom_attach(pp, NULL); - if (zcp == NULL) { + cp = vdev_geom_attach(pp, NULL); + if (cp == NULL) { ZFS_LOG(1, "Unable to attach tasting instance to %s.", pp->name); - return (B_FALSE); + return (NO_MATCH); } g_topology_unlock(); - vdev_geom_read_guids(zcp, &pool_guid, &vdev_guid); + if (vdev_geom_read_config(cp, &config) != 0) { + g_topology_lock(); + vdev_geom_detach(cp, B_TRUE); + ZFS_LOG(1, "Unable to read config from %s.", pp->name); + return (NO_MATCH); + } g_topology_lock(); - vdev_geom_detach(zcp, B_TRUE); + vdev_geom_detach(cp, B_TRUE); - /* - * Check that the label's vdev guid matches the desired guid. If the - * label has a pool guid, check that it matches too. (Inactive spares - * and L2ARCs do not have any pool guid in the label.) + pool_guid = 0; + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); + top_guid = 0; + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); + vdev_guid = 0; + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); + nvlist_free(config); + + /* + * Check that the label's pool guid matches the desired guid. + * Inactive spares and L2ARCs do not have any pool guid in the label. */ - if ((pool_guid == 0 || pool_guid == spa_guid(vd->vdev_spa)) && - vdev_guid == vd->vdev_guid) { - ZFS_LOG(1, "guids match for provider %s.", vd->vdev_path); - return (B_TRUE); - } else { - ZFS_LOG(1, "guid mismatch for provider %s: " - "%ju:%ju != %ju:%ju.", vd->vdev_path, - (uintmax_t)spa_guid(vd->vdev_spa), - (uintmax_t)vd->vdev_guid, - (uintmax_t)pool_guid, (uintmax_t)vdev_guid); - return (B_FALSE); + if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { + ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", + pp->name, + (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); + return (NO_MATCH); } + + /* + * Check that the label's vdev guid matches the desired guid. + * The second condition handles possible race on vdev detach, when + * remaining vdev receives GUID of destroyed top level mirror vdev. + */ + if (vdev_guid == vd->vdev_guid) { + ZFS_LOG(1, "guids match for provider %s.", pp->name); + return (FULL_MATCH); + } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { + ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); + return (TOP_MATCH); + } + ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", + pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); + return (NO_MATCH); } static struct g_consumer * @@ -667,6 +670,7 @@ vdev_geom_attach_by_guids(vdev_t *vd) struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; + enum match m; g_topology_assert(); @@ -678,23 +682,26 @@ vdev_geom_attach_by_guids(vdev_t *vd) if (gp->flags & G_GEOM_WITHER) continue; LIST_FOREACH(pp, &gp->provider, provider) { - if (!vdev_attach_ok(vd, pp)) + m = vdev_attach_ok(vd, pp); + if (m == NO_MATCH) continue; + if (cp != NULL) { + if (m == FULL_MATCH) + vdev_geom_detach(cp, B_TRUE); + else + continue; + } cp = vdev_geom_attach(pp, vd); if (cp == NULL) { printf("ZFS WARNING: Unable to " "attach to %s.\n", pp->name); continue; } - break; + if (m == FULL_MATCH) + return (cp); } - if (cp != NULL) - break; } - if (cp != NULL) - break; } -end: return (cp); } @@ -742,7 +749,7 @@ vdev_geom_open_by_path(vdev_t *vd, int c pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); if (pp != NULL) { ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); - if (!check_guid || vdev_attach_ok(vd, pp)) + if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) cp = vdev_geom_attach(pp, vd); }