From owner-svn-src-all@freebsd.org Fri Oct 28 16:21:32 2016 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 3EF13C2414B; Fri, 28 Oct 2016 16:21:32 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 16157A08; Fri, 28 Oct 2016 16:21:32 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u9SGLVxi017261; Fri, 28 Oct 2016 16:21:31 GMT (envelope-from mav@FreeBSD.org) Received: (from mav@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u9SGLVed017260; Fri, 28 Oct 2016 16:21:31 GMT (envelope-from mav@FreeBSD.org) Message-Id: <201610281621.u9SGLVed017260@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mav set sender to mav@FreeBSD.org using -f From: Alexander Motin Date: Fri, 28 Oct 2016 16:21:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r308051 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 28 Oct 2016 16:21:32 -0000 Author: mav Date: Fri Oct 28 16:21:31 2016 New Revision: 308051 URL: https://svnweb.freebsd.org/changeset/base/308051 Log: Matching GUIDs, handle possible race on vdev detach. In case of vdev detach, causing top level mirror vdev destruction, leaf vdev changes its GUID to one of the destroyed mirror, that creates race condition when GUID in vdev label may not match one in the pool config. This change replicates logic nuance of vdev_validate() by adding special exception, matching the vdev GUID against the top level vdev GUID. Since this exception is not completely reliable (may give false positives if we fail to erase label on detached vdev), use it only as last resort. Quick way to reproduce this scenario now is detach vdev from a pool with enabled autoextend. During vdev detach autoextend logic tries to reopen remaining vdev, that always fails now since in-memory configuration is already updated, while on-disk labels are not yet. MFC after: 2 weeks Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Fri Oct 28 15:57:55 2016 (r308050) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Fri Oct 28 16:21:31 2016 (r308051) @@ -338,14 +338,6 @@ vdev_geom_close_locked(vdev_t *vd) vdev_geom_detach(cp, B_TRUE); } -static void -nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid) -{ - - (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid); - (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid); -} - /* * Issue one or more bios to the vdev in parallel * cmds, datas, offsets, errors, and sizes are arrays of length ncmds. Each IO @@ -606,58 +598,69 @@ vdev_geom_read_pool_label(const char *na return (*count > 0 ? 0 : ENOENT); } -static void -vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid) -{ - nvlist_t *config; - - g_topology_assert_not(); - - *pguid = 0; - *vguid = 0; - if (vdev_geom_read_config(cp, &config) == 0) { - nvlist_get_guids(config, pguid, vguid); - nvlist_free(config); - } -} +enum match { + NO_MATCH, + TOP_MATCH, + FULL_MATCH +}; -static boolean_t +static enum match vdev_attach_ok(vdev_t *vd, struct g_provider *pp) { - uint64_t pool_guid; - uint64_t vdev_guid; - struct g_consumer *zcp; - boolean_t pool_ok; - boolean_t vdev_ok; + nvlist_t *config; + uint64_t pool_guid, top_guid, vdev_guid; + struct g_consumer *cp; - zcp = vdev_geom_attach(pp, NULL); - if (zcp == NULL) { + cp = vdev_geom_attach(pp, NULL); + if (cp == NULL) { ZFS_LOG(1, "Unable to attach tasting instance to %s.", pp->name); - return (B_FALSE); + return (NO_MATCH); } g_topology_unlock(); - vdev_geom_read_guids(zcp, &pool_guid, &vdev_guid); + if (vdev_geom_read_config(cp, &config) != 0) { + g_topology_lock(); + vdev_geom_detach(cp, B_TRUE); + ZFS_LOG(1, "Unable to read config from %s.", pp->name); + return (NO_MATCH); + } g_topology_lock(); - vdev_geom_detach(zcp, B_TRUE); + vdev_geom_detach(cp, B_TRUE); - /* - * Check that the label's vdev guid matches the desired guid. If the - * label has a pool guid, check that it matches too. (Inactive spares - * and L2ARCs do not have any pool guid in the label.) + pool_guid = 0; + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid); + top_guid = 0; + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid); + vdev_guid = 0; + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); + nvlist_free(config); + + /* + * Check that the label's pool guid matches the desired guid. + * Inactive spares and L2ARCs do not have any pool guid in the label. */ - if ((pool_guid == 0 || pool_guid == spa_guid(vd->vdev_spa)) && - vdev_guid == vd->vdev_guid) { - ZFS_LOG(1, "guids match for provider %s.", vd->vdev_path); - return (B_TRUE); - } else { - ZFS_LOG(1, "guid mismatch for provider %s: " - "%ju:%ju != %ju:%ju.", vd->vdev_path, - (uintmax_t)spa_guid(vd->vdev_spa), - (uintmax_t)vd->vdev_guid, - (uintmax_t)pool_guid, (uintmax_t)vdev_guid); - return (B_FALSE); + if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) { + ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.", + pp->name, + (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid); + return (NO_MATCH); } + + /* + * Check that the label's vdev guid matches the desired guid. + * The second condition handles possible race on vdev detach, when + * remaining vdev receives GUID of destroyed top level mirror vdev. + */ + if (vdev_guid == vd->vdev_guid) { + ZFS_LOG(1, "guids match for provider %s.", pp->name); + return (FULL_MATCH); + } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { + ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); + return (TOP_MATCH); + } + ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", + pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); + return (NO_MATCH); } static struct g_consumer * @@ -667,6 +670,7 @@ vdev_geom_attach_by_guids(vdev_t *vd) struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; + enum match m; g_topology_assert(); @@ -678,23 +682,26 @@ vdev_geom_attach_by_guids(vdev_t *vd) if (gp->flags & G_GEOM_WITHER) continue; LIST_FOREACH(pp, &gp->provider, provider) { - if (!vdev_attach_ok(vd, pp)) + m = vdev_attach_ok(vd, pp); + if (m == NO_MATCH) continue; + if (cp != NULL) { + if (m == FULL_MATCH) + vdev_geom_detach(cp, B_TRUE); + else + continue; + } cp = vdev_geom_attach(pp, vd); if (cp == NULL) { printf("ZFS WARNING: Unable to " "attach to %s.\n", pp->name); continue; } - break; + if (m == FULL_MATCH) + return (cp); } - if (cp != NULL) - break; } - if (cp != NULL) - break; } -end: return (cp); } @@ -742,7 +749,7 @@ vdev_geom_open_by_path(vdev_t *vd, int c pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1); if (pp != NULL) { ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path); - if (!check_guid || vdev_attach_ok(vd, pp)) + if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH) cp = vdev_geom_attach(pp, vd); }