From owner-svn-src-stable-11@freebsd.org Fri May 5 16:19:55 2017 Return-Path: Delivered-To: svn-src-stable-11@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 7B697D5F6DA; Fri, 5 May 2017 16:19:55 +0000 (UTC) (envelope-from asomers@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 55E7331D; Fri, 5 May 2017 16:19:55 +0000 (UTC) (envelope-from asomers@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v45GJsPh041041; Fri, 5 May 2017 16:19:54 GMT (envelope-from asomers@FreeBSD.org) Received: (from asomers@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v45GJsUu041040; Fri, 5 May 2017 16:19:54 GMT (envelope-from asomers@FreeBSD.org) Message-Id: <201705051619.v45GJsUu041040@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: asomers set sender to asomers@FreeBSD.org using -f From: Alan Somers Date: Fri, 5 May 2017 16:19:54 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r317833 - stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs X-SVN-Group: stable-11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-11@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for only the 11-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 05 May 2017 16:19:55 -0000 Author: asomers Date: Fri May 5 16:19:54 2017 New Revision: 317833 URL: https://svnweb.freebsd.org/changeset/base/317833 Log: MFC r316760: Fix vdev_geom_attach_by_guids for partitioned disks When opening a vdev whose path is unknown, vdev_geom must find a geom provider with a label whose guids match the desired vdev. However, due to partitioning, it is possible that two non-synonomous providers will share some labels. For example, if the first partition starts at the beginning of the drive, then ada0 and ada0p1 will share the first label. More troubling, if the last partition runs to the end of the drive, then ada0p3 and ada0 will share the last label. If vdev_geom opens ada0 when it should've opened ada0p3, then the pool won't be readable. If it opens ada0 when it should've opened ada0p1, then it will corrupt some other partition when it writes the 3rd and 4th labels. The easiest way to reproduce this problem is to install a mirrored root pool with the default partition layout, then swap the positions of the two boot drives and reboot. Whether the bug manifests depends on the order in which geom lists its providers, which is arbitrary. Fix this situation by modifying the search algorithm to prefer geom providers that have all four labels intact. If no such provider exists, then open whichever provider has the most. Reviewed by: mav Sponsored by: Spectra Logic Corp Differential Revision: https://reviews.freebsd.org/D10365 Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Fri May 5 15:26:55 2017 (r317832) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Fri May 5 16:19:54 2017 (r317833) @@ -400,12 +400,17 @@ vdev_geom_io(struct g_consumer *cp, int kmem_free(bios, bios_size); } +/* + * Read the vdev config from a device. Return the number of valid labels that + * were found. The vdev config will be returned in config if and only if at + * least one valid label was found. + */ static int vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) { struct g_provider *pp; vdev_phys_t *vdev_lists[VDEV_LABELS]; - char *p, *buf; + char *buf; size_t buflen; uint64_t psize, state, txg; off_t offsets[VDEV_LABELS]; @@ -413,7 +418,7 @@ vdev_geom_read_config(struct g_consumer off_t sizes[VDEV_LABELS]; int cmds[VDEV_LABELS]; int errors[VDEV_LABELS]; - int l, len; + int l, nlabels; g_topology_assert_not(); @@ -444,6 +449,7 @@ vdev_geom_read_config(struct g_consumer VDEV_LABELS); /* Parse the labels */ + nlabels = 0; for (l = 0; l < VDEV_LABELS; l++) { if (errors[l] != 0) continue; @@ -469,14 +475,14 @@ vdev_geom_read_config(struct g_consumer continue; } - break; + nlabels++; } /* Free the label storage */ for (l = 0; l < VDEV_LABELS; l++) kmem_free(vdev_lists[l], size); - return (*config == NULL ? ENOENT : 0); + return (nlabels); } static void @@ -559,7 +565,7 @@ vdev_geom_read_pool_label(const char *na struct g_consumer *zcp; nvlist_t *vdev_cfg; uint64_t pool_guid; - int error; + int error, nlabels; DROP_GIANT(); g_topology_lock(); @@ -580,10 +586,10 @@ vdev_geom_read_pool_label(const char *na if (zcp == NULL) continue; g_topology_unlock(); - error = vdev_geom_read_config(zcp, &vdev_cfg); + nlabels = vdev_geom_read_config(zcp, &vdev_cfg); g_topology_lock(); vdev_geom_detach(zcp, B_TRUE); - if (error) + if (nlabels == 0) continue; ZFS_LOG(1, "successfully read vdev config"); @@ -599,9 +605,13 @@ vdev_geom_read_pool_label(const char *na } enum match { - NO_MATCH, - TOP_MATCH, - FULL_MATCH + NO_MATCH = 0, /* No matching labels found */ + TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid*/ + ZERO_MATCH = 1, /* Should never be returned */ + ONE_MATCH = 2, /* 1 label matching the vdev_guid */ + TWO_MATCH = 3, /* 2 label matching the vdev_guid */ + THREE_MATCH = 4, /* 3 label matching the vdev_guid */ + FULL_MATCH = 5 /* all labels match the vdev_guid */ }; static enum match @@ -610,6 +620,7 @@ vdev_attach_ok(vdev_t *vd, struct g_prov nvlist_t *config; uint64_t pool_guid, top_guid, vdev_guid; struct g_consumer *cp; + int nlabels; cp = vdev_geom_attach(pp, NULL); if (cp == NULL) { @@ -618,7 +629,8 @@ vdev_attach_ok(vdev_t *vd, struct g_prov return (NO_MATCH); } g_topology_unlock(); - if (vdev_geom_read_config(cp, &config) != 0) { + nlabels = vdev_geom_read_config(cp, &config); + if (nlabels == 0) { g_topology_lock(); vdev_geom_detach(cp, B_TRUE); ZFS_LOG(1, "Unable to read config from %s.", pp->name); @@ -653,10 +665,10 @@ vdev_attach_ok(vdev_t *vd, struct g_prov */ if (vdev_guid == vd->vdev_guid) { ZFS_LOG(1, "guids match for provider %s.", pp->name); - return (FULL_MATCH); + return (ZERO_MATCH + nlabels); } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); - return (TOP_MATCH); + return (TOPGUID_MATCH); } ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); @@ -668,13 +680,15 @@ vdev_geom_attach_by_guids(vdev_t *vd) { struct g_class *mp; struct g_geom *gp; - struct g_provider *pp; + struct g_provider *pp, *best_pp; struct g_consumer *cp; - enum match m; + enum match match, best_match; g_topology_assert(); cp = NULL; + best_pp = NULL; + best_match = NO_MATCH; LIST_FOREACH(mp, &g_classes, class) { if (mp == &zfs_vdev_class) continue; @@ -682,26 +696,25 @@ vdev_geom_attach_by_guids(vdev_t *vd) if (gp->flags & G_GEOM_WITHER) continue; LIST_FOREACH(pp, &gp->provider, provider) { - m = vdev_attach_ok(vd, pp); - if (m == NO_MATCH) - continue; - if (cp != NULL) { - if (m == FULL_MATCH) - vdev_geom_detach(cp, B_TRUE); - else - continue; + match = vdev_attach_ok(vd, pp); + if (match > best_match) { + best_match = match; + best_pp = pp; } - cp = vdev_geom_attach(pp, vd); - if (cp == NULL) { - printf("ZFS WARNING: Unable to " - "attach to %s.\n", pp->name); - continue; - } - if (m == FULL_MATCH) - return (cp); + if (match == FULL_MATCH) + goto out; } } } + +out: + if (best_pp) { + cp = vdev_geom_attach(best_pp, vd); + if (cp == NULL) { + printf("ZFS WARNING: Unable to attach to %s.\n", + best_pp->name); + } + } return (cp); }