From owner-svn-src-head@freebsd.org Thu Apr 13 14:51:35 2017 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id E3874D3B247; Thu, 13 Apr 2017 14:51:35 +0000 (UTC) (envelope-from asomers@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id C0A5D1693; Thu, 13 Apr 2017 14:51:35 +0000 (UTC) (envelope-from asomers@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v3DEpYC9079956; Thu, 13 Apr 2017 14:51:34 GMT (envelope-from asomers@FreeBSD.org) Received: (from asomers@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v3DEpYbe079955; Thu, 13 Apr 2017 14:51:34 GMT (envelope-from asomers@FreeBSD.org) Message-Id: <201704131451.v3DEpYbe079955@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: asomers set sender to asomers@FreeBSD.org using -f From: Alan Somers Date: Thu, 13 Apr 2017 14:51:34 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r316760 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 13 Apr 2017 14:51:36 -0000 Author: asomers Date: Thu Apr 13 14:51:34 2017 New Revision: 316760 URL: https://svnweb.freebsd.org/changeset/base/316760 Log: Fix vdev_geom_attach_by_guids for partitioned disks When opening a vdev whose path is unknown, vdev_geom must find a geom provider with a label whose guids match the desired vdev. However, due to partitioning, it is possible that two non-synonomous providers will share some labels. For example, if the first partition starts at the beginning of the drive, then ada0 and ada0p1 will share the first label. More troubling, if the last partition runs to the end of the drive, then ada0p3 and ada0 will share the last label. If vdev_geom opens ada0 when it should've opened ada0p3, then the pool won't be readable. If it opens ada0 when it should've opened ada0p1, then it will corrupt some other partition when it writes the 3rd and 4th labels. The easiest way to reproduce this problem is to install a mirrored root pool with the default partition layout, then swap the positions of the two boot drives and reboot. Whether the bug manifests depends on the order in which geom lists its providers, which is arbitrary. Fix this situation by modifying the search algorithm to prefer geom providers that have all four labels intact. If no such provider exists, then open whichever provider has the most. Reviewed by: mav MFC after: 3 weeks Sponsored by: Spectra Logic Corp Differential Revision: https://reviews.freebsd.org/D10365 Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Thu Apr 13 14:44:17 2017 (r316759) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Thu Apr 13 14:51:34 2017 (r316760) @@ -400,12 +400,17 @@ vdev_geom_io(struct g_consumer *cp, int kmem_free(bios, bios_size); } +/* + * Read the vdev config from a device. Return the number of valid labels that + * were found. The vdev config will be returned in config if and only if at + * least one valid label was found. + */ static int vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config) { struct g_provider *pp; vdev_phys_t *vdev_lists[VDEV_LABELS]; - char *p, *buf; + char *buf; size_t buflen; uint64_t psize, state, txg; off_t offsets[VDEV_LABELS]; @@ -413,7 +418,7 @@ vdev_geom_read_config(struct g_consumer off_t sizes[VDEV_LABELS]; int cmds[VDEV_LABELS]; int errors[VDEV_LABELS]; - int l, len; + int l, nlabels; g_topology_assert_not(); @@ -444,6 +449,7 @@ vdev_geom_read_config(struct g_consumer VDEV_LABELS); /* Parse the labels */ + nlabels = 0; for (l = 0; l < VDEV_LABELS; l++) { if (errors[l] != 0) continue; @@ -469,14 +475,14 @@ vdev_geom_read_config(struct g_consumer continue; } - break; + nlabels++; } /* Free the label storage */ for (l = 0; l < VDEV_LABELS; l++) kmem_free(vdev_lists[l], size); - return (*config == NULL ? ENOENT : 0); + return (nlabels); } static void @@ -559,7 +565,7 @@ vdev_geom_read_pool_label(const char *na struct g_consumer *zcp; nvlist_t *vdev_cfg; uint64_t pool_guid; - int error; + int error, nlabels; DROP_GIANT(); g_topology_lock(); @@ -580,10 +586,10 @@ vdev_geom_read_pool_label(const char *na if (zcp == NULL) continue; g_topology_unlock(); - error = vdev_geom_read_config(zcp, &vdev_cfg); + nlabels = vdev_geom_read_config(zcp, &vdev_cfg); g_topology_lock(); vdev_geom_detach(zcp, B_TRUE); - if (error) + if (nlabels == 0) continue; ZFS_LOG(1, "successfully read vdev config"); @@ -599,9 +605,13 @@ vdev_geom_read_pool_label(const char *na } enum match { - NO_MATCH, - TOP_MATCH, - FULL_MATCH + NO_MATCH = 0, /* No matching labels found */ + TOPGUID_MATCH = 1, /* Labels match top guid, not vdev guid*/ + ZERO_MATCH = 1, /* Should never be returned */ + ONE_MATCH = 2, /* 1 label matching the vdev_guid */ + TWO_MATCH = 3, /* 2 label matching the vdev_guid */ + THREE_MATCH = 4, /* 3 label matching the vdev_guid */ + FULL_MATCH = 5 /* all labels match the vdev_guid */ }; static enum match @@ -610,6 +620,7 @@ vdev_attach_ok(vdev_t *vd, struct g_prov nvlist_t *config; uint64_t pool_guid, top_guid, vdev_guid; struct g_consumer *cp; + int nlabels; cp = vdev_geom_attach(pp, NULL); if (cp == NULL) { @@ -618,7 +629,8 @@ vdev_attach_ok(vdev_t *vd, struct g_prov return (NO_MATCH); } g_topology_unlock(); - if (vdev_geom_read_config(cp, &config) != 0) { + nlabels = vdev_geom_read_config(cp, &config); + if (nlabels == 0) { g_topology_lock(); vdev_geom_detach(cp, B_TRUE); ZFS_LOG(1, "Unable to read config from %s.", pp->name); @@ -653,10 +665,10 @@ vdev_attach_ok(vdev_t *vd, struct g_prov */ if (vdev_guid == vd->vdev_guid) { ZFS_LOG(1, "guids match for provider %s.", pp->name); - return (FULL_MATCH); + return (ZERO_MATCH + nlabels); } else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) { ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name); - return (TOP_MATCH); + return (TOPGUID_MATCH); } ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.", pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid); @@ -668,13 +680,15 @@ vdev_geom_attach_by_guids(vdev_t *vd) { struct g_class *mp; struct g_geom *gp; - struct g_provider *pp; + struct g_provider *pp, *best_pp; struct g_consumer *cp; - enum match m; + enum match match, best_match; g_topology_assert(); cp = NULL; + best_pp = NULL; + best_match = NO_MATCH; LIST_FOREACH(mp, &g_classes, class) { if (mp == &zfs_vdev_class) continue; @@ -682,26 +696,25 @@ vdev_geom_attach_by_guids(vdev_t *vd) if (gp->flags & G_GEOM_WITHER) continue; LIST_FOREACH(pp, &gp->provider, provider) { - m = vdev_attach_ok(vd, pp); - if (m == NO_MATCH) - continue; - if (cp != NULL) { - if (m == FULL_MATCH) - vdev_geom_detach(cp, B_TRUE); - else - continue; + match = vdev_attach_ok(vd, pp); + if (match > best_match) { + best_match = match; + best_pp = pp; } - cp = vdev_geom_attach(pp, vd); - if (cp == NULL) { - printf("ZFS WARNING: Unable to " - "attach to %s.\n", pp->name); - continue; - } - if (m == FULL_MATCH) - return (cp); + if (match == FULL_MATCH) + goto out; } } } + +out: + if (best_pp) { + cp = vdev_geom_attach(best_pp, vd); + if (cp == NULL) { + printf("ZFS WARNING: Unable to attach to %s.\n", + best_pp->name); + } + } return (cp); }