From owner-svn-src-all@FreeBSD.ORG Wed Jan 6 23:09:24 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 299921065696; Wed, 6 Jan 2010 23:09:24 +0000 (UTC) (envelope-from delphij@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 101F98FC13; Wed, 6 Jan 2010 23:09:24 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o06N9NGa068127; Wed, 6 Jan 2010 23:09:23 GMT (envelope-from delphij@svn.freebsd.org) Received: (from delphij@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o06N9NpH068124; Wed, 6 Jan 2010 23:09:23 GMT (envelope-from delphij@svn.freebsd.org) Message-Id: <201001062309.o06N9NpH068124@svn.freebsd.org> From: Xin LI Date: Wed, 6 Jan 2010 23:09:23 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r201689 - in head/sys: boot/zfs cddl/boot/zfs X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 06 Jan 2010 23:09:24 -0000 Author: delphij Date: Wed Jan 6 23:09:23 2010 New Revision: 201689 URL: http://svn.freebsd.org/changeset/base/201689 Log: Instead of assuming all vdevs are healthy, check the newest vdev label for each vdev's status. Booting from a degraded vdev should now be more robust. Submitted by: Matt Reimer Sponsored by: VPOP Technologies, Inc. MFC after: 2 weeks Modified: head/sys/boot/zfs/zfsimpl.c head/sys/cddl/boot/zfs/zfsimpl.h Modified: head/sys/boot/zfs/zfsimpl.c ============================================================================== --- head/sys/boot/zfs/zfsimpl.c Wed Jan 6 23:05:00 2010 (r201688) +++ head/sys/boot/zfs/zfsimpl.c Wed Jan 6 23:09:23 2010 (r201689) @@ -404,7 +404,7 @@ vdev_create(uint64_t guid, vdev_read_t * } static int -vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp) +vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int is_newer) { int rc; uint64_t guid, id, ashift, nparity; @@ -412,7 +412,8 @@ vdev_init_from_nvlist(const unsigned cha const char *path; vdev_t *vdev, *kid; const unsigned char *kids; - int nkids, i; + int nkids, i, is_new; + uint64_t is_offline, is_faulted, is_degraded, is_removed; if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0, &guid) @@ -424,17 +425,6 @@ vdev_init_from_nvlist(const unsigned cha return (ENOENT); } - /* - * Assume that if we've seen this vdev tree before, this one - * will be identical. - */ - vdev = vdev_find(guid); - if (vdev) { - if (vdevp) - *vdevp = vdev; - return (0); - } - if (strcmp(type, VDEV_TYPE_MIRROR) && strcmp(type, VDEV_TYPE_DISK) && strcmp(type, VDEV_TYPE_RAIDZ)) { @@ -442,6 +432,21 @@ vdev_init_from_nvlist(const unsigned cha return (EIO); } + is_offline = is_removed = is_faulted = is_degraded = 0; + + nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, 0, + &is_offline); + nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, 0, + &is_removed); + nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, 0, + &is_faulted); + nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, 0, + &is_degraded); + + vdev = vdev_find(guid); + if (!vdev) { + is_new = 1; + if (!strcmp(type, VDEV_TYPE_MIRROR)) vdev = vdev_create(guid, vdev_mirror_read); else if (!strcmp(type, VDEV_TYPE_RAIDZ)) @@ -480,6 +485,39 @@ vdev_init_from_nvlist(const unsigned cha vdev->v_name = strdup(type); } } + + if (is_offline) + vdev->v_state = VDEV_STATE_OFFLINE; + else if (is_removed) + vdev->v_state = VDEV_STATE_REMOVED; + else if (is_faulted) + vdev->v_state = VDEV_STATE_FAULTED; + else if (is_degraded) + vdev->v_state = VDEV_STATE_DEGRADED; + else + vdev->v_state = VDEV_STATE_HEALTHY; + } else { + is_new = 0; + + if (is_newer) { + /* + * We've already seen this vdev, but from an older + * vdev label, so let's refresh its state from the + * newer label. + */ + if (is_offline) + vdev->v_state = VDEV_STATE_OFFLINE; + else if (is_removed) + vdev->v_state = VDEV_STATE_REMOVED; + else if (is_faulted) + vdev->v_state = VDEV_STATE_FAULTED; + else if (is_degraded) + vdev->v_state = VDEV_STATE_DEGRADED; + else + vdev->v_state = VDEV_STATE_HEALTHY; + } + } + rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids); /* @@ -488,10 +526,12 @@ vdev_init_from_nvlist(const unsigned cha if (rc == 0) { vdev->v_nchildren = nkids; for (i = 0; i < nkids; i++) { - rc = vdev_init_from_nvlist(kids, &kid); + rc = vdev_init_from_nvlist(kids, &kid, is_newer); if (rc) return (rc); - STAILQ_INSERT_TAIL(&vdev->v_children, kid, v_childlink); + if (is_new) + STAILQ_INSERT_TAIL(&vdev->v_children, kid, + v_childlink); kids = nvlist_next(kids); } } else { @@ -593,7 +633,9 @@ state_name(vdev_state_t state) "UNKNOWN", "CLOSED", "OFFLINE", + "REMOVED", "CANT_OPEN", + "FAULTED", "DEGRADED", "ONLINE" }; @@ -711,7 +753,7 @@ vdev_probe(vdev_phys_read_t *read, void uint64_t pool_txg, pool_guid; const char *pool_name; const unsigned char *vdevs; - int i, rc; + int i, rc, is_newer; char upbuf[1024]; const struct uberblock *up; @@ -793,12 +835,15 @@ vdev_probe(vdev_phys_read_t *read, void spa = spa_create(pool_guid); spa->spa_name = strdup(pool_name); } - if (pool_txg > spa->spa_txg) + if (pool_txg > spa->spa_txg) { spa->spa_txg = pool_txg; + is_newer = 1; + } else + is_newer = 0; /* * Get the vdev tree and create our in-core copy of it. - * If we already have a healthy vdev with this guid, this must + * If we already have a vdev with this guid, this must * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ @@ -808,16 +853,16 @@ vdev_probe(vdev_phys_read_t *read, void return (EIO); } vdev = vdev_find(guid); - if (vdev && vdev->v_state == VDEV_STATE_HEALTHY) { + if (vdev && vdev->v_phys_read) /* Has this vdev already been inited? */ return (EIO); - } if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, 0, &vdevs)) { return (EIO); } - rc = vdev_init_from_nvlist(vdevs, &top_vdev); + + rc = vdev_init_from_nvlist(vdevs, &top_vdev, is_newer); if (rc) return (rc); @@ -838,7 +883,6 @@ vdev_probe(vdev_phys_read_t *read, void if (vdev) { vdev->v_phys_read = read; vdev->v_read_priv = read_priv; - vdev->v_state = VDEV_STATE_HEALTHY; } else { printf("ZFS: inconsistent nvlist contents\n"); return (EIO); Modified: head/sys/cddl/boot/zfs/zfsimpl.h ============================================================================== --- head/sys/cddl/boot/zfs/zfsimpl.h Wed Jan 6 23:05:00 2010 (r201688) +++ head/sys/cddl/boot/zfs/zfsimpl.h Wed Jan 6 23:09:23 2010 (r201689) @@ -548,7 +548,6 @@ typedef enum { #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_STATS "stats" #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" -#define ZPOOL_CONFIG_OFFLINE "offline" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" #define ZPOOL_CONFIG_SPARES "spares" @@ -558,6 +557,16 @@ typedef enum { #define ZPOOL_CONFIG_HOSTNAME "hostname" #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ +/* + * The persistent vdev state is stored as separate values rather than a single + * 'vdev_state' entry. This is because a device can be in multiple states, such + * as offline and degraded. + */ +#define ZPOOL_CONFIG_OFFLINE "offline" +#define ZPOOL_CONFIG_FAULTED "faulted" +#define ZPOOL_CONFIG_DEGRADED "degraded" +#define ZPOOL_CONFIG_REMOVED "removed" + #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" #define VDEV_TYPE_REPLACING "replacing" @@ -590,7 +599,9 @@ typedef enum vdev_state { VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */ VDEV_STATE_CLOSED, /* Not currently open */ VDEV_STATE_OFFLINE, /* Not allowed to open */ + VDEV_STATE_REMOVED, /* Explicitly removed from system */ VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */ + VDEV_STATE_FAULTED, /* External request to fault device */ VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */ VDEV_STATE_HEALTHY /* Presumed good */ } vdev_state_t;