Date: Wed, 20 Jan 2010 01:13:52 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org Subject: svn commit: r202669 - in stable/8/sys: boot/zfs cddl/boot/zfs Message-ID: <201001200113.o0K1DqXF062433@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Wed Jan 20 01:13:52 2010 New Revision: 202669 URL: http://svn.freebsd.org/changeset/base/202669 Log: MFC r201689: Instead of assuming all vdevs are healthy, check the newest vdev label for each vdev's status. Booting from a degraded vdev should now be more robust. Submitted by: Matt Reimer <mattjreimer at gmail.com> Sponsored by: VPOP Technologies, Inc. Modified: stable/8/sys/boot/zfs/zfsimpl.c stable/8/sys/cddl/boot/zfs/zfsimpl.h Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) Modified: stable/8/sys/boot/zfs/zfsimpl.c ============================================================================== --- stable/8/sys/boot/zfs/zfsimpl.c Wed Jan 20 01:07:38 2010 (r202668) +++ stable/8/sys/boot/zfs/zfsimpl.c Wed Jan 20 01:13:52 2010 (r202669) @@ -404,7 +404,7 @@ vdev_create(uint64_t guid, vdev_read_t * } static int -vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp) +vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int is_newer) { int rc; uint64_t guid, id, ashift, nparity; @@ -412,7 +412,8 @@ vdev_init_from_nvlist(const unsigned cha const char *path; vdev_t *vdev, *kid; const unsigned char *kids; - int nkids, i; + int nkids, i, is_new; + uint64_t is_offline, is_faulted, is_degraded, is_removed; if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, 0, &guid) @@ -424,17 +425,6 @@ vdev_init_from_nvlist(const unsigned cha return (ENOENT); } - /* - * Assume that if we've seen this vdev tree before, this one - * will be identical. - */ - vdev = vdev_find(guid); - if (vdev) { - if (vdevp) - *vdevp = vdev; - return (0); - } - if (strcmp(type, VDEV_TYPE_MIRROR) && strcmp(type, VDEV_TYPE_DISK) && strcmp(type, VDEV_TYPE_RAIDZ)) { @@ -442,6 +432,21 @@ vdev_init_from_nvlist(const unsigned cha return (EIO); } + is_offline = is_removed = is_faulted = is_degraded = 0; + + nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, 0, + &is_offline); + nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, 0, + &is_removed); + nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, 0, + &is_faulted); + nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, 0, + &is_degraded); + + vdev = vdev_find(guid); + if (!vdev) { + is_new = 1; + if (!strcmp(type, VDEV_TYPE_MIRROR)) vdev = vdev_create(guid, vdev_mirror_read); else if (!strcmp(type, VDEV_TYPE_RAIDZ)) @@ -480,6 +485,39 @@ vdev_init_from_nvlist(const unsigned cha vdev->v_name = strdup(type); } } + + if (is_offline) + vdev->v_state = VDEV_STATE_OFFLINE; + else if (is_removed) + vdev->v_state = VDEV_STATE_REMOVED; + else if (is_faulted) + vdev->v_state = VDEV_STATE_FAULTED; + else if (is_degraded) + vdev->v_state = VDEV_STATE_DEGRADED; + else + vdev->v_state = VDEV_STATE_HEALTHY; + } else { + is_new = 0; + + if (is_newer) { + /* + * We've already seen this vdev, but from an older + * vdev label, so let's refresh its state from the + * newer label. + */ + if (is_offline) + vdev->v_state = VDEV_STATE_OFFLINE; + else if (is_removed) + vdev->v_state = VDEV_STATE_REMOVED; + else if (is_faulted) + vdev->v_state = VDEV_STATE_FAULTED; + else if (is_degraded) + vdev->v_state = VDEV_STATE_DEGRADED; + else + vdev->v_state = VDEV_STATE_HEALTHY; + } + } + rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids); /* @@ -488,10 +526,12 @@ vdev_init_from_nvlist(const unsigned cha if (rc == 0) { vdev->v_nchildren = nkids; for (i = 0; i < nkids; i++) { - rc = vdev_init_from_nvlist(kids, &kid); + rc = vdev_init_from_nvlist(kids, &kid, is_newer); if (rc) return (rc); - STAILQ_INSERT_TAIL(&vdev->v_children, kid, v_childlink); + if (is_new) + STAILQ_INSERT_TAIL(&vdev->v_children, kid, + v_childlink); kids = nvlist_next(kids); } } else { @@ -593,7 +633,9 @@ state_name(vdev_state_t state) "UNKNOWN", "CLOSED", "OFFLINE", + "REMOVED", "CANT_OPEN", + "FAULTED", "DEGRADED", "ONLINE" }; @@ -711,7 +753,7 @@ vdev_probe(vdev_phys_read_t *read, void uint64_t pool_txg, pool_guid; const char *pool_name; const unsigned char *vdevs; - int i, rc; + int i, rc, is_newer; char upbuf[1024]; const struct uberblock *up; @@ -793,12 +835,15 @@ vdev_probe(vdev_phys_read_t *read, void spa = spa_create(pool_guid); spa->spa_name = strdup(pool_name); } - if (pool_txg > spa->spa_txg) + if (pool_txg > spa->spa_txg) { spa->spa_txg = pool_txg; + is_newer = 1; + } else + is_newer = 0; /* * Get the vdev tree and create our in-core copy of it. - * If we already have a healthy vdev with this guid, this must + * If we already have a vdev with this guid, this must * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ @@ -808,16 +853,16 @@ vdev_probe(vdev_phys_read_t *read, void return (EIO); } vdev = vdev_find(guid); - if (vdev && vdev->v_state == VDEV_STATE_HEALTHY) { + if (vdev && vdev->v_phys_read) /* Has this vdev already been inited? */ return (EIO); - } if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, 0, &vdevs)) { return (EIO); } - rc = vdev_init_from_nvlist(vdevs, &top_vdev); + + rc = vdev_init_from_nvlist(vdevs, &top_vdev, is_newer); if (rc) return (rc); @@ -838,7 +883,6 @@ vdev_probe(vdev_phys_read_t *read, void if (vdev) { vdev->v_phys_read = read; vdev->v_read_priv = read_priv; - vdev->v_state = VDEV_STATE_HEALTHY; } else { printf("ZFS: inconsistent nvlist contents\n"); return (EIO); Modified: stable/8/sys/cddl/boot/zfs/zfsimpl.h ============================================================================== --- stable/8/sys/cddl/boot/zfs/zfsimpl.h Wed Jan 20 01:07:38 2010 (r202668) +++ stable/8/sys/cddl/boot/zfs/zfsimpl.h Wed Jan 20 01:13:52 2010 (r202669) @@ -548,7 +548,6 @@ typedef enum { #define ZPOOL_CONFIG_DTL "DTL" #define ZPOOL_CONFIG_STATS "stats" #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" -#define ZPOOL_CONFIG_OFFLINE "offline" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" #define ZPOOL_CONFIG_SPARES "spares" @@ -558,6 +557,16 @@ typedef enum { #define ZPOOL_CONFIG_HOSTNAME "hostname" #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ +/* + * The persistent vdev state is stored as separate values rather than a single + * 'vdev_state' entry. This is because a device can be in multiple states, such + * as offline and degraded. + */ +#define ZPOOL_CONFIG_OFFLINE "offline" +#define ZPOOL_CONFIG_FAULTED "faulted" +#define ZPOOL_CONFIG_DEGRADED "degraded" +#define ZPOOL_CONFIG_REMOVED "removed" + #define VDEV_TYPE_ROOT "root" #define VDEV_TYPE_MIRROR "mirror" #define VDEV_TYPE_REPLACING "replacing" @@ -590,7 +599,9 @@ typedef enum vdev_state { VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */ VDEV_STATE_CLOSED, /* Not currently open */ VDEV_STATE_OFFLINE, /* Not allowed to open */ + VDEV_STATE_REMOVED, /* Explicitly removed from system */ VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */ + VDEV_STATE_FAULTED, /* External request to fault device */ VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */ VDEV_STATE_HEALTHY /* Presumed good */ } vdev_state_t;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001200113.o0K1DqXF062433>