Date: Mon, 27 Sep 2010 09:42:31 +0000 (UTC) From: Martin Matuska <mm@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r213198 - in head: cddl/contrib/opensolaris/cmd/zinject sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys Message-ID: <201009270942.o8R9gVVo026079@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mm Date: Mon Sep 27 09:42:31 2010 New Revision: 213198 URL: http://svn.freebsd.org/changeset/base/213198 Log: Properly handle IO with B_FAILFAST Retry IO once with ZIO_FLAG_TRYHARD before declaring a pool faulted OpenSolaris revision and Bug IDs: 9725:0bf7402e8022 6843014 ZFS B_FAILFAST handling is broken Approved by: delphij (mentor) Obtained from: OpenSolaris (Bug ID 6843014) MFC after: 3 weeks Modified: head/cddl/contrib/opensolaris/cmd/zinject/zinject.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c Modified: head/cddl/contrib/opensolaris/cmd/zinject/zinject.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/zinject/zinject.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/cddl/contrib/opensolaris/cmd/zinject/zinject.c Mon Sep 27 09:42:31 2010 (r213198) @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * ZFS Fault Injector * @@ -227,7 +225,7 @@ usage(void) "\t\tClear the particular record (if given a numeric ID), or\n" "\t\tall records if 'all' is specificed.\n" "\n" - "\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n" + "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n" "\t\tInject a fault into a particular device or the device's\n" "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n" "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n" @@ -519,7 +517,7 @@ main(int argc, char **argv) return (0); } - while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) { + while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) { switch (c) { case 'a': flags |= ZINJECT_FLUSH_ARC; @@ -556,6 +554,9 @@ main(int argc, char **argv) return (1); } break; + case 'F': + record.zi_failfast = B_TRUE; + break; case 'h': usage(); return (0); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Mon Sep 27 09:42:31 2010 (r213198) @@ -4252,10 +4252,16 @@ spa_sync(spa_t *spa, uint64_t txg) if (svdcount == SPA_DVAS_PER_BP) break; } - error = vdev_config_sync(svd, svdcount, txg); + error = vdev_config_sync(svd, svdcount, txg, B_FALSE); + if (error != 0) + error = vdev_config_sync(svd, svdcount, txg, + B_TRUE); } else { error = vdev_config_sync(rvd->vdev_child, - rvd->vdev_children, txg); + rvd->vdev_children, txg, B_FALSE); + if (error != 0) + error = vdev_config_sync(rvd->vdev_child, + rvd->vdev_children, txg, B_TRUE); } spa_config_exit(spa, SCL_STATE, FTAG); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h Mon Sep 27 09:42:31 2010 (r213198) @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -113,7 +113,8 @@ extern void vdev_queue_io_done(zio_t *zi extern void vdev_config_dirty(vdev_t *vd); extern void vdev_config_clean(vdev_t *vd); -extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg); +extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, + boolean_t); extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h Mon Sep 27 09:42:31 2010 (r213198) @@ -118,7 +118,7 @@ typedef struct zinject_record { uint32_t zi_error; uint64_t zi_type; uint32_t zi_freq; - uint32_t zi_pad; /* pad out to 64 bit alignment */ + uint32_t zi_failfast; } zinject_record_t; #define ZINJECT_NULL 0x1 Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Mon Sep 27 09:42:31 2010 (r213198) @@ -117,31 +117,33 @@ enum zio_compress { #define ZIO_PRIORITY_SCRUB (zio_priority_table[10]) #define ZIO_PRIORITY_TABLE_SIZE 11 -#define ZIO_FLAG_MUSTSUCCEED 0x00000 -#define ZIO_FLAG_CANFAIL 0x00001 -#define ZIO_FLAG_SPECULATIVE 0x00002 -#define ZIO_FLAG_CONFIG_WRITER 0x00004 -#define ZIO_FLAG_DONT_RETRY 0x00008 - -#define ZIO_FLAG_DONT_CACHE 0x00010 -#define ZIO_FLAG_DONT_QUEUE 0x00020 -#define ZIO_FLAG_DONT_AGGREGATE 0x00040 -#define ZIO_FLAG_DONT_PROPAGATE 0x00080 - -#define ZIO_FLAG_IO_BYPASS 0x00100 -#define ZIO_FLAG_IO_REPAIR 0x00200 -#define ZIO_FLAG_IO_RETRY 0x00400 -#define ZIO_FLAG_IO_REWRITE 0x00800 - -#define ZIO_FLAG_SELF_HEAL 0x01000 -#define ZIO_FLAG_RESILVER 0x02000 -#define ZIO_FLAG_SCRUB 0x04000 -#define ZIO_FLAG_SCRUB_THREAD 0x08000 - -#define ZIO_FLAG_PROBE 0x10000 -#define ZIO_FLAG_GANG_CHILD 0x20000 -#define ZIO_FLAG_RAW 0x40000 -#define ZIO_FLAG_GODFATHER 0x80000 +#define ZIO_FLAG_MUSTSUCCEED 0x000000 +#define ZIO_FLAG_CANFAIL 0x000001 +#define ZIO_FLAG_SPECULATIVE 0x000002 +#define ZIO_FLAG_CONFIG_WRITER 0x000004 +#define ZIO_FLAG_DONT_RETRY 0x000008 + +#define ZIO_FLAG_DONT_CACHE 0x000010 +#define ZIO_FLAG_DONT_QUEUE 0x000020 +#define ZIO_FLAG_DONT_AGGREGATE 0x000040 +#define ZIO_FLAG_DONT_PROPAGATE 0x000080 + +#define ZIO_FLAG_IO_BYPASS 0x000100 +#define ZIO_FLAG_IO_REPAIR 0x000200 +#define ZIO_FLAG_IO_RETRY 0x000400 +#define ZIO_FLAG_IO_REWRITE 0x000800 + +#define ZIO_FLAG_SELF_HEAL 0x001000 +#define ZIO_FLAG_RESILVER 0x002000 +#define ZIO_FLAG_SCRUB 0x004000 +#define ZIO_FLAG_SCRUB_THREAD 0x008000 + +#define ZIO_FLAG_PROBE 0x010000 +#define ZIO_FLAG_GANG_CHILD 0x020000 +#define ZIO_FLAG_RAW 0x040000 +#define ZIO_FLAG_GODFATHER 0x080000 + +#define ZIO_FLAG_TRYHARD 0x100000 #define ZIO_FLAG_GANG_INHERIT \ (ZIO_FLAG_CANFAIL | \ @@ -159,7 +161,8 @@ enum zio_compress { (ZIO_FLAG_GANG_INHERIT | \ ZIO_FLAG_IO_REPAIR | \ ZIO_FLAG_IO_RETRY | \ - ZIO_FLAG_PROBE) + ZIO_FLAG_PROBE | \ + ZIO_FLAG_TRYHARD) #define ZIO_FLAG_AGG_INHERIT \ (ZIO_FLAG_DONT_AGGREGATE | \ @@ -440,7 +443,7 @@ extern int zio_inject_list_next(int *id, struct zinject_record *record); extern int zio_clear_fault(int id); extern int zio_handle_fault_injection(zio_t *zio, int error); -extern int zio_handle_device_injection(vdev_t *vd, int error); +extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); extern int zio_handle_label_injection(zio_t *zio, int error); #ifdef __cplusplus Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c Mon Sep 27 09:42:31 2010 (r213198) @@ -928,7 +928,7 @@ vdev_probe(vdev_t *vd, zio_t *zio) vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE | - ZIO_FLAG_DONT_RETRY; + ZIO_FLAG_TRYHARD; if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) { /* @@ -1025,7 +1025,7 @@ vdev_open(vdev_t *vd) error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); if (zio_injection_enabled && error == 0) - error = zio_handle_device_injection(vd, ENXIO); + error = zio_handle_device_injection(vd, NULL, ENXIO); if (error) { if (vd->vdev_removed && @@ -2207,6 +2207,16 @@ vdev_stat_update(zio_t *zio, uint64_t ps if (flags & ZIO_FLAG_SPECULATIVE) return; + /* + * If this is an I/O error that is going to be retried, then ignore the + * error. Otherwise, the user may interpret B_FAILFAST I/O errors as + * hard errors, when in reality they can happen for any number of + * innocuous reasons (bus resets, MPxIO link failure, etc). + */ + if (zio->io_error == EIO && + !(zio->io_flags & ZIO_FLAG_IO_RETRY)) + return; + mutex_enter(&vd->vdev_stat_lock); if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) { if (zio->io_error == ECKSUM) Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c Mon Sep 27 09:42:31 2010 (r213198) @@ -401,8 +401,9 @@ vdev_disk_io_start(zio_t *zio) bioinit(bp); bp->b_flags = B_BUSY | B_NOCACHE | - (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE) | - ((zio->io_flags & ZIO_FLAG_IO_RETRY) ? 0 : B_FAILFAST); + (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); + if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) + bp->b_flags |= B_FAILFAST; bp->b_bcount = zio->io_size; bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c Mon Sep 27 09:42:31 2010 (r213198) @@ -339,8 +339,8 @@ vdev_label_read_config(vdev_t *vd) nvlist_t *config = NULL; vdev_phys_t *vp; zio_t *zio; - int flags = - ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE; ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); @@ -349,6 +349,7 @@ vdev_label_read_config(vdev_t *vd) vp = zio_buf_alloc(sizeof (vdev_phys_t)); +retry: for (int l = 0; l < VDEV_LABELS; l++) { zio = zio_root(spa, NULL, NULL, flags); @@ -368,6 +369,11 @@ vdev_label_read_config(vdev_t *vd) } } + if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + zio_buf_free(vp, sizeof (vdev_phys_t)); return (config); @@ -648,6 +654,7 @@ vdev_label_init(vdev_t *vd, uint64_t crt /* * Write everything in parallel. */ +retry: zio = zio_root(spa, NULL, NULL, flags); for (int l = 0; l < VDEV_LABELS; l++) { @@ -674,6 +681,11 @@ vdev_label_init(vdev_t *vd, uint64_t crt error = zio_wait(zio); + if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + nvlist_free(label); zio_buf_free(pad2, VDEV_PAD_SIZE); zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd)); @@ -760,8 +772,8 @@ vdev_uberblock_load(zio_t *zio, vdev_t * { spa_t *spa = vd->vdev_spa; vdev_t *rvd = spa->spa_root_vdev; - int flags = - ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; if (vd == rvd) { ASSERT(zio == NULL); @@ -999,7 +1011,7 @@ vdev_label_sync_list(spa_t *spa, int l, * at any time, you can just call it again, and it will resume its work. */ int -vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) +vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard) { spa_t *spa = svd[0]->vdev_spa; uberblock_t *ub = &spa->spa_uberblock; @@ -1008,6 +1020,16 @@ vdev_config_sync(vdev_t **svd, int svdco int error; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + /* + * Normally, we don't want to try too hard to write every label and + * uberblock. If there is a flaky disk, we don't want the rest of the + * sync process to block while we retry. But if we can't write a + * single label out, we should retry with ZIO_FLAG_TRYHARD before + * bailing out and declaring the pool faulted. + */ + if (tryhard) + flags |= ZIO_FLAG_TRYHARD; + ASSERT(ub->ub_txg <= txg); /* Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c Mon Sep 27 09:42:31 2010 (r213198) @@ -134,6 +134,15 @@ zfs_ereport_post(const char *subclass, s if (zio->io_flags & ZIO_FLAG_SPECULATIVE) return; + /* + * If this I/O is not a retry I/O, don't post an ereport. + * Otherwise, we risk making bad diagnoses based on B_FAILFAST + * I/Os. + */ + if (zio->io_error == EIO && + !(zio->io_flags & ZIO_FLAG_IO_RETRY)) + return; + if (vd != NULL) { /* * If the vdev has already been marked as failing due Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Mon Sep 27 09:42:31 2010 (r213198) @@ -1870,7 +1870,8 @@ zio_vdev_io_done(zio_t *zio) vdev_cache_write(zio); if (zio_injection_enabled && zio->io_error == 0) - zio->io_error = zio_handle_device_injection(vd, EIO); + zio->io_error = zio_handle_device_injection(vd, + zio, EIO); if (zio_injection_enabled && zio->io_error == 0) zio->io_error = zio_handle_label_injection(zio, EIO); Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c Mon Sep 27 09:05:51 2010 (r213197) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c Mon Sep 27 09:42:31 2010 (r213198) @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -195,7 +195,7 @@ zio_handle_label_injection(zio_t *zio, i int -zio_handle_device_injection(vdev_t *vd, int error) +zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) { inject_handler_t *handler; int ret = 0; @@ -210,6 +210,12 @@ zio_handle_device_injection(vdev_t *vd, continue; if (vd->vdev_guid == handler->zi_record.zi_guid) { + if (handler->zi_record.zi_failfast && + (zio == NULL || (zio->io_flags & + (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { + continue; + } + if (handler->zi_record.zi_error == error) { /* * For a failed open, pretend like the device
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201009270942.o8R9gVVo026079>