Date: Wed, 9 Feb 2011 04:35:33 +0000 (UTC) From: Warner Losh <imp@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r218470 - projects/graid/head/sys/geom/raid Message-ID: <201102090435.p194ZXqQ070807@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: imp Date: Wed Feb 9 04:35:33 2011 New Revision: 218470 URL: http://svn.freebsd.org/changeset/base/218470 Log: When we were failing the subdisk due to too many read errors, we were returning the error that caused us to kick that subdisk out rather than retrying the read on the other disk. Flag this condition so we go through all the motions, except the write, so that we return the results of this other read. We now fail the disk w/o EIO being returned. Modified: projects/graid/head/sys/geom/raid/g_raid.c projects/graid/head/sys/geom/raid/g_raid.h projects/graid/head/sys/geom/raid/tr_raid1.c Modified: projects/graid/head/sys/geom/raid/g_raid.c ============================================================================== --- projects/graid/head/sys/geom/raid/g_raid.c Tue Feb 8 23:23:55 2011 (r218469) +++ projects/graid/head/sys/geom/raid/g_raid.c Wed Feb 9 04:35:33 2011 (r218470) @@ -910,6 +910,7 @@ g_raid_start_request(struct bio *bp) sc = bp->bio_to->geom->softc; sx_assert(&sc->sc_lock, SX_LOCKED); vol = bp->bio_to->private; + /* * Check to see if this item is in a locked range. If so, * queue it to our locked queue and return. We'll requeue Modified: projects/graid/head/sys/geom/raid/g_raid.h ============================================================================== --- projects/graid/head/sys/geom/raid/g_raid.h Tue Feb 8 23:23:55 2011 (r218469) +++ projects/graid/head/sys/geom/raid/g_raid.h Wed Feb 9 04:35:33 2011 (r218470) @@ -98,6 +98,8 @@ extern struct g_class g_raid_class; * doing some desirable action such as bad * block remapping after we detect a bad part * of the disk. + * G_RAID_BIO_FLAG_FAKE_REMAP Only doing the reading half of a remap + * operation. * * and the following meta item: * G_RAID_BIO_FLAG_SPECIAL And of the I/O flags that need to make it @@ -109,6 +111,7 @@ extern struct g_class g_raid_class; #define G_RAID_BIO_FLAG_REMAP 0x02 #define G_RAID_BIO_FLAG_SPECIAL \ (G_RAID_BIO_FLAG_SYNC|G_RAID_BIO_FLAG_REMAP) +#define G_RAID_BIO_FLAG_FAKE_REMAP 0x80 struct g_raid_lock { off_t l_offset; Modified: projects/graid/head/sys/geom/raid/tr_raid1.c ============================================================================== --- projects/graid/head/sys/geom/raid/tr_raid1.c Tue Feb 8 23:23:55 2011 (r218469) +++ projects/graid/head/sys/geom/raid/tr_raid1.c Wed Feb 9 04:35:33 2011 (r218470) @@ -630,7 +630,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_ struct g_raid_volume *vol; struct bio *pbp; struct g_raid_tr_raid1_object *trs; - int i, error; + int i, error, do_write; trs = (struct g_raid_tr_raid1_object *)tr; pbp = bp->bio_parent; @@ -768,10 +768,11 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_ * everything to get it back in sync), or just degrade the * drive, which kicks off a resync? */ + do_write = 1; if (sd->sd_read_errs > g_raid1_read_err_thresh) { g_raid_fail_disk(sd->sd_softc, sd, sd->sd_disk); if (pbp->bio_children == 1) - goto remapdone; + do_write = 0; } /* @@ -792,6 +793,8 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_ pbp->bio_driver1 = sd; /* Save original subdisk. */ cbp->bio_caller1 = nsd; cbp->bio_cflags = G_RAID_BIO_FLAG_REMAP; + if (!do_write) + cbp->bio_cflags |= G_RAID_BIO_FLAG_FAKE_REMAP; /* Lock callback starts I/O */ g_raid_lock_range(sd->sd_volume, cbp->bio_offset, cbp->bio_length, pbp, cbp); @@ -805,8 +808,10 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_ */ G_RAID_LOGREQ(2, bp, "Couldn't retry read, failing it"); } - if (bp->bio_cmd == BIO_READ && bp->bio_error == 0 && - pbp->bio_children > 1) { + if (bp->bio_cmd == BIO_READ && + bp->bio_error == 0 && + pbp->bio_children > 1 && + !(bp->bio_cflags & G_RAID_BIO_FLAG_FAKE_REMAP)) { /* * If it was a read, and bio_children is 2, then we just * recovered the data from the second drive. We should try to @@ -817,6 +822,11 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_ * affect the return code of this current read, and can be * done at our liesure. However, to make the code simpler, it * is done syncrhonously. + * + * When the FAKE_REMAP flag is set, we fall through to the + * code below which handles the read without the next + * write so we don't return the error that failed the drive, + * but the results of reading the other disk. */ G_RAID_LOGREQ(3, bp, "Recovered data from other drive"); cbp = g_clone_bio(pbp); @@ -829,7 +839,6 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_ return; } } -remapdone: if (bp->bio_cflags & G_RAID_BIO_FLAG_REMAP) { /* * We're done with a remap write, mark the range as unlocked.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201102090435.p194ZXqQ070807>