Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 9 Feb 2011 05:48:52 +0000 (UTC)
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r218472 - projects/graid/head/sys/geom/raid
Message-ID:  <201102090548.p195mqGu072424@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: imp
Date: Wed Feb  9 05:48:52 2011
New Revision: 218472
URL: http://svn.freebsd.org/changeset/base/218472

Log:
  Don't fail the last disk in the volume on read/write errors.  Instead,
  let the last surviving drive in a volume reflect its imperfect state
  back to the upper layers.  This makes perfect sense for the volume
  that has / on it where you might be able to survive long enough to
  reboot or insert a good disk and start a sync.  I think in other cases
  as well, so I've just left a comment rather than making this yet
  another tunable.

Modified:
  projects/graid/head/sys/geom/raid/tr_raid1.c

Modified: projects/graid/head/sys/geom/raid/tr_raid1.c
==============================================================================
--- projects/graid/head/sys/geom/raid/tr_raid1.c	Wed Feb  9 05:30:38 2011	(r218471)
+++ projects/graid/head/sys/geom/raid/tr_raid1.c	Wed Feb  9 05:48:52 2011	(r218472)
@@ -226,6 +226,25 @@ g_raid_tr_update_state_raid1(struct g_ra
 }
 
 static void
+g_raid_tr_raid1_fail_disk(struct g_raid_softc *sc, struct g_raid_subdisk *sd,
+    struct g_raid_disk *disk)
+{
+	/*
+	 * We don't fail the last disk in the pack, since it still has decent
+	 * data on it and that's better than failing the disk if it is the root
+	 * file system.
+	 *
+	 * XXX should this be controlled via a tunable?  It makes sense for
+	 * the volume that has / on it.  I can't think of a case where we'd
+	 * want the volume to go away on this kind of event.
+	 */
+	if (g_raid_nsubdisks(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == 1 &&
+	    g_raid_get_subdisk(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == sd)
+		return;
+	g_raid_fail_disk(sc, sd, disk);
+}
+
+static void
 g_raid_tr_raid1_rebuild_some(struct g_raid_tr_object *tr,
     struct g_raid_subdisk *sd)
 {
@@ -685,7 +704,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 				    trs->trso_flags & TR_RAID1_F_ABORT) {
 					if ((trs->trso_flags &
 					    TR_RAID1_F_ABORT) == 0) {
-						g_raid_fail_disk(sd->sd_softc,
+						g_raid_tr_raid1_fail_disk(sd->sd_softc,
 						    nsd, nsd->sd_disk);
 					}
 					trs->trso_flags &= ~TR_RAID1_F_DOING_SOME;
@@ -770,7 +789,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		 */
 		do_write = 1;
 		if (sd->sd_read_errs > g_raid1_read_err_thresh) {
-			g_raid_fail_disk(sd->sd_softc, sd, sd->sd_disk);
+			g_raid_tr_raid1_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 			if (pbp->bio_children == 1)
 				do_write = 0;
 		}
@@ -852,7 +871,7 @@ g_raid_tr_iodone_raid1(struct g_raid_tr_
 		if (pbp->bio_cmd == BIO_WRITE && bp->bio_error) {
 			G_RAID_LOGREQ(0, bp, "Remap write failed: "
 			    "failing subdisk.");
-			g_raid_fail_disk(sd->sd_softc, sd, sd->sd_disk);
+			g_raid_tr_raid1_fail_disk(sd->sd_softc, sd, sd->sd_disk);
 			bp->bio_error = 0;
 		}
 		G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201102090548.p195mqGu072424>