Date: Mon, 13 Aug 2007 19:46:30 GMT From: Ulf Lilleengen <lulf@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 125117 for review Message-ID: <200708131946.l7DJkUBM094036@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=125117 Change 125117 by lulf@lulf_carrot on 2007/08/13 19:45:30 - Improve gv_is_newer hack by adding the drive to check as parameter. There was a case where gvinum didn't have the actual drive first in the list, which ended in comparing the wrong timestamps. - Re-add the growable state of a plex. - Fix a bug where GV_SD_CANGOUP was set instead of checked. - Make raid5 growing depend on that a raid-5 plex is not degraded. I've added some awareness to the fact that a subdisk could be added to degraded raid-5 plex, but I don't allow it for now, since it also requires a rewrite of how degraded writes and reads are done. However, the idea is that all of gvinum should be aware that they can be in a growing phase. - Make sure plex doesn't get the grown size before after the grow. This prevents writes outside the actual plex-size. - Use gv_start_plex in gv_start_vol since it basically does the same. Affected files ... .. //depot/projects/soc2007/lulf/gvinum_fixup/sbin/gvinum/gvinum.c#20 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.c#35 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.h#28 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_create.c#10 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_events.c#14 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_init.c#24 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_list.c#5 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_plex.c#26 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_raid5.c#14 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_share.c#6 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_state.c#23 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_subr.c#29 edit .. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_var.h#25 edit Differences ... ==== //depot/projects/soc2007/lulf/gvinum_fixup/sbin/gvinum/gvinum.c#20 (text+ko) ==== ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.c#35 (text+ko) ==== ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.h#28 (text+ko) ==== @@ -93,9 +93,10 @@ int gv_consumer_is_open(struct g_consumer *); int gv_provider_is_open(struct g_provider *); int gv_object_type(struct gv_softc *, char *); -void gv_parse_config(struct gv_softc *, char *); +void gv_parse_config(struct gv_softc *, char *, struct gv_drive *); int gv_sd_to_drive(struct gv_sd *, struct gv_drive *); int gv_sd_to_plex(struct gv_sd *, struct gv_plex *); +int gv_sdcount(struct gv_plex *, int); void gv_update_plex_config(struct gv_plex *); void gv_update_vol_size(struct gv_volume *, off_t); off_t gv_vol_size(struct gv_volume *); ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_create.c#10 (text+ko) ==== @@ -253,6 +253,13 @@ return (GV_ERR_CREATE); } + if (p->org == GV_PLEX_RAID5 && p->state == GV_PLEX_DEGRADED) { + printf("VINUM: can't add subdisk to %s, rebuild plex before " + " adding subdisks\n", p->name); + g_free(s); + return (0); + } + /* * First we give the subdisk to the drive, to handle autosized * values ... ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_events.c#14 (text+ko) ==== @@ -126,7 +126,7 @@ g_free(hdr); goto failed; } - gv_parse_config(sc, buf); + gv_parse_config(sc, buf, d); g_free(buf); g_topology_lock(); @@ -213,4 +213,5 @@ LIST_INSERT_HEAD(&sc->drives, d, drive); else LIST_INSERT_AFTER(d2, d, drive); + gv_save_config(sc); } ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_init.c#24 (text+ko) ==== @@ -104,14 +104,14 @@ KASSERT(p != NULL, ("gv_start_plex: NULL p")); - if (p->state == GV_PLEX_UP) - return (0); +/* if (p->state == GV_PLEX_UP) + return (0);*/ error = 0; v = p->vol_sc; - if ((v != NULL) && (v->plexcount > 1)) - error = gv_sync(v); - else if (p->org == GV_PLEX_STRIPED) { +/* if ((v != NULL) && (v->plexcount > 1)) + error = gv_sync(v);*/ + if (p->org == GV_PLEX_STRIPED) { grow = 0; LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->flags & GV_SD_GROW) { @@ -122,18 +122,15 @@ if (grow) error = gv_grow_plex(p); } else if (p->org == GV_PLEX_RAID5) { - if (p->state == GV_PLEX_DEGRADED) { - rebuild = 0; + if (p->state > GV_PLEX_DEGRADED) { LIST_FOREACH(s, &p->subdisks, in_plex) { - if (s->state < GV_SD_UP) { - rebuild = 1; - break; + if (s->flags & GV_SD_GROW) { + error = gv_grow_plex(p); + return (error); } } - if (rebuild) - error = gv_rebuild_plex(p); - else - error = gv_grow_plex(p); + } else if (p->state == GV_PLEX_DEGRADED) { + error = gv_rebuild_plex(p); } else error = gv_init_plex(p); } @@ -158,23 +155,7 @@ else if (v->plexcount == 1) { p = LIST_FIRST(&v->plexes); KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name)); - if (p->org == GV_PLEX_RAID5) { - switch (p->state) { - case GV_PLEX_DOWN: - error = gv_init_plex(p); - break; - case GV_PLEX_DEGRADED: - error = gv_rebuild_plex(p); - break; - default: - return (0); - } - } else { - LIST_FOREACH(s, &p->subdisks, in_plex) { - gv_set_sd_state(s, GV_SD_UP, - GV_SETSTATE_CONFIG); - } - } + error = gv_start_plex(p); } else error = gv_sync(v); @@ -239,6 +220,8 @@ static int gv_rebuild_plex(struct gv_plex *p) { + struct gv_drive *d; + struct gv_sd *s; /* XXX: Is this safe? (Allows for mounted rebuild)*/ /* if (gv_provider_is_open(p->vol_sc->provider)) @@ -248,6 +231,18 @@ p->flags & GV_PLEX_REBUILDING || p->flags & GV_PLEX_GROWING) return (EINPROGRESS); + /* + * Make sure that all subdisks have consumers. We won't allow a rebuild + * unless every subdisk have one. + */ + LIST_FOREACH(s, &p->subdisks, in_plex) { + d = s->drive_sc; + if (d == NULL || (d->flags & GV_DRIVE_REFERENCED)) { + printf("VINUM: can't rebuild %s, subdisk(s) have no " + "drives\n", p->name); + return (ENXIO); + } + } p->flags |= GV_PLEX_REBUILDING; p->synced = 0; ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_list.c#5 (text+ko) ==== @@ -300,7 +300,7 @@ (intmax_t)p->synced, (int)((p->synced * 100) / p->size)); } - printf("\t\tOrganization: %s", gv_plexorg(p->org)); + sbuf_printf(sb, "\t\tOrganization: %s", gv_plexorg(p->org)); if (gv_is_striped(p)) { sbuf_printf(sb, "\tStripe size: %s\n", gv_roughlength(p->stripesize, 1)); ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_plex.c#26 (text+ko) ==== @@ -161,15 +161,9 @@ KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0")); /* Take growing subdisks into account when calculating. */ - sdcount = p->sdcount; - if (boff >= p->synced) { - LIST_FOREACH(s, &p->subdisks, in_plex) { - if (s->flags & GV_SD_GROW) - sdcount--; - } - } else if (!(boff + bcount <= p->synced)){ + sdcount = gv_sdcount(p, (boff >= p->synced)); + if (!(boff + bcount <= p->synced)) return (GV_ERR_ISBUSY); - } /* The number of the subdisk where the stripe resides. */ *sdno = stripeno % sdcount; @@ -712,11 +706,7 @@ g_free(bp->bio_data); /* Find the real size of the plex. */ - sdcount = p->sdcount; - LIST_FOREACH(s, &p->subdisks, in_plex) { - if (s->flags & GV_SD_GROW) - sdcount--; - } + sdcount = gv_sdcount(p, 1); s = LIST_FIRST(&p->subdisks); /* XXX: should not ever happen */ if (s == NULL) { @@ -731,6 +721,7 @@ s->flags &= ~GV_SD_GROW; gv_set_sd_state(s, GV_SD_UP, 0); } + p->size = gv_plex_size(p); gv_set_plex_state(p, GV_PLEX_UP, 0); g_topology_lock(); gv_access(v->provider, -1, -1, 0); @@ -974,7 +965,7 @@ { struct gv_sd *s; int error, flags; - off_t offset; + off_t offset, plexsize; error = bp->bio_error; flags = bp->bio_cflags; @@ -1000,7 +991,7 @@ return; } - offset += (p->stripesize * (p->sdcount - 1)); + offset += (p->stripesize * (gv_sdcount(p, 1) - 1)); if (offset >= p->size) { /* We're finished. */ printf("VINUM: rebuild of %s finished\n", p->name); ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_raid5.c#14 (text+ko) ==== @@ -165,7 +165,7 @@ if (p == NULL || LIST_EMPTY(&p->subdisks)) return (ENXIO); - gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 0); + gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1); /* Find the right subdisk. */ parity = NULL; @@ -239,7 +239,7 @@ if (p == NULL || LIST_EMPTY(&p->subdisks)) return (ENXIO); - gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 0); + gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1); /* Find the right subdisk. */ broken = NULL; @@ -553,7 +553,7 @@ off_t len_left, stripeend, stripeoff, stripestart; sdcount = p->sdcount; - if (growing) { + if (growing) { LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->flags & GV_SD_GROW) sdcount--; ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_share.c#6 (text+ko) ==== @@ -270,6 +270,8 @@ return (GV_PLEX_INITIALIZING); else if (!strcmp(buf, "degraded")) return (GV_PLEX_DEGRADED); + else if (!strcmp(buf, "growable")) + return (GV_PLEX_GROWABLE); else return (GV_PLEX_DOWN); } @@ -285,6 +287,8 @@ return "initializing"; case GV_PLEX_DEGRADED: return "degraded"; + case GV_PLEX_GROWABLE: + return "growable"; case GV_PLEX_UP: return "up"; default: ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_state.c#23 (text+ko) ==== @@ -220,7 +220,7 @@ if (p->org != GV_PLEX_RAID5) break; - else if (s->flags |= GV_SD_CANGOUP) { + else if (s->flags & GV_SD_CANGOUP) { s->flags &= ~GV_SD_CANGOUP; break; } else if (flags & GV_SETSTATE_FORCE) @@ -412,6 +412,7 @@ void gv_update_plex_state(struct gv_plex *p) { + struct gv_sd *s; int sdstates; int oldstate; @@ -425,6 +426,7 @@ /* If all subdisks are up, our plex can be up, too. */ if (sdstates == GV_SD_UPSTATE) p->state = GV_PLEX_UP; + /* One or more of our subdisks are down. */ else if (sdstates & GV_SD_DOWNSTATE) { /* A RAID5 plex can handle one dead subdisk. */ @@ -435,15 +437,24 @@ /* Some of our subdisks are initializing. */ } else if (sdstates & GV_SD_INITSTATE) { + if (p->flags & GV_PLEX_SYNCING || - p->flags & GV_PLEX_REBUILDING || - p->flags & GV_PLEX_GROWING) + p->flags & GV_PLEX_REBUILDING) p->state = GV_PLEX_DEGRADED; else p->state = GV_PLEX_DOWN; } else p->state = GV_PLEX_DOWN; + if (p->state == GV_PLEX_UP) { + LIST_FOREACH(s, &p->subdisks, in_plex) { + if (s->flags & GV_SD_GROW) { + p->state = GV_PLEX_GROWABLE; + break; + } + } + } + if (p->state != oldstate) printf("VINUM: plex %s state change: %s -> %s\n", p->name, gv_plexstate(oldstate), gv_plexstate(p->state)); ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_subr.c#29 (text+ko) ==== @@ -51,11 +51,11 @@ #include <geom/vinum/geom_vinum.h> #include <geom/vinum/geom_vinum_share.h> -int gv_drive_is_newer(struct gv_softc *); +int gv_drive_is_newer(struct gv_softc *, struct gv_drive *); static off_t gv_plex_smallest_sd(struct gv_plex *); void -gv_parse_config(struct gv_softc *sc, char *buf) +gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d) { char *aptr, *bptr, *cptr; struct gv_volume *v, *v2; @@ -64,7 +64,7 @@ int error, is_newer, tokens; char *token[GV_MAXARGS]; - is_newer = gv_drive_is_newer(sc); + is_newer = gv_drive_is_newer(sc, d); /* Until the end of the string *buf. */ for (aptr = buf; *aptr != '\0'; aptr = bptr) { @@ -377,9 +377,9 @@ } else { if ((p->org == GV_PLEX_RAID5 || p->org == GV_PLEX_STRIPED) && - !(p->flags & GV_PLEX_NEWBORN)) { + !(p->flags & GV_PLEX_NEWBORN) && + p->state >= GV_PLEX_DEGRADED) { s->flags |= GV_SD_GROW; - s->state = GV_SD_UP; } p->sdcount++; } @@ -397,12 +397,31 @@ v->size = size; } +/* Return how many subdisks that constitute the original plex. */ +int +gv_sdcount(struct gv_plex *p, int growing) +{ + struct gv_sd *s; + int sdcount; + + sdcount = p->sdcount; + if (growing) { + LIST_FOREACH(s, &p->subdisks, in_plex) { + if (s->flags & GV_SD_GROW) + sdcount--; + } + } + + return (sdcount); +} + /* Calculates the plex size. */ off_t gv_plex_size(struct gv_plex *p) { struct gv_sd *s; off_t size; + int sdcount; KASSERT(p != NULL, ("gv_plex_size: NULL p")); @@ -411,6 +430,7 @@ /* Adjust the size of our plex. */ size = 0; + sdcount = gv_sdcount(p, 1); switch (p->org) { case GV_PLEX_CONCAT: LIST_FOREACH(s, &p->subdisks, in_plex) @@ -418,11 +438,11 @@ break; case GV_PLEX_STRIPED: s = LIST_FIRST(&p->subdisks); - size = p->sdcount * s->size; + size = sdcount * s->size; break; case GV_PLEX_RAID5: s = LIST_FIRST(&p->subdisks); - size = (p->sdcount - 1) * s->size; + size = (sdcount - 1) * s->size; break; } @@ -521,11 +541,10 @@ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); p->flags &= ~GV_PLEX_ADDED; gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); - } else { + } else if (p->state == GV_PLEX_UP) { LIST_FOREACH(s, &p->subdisks, in_plex) { if (s->flags & GV_SD_GROW) { - gv_set_plex_state(p, GV_PLEX_DEGRADED, - GV_SETSTATE_FORCE); + p->state = GV_PLEX_GROWABLE; break; } } @@ -938,19 +957,14 @@ * Return 1 if a > b, 0 otherwise. */ int -gv_drive_is_newer(struct gv_softc *sc) +gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d) { - struct gv_drive *d, *d2; + struct gv_drive *d2; struct timeval *a, *b; KASSERT(!LIST_EMPTY(&sc->drives), ("gv_is_drive_newer: empty drive list")); - /* - * We assume that the first drive on the list is the one to be compared - * with the others. - */ - d = LIST_FIRST(&sc->drives); a = &d->hdr->label.last_update; LIST_FOREACH(d2, &sc->drives, drive) { if ((d == d2) || (d2->state != GV_DRIVE_UP) || ==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_var.h#25 (text+ko) ==== @@ -312,7 +312,7 @@ #define GV_PLEX_DOWN 0 #define GV_PLEX_INITIALIZING 1 #define GV_PLEX_DEGRADED 2 -#define GV_PLEX_RESIZING 3 +#define GV_PLEX_GROWABLE 3 #define GV_PLEX_UP 4 int org; /* The plex organisation. */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200708131946.l7DJkUBM094036>