Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 1 May 2012 08:19:29 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r234868 - head/sys/geom/raid
Message-ID:  <201205010819.q418JT7k009948@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Tue May  1 08:19:29 2012
New Revision: 234868
URL: http://svn.freebsd.org/changeset/base/234868

Log:
  Improve DDF metadata writing.

Modified:
  head/sys/geom/raid/md_ddf.c

Modified: head/sys/geom/raid/md_ddf.c
==============================================================================
--- head/sys/geom/raid/md_ddf.c	Tue May  1 07:46:30 2012	(r234867)
+++ head/sys/geom/raid/md_ddf.c	Tue May  1 08:19:29 2012	(r234868)
@@ -89,9 +89,9 @@ struct g_raid_md_ddf_pervolume {
 struct g_raid_md_ddf_object {
 	struct g_raid_md_object	 mdio_base;
 	struct ddf_meta		 mdio_meta;
+	int			 mdio_starting;
 	struct callout		 mdio_start_co;	/* STARTING state timer. */
 	int			 mdio_started;
-	int			 mdio_incomplete;
 	struct root_hold_token	*mdio_rootmount; /* Root mount delay token. */
 };
 
@@ -835,7 +835,8 @@ ddf_vol_meta_create(struct ddf_vol_meta 
 }
 
 static void
-ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src, uint8_t *GUID)
+ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src,
+    uint8_t *GUID, int started)
 {
 	struct ddf_header *hdr;
 	struct ddf_vd_entry *vde;
@@ -850,15 +851,15 @@ ddf_vol_meta_update(struct ddf_vol_meta 
 	size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize;
 
 	if (dst->vdc == NULL ||
-	    ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-	    GET32(dst, vdc->Sequence_Number))) > 0)
+	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+	    GET32(dst, vdc->Sequence_Number))) > 0))
 		vnew = 1;
 	else
 		vnew = 0;
 
 	if (dst->bvdc[bvd] == NULL ||
-	    ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-	    GET32(dst, bvdc[bvd]->Sequence_Number))) > 0)
+	    (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+	    GET32(dst, bvdc[bvd]->Sequence_Number))) > 0))
 		bvnew = 1;
 	else
 		bvnew = 0;
@@ -1803,6 +1804,7 @@ g_raid_md_ddf_start(struct g_raid_volume
 	struct g_raid_disk *disk;
 	struct g_raid_md_object *md;
 	struct g_raid_md_ddf_pervolume *pv;
+	struct g_raid_md_ddf_object *mdi;
 	struct ddf_vol_meta *vmeta;
 	struct ddf_vdc_record *vdc;
 	uint64_t *val2;
@@ -1810,6 +1812,7 @@ g_raid_md_ddf_start(struct g_raid_volume
 
 	sc = vol->v_softc;
 	md = sc->sc_md;
+	mdi = (struct g_raid_md_ddf_object *)md;
 	pv = vol->v_md_data;
 	vmeta = &pv->pv_meta;
 	vdc = vmeta->vdc;
@@ -1862,6 +1865,7 @@ g_raid_md_ddf_start(struct g_raid_volume
 	}
 
 	pv->pv_started = 1;
+	mdi->mdio_starting--;
 	callout_stop(&pv->pv_start_co);
 	G_RAID_DEBUG1(0, sc, "Volume started.");
 	g_raid_md_write_ddf(md, vol, NULL, NULL);
@@ -1948,13 +1952,13 @@ g_raid_md_ddf_new_disk(struct g_raid_dis
 			callout_reset(&pv->pv_start_co,
 			    g_raid_start_timeout * hz,
 			    g_raid_ddf_go, vol);
+			mdi->mdio_starting++;
 		} else
 			pv = vol->v_md_data;
 
 		/* If we haven't started yet - check metadata freshness. */
 		vmeta = &pv->pv_meta;
-		if (vmeta->hdr == NULL || !pv->pv_started)
-			ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID);
+		ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID, pv->pv_started);
 	}
 
 	if (spare == 1) {
@@ -2649,26 +2653,29 @@ g_raid_md_write_ddf(struct g_raid_md_obj
 	if (sc->sc_stopping == G_RAID_DESTROY_HARD)
 		return (0);
 
-	/* Generate new per-volume metadata for affected volumes. */
-	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
-		if (vol->v_stopping)
-			continue;
-
-		/* Skip volumes not related to specified targets. */
-		if (tvol != NULL && vol != tvol)
-			continue;
-		if (tsd != NULL && vol != tsd->sd_volume)
-			continue;
-		if (tdisk != NULL) {
-			for (i = 0; i < vol->v_disks_count; i++) {
-				if (vol->v_subdisks[i].sd_disk == tdisk)
-					break;
-			}
-			if (i >= vol->v_disks_count)
+	/*
+	 * Clear disk flags to let only really needed ones to be reset.
+	 * Do it only if there are no volumes in starting state now,
+	 * as they can update disk statuses yet and we may kill innocent.
+	 */
+	if (mdi->mdio_starting == 0) {
+		for (i = 0; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
+			if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
 				continue;
+			SET16(gmeta, pdr->entry[i].PD_Type,
+			    GET16(gmeta, pdr->entry[i].PD_Type) &
+			    ~DDF_PDE_PARTICIPATING);
+			if ((GET16(gmeta, pdr->entry[i].PD_State) &
+			    DDF_PDE_PFA) == 0)
+				SET16(gmeta, pdr->entry[i].PD_State, 0);
 		}
+	}
 
+	/* Generate/update new per-volume metadata. */
+	TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
 		pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
+		if (vol->v_stopping || !pv->pv_started)
+			continue;
 		vmeta = &pv->pv_meta;
 
 		SET32(vmeta, vdc->Sequence_Number,
@@ -2711,7 +2718,9 @@ g_raid_md_write_ddf(struct g_raid_md_obj
 			SET8(vmeta, vde->VD_State, DDF_VDE_PARTIAL);
 		else
 			SET8(vmeta, vde->VD_State, DDF_VDE_OPTIMAL);
-		if (vol->v_dirty)
+		if (vol->v_dirty ||
+		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) > 0 ||
+		    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) > 0)
 			SET8(vmeta, vde->VD_State,
 			    GET8(vmeta, vde->VD_State) | DDF_VDE_DIRTY);
 		SET8(vmeta, vde->Init_State, DDF_VDE_INIT_FULL); // XXX
@@ -2719,45 +2728,50 @@ g_raid_md_write_ddf(struct g_raid_md_obj
 
 		for (i = 0; i < vol->v_disks_count; i++) {
 			sd = &vol->v_subdisks[i];
-			disk = sd->sd_disk;
-			if (disk == NULL)
-				continue;
-			pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 			bvd = i / GET16(vmeta, vdc->Primary_Element_Count);
 			pos = i % GET16(vmeta, vdc->Primary_Element_Count);
-			if (vmeta->bvdc[bvd] == NULL) {
-				size = GET16(vmeta,
-				    hdr->Configuration_Record_Length) *
-				    vmeta->sectorsize;
-				vmeta->bvdc[bvd] = malloc(size, M_MD_DDF, M_WAITOK);
-				memcpy(vmeta->bvdc[bvd], vmeta->vdc, size);
+			disk = sd->sd_disk;
+			if (disk != NULL) {
+				pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
+				if (vmeta->bvdc[bvd] == NULL) {
+					size = GET16(vmeta,
+					    hdr->Configuration_Record_Length) *
+					    vmeta->sectorsize;
+					vmeta->bvdc[bvd] = malloc(size,
+					    M_MD_DDF, M_WAITOK);
+					memset(vmeta->bvdc[bvd], 0xff, size);
+				}
+				memcpy(vmeta->bvdc[bvd], vmeta->vdc,
+				    sizeof(struct ddf_vdc_record));
 				SET8(vmeta, bvdc[bvd]->Secondary_Element_Seq, bvd);
+				SET64(vmeta, bvdc[bvd]->Block_Count,
+				    sd->sd_size / vol->v_sectorsize);
+				SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos],
+				    GET32(&pd->pd_meta, pdd->PD_Reference));
+				val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
+				    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
+				SET64P(vmeta, val2 + pos,
+				    sd->sd_offset / vol->v_sectorsize);
 			}
-			SET64(vmeta, bvdc[bvd]->Block_Count,
-			    sd->sd_size / vol->v_sectorsize);
-			SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos],
-			    GET32(&pd->pd_meta, pdd->PD_Reference));
-			val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
-			    GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
-			SET64P(vmeta, val2 + pos,
-			    sd->sd_offset / vol->v_sectorsize);
+			if (vmeta->bvdc[bvd] == NULL)
+				continue;
 
 			j = ddf_meta_find_pd(gmeta, NULL,
-			    GET32(&pd->pd_meta, pdd->PD_Reference));
+			    GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]));
 			if (j < 0)
 				continue;
 			SET32(gmeta, pdr->entry[j].PD_Type,
 			    GET32(gmeta, pdr->entry[j].PD_Type) |
 			    DDF_PDE_PARTICIPATING);
-			if (sd->sd_state == G_RAID_SUBDISK_S_FAILED)
+			if (sd->sd_state == G_RAID_SUBDISK_S_NONE)
 				SET32(gmeta, pdr->entry[j].PD_State,
 				    GET32(gmeta, pdr->entry[j].PD_State) |
-				    DDF_PDE_FAILED | DDF_PDE_PFA);
-			else if (sd->sd_state <= G_RAID_SUBDISK_S_UNINITIALIZED)
+				    DDF_PDE_FAILED | DDF_PDE_MISSING);
+			else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED)
 				SET32(gmeta, pdr->entry[j].PD_State,
 				    GET32(gmeta, pdr->entry[j].PD_State) |
-				    DDF_PDE_FAILED);
-			else if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE)
+				    DDF_PDE_FAILED | DDF_PDE_PFA);
+			else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD)
 				SET32(gmeta, pdr->entry[j].PD_State,
 				    GET32(gmeta, pdr->entry[j].PD_State) |
 				    DDF_PDE_FAILED);
@@ -2768,17 +2782,33 @@ g_raid_md_write_ddf(struct g_raid_md_obj
 		}
 	}
 
+	/* Remove disks without "participating" flag (unused). */
+	for (i = 0, j = -1; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
+		if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
+			continue;
+		if (GET16(gmeta, pdr->entry[i].PD_Type) & DDF_PDE_PARTICIPATING)
+			j = i;
+		else
+			memset(&gmeta->pdr->entry[i], 0xff,
+			    sizeof(struct ddf_pd_entry));
+	}
+	SET16(gmeta, pdr->Populated_PDEs, j + 1);
+
+	/* Update per-disk metadata and write them. */
 	TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
 		pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
 		if (disk->d_state != G_RAID_DISK_S_ACTIVE)
 			continue;
+		/* Update PDR. */
 		memcpy(pd->pd_meta.pdr, gmeta->pdr,
 		    GET32(&pd->pd_meta, hdr->pdr_length) *
 		    pd->pd_meta.sectorsize);
-		TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
-			vol = sd->sd_volume;
+		/* Update VDR. */
+		SET16(&pd->pd_meta, vdr->Populated_VDEs, 0);
+		TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
+			if (vol->v_stopping)
+				continue;
 			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
-			vmeta = &pv->pv_meta;
 			i = ddf_meta_find_vd(&pd->pd_meta,
 			    pv->pv_meta.vde->VD_GUID);
 			if (i < 0)
@@ -2787,14 +2817,22 @@ g_raid_md_write_ddf(struct g_raid_md_obj
 				memcpy(&pd->pd_meta.vdr->entry[i],
 				    pv->pv_meta.vde,
 				    sizeof(struct ddf_vd_entry));
+		}
+		/* Update VDC. */
+		TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
+			vol = sd->sd_volume;
+			if (vol->v_stopping)
+				continue;
+			pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
+			vmeta = &pv->pv_meta;
 			vdc = ddf_meta_find_vdc(&pd->pd_meta,
-			    pv->pv_meta.vde->VD_GUID);
+			    vmeta->vde->VD_GUID);
 			if (vdc == NULL)
 				vdc = ddf_meta_find_vdc(&pd->pd_meta, NULL);
 			if (vdc != NULL) {
 				bvd = sd->sd_pos / GET16(vmeta,
 				    vdc->Primary_Element_Count);
-				memcpy(vdc, pv->pv_meta.bvdc[bvd],
+				memcpy(vdc, vmeta->bvdc[bvd],
 				    GET16(&pd->pd_meta,
 				    hdr->Configuration_Record_Length) *
 				    pd->pd_meta.sectorsize);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201205010819.q418JT7k009948>