From owner-svn-src-projects@FreeBSD.ORG Wed Dec 22 09:02:22 2010 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id B4344106564A; Wed, 22 Dec 2010 09:02:22 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id A2AF78FC0A; Wed, 22 Dec 2010 09:02:22 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oBM92MD4098777; Wed, 22 Dec 2010 09:02:22 GMT (envelope-from mav@svn.freebsd.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oBM92MpZ098772; Wed, 22 Dec 2010 09:02:22 GMT (envelope-from mav@svn.freebsd.org) Message-Id: <201012220902.oBM92MpZ098772@svn.freebsd.org> From: Alexander Motin Date: Wed, 22 Dec 2010 09:02:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r216649 - projects/graid/head/sys/geom/raid X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 22 Dec 2010 09:02:22 -0000 Author: mav Date: Wed Dec 22 09:02:22 2010 New Revision: 216649 URL: http://svn.freebsd.org/changeset/base/216649 Log: Implement basic metadata generation check. To be sure that used metadata is up to date - delay volumes creation until we have all disks or timeout expires. This probably obsolete same functionality done at volumes level. Modified: projects/graid/head/sys/geom/raid/g_raid.c projects/graid/head/sys/geom/raid/g_raid.h projects/graid/head/sys/geom/raid/g_raid_md_if.m projects/graid/head/sys/geom/raid/md_intel.c Modified: projects/graid/head/sys/geom/raid/g_raid.c ============================================================================== --- projects/graid/head/sys/geom/raid/g_raid.c Wed Dec 22 05:53:46 2010 (r216648) +++ projects/graid/head/sys/geom/raid/g_raid.c Wed Dec 22 09:02:22 2010 (r216649) @@ -55,7 +55,7 @@ u_int g_raid_debug = 1000; TUNABLE_INT("kern.geom.raid.debug", &g_raid_debug); SYSCTL_UINT(_kern_geom_raid, OID_AUTO, debug, CTLFLAG_RW, &g_raid_debug, 0, "Debug level"); -static u_int g_raid_start_timeout = 4; +u_int g_raid_start_timeout = 4; TUNABLE_INT("kern.geom.raid.start_timeout", &g_raid_start_timeout); SYSCTL_UINT(_kern_geom_raid, OID_AUTO, timeout, CTLFLAG_RW, &g_raid_start_timeout, 0, "Time to wait on all mirror components"); @@ -122,6 +122,8 @@ g_raid_disk_state2str(int state) return ("SPARE"); case G_RAID_DISK_S_OFFLINE: return ("OFFLINE"); + case G_RAID_DISK_S_STALE: + return ("STALE"); default: return ("INVALID"); } @@ -1297,12 +1299,13 @@ g_raid_destroy_volume(struct g_raid_volu } int -g_raid_stop_disk(struct g_raid_disk *disk) +g_raid_destroy_disk(struct g_raid_disk *disk) { struct g_raid_softc *sc; struct g_raid_subdisk *sd, *tmp; sc = disk->d_softc; + G_RAID_DEBUG(2, "Destroying disk."); if (disk->d_consumer) { g_topology_lock(); g_raid_kill_consumer(sc, disk->d_consumer); @@ -1315,21 +1318,9 @@ g_raid_stop_disk(struct g_raid_disk *dis LIST_REMOVE(sd, sd_next); sd->sd_disk = NULL; } - return (0); -} - -int -g_raid_destroy_disk(struct g_raid_disk *disk) -{ - struct g_raid_softc *sc; - int error; - - sc = disk->d_softc; - G_RAID_DEBUG(2, "Destroying disk."); - error = g_raid_stop_disk(disk); - if (error) - return (error); LIST_REMOVE(disk, d_next); + if (sc->sc_md) + G_RAID_MD_FREE_DISK(sc->sc_md, disk); free(disk, M_RAID); return (0); } Modified: projects/graid/head/sys/geom/raid/g_raid.h ============================================================================== --- projects/graid/head/sys/geom/raid/g_raid.h Wed Dec 22 05:53:46 2010 (r216648) +++ projects/graid/head/sys/geom/raid/g_raid.h Wed Dec 22 09:02:22 2010 (r216649) @@ -58,6 +58,7 @@ struct g_raid_tr_object; #ifdef _KERNEL extern u_int g_raid_debug; +extern u_int g_raid_start_timeout; #define G_RAID_DEBUG(lvl, fmt, ...) do { \ if (g_raid_debug >= (lvl)) { \ @@ -101,6 +102,7 @@ struct g_raid_event { #define G_RAID_DISK_S_ACTIVE 0x01 #define G_RAID_DISK_S_SPARE 0x02 #define G_RAID_DISK_S_OFFLINE 0x03 +#define G_RAID_DISK_S_STALE 0x04 #define G_RAID_DISK_E_DISCONNECTED 0x01 @@ -284,10 +286,6 @@ struct g_raid_disk * g_raid_create_disk( int g_raid_start_volume(struct g_raid_volume *vol); -int g_raid_stop_node(struct g_raid_softc *sc); -int g_raid_stop_volume(struct g_raid_volume *vol); -int g_raid_stop_disk(struct g_raid_disk *disk); - int g_raid_destroy_node(struct g_raid_softc *sc, int worker); int g_raid_destroy_volume(struct g_raid_volume *vol); int g_raid_destroy_disk(struct g_raid_disk *disk); Modified: projects/graid/head/sys/geom/raid/g_raid_md_if.m ============================================================================== --- projects/graid/head/sys/geom/raid/g_raid_md_if.m Wed Dec 22 05:53:46 2010 (r216648) +++ projects/graid/head/sys/geom/raid/g_raid_md_if.m Wed Dec 22 09:02:22 2010 (r216649) @@ -71,6 +71,12 @@ METHOD int write { struct g_raid_disk *disk; }; +# free_disk() - disk destructor. +METHOD int free_disk { + struct g_raid_md_object *md; + struct g_raid_disk *disk; +}; + # free() - destructor. METHOD int free { struct g_raid_md_object *md; Modified: projects/graid/head/sys/geom/raid/md_intel.c ============================================================================== --- projects/graid/head/sys/geom/raid/md_intel.c Wed Dec 22 05:53:46 2010 (r216648) +++ projects/graid/head/sys/geom/raid/md_intel.c Wed Dec 22 09:02:22 2010 (r216649) @@ -135,21 +135,31 @@ struct intel_raid_vol { struct intel_raid_map map[1]; } __packed; +struct g_raid_md_intel_perdisk { + struct intel_raid_conf *pd_meta; + int pd_disk_pos; +}; + struct g_raid_md_intel_object { - struct g_raid_md_object mdio_base; - uint32_t mdio_config_id; + struct g_raid_md_object mdio_base; + uint32_t mdio_config_id; struct intel_raid_conf *mdio_meta; + struct callout mdio_start_co; /* STARTING state timer. */ + int mdio_disks_present; + int mdio_started; }; static g_raid_md_taste_t g_raid_md_taste_intel; static g_raid_md_event_t g_raid_md_event_intel; static g_raid_md_write_t g_raid_md_write_intel; +static g_raid_md_free_disk_t g_raid_md_free_disk_intel; static g_raid_md_free_t g_raid_md_free_intel; static kobj_method_t g_raid_md_intel_methods[] = { KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_intel), KOBJMETHOD(g_raid_md_event, g_raid_md_event_intel), KOBJMETHOD(g_raid_md_write, g_raid_md_write_intel), + KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_intel), KOBJMETHOD(g_raid_md_free, g_raid_md_free_intel), { 0, 0 } }; @@ -251,6 +261,17 @@ g_raid_md_intel_print(struct intel_raid_ printf("=================================================\n"); } +static struct intel_raid_conf * +intel_meta_copy(struct intel_raid_conf *meta) +{ + struct intel_raid_conf *nmeta; + + nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK | M_ZERO); + memcpy(nmeta, meta, meta->config_size); + return (nmeta); +} + +#if 0 static struct g_raid_disk * g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id) { @@ -262,6 +283,7 @@ g_raid_md_intel_get_disk(struct g_raid_s } return (disk); } +#endif static struct g_raid_volume * g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id) @@ -275,6 +297,166 @@ g_raid_md_intel_get_volume(struct g_raid return (vol); } +static void +g_raid_md_intel_start_disk(struct g_raid_disk *disk) +{ + struct g_raid_softc *sc; + struct g_raid_volume *vol; + struct g_raid_subdisk *sd; + struct g_raid_md_object *md; + struct g_raid_md_intel_object *mdi; + struct g_raid_md_intel_perdisk *pd; + struct intel_raid_conf *meta, *pdmeta; + struct intel_raid_vol *mvol; + struct intel_raid_map *map; + int i, j; + + sc = disk->d_softc; + md = sc->sc_md; + mdi = (struct g_raid_md_intel_object *)md; + meta = mdi->mdio_meta; + pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; + pdmeta = pd->pd_meta; + + if (pdmeta->generation != meta->generation) { + g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE); + return; + } + + /* Update disk state. */ + g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); + + /* Create subdisks. */ + for (i = 0; i < meta->total_volumes; i++) { + mvol = intel_get_volume(meta, i); + map = intel_get_map(mvol, 0); + for (j = 0; j < map->total_disks; j++) { + if ((map->disk_idx[j] & INTEL_DI_IDX) == pd->pd_disk_pos) + break; + } + if (j == map->total_disks) + continue; + vol = g_raid_md_intel_get_volume(sc, i); + sd = &vol->v_subdisks[j]; + sd->sd_disk = disk; + sd->sd_offset = map->offset * 512; //ZZZ + sd->sd_size = map->disk_sectors; + LIST_INSERT_HEAD(&disk->d_subdisks, sd, sd_next); + g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, + G_RAID_EVENT_SUBDISK); + } + +} + +static void +g_raid_md_intel_start(struct g_raid_softc *sc) +{ + struct g_raid_md_object *md; + struct g_raid_md_intel_object *mdi; + struct intel_raid_conf *meta; + struct intel_raid_vol *mvol; + struct intel_raid_map *map; + struct g_raid_volume *vol; + struct g_raid_disk *disk; + int i; + + md = sc->sc_md; + mdi = (struct g_raid_md_intel_object *)md; + meta = mdi->mdio_meta; + /* Create volumes */ + for (i = 0; i < meta->total_volumes; i++) { + mvol = intel_get_volume(meta, i); + map = intel_get_map(mvol, 0); + vol = g_raid_create_volume(sc, mvol->name); + vol->v_md_data = (void *)(intptr_t)i; + if (map->type == INTEL_T_RAID0) + vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; + else if (map->type == INTEL_T_RAID1 && + map->total_disks < 4) + vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; + else if (map->type == INTEL_T_RAID1) + vol->v_raid_level = G_RAID_VOLUME_RL_RAID10; + else if (map->type == INTEL_T_RAID5) + vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; + else + vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; + vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; + vol->v_strip_size = map->stripe_sectors * 512; //ZZZ + vol->v_disks_count = map->total_disks; + vol->v_mediasize = mvol->total_sectors * 512; //ZZZ + vol->v_sectorsize = 512; //ZZZ + g_raid_start_volume(vol); + } + LIST_FOREACH(disk, &sc->sc_disks, d_next) { + g_raid_md_intel_start_disk(disk); + } +} + +static void +g_raid_md_intel_new_disk(struct g_raid_disk *disk) +{ + struct g_raid_softc *sc; + struct g_raid_md_object *md; + struct g_raid_md_intel_object *mdi; + struct intel_raid_conf *meta, *pdmeta; + struct g_raid_md_intel_perdisk *pd; + + sc = disk->d_softc; + md = sc->sc_md; + mdi = (struct g_raid_md_intel_object *)md; + pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; + pdmeta = pd->pd_meta; + + if (mdi->mdio_meta == NULL || + pdmeta->generation > mdi->mdio_meta->generation) { + if (mdi->mdio_started) { + G_RAID_DEBUG(1, "Newer disk, but already started"); + } else { + G_RAID_DEBUG(1, "Newer disk"); + if (mdi->mdio_meta != NULL) + free(mdi->mdio_meta, M_MD_INTEL); + mdi->mdio_meta = intel_meta_copy(pdmeta); + mdi->mdio_disks_present = 1; + } + } else if (pdmeta->generation == mdi->mdio_meta->generation) { + mdi->mdio_disks_present++; + G_RAID_DEBUG(1, "Matching disk (%d up)", + mdi->mdio_disks_present); + } else { + G_RAID_DEBUG(1, "Stale disk"); + } + + meta = mdi->mdio_meta; + if (mdi->mdio_started) { + g_raid_md_intel_start_disk(disk); + } else { + if (mdi->mdio_disks_present == meta->total_disks) { + mdi->mdio_started = 1; + callout_stop(&mdi->mdio_start_co); + g_raid_md_intel_start(sc); + } + } +} + +static void +g_raid_intel_go(void *arg) +{ + struct g_raid_softc *sc; + struct g_raid_md_object *md; + struct g_raid_md_intel_object *mdi; + + sc = arg; + md = sc->sc_md; + mdi = (struct g_raid_md_intel_object *)md; + sx_xlock(&sc->sc_lock); + if (!mdi->mdio_started) { + G_RAID_DEBUG(0, "Force node %s start due to timeout.", sc->sc_name); + mdi->mdio_started = 1; + g_raid_md_intel_start(sc); + } + sx_xunlock(&sc->sc_lock); +} + static int g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp, struct g_consumer *cp, struct g_geom **gp) @@ -283,16 +465,13 @@ g_raid_md_taste_intel(struct g_raid_md_o struct g_provider *pp; struct g_raid_md_intel_object *mdi, *mdi1; struct g_raid_softc *sc; - struct g_raid_volume *vol; - struct g_raid_subdisk *subdisk; struct g_raid_disk *disk; struct intel_raid_conf *meta; - struct intel_raid_vol *mvol; - struct intel_raid_map *map; + struct g_raid_md_intel_perdisk *pd; struct g_geom *geom; uint32_t checksum, *ptr; char *buf, *tmp; - int i, j, error, serial_len, disk_pos, result; + int i, error, serial_len, disk_pos, result; char serial[INTEL_SERIAL_LEN]; char name[16]; @@ -364,6 +543,7 @@ g_raid_md_taste_intel(struct g_raid_md_o g_raid_md_intel_print(meta); G_RAID_DEBUG(1, "Intel disk position %d", disk_pos); + /* Search for matching node. */ sc = NULL; mdi1 = NULL; LIST_FOREACH(geom, &mp->geom, geom) { @@ -388,41 +568,14 @@ g_raid_md_taste_intel(struct g_raid_md_o } else { /* Not found matching node. */ result = G_RAID_MD_TASTE_NEW; mdi->mdio_config_id = meta->config_id; - mdi->mdio_meta = meta; snprintf(name, sizeof(name), "Intel-%08x", meta->config_id); sc = g_raid_create_node(mp, name, md); md->mdo_softc = sc; geom = sc->sc_geom; G_RAID_DEBUG(1, "Created new node %s", sc->sc_name); - - /* Create volumes */ - for (i = 0; i < meta->total_volumes; i++) { - mvol = intel_get_volume(meta, i); - map = intel_get_map(mvol, 0); - vol = g_raid_create_volume(sc, mvol->name); - vol->v_md_data = (void *)(intptr_t)i; - if (map->type == INTEL_T_RAID0) - vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; - else if (map->type == INTEL_T_RAID1 && - map->total_disks < 4) - vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; - else if (map->type == INTEL_T_RAID1) - vol->v_raid_level = G_RAID_VOLUME_RL_RAID10; - else if (map->type == INTEL_T_RAID5) - vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; - else - vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; - vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; - vol->v_strip_size = map->stripe_sectors * 512; //ZZZ - vol->v_disks_count = map->total_disks; - vol->v_mediasize = mvol->total_sectors * 512; //ZZZ - vol->v_sectorsize = 512; //ZZZ - g_topology_unlock(); - sx_xlock(&sc->sc_lock); - g_raid_start_volume(vol); - sx_xunlock(&sc->sc_lock); - g_topology_lock(); - } + callout_init(&mdi->mdio_start_co, 1); + callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz, + g_raid_intel_go, sc); } rcp = g_new_consumer(geom); @@ -433,34 +586,15 @@ g_raid_md_taste_intel(struct g_raid_md_o g_topology_unlock(); sx_xlock(&sc->sc_lock); - disk = g_raid_md_intel_get_disk(sc, disk_pos); - if (disk != 0) - ; /* Error, duplicate disk! */ + pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO); + pd->pd_meta = meta; + pd->pd_disk_pos = disk_pos; disk = g_raid_create_disk(sc); - disk->d_md_data = (void *)(intptr_t)disk_pos; + disk->d_md_data = (void *)pd; disk->d_consumer = rcp; - g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); rcp->private = disk; - /* Create subdisks */ - for (i = 0; i < meta->total_volumes; i++) { - mvol = intel_get_volume(meta, i); - map = intel_get_map(mvol, 0); - for (j = 0; j < map->total_disks; j++) { - if ((map->disk_idx[j] & INTEL_DI_IDX) == disk_pos) - break; - } - if (j == map->total_disks) - continue; - vol = g_raid_md_intel_get_volume(sc, i); - subdisk = &vol->v_subdisks[j]; - subdisk->sd_disk = disk; - subdisk->sd_offset = map->offset * 512; //ZZZ - subdisk->sd_size = map->disk_sectors; - LIST_INSERT_HEAD(&disk->d_subdisks, subdisk, sd_next); - g_raid_event_send(subdisk, G_RAID_SUBDISK_E_NEW, - G_RAID_EVENT_SUBDISK); - } + g_raid_md_intel_new_disk(disk); sx_xunlock(&sc->sc_lock); g_topology_lock(); @@ -501,13 +635,35 @@ g_raid_md_write_intel(struct g_raid_md_o } static int +g_raid_md_free_disk_intel(struct g_raid_md_object *md, + struct g_raid_disk *disk) +{ + struct g_raid_md_intel_perdisk *pd; + + pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; + if (pd->pd_meta != NULL) { + free(pd->pd_meta, M_MD_INTEL); + pd->pd_meta = NULL; + } + free(pd, M_MD_INTEL); + disk->d_md_data = NULL; + return (0); +} + +static int g_raid_md_free_intel(struct g_raid_md_object *md) { struct g_raid_md_intel_object *mdi; mdi = (struct g_raid_md_intel_object *)md; - free(mdi->mdio_meta, M_MD_INTEL); - mdi->mdio_meta = NULL; + if (!mdi->mdio_started) { + mdi->mdio_started = 0; + callout_stop(&mdi->mdio_start_co); + } + if (mdi->mdio_meta != NULL) { + free(mdi->mdio_meta, M_MD_INTEL); + mdi->mdio_meta = NULL; + } return (0); }