From owner-svn-src-stable@FreeBSD.ORG Mon Jan 2 18:06:49 2012 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 141581065670; Mon, 2 Jan 2012 18:06:49 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 0013D8FC16; Mon, 2 Jan 2012 18:06:48 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q02I6mrS079226; Mon, 2 Jan 2012 18:06:48 GMT (envelope-from mav@svn.freebsd.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q02I6m2D079221; Mon, 2 Jan 2012 18:06:48 GMT (envelope-from mav@svn.freebsd.org) Message-Id: <201201021806.q02I6m2D079221@svn.freebsd.org> From: Alexander Motin Date: Mon, 2 Jan 2012 18:06:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r229303 - in stable/9: sbin/geom/class/multipath sys/geom/multipath X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 02 Jan 2012 18:06:49 -0000 Author: mav Date: Mon Jan 2 18:06:48 2012 New Revision: 229303 URL: http://svn.freebsd.org/changeset/base/229303 Log: MFC r227464, r227471 Major GEOM MULTIPATH class rewrite: - Improved locking and destruction process to fix crashes. - Improved "automatic" configuration method to make it consistent and safe by reading metadata back from all specified paths after writing to one. - Added provider size check to reduce chance of ordering conflict with other GEOM classes. - Added "manual" configuration method without using on-disk metadata. - Added "add" and "remove" commands to allow manage paths manually. - Failed paths are no longer dropped from geom, but only marked as FAIL and excluded from I/O operations. - Automatically restore failed paths when all others paths are marked as failed, for example, because of device-caused (not transport) errors. - Added "fail" and "restore" commands to manually control FAIL flag. - geom is now destroyed on last path disconnection. - Added optional Active/Active mode support. Unlike Active/Passive mode, load evenly distributed between all working paths. If supported by the device, it allows to significantly improve performance, utilizing bandwidth of all paths. It is controlled by -A option during creation. Disabled by default now. - Improved `status` and `list` commands output. Sponsored by: iXsystems, inc. Modified: stable/9/sbin/geom/class/multipath/geom_multipath.c stable/9/sbin/geom/class/multipath/gmultipath.8 stable/9/sys/geom/multipath/g_multipath.c stable/9/sys/geom/multipath/g_multipath.h Directory Properties: stable/9/sbin/geom/ (props changed) stable/9/sys/ (props changed) stable/9/sys/amd64/include/xen/ (props changed) stable/9/sys/boot/ (props changed) stable/9/sys/boot/i386/efi/ (props changed) stable/9/sys/boot/ia64/efi/ (props changed) stable/9/sys/boot/ia64/ski/ (props changed) stable/9/sys/boot/powerpc/boot1.chrp/ (props changed) stable/9/sys/boot/powerpc/ofw/ (props changed) stable/9/sys/cddl/contrib/opensolaris/ (props changed) stable/9/sys/conf/ (props changed) stable/9/sys/contrib/dev/acpica/ (props changed) stable/9/sys/contrib/octeon-sdk/ (props changed) stable/9/sys/contrib/pf/ (props changed) stable/9/sys/contrib/x86emu/ (props changed) Modified: stable/9/sbin/geom/class/multipath/geom_multipath.c ============================================================================== --- stable/9/sbin/geom/class/multipath/geom_multipath.c Mon Jan 2 17:58:07 2012 (r229302) +++ stable/9/sbin/geom/class/multipath/geom_multipath.c Mon Jan 2 18:06:48 2012 (r229303) @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -48,31 +49,58 @@ uint32_t version = G_MULTIPATH_VERSION; static void mp_main(struct gctl_req *, unsigned int); static void mp_label(struct gctl_req *); static void mp_clear(struct gctl_req *); -static void mp_add(struct gctl_req *); struct g_command class_commands[] = { { - "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main, G_NULL_OPTS, - "[-v] name prov ..." + "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL, + { + { 'A', "active_active", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-vA] name prov ..." }, { - "add", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main, G_NULL_OPTS, - "[-v] name prov ..." + "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main, + { + { 'A', "active_active", NULL, G_TYPE_BOOL }, + G_OPT_SENTINEL + }, + "[-vA] name prov ..." }, { - "destroy", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, - "[-v] prov ..." + "add", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" }, { - "clear", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS, - "[-v] prov ..." + "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { + "fail", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" + }, + { + "restore", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name prov" }, { "rotate", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, - "[-v] prov ..." + "[-v] name" }, { "getactive", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name" + }, + { + "destroy", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name" + }, + { + "stop", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, + "[-v] name" + }, + { + "clear", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS, "[-v] prov ..." }, G_CMD_SENTINEL @@ -90,8 +118,6 @@ mp_main(struct gctl_req *req, unsigned i } if (strcmp(name, "label") == 0) { mp_label(req); - } else if (strcmp(name, "add") == 0) { - mp_add(req); } else if (strcmp(name, "clear") == 0) { mp_clear(req); } else { @@ -103,13 +129,13 @@ static void mp_label(struct gctl_req *req) { struct g_multipath_metadata md; - off_t disksiz = 0, msize; - uint8_t *sector; + off_t disksize = 0, msize; + uint8_t *sector, *rsector; char *ptr; uuid_t uuid; uint32_t secsize = 0, ssize, status; - const char *name, *mpname; - int error, i, nargs; + const char *name, *name2, *mpname; + int error, i, nargs, fd; nargs = gctl_get_int(req, "nargs"); if (nargs < 2) { @@ -132,14 +158,14 @@ mp_label(struct gctl_req *req) } if (i == 1) { secsize = ssize; - disksiz = msize; + disksize = msize; } else { if (secsize != ssize) { gctl_error(req, "%s sector size %u different.", name, ssize); return; } - if (disksiz != msize) { + if (disksize != msize) { gctl_error(req, "%s media size %ju different.", name, (intmax_t)msize); return; @@ -155,7 +181,7 @@ mp_label(struct gctl_req *req) md.md_version = G_MULTIPATH_VERSION; mpname = gctl_get_ascii(req, "arg0"); strlcpy(md.md_name, mpname, sizeof(md.md_name)); - md.md_size = disksiz; + md.md_size = disksize; md.md_sectorsize = secsize; uuid_create(&uuid, &status); if (status != uuid_s_ok) { @@ -168,19 +194,10 @@ mp_label(struct gctl_req *req) return; } strlcpy(md.md_uuid, ptr, sizeof (md.md_uuid)); + md.md_active_active = gctl_get_int(req, "active_active"); free(ptr); /* - * Clear metadata on initial provider first. - */ - name = gctl_get_ascii(req, "arg1"); - error = g_metadata_clear(name, NULL); - if (error != 0) { - gctl_error(req, "cannot clear metadata on %s: %s.", name, strerror(error)); - return; - } - - /* * Allocate a sector to write as metadata. */ sector = malloc(secsize); @@ -189,6 +206,12 @@ mp_label(struct gctl_req *req) return; } memset(sector, 0, secsize); + rsector = malloc(secsize); + if (rsector == NULL) { + free(sector); + gctl_error(req, "unable to allocate metadata buffer"); + return; + } /* * encode the metadata @@ -198,6 +221,7 @@ mp_label(struct gctl_req *req) /* * Store metadata on the initial provider. */ + name = gctl_get_ascii(req, "arg1"); error = g_metadata_store(name, sector, secsize); if (error != 0) { gctl_error(req, "cannot store metadata on %s: %s.", name, strerror(error)); @@ -205,20 +229,29 @@ mp_label(struct gctl_req *req) } /* - * Now add the rest of the providers. + * Now touch the rest of the providers to hint retaste. */ - error = gctl_change_param(req, "verb", -1, "add"); - if (error) { - gctl_error(req, "unable to change verb to \"add\": %s.", strerror(error)); - return; - } for (i = 2; i < nargs; i++) { - error = gctl_change_param(req, "arg1", -1, gctl_get_ascii(req, "arg%d", i)); - if (error) { - gctl_error(req, "unable to add %s to %s: %s.", gctl_get_ascii(req, "arg%d", i), mpname, strerror(error)); + name2 = gctl_get_ascii(req, "arg%d", i); + fd = g_open(name2, 1); + if (fd < 0) { + fprintf(stderr, "Unable to open %s: %s.\n", + name2, strerror(errno)); + continue; + } + if (pread(fd, rsector, secsize, disksize - secsize) != + (ssize_t)secsize) { + fprintf(stderr, "Unable to read metadata from %s: %s.\n", + name2, strerror(errno)); + g_close(fd); continue; } - mp_add(req); + g_close(fd); + if (memcmp(sector, rsector, secsize)) { + fprintf(stderr, "No metadata found on %s." + " It is not a path of %s.\n", + name2, name); + } } } @@ -247,13 +280,3 @@ mp_clear(struct gctl_req *req) } } -static void -mp_add(struct gctl_req *req) -{ - const char *errstr; - - errstr = gctl_issue(req); - if (errstr != NULL && errstr[0] != '\0') { - gctl_error(req, "%s", errstr); - } -} Modified: stable/9/sbin/geom/class/multipath/gmultipath.8 ============================================================================== --- stable/9/sbin/geom/class/multipath/gmultipath.8 Mon Jan 2 17:58:07 2012 (r229302) +++ stable/9/sbin/geom/class/multipath/gmultipath.8 Mon Jan 2 18:06:48 2012 (r229303) @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 26, 2007 +.Dd October 31, 2011 .Dt GMULTIPATH 8 .Os .Sh NAME @@ -32,11 +32,48 @@ .Nd "disk multipath control utility" .Sh SYNOPSIS .Nm +.Cm create +.Op Fl Av +.Ar name +.Ar prov ... +.Nm .Cm label -.Op Fl hv +.Op Fl Av .Ar name .Ar prov ... .Nm +.Cm add +.Op Fl v +.Ar name prov +.Nm +.Cm remove +.Op Fl v +.Ar name prov +.Nm +.Cm fail +.Op Fl v +.Ar name prov +.Nm +.Cm restore +.Op Fl v +.Ar name prov +.Nm +.Cm rotate +.Op Fl v +.Ar name +.Nm +.Cm getactive +.Op Fl v +.Ar name +.Nm +.Cm destroy +.Op Fl v +.Ar name +.Nm +.Cm stop +.Op Fl v +.Ar name +.Nm .Cm clear .Op Fl v .Ar prov ... @@ -53,27 +90,79 @@ The .Nm utility is used for device multipath configuration. .Pp -Only automatic configuration is supported at the present time via the -.Cm label -command. -This operation writes a label on the last sector of the underlying -disk device with a contained name and UUID. -The UUID guarantees uniqueness -in a shared storage environment but is in general too cumbersome to use. +The multipath device can be configured using two different methods: +.Dq manual +or +.Dq automatic . +When using the +.Dq manual +method, no metadata are stored on the devices, so the multipath +device has to be configured by hand every time it is needed. +Additional device paths also won't be detected automatically. +The +.Dq automatic +method uses on-disk metadata to detect device and all it's paths. +Metadata use the last sector of the underlying disk device and +include device name and UUID. +The UUID guarantees uniqueness in a shared storage environment +but is in general too cumbersome to use. The name is what is exported via the device interface. .Pp The first argument to .Nm indicates an action to be performed: .Bl -tag -width ".Cm destroy" +.It Cm create +Create multipath device with +.Dq manual +method without writing any on-disk metadata. +It is up to administrator, how to properly identify device paths. +Kernel will only check that all given providers have same media and +sector sizes. +.Pp +.Fl A +option enables Active/Active mode, otherwise Active/Passive mode is used +by default. .It Cm label -Label the given underlying device with the specified +Create multipath device with +.Dq automatic +method. +Label the first given provider with on-disk metadata using the specified .Ar name . -The kernel module -.Pa geom_multipath.ko -will be loaded if it is not loaded already. +The rest of given providers will be retasted to detect these metadata. +It reliably protects against specifying unrelated providers. +Providers with no matching metadata detected will not be added to the device. +.Pp +.Fl A +option enables Active/Active mode, otherwise Active/Passive mode is used +by default. +.It Cm add +Add the given provider as a path to the given multipath device. +Should normally be used only for devices created with +.Dq manual +method, unless you know what you are doing (you are sure that it is another +device path, but tasting its metadata in regular +.Dq automatic +way is not possible). +.It Cm remove +Remove the given provider as a path from the given multipath device. +If the last path removed, the multipath device will be destroyed. +.It Cm fail +Mark specified provider as a path of the specified multipath device as failed. +If there are other paths present, new requests will be forwarded there. +.It Cm restore +Mark specified provider as a path of the specified multipath device as +operational, allowing it to handle requests. +.It Cm rotate +Change the active provider/path in Active/Passive mode. +.It Cm getactive +Get the currently active provider(s)/path(s). +.It Cm destroy +Destroy the given multipath device clearing metadata. +.It Cm stop +Stop the given multipath device without clearing metadata. .It Cm clear -Clear metadata on the given device. +Clear metadata on the given provider. .It Cm list See .Xr geom 8 . @@ -101,14 +190,15 @@ Debug level of the GEOM class. This can be set to 0 (default) or 1 to disable or enable various forms of chattiness. +.It Va kern.geom.multipath.exclusive : No 1 +Open underlying providers exclusively, preventing individual paths access. .El .Sh EXIT STATUS Exit status is 0 on success, and 1 if the command fails. .Sh MULTIPATH ARCHITECTURE .Pp -This is an active/passive -multiple path architecture with no device knowledge or presumptions other -than size matching built in. +This is a multiple path architecture with no device knowledge or +presumptions other than size matching built in. Therefore the user must exercise some care in selecting providers that do indeed represent multiple paths to the same underlying disk device. @@ -133,15 +223,16 @@ of multiple pathnames refer to the same system operator who will use tools and knowledge of their own storage subsystem to make the correct configuration selection. .Pp -As an active/passive architecture, only one path has I/O moving on it +There are Active/Passive and Active/Active operation modes supported. +In Active/Passive mode only one path has I/O moving on it at any point in time. This I/O continues until an I/O is returned with a generic I/O error or a "Nonexistent Device" error. -When this occurs, -the active device is kicked out of the -.Nm MULTIPATH -GEOM class and the next in a list is selected, the failed I/O reissued -and the system proceeds. +When this occurs, that path is marked FAIL, the next path +in a list is selected as active and the failed I/O reissued. +In Active/Active mode all paths not marked FAIL may handle I/O same time. +Requests are distributed between paths to equalize load. +For capable devices it allows to utilize bandwidth of all paths. .Pp When new devices are added to the system the .Nm MULTIPATH @@ -149,9 +240,9 @@ GEOM class is given an opportunity to ta If a new device has a .Nm MULTIPATH -label, the device is used to either create a new +on-disk metadata label, the device is used to either create a new .Nm MULTIPATH -GEOM, or to attach to the end of the list of devices for an existing +GEOM, or been added the list of paths for an existing .Nm MULTIPATH GEOM. .Pp @@ -176,7 +267,7 @@ of an RSCN event from the Fabric Domain a rescan to occur and cause the attachment and configuration of any (now) new devices to occur, causing the taste event described above. .Pp -This means that this active/passive architecture is not a one-shot path +This means that this multipath architecture is not a one-shot path failover, but can be considered to be steady state as long as failed paths are repaired (automatically or otherwise). .Pp @@ -184,7 +275,7 @@ Automatic rescanning is not a requiremen Nor is Fibre Channel. The same failover mechanisms work equally well for traditional "Parallel" -SCSI but require manual intervention with +SCSI but may require manual intervention with .Xr camcontrol 8 to cause the reattachment of repaired device links. .Sh EXAMPLES @@ -226,9 +317,9 @@ mount /dev/multipath/FREDa /mnt.... .Pp The resultant console output looks something like: .Bd -literal -offset indent -GEOM_MULTIPATH: adding da0 to Fred/b631385f-c61c-11db-b884-0011116ae789 -GEOM_MULTIPATH: da0 now active path in Fred -GEOM_MULTIPATH: adding da2 to Fred/b631385f-c61c-11db-b884-0011116ae789 +GEOM_MULTIPATH: da0 added to FRED +GEOM_MULTIPATH: da0 is now active path in FRED +GEOM_MULTIPATH: da2 added to FRED .Ed .Sh SEE ALSO .Xr geom 4 , @@ -240,24 +331,6 @@ GEOM_MULTIPATH: adding da2 to Fred/b6313 .Xr mount 8 , .Xr newfs 8 , .Xr sysctl 8 -.Sh BUGS -The -.Nm -should allow for a manual method of pairing disks. -.Pp -There is currently no way for -.Pa geom_multipath.ko -to distinguish between various label instances of the same provider. -That -is devices such as -.Ar da0 -and -.Ar da0c -can be tasted and instantiated as multiple paths for the same device. -Technically, this is correct, but pretty useless. -This will be fixed soon -(I hope), but to avoid this it is a good idea to destroy any label on -the disk object prior to labelling it with -.Nm . .Sh AUTHOR .An Matthew Jacob Aq mjacob@FreeBSD.org +.An Alexander Motin Aq mav@FreeBSD.org Modified: stable/9/sys/geom/multipath/g_multipath.c ============================================================================== --- stable/9/sys/geom/multipath/g_multipath.c Mon Jan 2 17:58:07 2012 (r229302) +++ stable/9/sys/geom/multipath/g_multipath.c Mon Jan 2 18:06:48 2012 (r229303) @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2011 Alexander Motin * Copyright (c) 2006-2007 Matthew Jacob * All rights reserved. * @@ -53,6 +54,9 @@ SYSCTL_NODE(_kern_geom, OID_AUTO, multip static u_int g_multipath_debug = 0; SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, debug, CTLFLAG_RW, &g_multipath_debug, 0, "Debug level"); +static u_int g_multipath_exclusive = 1; +SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, exclusive, CTLFLAG_RW, + &g_multipath_exclusive, 0, "Exclusively open providers"); static enum { GKT_NIL, @@ -79,6 +83,7 @@ static g_taste_t g_multipath_taste; static g_ctl_req_t g_multipath_config; static g_init_t g_multipath_init; static g_fini_t g_multipath_fini; +static g_dumpconf_t g_multipath_dumpconf; struct g_class g_multipath_class = { .name = G_MULTIPATH_CLASS_NAME, @@ -90,35 +95,144 @@ struct g_class g_multipath_class = { .fini = g_multipath_fini }; -#define MP_BAD 0x1 -#define MP_POSTED 0x2 +#define MP_FAIL 0x00000001 +#define MP_LOST 0x00000002 +#define MP_NEW 0x00000004 +#define MP_POSTED 0x00000008 +#define MP_BAD (MP_FAIL | MP_LOST | MP_NEW) +#define MP_IDLE 0x00000010 +#define MP_IDLE_MASK 0xfffffff0 + +static int +g_multipath_good(struct g_geom *gp) +{ + struct g_consumer *cp; + int n = 0; + + LIST_FOREACH(cp, &gp->consumer, consumer) { + if ((cp->index & MP_BAD) == 0) + n++; + } + return (n); +} + +static void +g_multipath_fault(struct g_consumer *cp, int cause) +{ + struct g_multipath_softc *sc; + struct g_consumer *lcp; + struct g_geom *gp; + + gp = cp->geom; + sc = gp->softc; + cp->index |= cause; + if (g_multipath_good(gp) == 0 && sc->sc_ndisks > 0) { + LIST_FOREACH(lcp, &gp->consumer, consumer) { + if (lcp->provider == NULL || + (lcp->index & (MP_LOST | MP_NEW))) + continue; + if (sc->sc_ndisks > 1 && lcp == cp) + continue; + printf("GEOM_MULTIPATH: " + "all paths in %s were marked FAIL, restore %s\n", + sc->sc_name, lcp->provider->name); + lcp->index &= ~MP_FAIL; + } + } + if (cp != sc->sc_active) + return; + sc->sc_active = NULL; + LIST_FOREACH(lcp, &gp->consumer, consumer) { + if ((lcp->index & MP_BAD) == 0) { + sc->sc_active = lcp; + break; + } + } + if (sc->sc_active == NULL) { + printf("GEOM_MULTIPATH: out of providers for %s\n", + sc->sc_name); + } else if (!sc->sc_active_active) { + printf("GEOM_MULTIPATH: %s is now active path in %s\n", + sc->sc_active->provider->name, sc->sc_name); + } +} + +static struct g_consumer * +g_multipath_choose(struct g_geom *gp) +{ + struct g_multipath_softc *sc; + struct g_consumer *best, *cp; + + sc = gp->softc; + if (!sc->sc_active_active) + return (sc->sc_active); + best = NULL; + LIST_FOREACH(cp, &gp->consumer, consumer) { + if (cp->index & MP_BAD) + continue; + cp->index += MP_IDLE; + if (best == NULL || cp->private < best->private || + (cp->private == best->private && cp->index > best->index)) + best = cp; + } + if (best != NULL) + best->index &= ~MP_IDLE_MASK; + return (best); +} static void g_mpd(void *arg, int flags __unused) { + struct g_geom *gp; + struct g_multipath_softc *sc; struct g_consumer *cp; + int w; g_topology_assert(); cp = arg; - if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) + gp = cp->geom; + if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) { + w = cp->acw; g_access(cp, -cp->acr, -cp->acw, -cp->ace); + if (w > 0 && cp->provider != NULL && + (cp->provider->geom->flags & G_GEOM_WITHER) == 0) { + g_post_event(g_mpd, cp, M_WAITOK, NULL); + return; + } + } + sc = gp->softc; + mtx_lock(&sc->sc_mtx); if (cp->provider) { printf("GEOM_MULTIPATH: %s removed from %s\n", - cp->provider->name, cp->geom->name); + cp->provider->name, gp->name); g_detach(cp); } g_destroy_consumer(cp); + mtx_unlock(&sc->sc_mtx); + if (LIST_EMPTY(&gp->consumer)) + g_multipath_destroy(gp); } static void g_multipath_orphan(struct g_consumer *cp) { - if ((cp->index & MP_POSTED) == 0) { + struct g_multipath_softc *sc; + uintptr_t *cnt; + + g_topology_assert(); + printf("GEOM_MULTIPATH: %s in %s was disconnected\n", + cp->provider->name, cp->geom->name); + sc = cp->geom->softc; + cnt = (uintptr_t *)&cp->private; + mtx_lock(&sc->sc_mtx); + sc->sc_ndisks--; + g_multipath_fault(cp, MP_LOST); + if (*cnt == 0 && (cp->index & MP_POSTED) == 0) { cp->index |= MP_POSTED; - printf("GEOM_MULTIPATH: %s orphaned in %s\n", - cp->provider->name, cp->geom->name); + mtx_unlock(&sc->sc_mtx); g_mpd(cp, 0); - } + } else + mtx_unlock(&sc->sc_mtx); } static void @@ -128,20 +242,29 @@ g_multipath_start(struct bio *bp) struct g_geom *gp; struct g_consumer *cp; struct bio *cbp; + uintptr_t *cnt; gp = bp->bio_to->geom; sc = gp->softc; KASSERT(sc != NULL, ("NULL sc")); - cp = sc->cp_active; - if (cp == NULL) { - g_io_deliver(bp, ENXIO); - return; - } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); return; } + mtx_lock(&sc->sc_mtx); + cp = g_multipath_choose(gp); + if (cp == NULL) { + mtx_unlock(&sc->sc_mtx); + g_destroy_bio(cbp); + g_io_deliver(bp, ENXIO); + return; + } + if ((uintptr_t)bp->bio_driver1 < sc->sc_ndisks) + bp->bio_driver1 = (void *)(uintptr_t)sc->sc_ndisks; + cnt = (uintptr_t *)&cp->private; + (*cnt)++; + mtx_unlock(&sc->sc_mtx); cbp->bio_done = g_multipath_done; g_io_request(cbp, cp); } @@ -149,12 +272,27 @@ g_multipath_start(struct bio *bp) static void g_multipath_done(struct bio *bp) { + struct g_multipath_softc *sc; + struct g_consumer *cp; + uintptr_t *cnt; + if (bp->bio_error == ENXIO || bp->bio_error == EIO) { mtx_lock(&gmtbq_mtx); bioq_insert_tail(&gmtbq, bp); - wakeup(&g_multipath_kt_state); mtx_unlock(&gmtbq_mtx); + wakeup(&g_multipath_kt_state); } else { + cp = bp->bio_from; + sc = cp->geom->softc; + cnt = (uintptr_t *)&cp->private; + mtx_lock(&sc->sc_mtx); + (*cnt)--; + if (*cnt == 0 && (cp->index & MP_LOST)) { + cp->index |= MP_POSTED; + mtx_unlock(&sc->sc_mtx); + g_post_event(g_mpd, cp, M_WAITOK, NULL); + } else + mtx_unlock(&sc->sc_mtx); g_std_done(bp); } } @@ -167,6 +305,7 @@ g_multipath_done_error(struct bio *bp) struct g_multipath_softc *sc; struct g_consumer *cp; struct g_provider *pp; + uintptr_t *cnt; /* * If we had a failure, we have to check first to see @@ -176,47 +315,31 @@ g_multipath_done_error(struct bio *bp) * to the next available consumer. */ - g_topology_lock(); pbp = bp->bio_parent; gp = pbp->bio_to->geom; sc = gp->softc; cp = bp->bio_from; pp = cp->provider; + cnt = (uintptr_t *)&cp->private; - cp->index |= MP_BAD; - if (cp->nend == cp->nstart && pp->nend == pp->nstart) { + mtx_lock(&sc->sc_mtx); + printf("GEOM_MULTIPATH: Error %d, %s in %s marked FAIL\n", + bp->bio_error, pp->name, sc->sc_name); + g_multipath_fault(cp, MP_FAIL); + (*cnt)--; + if (*cnt == 0 && (cp->index & (MP_LOST | MP_POSTED)) == MP_LOST) { cp->index |= MP_POSTED; - g_post_event(g_mpd, cp, M_NOWAIT, NULL); - } - if (cp == sc->cp_active) { - struct g_consumer *lcp; - printf("GEOM_MULTIPATH: %s failed in %s\n", - pp->name, sc->sc_name); - sc->cp_active = NULL; - LIST_FOREACH(lcp, &gp->consumer, consumer) { - if ((lcp->index & MP_BAD) == 0) { - sc->cp_active = lcp; - break; - } - } - if (sc->cp_active == NULL || sc->cp_active->provider == NULL) { - printf("GEOM_MULTIPATH: out of providers for %s\n", - sc->sc_name); - g_topology_unlock(); - return; - } else { - printf("GEOM_MULTIPATH: %s now active path in %s\n", - sc->cp_active->provider->name, sc->sc_name); - } - } - g_topology_unlock(); + mtx_unlock(&sc->sc_mtx); + g_post_event(g_mpd, cp, M_WAITOK, NULL); + } else + mtx_unlock(&sc->sc_mtx); /* * If we can fruitfully restart the I/O, do so. */ - if (sc->cp_active) { + if (pbp->bio_children < (uintptr_t)pbp->bio_driver1) { + pbp->bio_inbed++; g_destroy_bio(bp); - pbp->bio_children--; g_multipath_start(pbp); } else { g_std_done(bp); @@ -254,6 +377,7 @@ g_multipath_access(struct g_provider *pp { struct g_geom *gp; struct g_consumer *cp, *badcp = NULL; + struct g_multipath_softc *sc; int error; gp = pp->geom; @@ -265,6 +389,10 @@ g_multipath_access(struct g_provider *pp goto fail; } } + sc = gp->softc; + sc->sc_opened += dr + dw + de; + if (sc->sc_stopping && sc->sc_opened == 0) + g_multipath_destroy(gp); return (0); fail: @@ -286,6 +414,9 @@ g_multipath_create(struct g_class *mp, s g_topology_assert(); LIST_FOREACH(gp, &mp->geom, geom) { + sc = gp->softc; + if (sc == NULL || sc->sc_stopping) + continue; if (strcmp(gp->name, md->md_name) == 0) { printf("GEOM_MULTIPATH: name %s already exists\n", md->md_name); @@ -295,19 +426,25 @@ g_multipath_create(struct g_class *mp, s gp = g_new_geomf(mp, md->md_name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); + mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF); + memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid)); + memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name)); + sc->sc_active_active = md->md_active_active; gp->softc = sc; gp->start = g_multipath_start; gp->orphan = g_multipath_orphan; gp->access = g_multipath_access; - memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid)); - memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name)); + gp->dumpconf = g_multipath_dumpconf; pp = g_new_providerf(gp, "multipath/%s", md->md_name); - /* limit the provider to not have it stomp on metadata */ - pp->mediasize = md->md_size - md->md_sectorsize; - pp->sectorsize = md->md_sectorsize; - sc->pp = pp; + if (md->md_size != 0) { + pp->mediasize = md->md_size - + ((md->md_uuid[0] != 0) ? md->md_sectorsize : 0); + pp->sectorsize = md->md_sectorsize; + } + sc->sc_pp = pp; g_error_provider(pp, 0); + printf("GEOM_MULTIPATH: %s created\n", gp->name); return (gp); } @@ -316,7 +453,7 @@ g_multipath_add_disk(struct g_geom *gp, { struct g_multipath_softc *sc; struct g_consumer *cp, *nxtcp; - int error; + int error, acr, acw, ace; g_topology_assert(); @@ -337,6 +474,8 @@ g_multipath_add_disk(struct g_geom *gp, } nxtcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); + cp->private = NULL; + cp->index = MP_NEW; error = g_attach(cp, pp); if (error != 0) { printf("GEOM_MULTIPATH: cannot attach %s to %s", @@ -344,29 +483,51 @@ g_multipath_add_disk(struct g_geom *gp, g_destroy_consumer(cp); return (error); } - cp->private = sc; - cp->index = 0; /* * Set access permissions on new consumer to match other consumers */ - if (nxtcp && (nxtcp->acr + nxtcp->acw + nxtcp->ace)) { - error = g_access(cp, nxtcp->acr, nxtcp->acw, nxtcp->ace); - if (error) { - printf("GEOM_MULTIPATH: cannot set access in " - "attaching %s to %s/%s (%d)\n", - pp->name, sc->sc_name, sc->sc_uuid, error); - g_detach(cp); - g_destroy_consumer(cp); - return (error); - } + if (sc->sc_pp) { + acr = sc->sc_pp->acr; + acw = sc->sc_pp->acw; + ace = sc->sc_pp->ace; + } else + acr = acw = ace = 0; + if (g_multipath_exclusive) { + acr++; + acw++; + ace++; + } + error = g_access(cp, acr, acw, ace); + if (error) { + printf("GEOM_MULTIPATH: cannot set access in " + "attaching %s to %s (%d)\n", + pp->name, sc->sc_name, error); + g_detach(cp); + g_destroy_consumer(cp); + return (error); } - printf("GEOM_MULTIPATH: adding %s to %s/%s\n", - pp->name, sc->sc_name, sc->sc_uuid); - if (sc->cp_active == NULL) { - sc->cp_active = cp; - printf("GEOM_MULTIPATH: %s now active path in %s\n", - pp->name, sc->sc_name); + if (sc->sc_pp != NULL && sc->sc_pp->mediasize == 0) { + sc->sc_pp->mediasize = pp->mediasize - + ((sc->sc_uuid[0] != 0) ? pp->sectorsize : 0); + sc->sc_pp->sectorsize = pp->sectorsize; + } + if (sc->sc_pp != NULL && + sc->sc_pp->stripesize == 0 && sc->sc_pp->stripeoffset == 0) { + sc->sc_pp->stripesize = pp->stripesize; + sc->sc_pp->stripeoffset = pp->stripeoffset; + } + mtx_lock(&sc->sc_mtx); + cp->index = 0; + sc->sc_ndisks++; + mtx_unlock(&sc->sc_mtx); + printf("GEOM_MULTIPATH: %s added to %s\n", + pp->name, sc->sc_name); + if (sc->sc_active == NULL) { + sc->sc_active = cp; + if (!sc->sc_active_active) + printf("GEOM_MULTIPATH: %s is now active path in %s\n", + pp->name, sc->sc_name); } return (0); } @@ -374,17 +535,41 @@ g_multipath_add_disk(struct g_geom *gp, static int g_multipath_destroy(struct g_geom *gp) { - struct g_provider *pp; + struct g_multipath_softc *sc; + struct g_consumer *cp, *cp1; g_topology_assert(); if (gp->softc == NULL) return (ENXIO); - pp = LIST_FIRST(&gp->provider); - if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) - return (EBUSY); - printf("GEOM_MULTIPATH: destroying %s\n", gp->name); + sc = gp->softc; + if (!sc->sc_stopping) { + printf("GEOM_MULTIPATH: destroying %s\n", gp->name); + sc->sc_stopping = 1; + } + if (sc->sc_opened != 0) { + if (sc->sc_pp != NULL) { + g_wither_provider(sc->sc_pp, ENXIO); + sc->sc_pp = NULL; + } + return (EINPROGRESS); + } + LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { + mtx_lock(&sc->sc_mtx); + if ((cp->index & MP_POSTED) == 0) { + cp->index |= MP_POSTED; + mtx_unlock(&sc->sc_mtx); + g_mpd(cp, 0); + if (cp1 == NULL) + return(0); /* Recursion happened. */ + } else + mtx_unlock(&sc->sc_mtx); + } + if (!LIST_EMPTY(&gp->consumer)) + return (EINPROGRESS); + mtx_destroy(&sc->sc_mtx); g_free(gp->softc); gp->softc = NULL; + printf("GEOM_MULTIPATH: %s destroyed\n", gp->name); g_wither_geom(gp, ENXIO); return (0); } *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***