Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 2 Jan 2012 19:27:24 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r229309 - in stable/8: sbin/geom/class/multipath sbin/geom/class/sched sys/conf sys/geom/multipath
Message-ID:  <201201021927.q02JROpO082273@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Mon Jan  2 19:27:23 2012
New Revision: 229309
URL: http://svn.freebsd.org/changeset/base/229309

Log:
  MFC r227464, r227471:
  Major GEOM MULTIPATH class rewrite:
   - Improved locking and destruction process to fix crashes.
   - Improved "automatic" configuration method to make it consistent and safe
  by reading metadata back from all specified paths after writing to one.
   - Added provider size check to reduce chance of ordering conflict with
  other GEOM classes.
   - Added "manual" configuration method without using on-disk metadata.
   - Added "add" and "remove" commands to allow manage paths manually.
   - Failed paths are no longer dropped from geom, but only marked as FAIL
  and excluded from I/O operations.
   - Automatically restore failed paths when all others paths are marked
  as failed, for example, because of device-caused (not transport) errors.
   - Added "fail" and "restore" commands to manually control FAIL flag.
   - geom is now destroyed on last path disconnection.
   - Added optional Active/Active mode support. Unlike Active/Passive
  mode, load evenly distributed between all working paths. If supported by
  the device, it allows to significantly improve performance, utilizing
  bandwidth of all paths. It is controlled by -A option during creation.
  Disabled by default now.
   - Improved `status` and `list` commands output.
  
  Sponsored by:   iXsystems, inc.

Modified:
  stable/8/sbin/geom/class/multipath/geom_multipath.c
  stable/8/sbin/geom/class/multipath/gmultipath.8
  stable/8/sys/geom/multipath/g_multipath.c
  stable/8/sys/geom/multipath/g_multipath.h
Directory Properties:
  stable/8/sbin/geom/   (props changed)
  stable/8/sbin/geom/class/multipath/   (props changed)
  stable/8/sbin/geom/class/part/   (props changed)
  stable/8/sbin/geom/class/sched/gsched.8   (props changed)
  stable/8/sbin/geom/class/stripe/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/conf/ldscript.mips.octeon1.32   (props changed)
  stable/8/sys/conf/ldscript.mips.octeon1.64   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)

Modified: stable/8/sbin/geom/class/multipath/geom_multipath.c
==============================================================================
--- stable/8/sbin/geom/class/multipath/geom_multipath.c	Mon Jan  2 19:23:52 2012	(r229308)
+++ stable/8/sbin/geom/class/multipath/geom_multipath.c	Mon Jan  2 19:27:23 2012	(r229309)
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <strings.h>
 #include <assert.h>
 #include <libgeom.h>
+#include <unistd.h>
 #include <uuid.h>
 #include <geom/multipath/g_multipath.h>
 
@@ -48,31 +49,58 @@ uint32_t version = G_MULTIPATH_VERSION;
 static void mp_main(struct gctl_req *, unsigned int);
 static void mp_label(struct gctl_req *);
 static void mp_clear(struct gctl_req *);
-static void mp_add(struct gctl_req *);
 
 struct g_command class_commands[] = {
 	{
-		"label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main, G_NULL_OPTS,
-		NULL, "[-v] name prov ..."
+		"create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, NULL,
+		{
+			{ 'A', "active_active", NULL, G_TYPE_BOOL },
+			G_OPT_SENTINEL
+		},
+		NULL, "[-vA] name prov ..."
 	},
 	{
-		"add", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main, G_NULL_OPTS,
-		NULL, "[-v] name prov ..."
+		"label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, mp_main,
+		{
+			{ 'A', "active_active", NULL, G_TYPE_BOOL },
+			G_OPT_SENTINEL
+		},
+		NULL, "[-vA] name prov ..."
 	},
 	{
-		"destroy", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
-		NULL, "[-v] prov ..."
+		"add", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+		NULL, "[-v] name prov"
 	},
 	{
-		"clear", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS,
-		NULL, "[-v] prov ..."
+		"remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+		NULL, "[-v] name prov"
+	},
+	{
+		"fail", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+		NULL, "[-v] name prov"
+	},
+	{
+		"restore", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+		NULL, "[-v] name prov"
 	},
 	{
 		"rotate", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
-		NULL, "[-v] prov ..."
+		NULL, "[-v] name"
 	},
 	{
 		"getactive", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+		NULL, "[-v] name"
+	},
+	{
+		"destroy", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+		NULL, "[-v] name"
+	},
+	{
+		"stop", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
+		NULL, "[-v] name"
+	},
+	{
+		"clear", G_FLAG_VERBOSE, mp_main, G_NULL_OPTS,
 		NULL, "[-v] prov ..."
 	},
 	G_CMD_SENTINEL
@@ -90,8 +118,6 @@ mp_main(struct gctl_req *req, unsigned i
 	}
 	if (strcmp(name, "label") == 0) {
 		mp_label(req);
-	} else if (strcmp(name, "add") == 0) {
-		mp_add(req);
 	} else if (strcmp(name, "clear") == 0) {
 		mp_clear(req);
 	} else {
@@ -103,13 +129,13 @@ static void
 mp_label(struct gctl_req *req)
 {
 	struct g_multipath_metadata md;
-	off_t disksiz = 0, msize;
-	uint8_t *sector;
+	off_t disksize = 0, msize;
+	uint8_t *sector, *rsector;
 	char *ptr;
 	uuid_t uuid;
 	uint32_t secsize = 0, ssize, status;
-	const char *name, *mpname;
-	int error, i, nargs;
+	const char *name, *name2, *mpname;
+	int error, i, nargs, fd;
 
 	nargs = gctl_get_int(req, "nargs");
 	if (nargs < 2) {
@@ -132,14 +158,14 @@ mp_label(struct gctl_req *req)
 		}
 		if (i == 1) {
 			secsize = ssize;
-			disksiz	= msize;
+			disksize = msize;
 		} else {
 			if (secsize != ssize) {
 				gctl_error(req, "%s sector size %u different.",
 				    name, ssize);
 				return;
 			}
-			if (disksiz != msize) {
+			if (disksize != msize) {
 				gctl_error(req, "%s media size %ju different.",
 				    name, (intmax_t)msize);
 				return;
@@ -155,7 +181,7 @@ mp_label(struct gctl_req *req)
 	md.md_version = G_MULTIPATH_VERSION;
 	mpname = gctl_get_ascii(req, "arg0");
 	strlcpy(md.md_name, mpname, sizeof(md.md_name));
-	md.md_size = disksiz;
+	md.md_size = disksize;
 	md.md_sectorsize = secsize;
 	uuid_create(&uuid, &status);
 	if (status != uuid_s_ok) {
@@ -168,19 +194,10 @@ mp_label(struct gctl_req *req)
 		return;
 	}
 	strlcpy(md.md_uuid, ptr, sizeof (md.md_uuid));
+	md.md_active_active = gctl_get_int(req, "active_active");
 	free(ptr);
 
 	/*
-	 * Clear metadata on initial provider first.
-	 */
-	name = gctl_get_ascii(req, "arg1");
-	error = g_metadata_clear(name, NULL);
-	if (error != 0) {
-		gctl_error(req, "cannot clear metadata on %s: %s.", name, strerror(error));
-		return;
-	}
-
-	/*
 	 * Allocate a sector to write as metadata.
 	 */
 	sector = malloc(secsize);
@@ -189,6 +206,12 @@ mp_label(struct gctl_req *req)
 		return;
 	}
 	memset(sector, 0, secsize);
+	rsector = malloc(secsize);
+	if (rsector == NULL) {
+		free(sector);
+		gctl_error(req, "unable to allocate metadata buffer");
+		return;
+	}
 
 	/*
 	 * encode the metadata
@@ -198,6 +221,7 @@ mp_label(struct gctl_req *req)
 	/*
 	 * Store metadata on the initial provider.
 	 */
+	name = gctl_get_ascii(req, "arg1");
 	error = g_metadata_store(name, sector, secsize);
 	if (error != 0) {
 		gctl_error(req, "cannot store metadata on %s: %s.", name, strerror(error));
@@ -205,20 +229,29 @@ mp_label(struct gctl_req *req)
 	}
 
 	/*
-	 * Now add the rest of the providers.
+	 * Now touch the rest of the providers to hint retaste.
 	 */
-	error = gctl_change_param(req, "verb", -1, "add");
-	if (error) {
-		gctl_error(req, "unable to change verb to \"add\": %s.", strerror(error));
-		return;
-	}
 	for (i = 2; i < nargs; i++) {
-		error = gctl_change_param(req, "arg1", -1, gctl_get_ascii(req, "arg%d", i));
-		if (error) {
-			gctl_error(req, "unable to add %s to %s: %s.", gctl_get_ascii(req, "arg%d", i), mpname, strerror(error));
+		name2 = gctl_get_ascii(req, "arg%d", i);
+		fd = g_open(name2, 1);
+		if (fd < 0) {
+			fprintf(stderr, "Unable to open %s: %s.\n",
+			    name2, strerror(errno));
+			continue;
+		}
+		if (pread(fd, rsector, secsize, disksize - secsize) !=
+		    (ssize_t)secsize) {
+			fprintf(stderr, "Unable to read metadata from %s: %s.\n",
+			    name2, strerror(errno));
+			g_close(fd);
 			continue;
 		}
-		mp_add(req);
+		g_close(fd);
+		if (memcmp(sector, rsector, secsize)) {
+			fprintf(stderr, "No metadata found on %s."
+			    " It is not a path of %s.\n",
+			    name2, name);
+		}
 	}
 }
 
@@ -247,13 +280,3 @@ mp_clear(struct gctl_req *req)
 	}
 }
 
-static void
-mp_add(struct gctl_req *req)
-{
-	const char *errstr;
-
-	errstr = gctl_issue(req);
-	if (errstr != NULL && errstr[0] != '\0') {
-		gctl_error(req, "%s", errstr);
-	}
-}

Modified: stable/8/sbin/geom/class/multipath/gmultipath.8
==============================================================================
--- stable/8/sbin/geom/class/multipath/gmultipath.8	Mon Jan  2 19:23:52 2012	(r229308)
+++ stable/8/sbin/geom/class/multipath/gmultipath.8	Mon Jan  2 19:27:23 2012	(r229309)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd February 26, 2007
+.Dd October 31, 2011
 .Dt GMULTIPATH 8
 .Os
 .Sh NAME
@@ -32,11 +32,48 @@
 .Nd "disk multipath control utility"
 .Sh SYNOPSIS
 .Nm
+.Cm create
+.Op Fl Av
+.Ar name
+.Ar prov ...
+.Nm
 .Cm label
-.Op Fl hv
+.Op Fl Av
 .Ar name
 .Ar prov ...
 .Nm
+.Cm add
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm remove
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm fail
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm restore
+.Op Fl v
+.Ar name prov
+.Nm
+.Cm rotate
+.Op Fl v
+.Ar name
+.Nm
+.Cm getactive
+.Op Fl v
+.Ar name
+.Nm
+.Cm destroy
+.Op Fl v
+.Ar name
+.Nm
+.Cm stop
+.Op Fl v
+.Ar name
+.Nm
 .Cm clear
 .Op Fl v
 .Ar prov ...
@@ -53,27 +90,79 @@ The
 .Nm
 utility is used for device multipath configuration.
 .Pp
-Only automatic configuration is supported at the present time via the
-.Cm label
-command.
-This operation writes a label on the last sector of the underlying
-disk device with a contained name and UUID.
-The UUID guarantees uniqueness
-in a shared storage environment but is in general too cumbersome to use.
+The multipath device can be configured using two different methods:
+.Dq manual
+or
+.Dq automatic .
+When using the
+.Dq manual
+method, no metadata are stored on the devices, so the multipath
+device has to be configured by hand every time it is needed.
+Additional device paths also won't be detected automatically.
+The
+.Dq automatic
+method uses on-disk metadata to detect device and all it's paths.
+Metadata use the last sector of the underlying disk device and
+include device name and UUID.
+The UUID guarantees uniqueness in a shared storage environment
+but is in general too cumbersome to use.
 The name is what is exported via the device interface.
 .Pp
 The first argument to
 .Nm
 indicates an action to be performed:
 .Bl -tag -width ".Cm destroy"
+.It Cm create
+Create multipath device with
+.Dq manual
+method without writing any on-disk metadata.
+It is up to administrator, how to properly identify device paths.
+Kernel will only check that all given providers have same media and
+sector sizes.
+.Pp
+.Fl A
+option enables Active/Active mode, otherwise Active/Passive mode is used
+by default.
 .It Cm label
-Label the given underlying device with the specified
+Create multipath device with
+.Dq automatic
+method.
+Label the first given provider with on-disk metadata using the specified
 .Ar name .
-The kernel module
-.Pa geom_multipath.ko
-will be loaded if it is not loaded already.
+The rest of given providers will be retasted to detect these metadata.
+It reliably protects against specifying unrelated providers.
+Providers with no matching metadata detected will not be added to the device.
+.Pp
+.Fl A
+option enables Active/Active mode, otherwise Active/Passive mode is used
+by default.
+.It Cm add
+Add the given provider as a path to the given multipath device.
+Should normally be used only for devices created with
+.Dq manual
+method, unless you know what you are doing (you are sure that it is another
+device path, but tasting its metadata in regular
+.Dq automatic
+way is not possible).
+.It Cm remove
+Remove the given provider as a path from the given multipath device.
+If the last path removed, the multipath device will be destroyed.
+.It Cm fail
+Mark specified provider as a path of the specified multipath device as failed.
+If there are other paths present, new requests will be forwarded there.
+.It Cm restore
+Mark specified provider as a path of the specified multipath device as
+operational, allowing it to handle requests.
+.It Cm rotate
+Change the active provider/path in Active/Passive mode.
+.It Cm getactive
+Get the currently active provider(s)/path(s).
+.It Cm destroy
+Destroy the given multipath device clearing metadata.
+.It Cm stop
+Stop the given multipath device without clearing metadata.
 .It Cm clear
-Clear metadata on the given device.
+Clear metadata on the given provider.
 .It Cm list
 See
 .Xr geom 8 .
@@ -101,14 +190,15 @@ Debug level of the
 GEOM class.
 This can be set to 0 (default) or 1 to disable or enable various
 forms of chattiness.
+.It Va kern.geom.multipath.exclusive : No 1
+Open underlying providers exclusively, preventing individual paths access.
 .El
 .Sh EXIT STATUS
 Exit status is 0 on success, and 1 if the command fails.
 .Sh MULTIPATH ARCHITECTURE
 .Pp
-This is an active/passive
-multiple path architecture with no device knowledge or presumptions other
-than size matching built in.
+This is a multiple path architecture with no device knowledge or
+presumptions other than size matching built in.
 Therefore the user must exercise some care
 in selecting providers that do indeed represent multiple paths to the
 same underlying disk device.
@@ -133,15 +223,16 @@ of multiple pathnames refer to the same 
 system operator who will use tools and knowledge of their own storage
 subsystem to make the correct configuration selection.
 .Pp
-As an active/passive architecture, only one path has I/O moving on it
+There are Active/Passive and Active/Active operation modes supported.
+In Active/Passive mode only one path has I/O moving on it
 at any point in time.
 This I/O continues until an I/O is returned with
 a generic I/O error or a "Nonexistent Device" error.
-When this occurs,
-the active device is kicked out of the
-.Nm MULTIPATH
-GEOM class and the next in a list is selected, the failed I/O reissued
-and the system proceeds.
+When this occurs, that path is marked FAIL, the next path
+in a list is selected as active and the failed I/O reissued.
+In Active/Active mode all paths not marked FAIL may handle I/O same time.
+Requests are distributed between paths to equalize load.
+For capable devices it allows to utilize bandwidth of all paths.
 .Pp
 When new devices are added to the system the
 .Nm MULTIPATH
@@ -149,9 +240,9 @@ GEOM class is given an opportunity to ta
 If a new
 device has a
 .Nm MULTIPATH
-label, the device is used to either create a new
+on-disk metadata label, the device is used to either create a new
 .Nm MULTIPATH
-GEOM, or to attach to the end of the list of devices for an existing
+GEOM, or been added the list of paths for an existing
 .Nm MULTIPATH
 GEOM.
 .Pp
@@ -176,7 +267,7 @@ of an RSCN event from the Fabric Domain 
 a rescan to occur and cause the attachment and configuration of any
 (now) new devices to occur, causing the taste event described above.
 .Pp
-This means that this active/passive architecture is not a one-shot path
+This means that this multipath architecture is not a one-shot path
 failover, but can be considered to be steady state as long as failed
 paths are repaired (automatically or otherwise).
 .Pp
@@ -184,7 +275,7 @@ Automatic rescanning is not a requiremen
 Nor is Fibre Channel.
 The
 same failover mechanisms work equally well for traditional "Parallel"
-SCSI but require manual intervention with
+SCSI but may require manual intervention with
 .Xr camcontrol 8
 to cause the reattachment of repaired device links.
 .Sh EXAMPLES
@@ -226,9 +317,9 @@ mount /dev/multipath/FREDa /mnt....
 .Pp
 The resultant console output looks something like:
 .Bd -literal -offset indent
-GEOM_MULTIPATH: adding da0 to Fred/b631385f-c61c-11db-b884-0011116ae789
-GEOM_MULTIPATH: da0 now active path in Fred
-GEOM_MULTIPATH: adding da2 to Fred/b631385f-c61c-11db-b884-0011116ae789
+GEOM_MULTIPATH: da0 added to FRED
+GEOM_MULTIPATH: da0 is now active path in FRED
+GEOM_MULTIPATH: da2 added to FRED
 .Ed
 .Sh SEE ALSO
 .Xr geom 4 ,
@@ -240,24 +331,6 @@ GEOM_MULTIPATH: adding da2 to Fred/b6313
 .Xr mount 8 ,
 .Xr newfs 8 ,
 .Xr sysctl 8
-.Sh BUGS
-The
-.Nm
-should allow for a manual method of pairing disks.
-.Pp
-There is currently no way for
-.Pa geom_multipath.ko
-to distinguish between various label instances of the same provider.
-That
-is devices such as
-.Ar da0
-and
-.Ar da0c
-can be tasted and instantiated as multiple paths for the same device.
-Technically, this is correct, but pretty useless.
-This will be fixed soon
-(I hope), but to avoid this it is a good idea to destroy any label on
-the disk object prior to labelling it with
-.Nm .
 .Sh AUTHOR
 .An Matthew Jacob Aq mjacob@FreeBSD.org
+.An Alexander Motin Aq mav@FreeBSD.org

Modified: stable/8/sys/geom/multipath/g_multipath.c
==============================================================================
--- stable/8/sys/geom/multipath/g_multipath.c	Mon Jan  2 19:23:52 2012	(r229308)
+++ stable/8/sys/geom/multipath/g_multipath.c	Mon Jan  2 19:27:23 2012	(r229309)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org>
  * Copyright (c) 2006-2007 Matthew Jacob <mjacob@FreeBSD.org>
  * All rights reserved.
  *
@@ -51,6 +52,9 @@ SYSCTL_NODE(_kern_geom, OID_AUTO, multip
 static u_int g_multipath_debug = 0;
 SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, debug, CTLFLAG_RW,
     &g_multipath_debug, 0, "Debug level");
+static u_int g_multipath_exclusive = 1;
+SYSCTL_UINT(_kern_geom_multipath, OID_AUTO, exclusive, CTLFLAG_RW,
+    &g_multipath_exclusive, 0, "Exclusively open providers");
 
 static enum {
 	GKT_NIL,
@@ -77,6 +81,7 @@ static g_taste_t g_multipath_taste;
 static g_ctl_req_t g_multipath_config;
 static g_init_t g_multipath_init;
 static g_fini_t g_multipath_fini;
+static g_dumpconf_t g_multipath_dumpconf;
 
 struct g_class g_multipath_class = {
 	.name		= G_MULTIPATH_CLASS_NAME,
@@ -88,35 +93,144 @@ struct g_class g_multipath_class = {
 	.fini		= g_multipath_fini
 };
 
-#define	MP_BAD		0x1
-#define	MP_POSTED	0x2
+#define	MP_FAIL		0x00000001
+#define	MP_LOST		0x00000002
+#define	MP_NEW		0x00000004
+#define	MP_POSTED	0x00000008
+#define	MP_BAD		(MP_FAIL | MP_LOST | MP_NEW)
+#define MP_IDLE		0x00000010
+#define MP_IDLE_MASK	0xfffffff0
+
+static int
+g_multipath_good(struct g_geom *gp)
+{
+	struct g_consumer *cp;
+	int n = 0;
+
+	LIST_FOREACH(cp, &gp->consumer, consumer) {
+		if ((cp->index & MP_BAD) == 0)
+			n++;
+	}
+	return (n);
+}
+
+static void
+g_multipath_fault(struct g_consumer *cp, int cause)
+{
+	struct g_multipath_softc *sc;
+	struct g_consumer *lcp;
+	struct g_geom *gp;
+
+	gp = cp->geom;
+	sc = gp->softc;
+	cp->index |= cause;
+	if (g_multipath_good(gp) == 0 && sc->sc_ndisks > 0) {
+		LIST_FOREACH(lcp, &gp->consumer, consumer) {
+			if (lcp->provider == NULL ||
+			    (lcp->index & (MP_LOST | MP_NEW)))
+				continue;
+			if (sc->sc_ndisks > 1 && lcp == cp)
+				continue;
+			printf("GEOM_MULTIPATH: "
+			    "all paths in %s were marked FAIL, restore %s\n",
+			    sc->sc_name, lcp->provider->name);
+			lcp->index &= ~MP_FAIL;
+		}
+	}
+	if (cp != sc->sc_active)
+		return;
+	sc->sc_active = NULL;
+	LIST_FOREACH(lcp, &gp->consumer, consumer) {
+		if ((lcp->index & MP_BAD) == 0) {
+			sc->sc_active = lcp;
+			break;
+		}
+	}
+	if (sc->sc_active == NULL) {
+		printf("GEOM_MULTIPATH: out of providers for %s\n",
+		    sc->sc_name);
+	} else if (!sc->sc_active_active) {
+		printf("GEOM_MULTIPATH: %s is now active path in %s\n",
+		    sc->sc_active->provider->name, sc->sc_name);
+	}
+}
+
+static struct g_consumer *
+g_multipath_choose(struct g_geom *gp)
+{
+	struct g_multipath_softc *sc;
+	struct g_consumer *best, *cp;
+
+	sc = gp->softc;
+	if (!sc->sc_active_active)
+		return (sc->sc_active);
+	best = NULL;
+	LIST_FOREACH(cp, &gp->consumer, consumer) {
+		if (cp->index & MP_BAD)
+			continue;
+		cp->index += MP_IDLE;
+		if (best == NULL || cp->private < best->private ||
+		    (cp->private == best->private && cp->index > best->index))
+			best = cp;
+	}
+	if (best != NULL)
+		best->index &= ~MP_IDLE_MASK;
+	return (best);
+}
 
 static void
 g_mpd(void *arg, int flags __unused)
 {
+	struct g_geom *gp;
+	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
+	int w;
 
 	g_topology_assert();
 	cp = arg;
-	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
+	gp = cp->geom;
+	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
+		w = cp->acw;
 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+		if (w > 0 && cp->provider != NULL &&
+		    (cp->provider->geom->flags & G_GEOM_WITHER) == 0) {
+			g_post_event(g_mpd, cp, M_WAITOK, NULL);
+			return;
+		}
+	}
+	sc = gp->softc;
+	mtx_lock(&sc->sc_mtx);
 	if (cp->provider) {
 		printf("GEOM_MULTIPATH: %s removed from %s\n",
-		    cp->provider->name, cp->geom->name);
+		    cp->provider->name, gp->name);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
+	mtx_unlock(&sc->sc_mtx);
+	if (LIST_EMPTY(&gp->consumer))
+		g_multipath_destroy(gp);
 }
 
 static void
 g_multipath_orphan(struct g_consumer *cp)
 {
-	if ((cp->index & MP_POSTED) == 0) {
+	struct g_multipath_softc *sc;
+	uintptr_t *cnt;
+
+	g_topology_assert();
+	printf("GEOM_MULTIPATH: %s in %s was disconnected\n",
+	    cp->provider->name, cp->geom->name);
+	sc = cp->geom->softc;
+	cnt = (uintptr_t *)&cp->private;
+	mtx_lock(&sc->sc_mtx);
+	sc->sc_ndisks--;
+	g_multipath_fault(cp, MP_LOST);
+	if (*cnt == 0 && (cp->index & MP_POSTED) == 0) {
 		cp->index |= MP_POSTED;
-		printf("GEOM_MULTIPATH: %s orphaned in %s\n",
-		    cp->provider->name, cp->geom->name);
+		mtx_unlock(&sc->sc_mtx);
 		g_mpd(cp, 0);
-	}
+	} else
+		mtx_unlock(&sc->sc_mtx);
 }
 
 static void
@@ -126,20 +240,29 @@ g_multipath_start(struct bio *bp)
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	struct bio *cbp;
+	uintptr_t *cnt;
 
 	gp = bp->bio_to->geom;
 	sc = gp->softc;
 	KASSERT(sc != NULL, ("NULL sc"));
-	cp = sc->cp_active;
-	if (cp == NULL) {
-		g_io_deliver(bp, ENXIO);
-		return;
-	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
+	mtx_lock(&sc->sc_mtx);
+	cp = g_multipath_choose(gp);
+	if (cp == NULL) {
+		mtx_unlock(&sc->sc_mtx);
+		g_destroy_bio(cbp);
+		g_io_deliver(bp, ENXIO);
+		return;
+	}
+	if ((uintptr_t)bp->bio_driver1 < sc->sc_ndisks)
+		bp->bio_driver1 = (void *)(uintptr_t)sc->sc_ndisks;
+	cnt = (uintptr_t *)&cp->private;
+	(*cnt)++;
+	mtx_unlock(&sc->sc_mtx);
 	cbp->bio_done = g_multipath_done;
 	g_io_request(cbp, cp);
 }
@@ -147,12 +270,27 @@ g_multipath_start(struct bio *bp)
 static void
 g_multipath_done(struct bio *bp)
 {
+	struct g_multipath_softc *sc;
+	struct g_consumer *cp;
+	uintptr_t *cnt;
+
 	if (bp->bio_error == ENXIO || bp->bio_error == EIO) {
 		mtx_lock(&gmtbq_mtx);
 		bioq_insert_tail(&gmtbq, bp);
-		wakeup(&g_multipath_kt_state);
 		mtx_unlock(&gmtbq_mtx);
+		wakeup(&g_multipath_kt_state);
 	} else {
+		cp = bp->bio_from;
+		sc = cp->geom->softc;
+		cnt = (uintptr_t *)&cp->private;
+		mtx_lock(&sc->sc_mtx);
+		(*cnt)--;
+		if (*cnt == 0 && (cp->index & MP_LOST)) {
+			cp->index |= MP_POSTED;
+			mtx_unlock(&sc->sc_mtx);
+			g_post_event(g_mpd, cp, M_WAITOK, NULL);
+		} else
+			mtx_unlock(&sc->sc_mtx);
 		g_std_done(bp);
 	}
 }
@@ -165,6 +303,7 @@ g_multipath_done_error(struct bio *bp)
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp;
 	struct g_provider *pp;
+	uintptr_t *cnt;
 
 	/*
 	 * If we had a failure, we have to check first to see
@@ -174,47 +313,31 @@ g_multipath_done_error(struct bio *bp)
 	 * to the next available consumer.
 	 */
 
-	g_topology_lock();
 	pbp = bp->bio_parent;
 	gp = pbp->bio_to->geom;
 	sc = gp->softc;
 	cp = bp->bio_from;
 	pp = cp->provider;
+	cnt = (uintptr_t *)&cp->private;
 
-	cp->index |= MP_BAD;
-	if (cp->nend == cp->nstart && pp->nend == pp->nstart) {
+	mtx_lock(&sc->sc_mtx);
+	printf("GEOM_MULTIPATH: Error %d, %s in %s marked FAIL\n",
+	    bp->bio_error, pp->name, sc->sc_name);
+	g_multipath_fault(cp, MP_FAIL);
+	(*cnt)--;
+	if (*cnt == 0 && (cp->index & (MP_LOST | MP_POSTED)) == MP_LOST) {
 		cp->index |= MP_POSTED;
-		g_post_event(g_mpd, cp, M_NOWAIT, NULL);
-	}
-	if (cp == sc->cp_active) {
-		struct g_consumer *lcp;
-		printf("GEOM_MULTIPATH: %s failed in %s\n",
-		    pp->name, sc->sc_name);
-		sc->cp_active = NULL;
-		LIST_FOREACH(lcp, &gp->consumer, consumer) {
-			if ((lcp->index & MP_BAD) == 0) {
-				sc->cp_active = lcp;
-				break;
-			}
-		}
-		if (sc->cp_active == NULL || sc->cp_active->provider == NULL) {
-			printf("GEOM_MULTIPATH: out of providers for %s\n",
-			    sc->sc_name);
-			g_topology_unlock();
-			return;
-		} else {
-			printf("GEOM_MULTIPATH: %s now active path in %s\n",
-			    sc->cp_active->provider->name, sc->sc_name);
-		}
-	}
-	g_topology_unlock();
+		mtx_unlock(&sc->sc_mtx);
+		g_post_event(g_mpd, cp, M_WAITOK, NULL);
+	} else
+		mtx_unlock(&sc->sc_mtx);
 
 	/*
 	 * If we can fruitfully restart the I/O, do so.
 	 */
-	if (sc->cp_active) {
+	if (pbp->bio_children < (uintptr_t)pbp->bio_driver1) {
+		pbp->bio_inbed++;
 		g_destroy_bio(bp);
-		pbp->bio_children--;
 		g_multipath_start(pbp);
 	} else {
 		g_std_done(bp);
@@ -252,6 +375,7 @@ g_multipath_access(struct g_provider *pp
 {
 	struct g_geom *gp;
 	struct g_consumer *cp, *badcp = NULL;
+	struct g_multipath_softc *sc;
 	int error;
 
 	gp = pp->geom;
@@ -263,6 +387,10 @@ g_multipath_access(struct g_provider *pp
 			goto fail;
 		}
 	}
+	sc = gp->softc;
+	sc->sc_opened += dr + dw + de;
+	if (sc->sc_stopping && sc->sc_opened == 0)
+		g_multipath_destroy(gp);
 	return (0);
 
 fail:
@@ -284,6 +412,9 @@ g_multipath_create(struct g_class *mp, s
 	g_topology_assert();
 
 	LIST_FOREACH(gp, &mp->geom, geom) {
+		sc = gp->softc;
+		if (sc == NULL || sc->sc_stopping)
+			continue;
 		if (strcmp(gp->name, md->md_name) == 0) {
 			printf("GEOM_MULTIPATH: name %s already exists\n",
 			    md->md_name);
@@ -293,19 +424,25 @@ g_multipath_create(struct g_class *mp, s
 
 	gp = g_new_geomf(mp, md->md_name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
+	mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF);
+	memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid));
+	memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name));
+	sc->sc_active_active = md->md_active_active;
 	gp->softc = sc;
 	gp->start = g_multipath_start;
 	gp->orphan = g_multipath_orphan;
 	gp->access = g_multipath_access;
-	memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid));
-	memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name));
+	gp->dumpconf = g_multipath_dumpconf;
 
 	pp = g_new_providerf(gp, "multipath/%s", md->md_name);
-	/* limit the provider to not have it stomp on metadata */
-	pp->mediasize = md->md_size - md->md_sectorsize;
-	pp->sectorsize = md->md_sectorsize;
-	sc->pp = pp;
+	if (md->md_size != 0) {
+		pp->mediasize = md->md_size -
+		    ((md->md_uuid[0] != 0) ? md->md_sectorsize : 0);
+		pp->sectorsize = md->md_sectorsize;
+	}
+	sc->sc_pp = pp;
 	g_error_provider(pp, 0);
+	printf("GEOM_MULTIPATH: %s created\n", gp->name);
 	return (gp);
 }
 
@@ -314,7 +451,7 @@ g_multipath_add_disk(struct g_geom *gp, 
 {
 	struct g_multipath_softc *sc;
 	struct g_consumer *cp, *nxtcp;
-	int error;
+	int error, acr, acw, ace;
 
 	g_topology_assert();
 
@@ -335,6 +472,8 @@ g_multipath_add_disk(struct g_geom *gp, 
 	}
 	nxtcp = LIST_FIRST(&gp->consumer);
 	cp = g_new_consumer(gp);
+	cp->private = NULL;
+	cp->index = MP_NEW;
 	error = g_attach(cp, pp);
 	if (error != 0) {
 		printf("GEOM_MULTIPATH: cannot attach %s to %s",
@@ -342,29 +481,51 @@ g_multipath_add_disk(struct g_geom *gp, 
 		g_destroy_consumer(cp);
 		return (error);
 	}
-	cp->private = sc;
-	cp->index = 0;
 
 	/*
 	 * Set access permissions on new consumer to match other consumers
 	 */
-	if (nxtcp && (nxtcp->acr + nxtcp->acw +  nxtcp->ace)) {
-		error = g_access(cp, nxtcp->acr, nxtcp->acw, nxtcp->ace);
-		if (error) {
-			printf("GEOM_MULTIPATH: cannot set access in "
-			    "attaching %s to %s/%s (%d)\n",
-			    pp->name, sc->sc_name, sc->sc_uuid, error);
-			g_detach(cp);
-			g_destroy_consumer(cp);
-			return (error);
-		}
+	if (sc->sc_pp) {
+		acr = sc->sc_pp->acr;
+		acw = sc->sc_pp->acw;
+		ace = sc->sc_pp->ace;
+	} else
+		acr = acw = ace = 0;
+	if (g_multipath_exclusive) {
+		acr++;
+		acw++;
+		ace++;
+	}
+	error = g_access(cp, acr, acw, ace);
+	if (error) {
+		printf("GEOM_MULTIPATH: cannot set access in "
+		    "attaching %s to %s (%d)\n",
+		    pp->name, sc->sc_name, error);
+		g_detach(cp);
+		g_destroy_consumer(cp);
+		return (error);
 	}
-	printf("GEOM_MULTIPATH: adding %s to %s/%s\n",
-	    pp->name, sc->sc_name, sc->sc_uuid);
-	if (sc->cp_active == NULL) {
-		sc->cp_active = cp;
-		printf("GEOM_MULTIPATH: %s now active path in %s\n",
-		    pp->name, sc->sc_name);
+	if (sc->sc_pp != NULL && sc->sc_pp->mediasize == 0) {
+		sc->sc_pp->mediasize = pp->mediasize -
+		    ((sc->sc_uuid[0] != 0) ? pp->sectorsize : 0);
+		sc->sc_pp->sectorsize = pp->sectorsize;
+	}
+	if (sc->sc_pp != NULL &&
+	    sc->sc_pp->stripesize == 0 && sc->sc_pp->stripeoffset == 0) {
+		sc->sc_pp->stripesize = pp->stripesize;
+		sc->sc_pp->stripeoffset = pp->stripeoffset;
+	}
+	mtx_lock(&sc->sc_mtx);
+	cp->index = 0;
+	sc->sc_ndisks++;
+	mtx_unlock(&sc->sc_mtx);
+	printf("GEOM_MULTIPATH: %s added to %s\n",
+	    pp->name, sc->sc_name);
+	if (sc->sc_active == NULL) {
+		sc->sc_active = cp;
+		if (!sc->sc_active_active)
+			printf("GEOM_MULTIPATH: %s is now active path in %s\n",
+			    pp->name, sc->sc_name);
 	}
 	return (0);
 }
@@ -372,17 +533,41 @@ g_multipath_add_disk(struct g_geom *gp, 
 static int
 g_multipath_destroy(struct g_geom *gp)
 {
-	struct g_provider *pp;
+	struct g_multipath_softc *sc;
+	struct g_consumer *cp, *cp1;
 
 	g_topology_assert();
 	if (gp->softc == NULL)
 		return (ENXIO);
-	pp = LIST_FIRST(&gp->provider);
-	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))
-		return (EBUSY);
-	printf("GEOM_MULTIPATH: destroying %s\n", gp->name);
+	sc = gp->softc;
+	if (!sc->sc_stopping) {
+		printf("GEOM_MULTIPATH: destroying %s\n", gp->name);
+		sc->sc_stopping = 1;
+	}
+	if (sc->sc_opened != 0) {
+		if (sc->sc_pp != NULL) {
+			g_wither_provider(sc->sc_pp, ENXIO);
+			sc->sc_pp = NULL;
+		}
+		return (EINPROGRESS);
+	}
+	LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
+		mtx_lock(&sc->sc_mtx);
+		if ((cp->index & MP_POSTED) == 0) {
+			cp->index |= MP_POSTED;
+			mtx_unlock(&sc->sc_mtx);
+			g_mpd(cp, 0);
+			if (cp1 == NULL)
+				return(0);	/* Recursion happened. */
+		} else
+			mtx_unlock(&sc->sc_mtx);
+	}
+	if (!LIST_EMPTY(&gp->consumer))
+		return (EINPROGRESS);
+	mtx_destroy(&sc->sc_mtx);
 	g_free(gp->softc);
 	gp->softc = NULL;
+	printf("GEOM_MULTIPATH: %s destroyed\n", gp->name);
 	g_wither_geom(gp, ENXIO);
 	return (0);
 }

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201021927.q02JROpO082273>