Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 4 Nov 2012 13:29:47 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r242554 - in stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201211041329.qA4DTl4V097278@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Sun Nov  4 13:29:47 2012
New Revision: 242554
URL: http://svn.freebsd.org/changeset/base/242554

Log:
  MFC r241286,r242135: zfs_mount: taste geom providers for root pool config

Modified:
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Sun Nov  4 12:05:54 2012	(r242553)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Sun Nov  4 13:29:47 2012	(r242554)
@@ -3167,8 +3167,8 @@ spa_create(const char *pool, nvlist_t *n
 	return (0);
 }
 
-#if defined(sun)
 #ifdef _KERNEL
+#if defined(sun)
 /*
  * Get the root pool information from the root disk, then import the root pool
  * during the system boot up time.
@@ -3370,8 +3370,115 @@ out:
 	return (error);
 }
 
-#endif
+#else
+
+extern int
+vdev_geom_read_pool_label(const char *name, nvlist_t **config);
+
+static nvlist_t *
+spa_generate_rootconf(const char *name)
+{
+	nvlist_t *config;
+	nvlist_t *nvtop, *nvroot;
+	uint64_t pgid;
+
+	if (vdev_geom_read_pool_label(name, &config) != 0)
+		return (NULL);
+
+	/*
+	 * Add this top-level vdev to the child array.
+	 */
+	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvtop) == 0);
+	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+	    &pgid) == 0);
+
+	/*
+	 * Put this pool's top-level vdevs into a root vdev.
+	 */
+	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+	    VDEV_TYPE_ROOT) == 0);
+	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
+	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
+	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &nvtop, 1) == 0);
+
+	/*
+	 * Replace the existing vdev_tree with the new root vdev in
+	 * this pool's configuration (remove the old, add the new).
+	 */
+	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
+	nvlist_free(nvroot);
+	return (config);
+}
+
+int
+spa_import_rootpool(const char *name)
+{
+	spa_t *spa;
+	vdev_t *rvd, *bvd, *avd = NULL;
+	nvlist_t *config, *nvtop;
+	uint64_t txg;
+	char *pname;
+	int error;
+
+	/*
+	 * Read the label from the boot device and generate a configuration.
+	 */
+	config = spa_generate_rootconf(name);
+	if (config == NULL) {
+		cmn_err(CE_NOTE, "Cannot find the pool label for '%s'",
+		    name);
+		return (EIO);
+	}
+
+	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+	    &pname) == 0 && strcmp(name, pname) == 0);
+	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
+
+	mutex_enter(&spa_namespace_lock);
+	if ((spa = spa_lookup(pname)) != NULL) {
+		/*
+		 * Remove the existing root pool from the namespace so that we
+		 * can replace it with the correct config we just read in.
+		 */
+		spa_remove(spa);
+	}
+	spa = spa_add(pname, config, NULL);
+	spa->spa_is_root = B_TRUE;
+	spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
+
+	/*
+	 * Build up a vdev tree based on the boot device's label config.
+	 */
+	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvtop) == 0);
+	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
+	    VDEV_ALLOC_ROOTPOOL);
+	spa_config_exit(spa, SCL_ALL, FTAG);
+	if (error) {
+		mutex_exit(&spa_namespace_lock);
+		nvlist_free(config);
+		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
+		    pname);
+		return (error);
+	}
+
+	error = 0;
+	spa_history_log_version(spa, LOG_POOL_IMPORT);
+out:
+	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+	vdev_free(rvd);
+	spa_config_exit(spa, SCL_ALL, FTAG);
+	mutex_exit(&spa_namespace_lock);
+
+	return (error);
+}
+
 #endif	/* sun */
+#endif
 
 /*
  * Import a non-root pool into the system.

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	Sun Nov  4 12:05:54 2012	(r242553)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	Sun Nov  4 13:29:47 2012	(r242554)
@@ -419,7 +419,11 @@ extern int spa_get_stats(const char *poo
     char *altroot, size_t buflen);
 extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
     const char *history_str, nvlist_t *zplprops);
+#if defined(sun)
 extern int spa_import_rootpool(char *devpath, char *devid);
+#else
+extern int spa_import_rootpool(const char *name);
+#endif
 extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,
     uint64_t flags);
 extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Sun Nov  4 12:05:54 2012	(r242553)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Sun Nov  4 13:29:47 2012	(r242554)
@@ -175,17 +175,11 @@ vdev_geom_detach(void *arg, int flag __u
 static uint64_t
 nvlist_get_guid(nvlist_t *list)
 {
-	nvpair_t *elem = NULL;
 	uint64_t value;
 
-	while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
-		if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
-		    strcmp(nvpair_name(elem), "guid") == 0) {
-			VERIFY(nvpair_value_uint64(elem, &value) == 0);
-			return (value);
-		}
-	}
-	return (0);
+	value = 0;
+	nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
+	return (value);
 }
 
 static int
@@ -223,8 +217,16 @@ vdev_geom_io(struct g_consumer *cp, int 
 	return (error);
 }
 
-static uint64_t
-vdev_geom_read_guid(struct g_consumer *cp)
+static void
+vdev_geom_taste_orphan(struct g_consumer *cp)
+{
+
+	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
+	    cp->provider->name));
+}
+
+static int
+vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
 {
 	struct g_provider *pp;
 	vdev_label_t *label;
@@ -232,13 +234,13 @@ vdev_geom_read_guid(struct g_consumer *c
 	size_t buflen;
 	uint64_t psize;
 	off_t offset, size;
-	uint64_t guid;
+	uint64_t guid, state, txg;
 	int error, l, len;
 
 	g_topology_assert_not();
 
 	pp = cp->provider;
-	ZFS_LOG(1, "Reading guid from %s...", pp->name);
+	ZFS_LOG(1, "Reading config from %s...", pp->name);
 
 	psize = pp->mediasize;
 	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
@@ -250,8 +252,8 @@ vdev_geom_read_guid(struct g_consumer *c
 	label = kmem_alloc(size, KM_SLEEP);
 	buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
 
+	*config = NULL;
 	for (l = 0; l < VDEV_LABELS; l++) {
-		nvlist_t *config = NULL;
 
 		offset = vdev_label_offset(psize, l, 0);
 		if ((offset % pp->sectorsize) != 0)
@@ -261,27 +263,151 @@ vdev_geom_read_guid(struct g_consumer *c
 			continue;
 		buf = label->vl_vdev_phys.vp_nvlist;
 
-		if (nvlist_unpack(buf, buflen, &config, 0) != 0)
+		if (nvlist_unpack(buf, buflen, config, 0) != 0)
 			continue;
 
-		guid = nvlist_get_guid(config);
-		nvlist_free(config);
-		if (guid != 0)
-			break;
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state == POOL_STATE_DESTROYED ||
+		    state > POOL_STATE_L2CACHE) {
+			nvlist_free(*config);
+			*config = NULL;
+			continue;
+		}
+
+		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
+		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			nvlist_free(*config);
+			*config = NULL;
+			continue;
+		}
+
+		break;
 	}
 
 	kmem_free(label, size);
-	if (guid != 0)
-		ZFS_LOG(1, "guid for %s is %ju", pp->name, (uintmax_t)guid);
-	return (guid);
+	return (*config == NULL ? ENOENT : 0);
+}
+
+static int
+vdev_geom_check_config(nvlist_t *config, const char *name, uint64_t *best_txg)
+{
+	uint64_t vdev_guid;
+	uint64_t txg;
+	char *pname;
+
+	if (nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
+	    strcmp(pname, name) != 0)
+		return (ENOENT);
+
+	ZFS_LOG(1, "found pool: %s", pname);
+
+	txg = 0;
+	nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg);
+	if (txg <= *best_txg)
+		return (ENOENT);
+	*best_txg = txg;
+	ZFS_LOG(1, "txg: %ju", (uintmax_t)*best_txg);
+
+	return (0);
+}
+
+static int
+vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
+{
+	int error;
+
+	if (pp->flags & G_PF_WITHER)
+		return (EINVAL);
+	if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
+		return (EINVAL);
+	g_attach(cp, pp);
+	error = g_access(cp, 1, 0, 0);
+	if (error != 0)
+		g_detach(cp);
+	return (error);
 }
 
 static void
-vdev_geom_taste_orphan(struct g_consumer *cp)
+vdev_geom_dettach_taster(struct g_consumer *cp)
 {
+	g_access(cp, -1, 0, 0);
+	g_detach(cp);
+}
 
-	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
-	    cp->provider->name));
+int
+vdev_geom_read_pool_label(const char *name, nvlist_t **config)
+{
+	struct g_class *mp;
+	struct g_geom *gp, *zgp;
+	struct g_provider *pp;
+	struct g_consumer *zcp;
+	nvlist_t *vdev_cfg;
+	uint64_t best_txg;
+	int error;
+
+	DROP_GIANT();
+	g_topology_lock();
+
+	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
+	/* This orphan function should be never called. */
+	zgp->orphan = vdev_geom_taste_orphan;
+	zcp = g_new_consumer(zgp);
+
+	best_txg = 0;
+	*config = NULL;
+	LIST_FOREACH(mp, &g_classes, class) {
+		if (mp == &zfs_vdev_class)
+			continue;
+		LIST_FOREACH(gp, &mp->geom, geom) {
+			if (gp->flags & G_GEOM_WITHER)
+				continue;
+			LIST_FOREACH(pp, &gp->provider, provider) {
+				if (pp->flags & G_PF_WITHER)
+					continue;
+				if (vdev_geom_attach_taster(zcp, pp) != 0)
+					continue;
+				g_topology_unlock();
+				error = vdev_geom_read_config(zcp, &vdev_cfg);
+				g_topology_lock();
+				vdev_geom_dettach_taster(zcp);
+				if (error)
+					continue;
+				ZFS_LOG(1, "successfully read vdev config");
+
+				error = vdev_geom_check_config(vdev_cfg, name,
+				    &best_txg);
+				if (error != 0) {
+					nvlist_free(vdev_cfg);
+					continue;
+				}
+				nvlist_free(*config);
+				*config = vdev_cfg;
+			}
+		}
+	}
+
+	g_destroy_consumer(zcp);
+	g_destroy_geom(zgp);
+	g_topology_unlock();
+	PICKUP_GIANT();
+	return (*config == NULL ? ENOENT : 0);
+}
+
+static uint64_t
+vdev_geom_read_guid(struct g_consumer *cp)
+{
+	nvlist_t *config;
+	uint64_t guid;
+
+	g_topology_assert_not();
+
+	guid = 0;
+	if (vdev_geom_read_config(cp, &config) == 0) {
+		guid = nvlist_get_guid(config);
+		nvlist_free(config);
+	}
+	return (guid);
 }
 
 static struct g_consumer *
@@ -308,18 +434,12 @@ vdev_geom_attach_by_guid(uint64_t guid)
 			if (gp->flags & G_GEOM_WITHER)
 				continue;
 			LIST_FOREACH(pp, &gp->provider, provider) {
-				if (pp->flags & G_PF_WITHER)
-					continue;
-				g_attach(zcp, pp);
-				if (g_access(zcp, 1, 0, 0) != 0) {
-					g_detach(zcp);
+				if (vdev_geom_attach_taster(zcp, pp) != 0)
 					continue;
-				}
 				g_topology_unlock();
 				pguid = vdev_geom_read_guid(zcp);
 				g_topology_lock();
-				g_access(zcp, -1, 0, 0);
-				g_detach(zcp);
+				vdev_geom_dettach_taster(zcp);
 				if (pguid != guid)
 					continue;
 				cp = vdev_geom_attach(pp);

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Sun Nov  4 12:05:54 2012	(r242553)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Sun Nov  4 13:29:47 2012	(r242554)
@@ -1523,6 +1523,25 @@ out:
 }
 #endif	/* OPENSOLARIS_MOUNTROOT */
 
+static int
+getpoolname(const char *osname, char *poolname)
+{
+	char *p;
+
+	p = strchr(osname, '/');
+	if (p == NULL) {
+		if (strlen(osname) >= MAXNAMELEN)
+			return (ENAMETOOLONG);
+		(void) strcpy(poolname, osname);
+	} else {
+		if (p - osname >= MAXNAMELEN)
+			return (ENAMETOOLONG);
+		(void) strncpy(poolname, osname, p - osname);
+		poolname[p - osname] = '\0';
+	}
+	return (0);
+}
+
 /*ARGSUSED*/
 static int
 zfs_mount(vfs_t *vfsp)
@@ -1616,6 +1635,29 @@ zfs_mount(vfs_t *vfsp)
 		goto out;
 	}
 
+	/* Initial root mount: try hard to import the requested root pool. */
+	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
+	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
+		char pname[MAXNAMELEN];
+		spa_t *spa;
+		int prefer_cache;
+
+		error = getpoolname(osname, pname);
+		if (error)
+			goto out;
+
+		prefer_cache = 1;
+		TUNABLE_INT_FETCH("vfs.zfs.rootpool.prefer_cached_config",
+		    &prefer_cache);
+		mutex_enter(&spa_namespace_lock);
+		spa = spa_lookup(pname);
+		mutex_exit(&spa_namespace_lock);
+		if (!prefer_cache || spa == NULL) {
+			error = spa_import_rootpool(pname);
+			if (error)
+				goto out;
+		}
+	}
 	DROP_GIANT();
 	error = zfs_domount(vfsp, osname);
 	PICKUP_GIANT();



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201211041329.qA4DTl4V097278>