Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 15 Mar 2017 04:16:08 +0000 (UTC)
From:      Josh Paetzel <jpaetzel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r315290 - in vendor/illumos/dist: cmd/zdb cmd/ztest lib/libzpool/common lib/libzpool/common/sys
Message-ID:  <201703150416.v2F4G8xo025013@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jpaetzel
Date: Wed Mar 15 04:16:08 2017
New Revision: 315290
URL: https://svnweb.freebsd.org/changeset/base/315290

Log:
  7303 dynamic metaslab selection
  
  illumos/illumos-gate@8363e80ae72609660f6090766ca8c2c18aa53f0c
  https://github.com/illumos/illumos-gate/commit/8363e80ae72609660f6090766ca8c2c18aa53f0
  
  https://www.illumos.org/issues/7303
  
    This change introduces a new weighting algorithm to improve metaslab selection.
    The new weighting algorithm relies on the SPACEMAP_HISTOGRAM feature. As a result,
    the metaslab weight now encodes the type of weighting algorithm used
    (size-based vs segment-based).
  
    This also introduce a new allocation tracing facility and two new dcmds to help
    debug allocation problems. Each zio now contains a zio_alloc_list_t structure
    that is populated as the zio goes through the allocations stage. Here's an
    example of how to use the tracing facility:
  
  > c5ec000::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
    MSID    DVA    ASIZE      WEIGHT             RESULT               VDEV
       -      0      400           0    NOT_ALLOCATABLE           ztest.0a
       -      0      400           0    NOT_ALLOCATABLE           ztest.0a
       -      0      400           0             ENOSPC           ztest.0a
       -      0      200           0    NOT_ALLOCATABLE           ztest.0a
       -      0      200           0    NOT_ALLOCATABLE           ztest.0a
       -      0      200           0             ENOSPC           ztest.0a
       1      0      400      1 x 8M            17b1a00           ztest.0a
  
  > 1ff2400::print zio_t io_alloc_list | ::walk list | ::metaslab_trace
    MSID    DVA    ASIZE      WEIGHT             RESULT               VDEV
       -      0      200           0    NOT_ALLOCATABLE           mirror-2
       -      0      200           0    NOT_ALLOCATABLE           mirror-0
       1      0      200      1 x 4M            112ae00           mirror-1
       -      1      200           0    NOT_ALLOCATABLE           mirror-2
       -      1      200           0    NOT_ALLOCATABLE           mirror-0
       1      1      200      1 x 4M            112b000           mirror-1
       -      2      200           0    NOT_ALLOCATABLE           mirror-2
  
    If the metaslab is using segment-based weighting then the WEIGHT column will
    display the number of segments available in the bucket where the allocation
    attempt was made.
  
  Author: George Wilson <george.wilson@delphix.com>
  Reviewed by: Alex Reece <alex@delphix.com>
  Reviewed by: Chris Siden <christopher.siden@delphix.com>
  Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
  Reviewed by: Matthew Ahrens <mahrens@delphix.com>
  Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com>
  Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
  Reviewed by: Prakash Surya <prakash.surya@delphix.com>
  Reviewed by: Don Brady <don.brady@intel.com>
  Approved by: Richard Lowe <richlowe@richlowe.net>

Modified:
  vendor/illumos/dist/cmd/zdb/zdb.c
  vendor/illumos/dist/cmd/ztest/ztest.c
  vendor/illumos/dist/lib/libzpool/common/kernel.c
  vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h

Modified: vendor/illumos/dist/cmd/zdb/zdb.c
==============================================================================
--- vendor/illumos/dist/cmd/zdb/zdb.c	Wed Mar 15 00:29:27 2017	(r315289)
+++ vendor/illumos/dist/cmd/zdb/zdb.c	Wed Mar 15 04:16:08 2017	(r315290)
@@ -2562,10 +2562,21 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
 
 	if (!dump_opt['L']) {
 		vdev_t *rvd = spa->spa_root_vdev;
+
+		/*
+		 * We are going to be changing the meaning of the metaslab's
+		 * ms_tree.  Ensure that the allocator doesn't try to
+		 * use the tree.
+		 */
+		spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
+		spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
+
 		for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *vd = rvd->vdev_child[c];
+			metaslab_group_t *mg = vd->vdev_mg;
 			for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
+				ASSERT3P(msp->ms_group, ==, mg);
 				mutex_enter(&msp->ms_lock);
 				metaslab_unload(msp);
 
@@ -2586,8 +2597,6 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
 					    (longlong_t)m,
 					    (longlong_t)vd->vdev_ms_count);
 
-					msp->ms_ops = &zdb_metaslab_ops;
-
 					/*
 					 * We don't want to spend the CPU
 					 * manipulating the size-ordered
@@ -2597,7 +2606,10 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
 					msp->ms_tree->rt_ops = NULL;
 					VERIFY0(space_map_load(msp->ms_sm,
 					    msp->ms_tree, SM_ALLOC));
-					msp->ms_loaded = B_TRUE;
+
+					if (!msp->ms_loaded) {
+						msp->ms_loaded = B_TRUE;
+					}
 				}
 				mutex_exit(&msp->ms_lock);
 			}
@@ -2619,8 +2631,10 @@ zdb_leak_fini(spa_t *spa)
 		vdev_t *rvd = spa->spa_root_vdev;
 		for (int c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *vd = rvd->vdev_child[c];
+			metaslab_group_t *mg = vd->vdev_mg;
 			for (int m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
+				ASSERT3P(mg, ==, msp->ms_group);
 				mutex_enter(&msp->ms_lock);
 
 				/*
@@ -2634,7 +2648,10 @@ zdb_leak_fini(spa_t *spa)
 				 * from the ms_tree.
 				 */
 				range_tree_vacate(msp->ms_tree, zdb_leak, vd);
-				msp->ms_loaded = B_FALSE;
+
+				if (msp->ms_loaded) {
+					msp->ms_loaded = B_FALSE;
+				}
 
 				mutex_exit(&msp->ms_lock);
 			}

Modified: vendor/illumos/dist/cmd/ztest/ztest.c
==============================================================================
--- vendor/illumos/dist/cmd/ztest/ztest.c	Wed Mar 15 00:29:27 2017	(r315289)
+++ vendor/illumos/dist/cmd/ztest/ztest.c	Wed Mar 15 04:16:08 2017	(r315290)
@@ -171,7 +171,7 @@ static const ztest_shared_opts_t ztest_o
 	.zo_mirrors = 2,
 	.zo_raidz = 4,
 	.zo_raidz_parity = 1,
-	.zo_vdev_size = SPA_MINDEVSIZE * 2,
+	.zo_vdev_size = SPA_MINDEVSIZE * 4,	/* 256m default size */
 	.zo_datasets = 7,
 	.zo_threads = 23,
 	.zo_passtime = 60,		/* 60 seconds */

Modified: vendor/illumos/dist/lib/libzpool/common/kernel.c
==============================================================================
--- vendor/illumos/dist/lib/libzpool/common/kernel.c	Wed Mar 15 00:29:27 2017	(r315289)
+++ vendor/illumos/dist/lib/libzpool/common/kernel.c	Wed Mar 15 04:16:08 2017	(r315290)
@@ -94,6 +94,11 @@ kstat_create(const char *module, int ins
 
 /*ARGSUSED*/
 void
+kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
+{}
+
+/*ARGSUSED*/
+void
 kstat_install(kstat_t *ksp)
 {}
 

Modified: vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h	Wed Mar 15 00:29:27 2017	(r315289)
+++ vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h	Wed Mar 15 04:16:08 2017	(r315290)
@@ -301,6 +301,7 @@ extern void cv_broadcast(kcondvar_t *cv)
  */
 extern kstat_t *kstat_create(const char *, int,
     const char *, const char *, uchar_t, ulong_t, uchar_t);
+extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
 extern void kstat_install(kstat_t *);
 extern void kstat_delete(kstat_t *);
 extern void kstat_waitq_enter(kstat_io_t *);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201703150416.v2F4G8xo025013>