Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 18 Apr 2014 22:04:59 +0000 (UTC)
From:      Xin LI <delphij@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r264671 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201404182204.s3IM4x13080118@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: delphij
Date: Fri Apr 18 22:04:58 2014
New Revision: 264671
URL: http://svnweb.freebsd.org/changeset/base/264671

Log:
  MFV r264668:
  
  4754 io issued to near-full luns even after setting noalloc threshold
  4755 mg_alloc_failures is no longer needed
  
  illumos/illumos@b6240e830b871f59c22a3918aebb3b36c872edba
  
  MFC after:	2 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	Fri Apr 18 21:35:23 2014	(r264670)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	Fri Apr 18 22:04:58 2014	(r264671)
@@ -41,7 +41,7 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, metaslab
  * avoid having to load lots of space_maps in a given txg. There are,
  * however, some cases where we want to avoid "fast" ganging and instead
  * we want to do an exhaustive search of all metaslabs on this device.
- * Currently we don't allow any gang, zil, or dump device related allocations
+ * Currently we don't allow any gang, slog, or dump device related allocations
  * to "fast" gang.
  */
 #define	CAN_FASTGANG(flags) \
@@ -74,18 +74,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_
     " of in-memory counterpart");
 
 /*
- * This value defines the number of allowed allocation failures per vdev.
- * If a device reaches this threshold in a given txg then we consider skipping
- * allocations on that device. The value of zfs_mg_alloc_failures is computed
- * in zio_init() unless it has been overridden in /etc/system.
- */
-int zfs_mg_alloc_failures = 0;
-TUNABLE_INT("vfs.zfs.mg_alloc_failures", &zfs_mg_alloc_failures);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, mg_alloc_failures, CTLFLAG_RWTUN,
-    &zfs_mg_alloc_failures, 0,
-    "Number of allowed allocation failures per vdev");
-
-/*
  * The zfs_mg_noalloc_threshold defines which metaslab groups should
  * be eligible for allocation. The value is defined as a percentage of
  * a free space. Metaslab groups that have more free space than
@@ -1707,10 +1695,7 @@ metaslab_sync_done(metaslab_t *msp, uint
 void
 metaslab_sync_reassess(metaslab_group_t *mg)
 {
-	int64_t failures = mg->mg_alloc_failures;
-
 	metaslab_group_alloc_update(mg);
-	atomic_add_64(&mg->mg_alloc_failures, -failures);
 
 	/*
 	 * Preload the next potential metaslabs
@@ -1737,7 +1722,7 @@ metaslab_distance(metaslab_t *msp, dva_t
 
 static uint64_t
 metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
-    uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
+    uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
 {
 	spa_t *spa = mg->mg_vd->vdev_spa;
 	metaslab_t *msp = NULL;
@@ -1764,10 +1749,9 @@ metaslab_group_alloc(metaslab_group_t *m
 				spa_dbgmsg(spa, "%s: failed to meet weight "
 				    "requirement: vdev %llu, txg %llu, mg %p, "
 				    "msp %p, psize %llu, asize %llu, "
-				    "failures %llu, weight %llu",
-				    spa_name(spa), mg->mg_vd->vdev_id, txg,
-				    mg, msp, psize, asize,
-				    mg->mg_alloc_failures, msp->ms_weight);
+				    "weight %llu", spa_name(spa),
+				    mg->mg_vd->vdev_id, txg,
+				    mg, msp, psize, asize, msp->ms_weight);
 				mutex_exit(&mg->mg_lock);
 				return (-1ULL);
 			}
@@ -1800,27 +1784,6 @@ metaslab_group_alloc(metaslab_group_t *m
 		mutex_enter(&msp->ms_lock);
 
 		/*
-		 * If we've already reached the allowable number of failed
-		 * allocation attempts on this metaslab group then we
-		 * consider skipping it. We skip it only if we're allowed
-		 * to "fast" gang, the physical size is larger than
-		 * a gang block, and we're attempting to allocate from
-		 * the primary metaslab.
-		 */
-		if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
-		    CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
-		    activation_weight == METASLAB_WEIGHT_PRIMARY) {
-			spa_dbgmsg(spa, "%s: skipping metaslab group: "
-			    "vdev %llu, txg %llu, mg %p, msp[%llu] %p, "
-			    "psize %llu, asize %llu, failures %llu",
-			    spa_name(spa), mg->mg_vd->vdev_id, txg, mg,
-			    msp->ms_id, msp, psize, asize,
-			    mg->mg_alloc_failures);
-			mutex_exit(&msp->ms_lock);
-			return (-1ULL);
-		}
-
-		/*
 		 * Ensure that the metaslab we have selected is still
 		 * capable of handling our request. It's possible that
 		 * another thread may have changed the weight while we
@@ -1859,8 +1822,6 @@ metaslab_group_alloc(metaslab_group_t *m
 		if ((offset = metaslab_block_alloc(msp, asize)) != -1ULL)
 			break;
 
-		atomic_inc_64(&mg->mg_alloc_failures);
-
 		metaslab_passivate(msp, metaslab_block_maxsize(msp));
 		mutex_exit(&msp->ms_lock);
 	}
@@ -2015,7 +1976,7 @@ top:
 		ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
 
 		offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
-		    dva, d, flags);
+		    dva, d);
 		if (offset != -1ULL) {
 			/*
 			 * If we've just selected this metaslab group,

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h	Fri Apr 18 21:35:23 2014	(r264670)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h	Fri Apr 18 22:04:58 2014	(r264671)
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_METASLAB_IMPL_H
@@ -58,7 +58,6 @@ struct metaslab_group {
 	kmutex_t		mg_lock;
 	avl_tree_t		mg_metaslab_tree;
 	uint64_t		mg_aliquot;
-	uint64_t		mg_alloc_failures;
 	boolean_t		mg_allocatable;		/* can we allocate? */
 	uint64_t		mg_free_capacity;	/* percentage free */
 	int64_t			mg_bias;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Fri Apr 18 21:35:23 2014	(r264670)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Fri Apr 18 22:04:58 2014	(r264671)
@@ -90,7 +90,6 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAX
 #ifdef _KERNEL
 extern vmem_t *zio_alloc_arena;
 #endif
-extern int zfs_mg_alloc_failures;
 
 /*
  * The following actions directly effect the spa's sync-to-convergence logic.
@@ -206,15 +205,6 @@ zio_init(void)
 	}
 out:
 
-	/*
-	 * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
-	 * to fail 3 times per txg or 8 failures, whichever is greater.
-	 */
-	if (zfs_mg_alloc_failures == 0)
-		zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
-	else if (zfs_mg_alloc_failures < 8)
-		zfs_mg_alloc_failures = 8;
-
 	zio_inject_init();
 
 	zio_trim_ksp = kstat_create("zfs", 0, "zio_trim", "misc",



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201404182204.s3IM4x13080118>