Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 15 May 2014 12:44:00 +0000 (UTC)
From:      Steven Hartland <smh@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r266123 - in stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201405151244.s4FCi04k072254@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: smh
Date: Thu May 15 12:44:00 2014
New Revision: 266123
URL: http://svnweb.freebsd.org/changeset/base/266123

Log:
  MFC r264850
  
  Add the ability to set a minimum ashift size for ZFS pool creation or root level
  vdev addition.
  
  Change max_auto_ashift sysctl to error when an invalid value is requested instead
  of silently limiting it.
  
  Sponsored by:	Multiplay

Modified:
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	Thu May 15 12:39:28 2014	(r266122)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	Thu May 15 12:44:00 2014	(r266123)
@@ -106,7 +106,7 @@ _NOTE(CONSTCOND) } while (0)
 #define	SPA_BLOCKSIZES		(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
 
 /*
- * Maximum supported logical ashift.
+ * Default maximum supported logical ashift.
  *
  * The current 8k allocation block size limit is due to the 8k
  * aligned/sized operations performed by vdev_probe() on
@@ -117,6 +117,11 @@ _NOTE(CONSTCOND) } while (0)
 #define	SPA_MAXASHIFT		13
 
 /*
+ * Default minimum supported logical ashift.
+ */
+#define SPA_MINASHIFT		SPA_MINBLOCKSHIFT
+
+/*
  * Size of block to hold the configuration data (a packed nvlist)
  */
 #define	SPA_CONFIG_BLOCKSIZE	(1ULL << 14)

Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Thu May 15 12:39:28 2014	(r266122)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Thu May 15 12:44:00 2014	(r266123)
@@ -53,7 +53,7 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CT
  * Virtual device management.
  */
 
-/**
+/*
  * The limit for ZFS to automatically increase a top-level vdev's ashift
  * from logical ashift to physical ashift.
  *
@@ -61,19 +61,34 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CT
  *          child->vdev_ashift = 9 (512 bytes)
  *          child->vdev_physical_ashift = 12 (4096 bytes)
  *          zfs_max_auto_ashift = 11 (2048 bytes)
+ *          zfs_min_auto_ashift = 9 (512 bytes)
  *
- * On pool creation or the addition of a new top-leve vdev, ZFS will
- * bump the ashift of the top-level vdev to 2048.
+ * On pool creation or the addition of a new top-level vdev, ZFS will
+ * increase the ashift of the top-level vdev to 2048 as limited by
+ * zfs_max_auto_ashift.
  *
  * Example: one or more 512B emulation child vdevs
  *          child->vdev_ashift = 9 (512 bytes)
  *          child->vdev_physical_ashift = 12 (4096 bytes)
  *          zfs_max_auto_ashift = 13 (8192 bytes)
+ *          zfs_min_auto_ashift = 9 (512 bytes)
+ *
+ * On pool creation or the addition of a new top-level vdev, ZFS will
+ * increase the ashift of the top-level vdev to 4096 to match the
+ * max vdev_physical_ashift.
  *
- * On pool creation or the addition of a new top-leve vdev, ZFS will
- * bump the ashift of the top-level vdev to 4096.
+ * Example: one or more 512B emulation child vdevs
+ *          child->vdev_ashift = 9 (512 bytes)
+ *          child->vdev_physical_ashift = 9 (512 bytes)
+ *          zfs_max_auto_ashift = 13 (8192 bytes)
+ *          zfs_min_auto_ashift = 12 (4096 bytes)
+ *
+ * On pool creation or the addition of a new top-level vdev, ZFS will
+ * increase the ashift of the top-level vdev to 4096 to match the
+ * zfs_min_auto_ashift.
  */
 static uint64_t zfs_max_auto_ashift = SPA_MAXASHIFT;
+static uint64_t zfs_min_auto_ashift = SPA_MINASHIFT;
 
 static int
 sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS)
@@ -86,8 +101,8 @@ sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HA
 	if (err != 0 || req->newptr == NULL)
 		return (err);
 
-	if (val > SPA_MAXASHIFT)
-		val = SPA_MAXASHIFT;
+	if (val > SPA_MAXASHIFT || val < zfs_min_auto_ashift)
+		return (EINVAL);
 
 	zfs_max_auto_ashift = val;
 
@@ -96,7 +111,31 @@ sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HA
 SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
     CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t),
     sysctl_vfs_zfs_max_auto_ashift, "QU",
-    "Cap on logical -> physical ashift adjustment on new top-level vdevs.");
+    "Max ashift used when optimising for logical -> physical sectors size on "
+    "new top-level vdevs.");
+
+static int
+sysctl_vfs_zfs_min_auto_ashift(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_min_auto_ashift;
+	err = sysctl_handle_64(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (val < SPA_MINASHIFT || val > zfs_max_auto_ashift)
+		return (EINVAL);
+
+	zfs_min_auto_ashift = val;
+
+	return (0);
+}
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
+    CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t),
+    sysctl_vfs_zfs_min_auto_ashift, "QU",
+    "Min ashift used when creating new top-level vdevs.");
 
 static vdev_ops_t *vdev_ops_table[] = {
 	&vdev_root_ops,
@@ -1631,19 +1670,30 @@ vdev_metaslab_set_size(vdev_t *vd)
 }
 
 /*
- * Maximize performance by inflating the configured ashift for
- * top level vdevs to be as close to the physical ashift as
- * possible without exceeding the administrator specified
- * limit.
+ * Maximize performance by inflating the configured ashift for top level
+ * vdevs to be as close to the physical ashift as possible while maintaining
+ * administrator defined limits and ensuring it doesn't go below the
+ * logical ashift.
  */
 void
 vdev_ashift_optimize(vdev_t *vd)
 {
-	if (vd == vd->vdev_top &&
-	    (vd->vdev_ashift < vd->vdev_physical_ashift) &&
-	    (vd->vdev_ashift < zfs_max_auto_ashift)) {
-		vd->vdev_ashift = MIN(zfs_max_auto_ashift,
-		    vd->vdev_physical_ashift);
+	if (vd == vd->vdev_top) {
+		if (vd->vdev_ashift < vd->vdev_physical_ashift) {
+			vd->vdev_ashift = MIN(
+			    MAX(zfs_max_auto_ashift, vd->vdev_ashift),
+			    MAX(zfs_min_auto_ashift, vd->vdev_physical_ashift));
+		} else {
+			/*
+			 * Unusual case where logical ashift > physical ashift
+			 * so we can't cap the calculated ashift based on max
+			 * ashift as that would cause failures.
+			 * We still check if we need to increase it to match
+			 * the min ashift.
+			 */
+			vd->vdev_ashift = MAX(zfs_min_auto_ashift,
+			    vd->vdev_ashift);
+		}
 	}
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201405151244.s4FCi04k072254>