Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 11 Jan 2016 17:32:56 +0000 (UTC)
From:      Jim Harris <jimharris@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r293672 - in stable/10: share/man/man4 sys/dev/nvme
Message-ID:  <201601111732.u0BHWuKq032845@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jimharris
Date: Mon Jan 11 17:32:56 2016
New Revision: 293672
URL: https://svnweb.freebsd.org/changeset/base/293672

Log:
  MFC r293352:
  
    nvme: add hw.nvme.min_cpus_per_ioq tunable
  
    Due to FreeBSD system-wide limits on number of MSI-X vectors
    (https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=199321),
    it may be desirable to allocate fewer than the maximum number
    of vectors for an NVMe device, in order to save vectors for
    other devices (usually Ethernet) that can take better
    advantage of them and may be probed after NVMe.
  
    This tunable is expressed in terms of minimum number of CPUs
    per I/O queue instead of max number of queues per controller,
    to allow for a more even distribution of CPUs per queue.  This
    avoids cases where some number of CPUs have a dedicated queue,
    but other CPUs need to share queues.  Ideally the PR referenced
    above will eventually be fixed and the mechanism implemented
    here becomes obsolete anyways.
  
    While here, fix a bug in the CPUs per I/O queue calculation to
    properly account for the admin queue's MSI-X vector.

Modified:
  stable/10/share/man/man4/nvme.4
  stable/10/sys/dev/nvme/nvme_ctrlr.c
  stable/10/sys/dev/nvme/nvme_sysctl.c

Modified: stable/10/share/man/man4/nvme.4
==============================================================================
--- stable/10/share/man/man4/nvme.4	Mon Jan 11 17:31:18 2016	(r293671)
+++ stable/10/share/man/man4/nvme.4	Mon Jan 11 17:32:56 2016	(r293672)
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2012-2014 Intel Corporation
+.\" Copyright (c) 2012-2016 Intel Corporation
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 18, 2014
+.Dd January 7, 2016
 .Dt NVME 4
 .Os
 .Sh NAME
@@ -90,7 +90,10 @@ not 0, and this driver follows that conv
 By default,
 .Nm
 will create an I/O queue pair for each CPU, provided enough MSI-X vectors
-can be allocated.
+and NVMe queue pairs can be allocated.  If not enough vectors or queue
+pairs are available, nvme(4) will use a smaller number of queue pairs and
+assign multiple CPUs per queue pair.
+.Pp
 To force a single I/O queue pair shared by all CPUs, set the following
 tunable value in
 .Xr loader.conf 5 :
@@ -98,6 +101,13 @@ tunable value in
 hw.nvme.per_cpu_io_queues=0
 .Ed
 .Pp
+To assign more than one CPU per I/O queue pair, thereby reducing the number
+of MSI-X vectors consumed by the device, set the following tunable value in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+hw.nvme.min_cpus_per_ioq=X
+.Ed
+.Pp
 To force legacy interrupts for all
 .Nm
 driver instances, set the following tunable value in
@@ -110,6 +120,8 @@ Note that use of INTx implies disabling 
 .Sh SYSCTL VARIABLES
 The following controller-level sysctls are currently implemented:
 .Bl -tag -width indent
+.It Va dev.nvme.0.num_cpus_per_ioq
+(R) Number of CPUs associated with each I/O queue pair.
 .It Va dev.nvme.0.int_coal_time
 (R/W) Interrupt coalescing timer period in microseconds.
 Set to 0 to disable.

Modified: stable/10/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- stable/10/sys/dev/nvme/nvme_ctrlr.c	Mon Jan 11 17:31:18 2016	(r293671)
+++ stable/10/sys/dev/nvme/nvme_ctrlr.c	Mon Jan 11 17:32:56 2016	(r293672)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (C) 2012-2015 Intel Corporation
+ * Copyright (C) 2012-2016 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -979,13 +979,27 @@ nvme_ctrlr_setup_interrupts(struct nvme_
 {
 	device_t	dev;
 	int		per_cpu_io_queues;
+	int		min_cpus_per_ioq;
 	int		num_vectors_requested, num_vectors_allocated;
 	int		num_vectors_available;
 
 	dev = ctrlr->dev;
+	min_cpus_per_ioq = 1;
+	TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
+
+	if (min_cpus_per_ioq < 1) {
+		min_cpus_per_ioq = 1;
+	} else if (min_cpus_per_ioq > mp_ncpus) {
+		min_cpus_per_ioq = mp_ncpus;
+	}
+
 	per_cpu_io_queues = 1;
 	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
 
+	if (per_cpu_io_queues == 0) {
+		min_cpus_per_ioq = mp_ncpus;
+	}
+
 	ctrlr->force_intx = 0;
 	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
 
@@ -1011,10 +1025,12 @@ nvme_ctrlr_setup_interrupts(struct nvme_
 		return;
 	}
 
-	if (per_cpu_io_queues)
-		ctrlr->num_cpus_per_ioq = NVME_CEILING(mp_ncpus, num_vectors_available + 1);
-	else
-		ctrlr->num_cpus_per_ioq = mp_ncpus;
+	/*
+	 * Do not use all vectors for I/O queues - one must be saved for the
+	 *  admin queue.
+	 */
+	ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
+	    NVME_CEILING(mp_ncpus, num_vectors_available - 1));
 
 	ctrlr->num_io_queues = NVME_CEILING(mp_ncpus, ctrlr->num_cpus_per_ioq);
 	num_vectors_requested = ctrlr->num_io_queues + 1;

Modified: stable/10/sys/dev/nvme/nvme_sysctl.c
==============================================================================
--- stable/10/sys/dev/nvme/nvme_sysctl.c	Mon Jan 11 17:31:18 2016	(r293671)
+++ stable/10/sys/dev/nvme/nvme_sysctl.c	Mon Jan 11 17:32:56 2016	(r293672)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2016 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -251,6 +251,10 @@ nvme_sysctl_initialize_ctrlr(struct nvme
 	ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
 	ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
 
+	SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_cpus_per_ioq",
+	    CTLFLAG_RD, &ctrlr->num_cpus_per_ioq, 0,
+	    "Number of CPUs assigned per I/O queue pair");
+
 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
 	    "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
 	    nvme_sysctl_int_coal_time, "IU",



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201601111732.u0BHWuKq032845>