Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 Jan 2016 20:32:04 +0000 (UTC)
From:      Jim Harris <jimharris@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r293352 - in head: share/man/man4 sys/dev/nvme
Message-ID:  <201601072032.u07KW4V5034717@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jimharris
Date: Thu Jan  7 20:32:04 2016
New Revision: 293352
URL: https://svnweb.freebsd.org/changeset/base/293352

Log:
  nvme: add hw.nvme.min_cpus_per_ioq tunable
  
  Due to FreeBSD system-wide limits on number of MSI-X vectors
  (https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=199321),
  it may be desirable to allocate fewer than the maximum number
  of vectors for an NVMe device, in order to save vectors for
  other devices (usually Ethernet) that can take better
  advantage of them and may be probed after NVMe.
  
  This tunable is expressed in terms of minimum number of CPUs
  per I/O queue instead of max number of queues per controller,
  to allow for a more even distribution of CPUs per queue.  This
  avoids cases where some number of CPUs have a dedicated queue,
  but other CPUs need to share queues.  Ideally the PR referenced
  above will eventually be fixed and the mechanism implemented
  here becomes obsolete anyways.
  
  While here, fix a bug in the CPUs per I/O queue calculation to
  properly account for the admin queue's MSI-X vector.
  
  Reviewed by:	gallatin
  MFC after:	3 days
  Sponsored by:	Intel

Modified:
  head/share/man/man4/nvme.4
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_sysctl.c

Modified: head/share/man/man4/nvme.4
==============================================================================
--- head/share/man/man4/nvme.4	Thu Jan  7 20:24:30 2016	(r293351)
+++ head/share/man/man4/nvme.4	Thu Jan  7 20:32:04 2016	(r293352)
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2012-2014 Intel Corporation
+.\" Copyright (c) 2012-2016 Intel Corporation
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 18, 2014
+.Dd January 7, 2016
 .Dt NVME 4
 .Os
 .Sh NAME
@@ -89,7 +89,10 @@ not 0, and this driver follows that conv
 By default,
 .Nm
 will create an I/O queue pair for each CPU, provided enough MSI-X vectors
-can be allocated.
+and NVMe queue pairs can be allocated.  If not enough vectors or queue
+pairs are available, nvme(4) will use a smaller number of queue pairs and
+assign multiple CPUs per queue pair.
+.Pp
 To force a single I/O queue pair shared by all CPUs, set the following
 tunable value in
 .Xr loader.conf 5 :
@@ -97,6 +100,13 @@ tunable value in
 hw.nvme.per_cpu_io_queues=0
 .Ed
 .Pp
+To assign more than one CPU per I/O queue pair, thereby reducing the number
+of MSI-X vectors consumed by the device, set the following tunable value in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+hw.nvme.min_cpus_per_ioq=X
+.Ed
+.Pp
 To force legacy interrupts for all
 .Nm
 driver instances, set the following tunable value in
@@ -109,6 +119,8 @@ Note that use of INTx implies disabling 
 .Sh SYSCTL VARIABLES
 The following controller-level sysctls are currently implemented:
 .Bl -tag -width indent
+.It Va dev.nvme.0.num_cpus_per_ioq
+(R) Number of CPUs associated with each I/O queue pair.
 .It Va dev.nvme.0.int_coal_time
 (R/W) Interrupt coalescing timer period in microseconds.
 Set to 0 to disable.

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	Thu Jan  7 20:24:30 2016	(r293351)
+++ head/sys/dev/nvme/nvme_ctrlr.c	Thu Jan  7 20:32:04 2016	(r293352)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (C) 2012-2015 Intel Corporation
+ * Copyright (C) 2012-2016 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -978,13 +978,27 @@ nvme_ctrlr_setup_interrupts(struct nvme_
 {
 	device_t	dev;
 	int		per_cpu_io_queues;
+	int		min_cpus_per_ioq;
 	int		num_vectors_requested, num_vectors_allocated;
 	int		num_vectors_available;
 
 	dev = ctrlr->dev;
+	min_cpus_per_ioq = 1;
+	TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
+
+	if (min_cpus_per_ioq < 1) {
+		min_cpus_per_ioq = 1;
+	} else if (min_cpus_per_ioq > mp_ncpus) {
+		min_cpus_per_ioq = mp_ncpus;
+	}
+
 	per_cpu_io_queues = 1;
 	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
 
+	if (per_cpu_io_queues == 0) {
+		min_cpus_per_ioq = mp_ncpus;
+	}
+
 	ctrlr->force_intx = 0;
 	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
 
@@ -1010,10 +1024,12 @@ nvme_ctrlr_setup_interrupts(struct nvme_
 		return;
 	}
 
-	if (per_cpu_io_queues)
-		ctrlr->num_cpus_per_ioq = NVME_CEILING(mp_ncpus, num_vectors_available + 1);
-	else
-		ctrlr->num_cpus_per_ioq = mp_ncpus;
+	/*
+	 * Do not use all vectors for I/O queues - one must be saved for the
+	 *  admin queue.
+	 */
+	ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
+	    NVME_CEILING(mp_ncpus, num_vectors_available - 1));
 
 	ctrlr->num_io_queues = NVME_CEILING(mp_ncpus, ctrlr->num_cpus_per_ioq);
 	num_vectors_requested = ctrlr->num_io_queues + 1;

Modified: head/sys/dev/nvme/nvme_sysctl.c
==============================================================================
--- head/sys/dev/nvme/nvme_sysctl.c	Thu Jan  7 20:24:30 2016	(r293351)
+++ head/sys/dev/nvme/nvme_sysctl.c	Thu Jan  7 20:32:04 2016	(r293352)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2016 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -267,6 +267,10 @@ nvme_sysctl_initialize_ctrlr(struct nvme
 	ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
 	ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
 
+	SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_cpus_per_ioq",
+	    CTLFLAG_RD, &ctrlr->num_cpus_per_ioq, 0,
+	    "Number of CPUs assigned per I/O queue pair");
+
 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
 	    "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
 	    nvme_sysctl_int_coal_time, "IU",



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201601072032.u07KW4V5034717>