Date: Fri, 11 Dec 2015 02:06:04 +0000 (UTC) From: Steven Hartland <smh@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r292074 - in head/sys/dev: nvd nvme Message-ID: <201512110206.tBB264Ad039486@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: smh Date: Fri Dec 11 02:06:03 2015 New Revision: 292074 URL: https://svnweb.freebsd.org/changeset/base/292074 Log: Limit stripesize reported from nvd(4) to 4K Intel NVMe controllers have a slow path for I/Os that span a 128KB stripe boundary but ZFS limits ashift, which is derived from d_stripesize, to 13 (8KB) so we limit the stripesize reported to geom(8) to 4KB. This may result in a small number of additional I/Os to require splitting in nvme(4), however the NVMe I/O path is very efficient so these additional I/Os will cause very minimal (if any) difference in performance or CPU utilisation. This can be controller by the new sysctl kern.nvme.max_optimal_sectorsize. MFC after: 1 week Sponsored by: Multiplay Differential Revision: https://reviews.freebsd.org/D4446 Modified: head/sys/dev/nvd/nvd.c head/sys/dev/nvme/nvme.h head/sys/dev/nvme/nvme_ns.c head/sys/dev/nvme/nvme_sysctl.c Modified: head/sys/dev/nvd/nvd.c ============================================================================== --- head/sys/dev/nvd/nvd.c Fri Dec 11 01:34:13 2015 (r292073) +++ head/sys/dev/nvd/nvd.c Fri Dec 11 02:06:03 2015 (r292074) @@ -279,7 +279,7 @@ nvd_new_disk(struct nvme_namespace *ns, disk->d_sectorsize = nvme_ns_get_sector_size(ns); disk->d_mediasize = (off_t)nvme_ns_get_size(ns); disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns); - disk->d_stripesize = nvme_ns_get_stripesize(ns); + disk->d_stripesize = nvme_ns_get_optimal_sector_size(ns); if (TAILQ_EMPTY(&disk_head)) disk->d_unit = 0; Modified: head/sys/dev/nvme/nvme.h ============================================================================== --- head/sys/dev/nvme/nvme.h Fri Dec 11 01:34:13 2015 (r292073) +++ head/sys/dev/nvme/nvme.h Fri Dec 11 02:06:03 2015 (r292074) @@ -870,6 +870,7 @@ const char * nvme_ns_get_serial_number(s const char * nvme_ns_get_model_number(struct nvme_namespace *ns); const struct nvme_namespace_data * nvme_ns_get_data(struct nvme_namespace *ns); +uint32_t nvme_ns_get_optimal_sector_size(struct nvme_namespace *ns); uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns); int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, Modified: head/sys/dev/nvme/nvme_ns.c ============================================================================== --- head/sys/dev/nvme/nvme_ns.c Fri Dec 11 01:34:13 2015 (r292073) +++ head/sys/dev/nvme/nvme_ns.c Fri Dec 11 02:06:03 2015 (r292074) @@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include "nvme_private.h" +extern int nvme_max_optimal_sectorsize; + static void nvme_bio_child_inbed(struct bio *parent, int bio_error); static void nvme_bio_child_done(void *arg, const struct nvme_completion *cpl); @@ -217,6 +219,22 @@ nvme_ns_get_stripesize(struct nvme_names return (ns->stripesize); } +uint32_t +nvme_ns_get_optimal_sector_size(struct nvme_namespace *ns) +{ + uint32_t stripesize; + + stripesize = nvme_ns_get_stripesize(ns); + + if (stripesize == 0) + return nvme_ns_get_sector_size(ns); + + if (nvme_max_optimal_sectorsize == 0) + return (stripesize); + + return (MIN(stripesize, nvme_max_optimal_sectorsize)); +} + static void nvme_ns_bio_done(void *arg, const struct nvme_completion *status) { Modified: head/sys/dev/nvme/nvme_sysctl.c ============================================================================== --- head/sys/dev/nvme/nvme_sysctl.c Fri Dec 11 01:34:13 2015 (r292073) +++ head/sys/dev/nvme/nvme_sysctl.c Fri Dec 11 02:06:03 2015 (r292074) @@ -33,6 +33,22 @@ __FBSDID("$FreeBSD$"); #include "nvme_private.h" +SYSCTL_NODE(_kern, OID_AUTO, nvme, CTLFLAG_RD, 0, "NVM Express"); +/* + * Intel NVMe controllers have a slow path for I/Os that span a 128KB + * stripe boundary but ZFS limits ashift, which is derived from + * d_stripesize, to 13 (8KB) so we limit the stripesize reported to + * geom(8) to 4KB by default. + * + * This may result in a small number of additional I/Os to require + * splitting in nvme(4), however the NVMe I/O path is very efficient + * so these additional I/Os will cause very minimal (if any) difference + * in performance or CPU utilisation. + */ +int nvme_max_optimal_sectorsize = 1<<12; +SYSCTL_INT(_kern_nvme, OID_AUTO, max_optimal_sectorsize, CTLFLAG_RWTUN, + &nvme_max_optimal_sectorsize, 0, "The maximum optimal sectorsize reported"); + /* * CTLTYPE_S64 and sysctl_handle_64 were added in r217616. Define these * explicitly here for older kernels that don't include the r217616
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512110206.tBB264Ad039486>