Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 2 Apr 2015 00:23:03 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Tobias Oberstein <tobias.oberstein@gmail.com>
Cc:        "freebsd-hackers@freebsd.org" <freebsd-hackers@freebsd.org>, Michael Fuckner <michael@fuckner.net>, Jim Harris <jim.harris@gmail.com>, Alan Somers <asomers@freebsd.org>
Subject:   Re: NVMe performance 4x slower than expected
Message-ID:  <20150401212303.GB2379@kib.kiev.ua>
In-Reply-To: <551C5A82.2090306@gmail.com>
References:  <551BC57D.5070101@gmail.com> <CAOtMX2jVwMHSnQfphAF%2Ba2%2Bo7eLp62nHmUo4t%2BEahrXLWReaFQ@mail.gmail.com> <CAJP=Hc-RNVuhPePg7bnpmT4ByzyXs_CNvAs7Oy7ntXjqhZYhCQ@mail.gmail.com> <551C5A82.2090306@gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On Wed, Apr 01, 2015 at 10:52:18PM +0200, Tobias Oberstein wrote:
> >     > FreeBSD 11 Current with patches (DMAR and ZFS patches, otherwise the box
> >     > doesn't boot at all .. because of 3TB RAM and the amount of periphery).
> >
> >     Do you still have WITNESS and INVARIANTS turned on in your kernel
> >     config?  They're turned on by default for Current, but they do have
> >     some performance impact.  To turn them off, just build a
> >     GENERIC-NODEBUG kernel .
> 
> WITNESS is off, INVARIANTS is still on.
INVARIANTS are costly.

> 
> Here is complete config:
> 
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_kernel_conf.md
> 
> This is the aggregated patch (work was done by Konstantin - thanks again 
> btw!)
> 
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_patch.md
> 
> > Could you also post full dmesg output as well as vmstat -i?
> 
> dmesg:
> 
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_dmesg.md
> 
> vmstat:
> 
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd_vmstat.md
> 
> ===
> 
> Here are results from FIO under FreeBSD:
> 
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/freebsd.md
> 
> Here are results using _same_ FIO control file under Linux:
> 
> https://github.com/oberstet/scratchbox/blob/master/freebsd/cruncher/results/linux.md

Is this vmstat after the test ?
Somewhat funny is that nvme does not use MSI(X).

I have the following patch for a long time, it allowed to increase pps
in iperf and similar tests when DMAR is enabled. In your case it could
reduce the rate of the DMAR interrupts.

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index a18adcf..b23a4c1 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -586,6 +586,18 @@ dmar_ctx_unload_entry(struct dmar_map_entry *entry, bool free)
 	}
 }
 
+static struct dmar_qi_genseq *
+dmar_ctx_unload_gseq(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+    struct dmar_qi_genseq *gseq)
+{
+
+	if (TAILQ_NEXT(entry, dmamap_link) != NULL)
+		return (NULL);
+	if (ctx->batch_no++ % dmar_batch_coalesce != 0)
+		return (NULL);
+	return (gseq);
+}
+
 void
 dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries,
     bool cansleep)
@@ -619,8 +631,7 @@ dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries,
 		entry->gseq.gen = 0;
 		entry->gseq.seq = 0;
 		dmar_qi_invalidate_locked(ctx, entry->start, entry->end -
-		    entry->start, TAILQ_NEXT(entry, dmamap_link) == NULL ?
-		    &gseq : NULL);
+		    entry->start, dmar_ctx_unload_gseq(ctx, entry, &gseq));
 	}
 	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
 		entry->gseq = gseq;
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 2865ab5..6e0ab7f 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -93,6 +93,7 @@ struct dmar_ctx {
 	u_int entries_cnt;
 	u_long loads;
 	u_long unloads;
+	u_int batch_no;
 	struct dmar_gas_entries_tree rb_root;
 	struct dmar_map_entries_tailq unload_entries; /* Entries to unload */
 	struct dmar_map_entry *first_place, *last_place;
@@ -339,6 +340,7 @@ extern dmar_haddr_t dmar_high;
 extern int haw;
 extern int dmar_tbl_pagecnt;
 extern int dmar_match_verbose;
+extern int dmar_batch_coalesce;
 extern int dmar_check_free;
 
 static inline uint32_t
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index c239579..e7dc3f9 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -153,7 +153,7 @@ dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
 	return (1);
 }
 
-static int dmar_enable = 0;
+static int dmar_enable = 1;
 static void
 dmar_identify(driver_t *driver, device_t parent)
 {
diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c
index f696f9d..d3c3267 100644
--- a/sys/x86/iommu/intel_utils.c
+++ b/sys/x86/iommu/intel_utils.c
@@ -624,6 +624,7 @@ dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
 }
 
 int dmar_match_verbose;
+int dmar_batch_coalesce = 100;
 
 static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL, "");
 SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
@@ -632,6 +633,9 @@ SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
 SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RWTUN,
     &dmar_match_verbose, 0,
     "Verbose matching of the PCI devices to DMAR paths");
+SYSCTL_INT(_hw_dmar, OID_AUTO, batch_coalesce, CTLFLAG_RW | CTLFLAG_TUN,
+    &dmar_batch_coalesce, 0,
+    "Number of qi batches between interrupt");
 #ifdef INVARIANTS
 int dmar_check_free;
 SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RWTUN,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20150401212303.GB2379>