Date: Thu, 22 Oct 2015 04:38:06 +0000 (UTC) From: "Conrad E. Meyer" <cem@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r289733 - in head: sys/dev/ioat tools/tools/ioat Message-ID: <201510220438.t9M4c67v041733@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: cem Date: Thu Oct 22 04:38:05 2015 New Revision: 289733 URL: https://svnweb.freebsd.org/changeset/base/289733 Log: Improve flexibility of ioat_test / ioatcontrol(8) The test logic now preallocates memory before running the test. The buffer size is now configurable. Post-copy verification is configurable. The number of copies to chain into one transaction (one interrupt) is configurable. A 'duration' mode is added, which repeats the test until the duration has elapsed, reporting the B/s and transactions completed. ioatcontrol.8 has been updated to document the new arguments. Initial limits (on this particular Broadwell-DE) (and when the interrupts are working) seem to be: 256 interrupts/sec or ~6 GB/s, whichever limit is more restrictive. Unfortunately, it seems the interrupt-reset handling on Broadwell isn't working as intended. That will be fixed in a later commit. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/dev/ioat/ioat_test.c head/sys/dev/ioat/ioat_test.h head/tools/tools/ioat/Makefile head/tools/tools/ioat/ioatcontrol.8 head/tools/tools/ioat/ioatcontrol.c Modified: head/sys/dev/ioat/ioat_test.c ============================================================================== --- head/sys/dev/ioat/ioat_test.c Thu Oct 22 04:33:05 2015 (r289732) +++ head/sys/dev/ioat/ioat_test.c Thu Oct 22 04:38:05 2015 (r289733) @@ -51,18 +51,28 @@ __FBSDID("$FreeBSD$"); #include "ioat_internal.h" #include "ioat_test.h" +#ifndef time_after +#define time_after(a,b) ((long)(b) - (long)(a) < 0) +#endif + MALLOC_DEFINE(M_IOAT_TEST, "ioat_test", "ioat test allocations"); -#define IOAT_TEST_SIZE 0x40000 -#define IOAT_MAX_BUFS 8 +#define IOAT_MAX_BUFS 256 struct test_transaction { - uint8_t num_buffers; void *buf[IOAT_MAX_BUFS]; uint32_t length; + uint32_t depth; struct ioat_test *test; + TAILQ_ENTRY(test_transaction) entry; }; +#define IT_LOCK() mtx_lock(&ioat_test_lk) +#define IT_UNLOCK() mtx_unlock(&ioat_test_lk) +#define IT_ASSERT() mtx_assert(&ioat_test_lk, MA_OWNED) +static struct mtx ioat_test_lk; +MTX_SYSINIT(ioat_test_lk, &ioat_test_lk, "test coordination mtx", MTX_DEF); + static int g_thread_index = 1; static struct cdev *g_ioat_cdev = NULL; @@ -73,7 +83,7 @@ ioat_test_transaction_destroy(struct tes for (i = 0; i < IOAT_MAX_BUFS; i++) { if (tx->buf[i] != NULL) { - contigfree(tx->buf[i], IOAT_TEST_SIZE, M_IOAT_TEST); + contigfree(tx->buf[i], tx->length, M_IOAT_TEST); tx->buf[i] = NULL; } } @@ -82,17 +92,16 @@ ioat_test_transaction_destroy(struct tes } static struct -test_transaction *ioat_test_transaction_create(uint8_t num_buffers, +test_transaction *ioat_test_transaction_create(unsigned num_buffers, uint32_t buffer_size) { struct test_transaction *tx; - int i; + unsigned i; - tx = malloc(sizeof(struct test_transaction), M_IOAT_TEST, M_NOWAIT | M_ZERO); + tx = malloc(sizeof(*tx), M_IOAT_TEST, M_NOWAIT | M_ZERO); if (tx == NULL) return (NULL); - tx->num_buffers = num_buffers; tx->length = buffer_size; for (i = 0; i < num_buffers; i++) { @@ -107,6 +116,18 @@ test_transaction *ioat_test_transaction_ return (tx); } +static bool +ioat_compare_ok(struct test_transaction *tx) +{ + uint32_t i; + + for (i = 0; i < tx->depth; i++) { + if (memcmp(tx->buf[2*i], tx->buf[2*i+1], tx->length) != 0) + return (false); + } + return (true); +} + static void ioat_dma_test_callback(void *arg) { @@ -116,82 +137,195 @@ ioat_dma_test_callback(void *arg) tx = arg; test = tx->test; - if (memcmp(tx->buf[0], tx->buf[1], tx->length) != 0) { + if (test->verify && !ioat_compare_ok(tx)) { ioat_log_message(0, "miscompare found\n"); - test->status = IOAT_TEST_MISCOMPARE; + atomic_add_32(&test->status[IOAT_TEST_MISCOMPARE], tx->depth); + } else if (!test->too_late) + atomic_add_32(&test->status[IOAT_TEST_OK], tx->depth); + + IT_LOCK(); + TAILQ_REMOVE(&test->pend_q, tx, entry); + TAILQ_INSERT_TAIL(&test->free_q, tx, entry); + wakeup(&test->free_q); + IT_UNLOCK(); +} + +static int +ioat_test_prealloc_memory(struct ioat_test *test, int index) +{ + uint32_t i, j, k; + struct test_transaction *tx; + + for (i = 0; i < test->transactions; i++) { + tx = ioat_test_transaction_create(test->chain_depth * 2, + test->buffer_size); + if (tx == NULL) { + ioat_log_message(0, "tx == NULL - memory exhausted\n"); + test->status[IOAT_TEST_NO_MEMORY]++; + return (ENOMEM); + } + + TAILQ_INSERT_HEAD(&test->free_q, tx, entry); + + tx->test = test; + tx->depth = test->chain_depth; + + /* fill in source buffers */ + for (j = 0; j < (tx->length / sizeof(uint32_t)); j++) { + uint32_t val = j + (index << 28); + + for (k = 0; k < test->chain_depth; k++) { + ((uint32_t *)tx->buf[2*k])[j] = ~val; + ((uint32_t *)tx->buf[2*k+1])[j] = val; + } + } } - atomic_add_32(&test->num_completions, 1); - ioat_test_transaction_destroy(tx); - if (test->num_completions == test->num_loops) - wakeup(test); + return (0); } static void -ioat_dma_test(void *arg) +ioat_test_release_memory(struct ioat_test *test) +{ + struct test_transaction *tx, *s; + + TAILQ_FOREACH_SAFE(tx, &test->free_q, entry, s) + ioat_test_transaction_destroy(tx); + TAILQ_INIT(&test->free_q); + + TAILQ_FOREACH_SAFE(tx, &test->pend_q, entry, s) + ioat_test_transaction_destroy(tx); + TAILQ_INIT(&test->pend_q); +} + +static void +ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma) { struct test_transaction *tx; + struct bus_dmadesc *desc; + bus_dmaengine_callback_t cb; + bus_addr_t src, dest; + uint32_t i, flags; + + IT_LOCK(); + while (TAILQ_EMPTY(&test->free_q)) + msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0); + + tx = TAILQ_FIRST(&test->free_q); + TAILQ_REMOVE(&test->free_q, tx, entry); + TAILQ_INSERT_HEAD(&test->pend_q, tx, entry); + IT_UNLOCK(); + + ioat_acquire(dma); + for (i = 0; i < tx->depth; i++) { + src = vtophys((vm_offset_t)tx->buf[2*i]); + dest = vtophys((vm_offset_t)tx->buf[2*i+1]); + + if (i == tx->depth - 1) { + cb = ioat_dma_test_callback; + flags = DMA_INT_EN; + } else { + cb = NULL; + flags = 0; + } + + desc = ioat_copy(dma, src, dest, tx->length, cb, tx, flags); + if (desc == NULL) + panic("Failed to allocate a ring slot " + "-- this shouldn't happen!"); + } + ioat_release(dma); +} + +static void +ioat_dma_test(void *arg) +{ struct ioat_test *test; bus_dmaengine_t dmaengine; uint32_t loops; - int index, i; + int index, rc, start, end; test = arg; - loops = test->num_loops; + memset(__DEVOLATILE(void *, test->status), 0, sizeof(test->status)); + + if (test->buffer_size > 1024 * 1024) { + ioat_log_message(0, "Buffer size too large >1MB\n"); + test->status[IOAT_TEST_NO_MEMORY]++; + return; + } - test->status = IOAT_TEST_OK; - test->num_completions = 0; + if (test->chain_depth * 2 > IOAT_MAX_BUFS) { + ioat_log_message(0, "Depth too large (> %u)\n", + (unsigned)IOAT_MAX_BUFS / 2); + test->status[IOAT_TEST_NO_MEMORY]++; + return; + } - index = g_thread_index++; - dmaengine = ioat_get_dmaengine(test->channel_index); + if (btoc((uint64_t)test->buffer_size * test->chain_depth * + test->transactions) > (physmem / 4)) { + ioat_log_message(0, "Sanity check failed -- test would " + "use more than 1/4 of phys mem.\n"); + test->status[IOAT_TEST_NO_MEMORY]++; + return; + } + if ((uint64_t)test->transactions * test->chain_depth > (1<<16)) { + ioat_log_message(0, "Sanity check failed -- test would " + "use more than available IOAT ring space.\n"); + test->status[IOAT_TEST_NO_MEMORY]++; + return; + } + + dmaengine = ioat_get_dmaengine(test->channel_index); if (dmaengine == NULL) { ioat_log_message(0, "Couldn't acquire dmaengine\n"); - test->status = IOAT_TEST_NO_DMA_ENGINE; + test->status[IOAT_TEST_NO_DMA_ENGINE]++; return; } - ioat_log_message(0, "Thread %d: num_loops remaining: 0x%07x\n", index, - test->num_loops); + index = g_thread_index++; + TAILQ_INIT(&test->free_q); + TAILQ_INIT(&test->pend_q); - for (loops = 0; loops < test->num_loops; loops++) { - bus_addr_t src, dest; + if (test->duration == 0) + ioat_log_message(1, "Thread %d: num_loops remaining: 0x%08x\n", + index, test->transactions); + else + ioat_log_message(1, "Thread %d: starting\n", index); + + rc = ioat_test_prealloc_memory(test, index); + if (rc != 0) { + ioat_log_message(0, "prealloc_memory: %d\n", rc); + return; + } + wmb(); - if (loops % 0x10000 == 0) { - ioat_log_message(0, "Thread %d: " - "num_loops remaining: 0x%07x\n", index, - test->num_loops - loops); + test->too_late = false; + start = ticks; + end = start + (((sbintime_t)test->duration * hz) / 1000); + + for (loops = 0;; loops++) { + if (test->duration == 0 && loops >= test->transactions) + break; + else if (test->duration != 0 && time_after(ticks, end)) { + test->too_late = true; + break; } - tx = ioat_test_transaction_create(2, IOAT_TEST_SIZE); - if (tx == NULL) { - ioat_log_message(0, "tx == NULL - memory exhausted\n"); - atomic_add_32(&test->num_completions, 1); - test->status = IOAT_TEST_NO_MEMORY; - continue; - } - - tx->test = test; - wmb(); - - /* fill in source buffer */ - for (i = 0; i < (IOAT_TEST_SIZE / sizeof(uint32_t)); i++) { - uint32_t val = i + (loops << 16) + (index << 28); - ((uint32_t *)tx->buf[0])[i] = ~val; - ((uint32_t *)tx->buf[1])[i] = val; - } + ioat_test_submit_1_tx(test, dmaengine); + } - src = pmap_kextract((vm_offset_t)tx->buf[0]); - dest = pmap_kextract((vm_offset_t)tx->buf[1]); + ioat_log_message(1, "Test Elapsed: %d ticks (overrun %d), %d sec.\n", + ticks - start, ticks - end, (ticks - start) / hz); - ioat_acquire(dmaengine); - ioat_copy(dmaengine, src, dest, IOAT_TEST_SIZE, - ioat_dma_test_callback, tx, DMA_INT_EN); - ioat_release(dmaengine); - } + IT_LOCK(); + while (!TAILQ_EMPTY(&test->pend_q)) + msleep(&test->free_q, &ioat_test_lk, 0, "ioattestcompl", hz); + IT_UNLOCK(); - while (test->num_completions < test->num_loops) - tsleep(test, 0, "compl", 5 * hz); + ioat_log_message(1, "Test Elapsed2: %d ticks (overrun %d), %d sec.\n", + ticks - start, ticks - end, (ticks - start) / hz); + ioat_test_release_memory(test); } static int Modified: head/sys/dev/ioat/ioat_test.h ============================================================================== --- head/sys/dev/ioat/ioat_test.h Thu Oct 22 04:33:05 2015 (r289732) +++ head/sys/dev/ioat/ioat_test.h Thu Oct 22 04:38:05 2015 (r289733) @@ -29,17 +29,39 @@ __FBSDID("$FreeBSD$"); #ifndef __IOAT_TEST_H__ #define __IOAT_TEST_H__ +enum ioat_res { + IOAT_TEST_OK = 0, + IOAT_TEST_NO_DMA_ENGINE, + IOAT_TEST_NO_MEMORY, + IOAT_TEST_MISCOMPARE, + IOAT_NUM_RES +}; + +struct test_transaction; + struct ioat_test { + volatile uint32_t status[IOAT_NUM_RES]; uint32_t channel_index; - uint32_t num_loops; - volatile uint32_t num_completions; - uint32_t status; -}; -#define IOAT_TEST_OK 0 -#define IOAT_TEST_NO_DMA_ENGINE 1 -#define IOAT_TEST_NO_MEMORY 2 -#define IOAT_TEST_MISCOMPARE 3 + /* HW max of 1MB */ + uint32_t buffer_size; + uint32_t chain_depth; + uint32_t transactions; + + /* + * If non-zero, duration is time in ms; + * If zero, bounded by 'transactions' above. + */ + uint32_t duration; + + /* If true, check for miscompares after a copy. */ + bool verify; + + /* Internal usage -- not test inputs */ + TAILQ_HEAD(, test_transaction) free_q; + TAILQ_HEAD(, test_transaction) pend_q; + volatile bool too_late; +}; #define IOAT_DMATEST _IOWR('i', 0, struct ioat_test) Modified: head/tools/tools/ioat/Makefile ============================================================================== --- head/tools/tools/ioat/Makefile Thu Oct 22 04:33:05 2015 (r289732) +++ head/tools/tools/ioat/Makefile Thu Oct 22 04:38:05 2015 (r289733) @@ -4,5 +4,6 @@ PROG= ioatcontrol MAN= ioatcontrol.8 CFLAGS+= -I${.CURDIR:H:H:H}/sys/dev/ioat WARNS?= 6 +LIBADD= util .include <bsd.prog.mk> Modified: head/tools/tools/ioat/ioatcontrol.8 ============================================================================== --- head/tools/tools/ioat/ioatcontrol.8 Thu Oct 22 04:33:05 2015 (r289732) +++ head/tools/tools/ioat/ioatcontrol.8 Thu Oct 22 04:38:05 2015 (r289733) @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 24, 2015 +.Dd October 21, 2015 .Dt IOATCONTROL 8 .Os .Sh NAME @@ -33,18 +33,64 @@ .Xr ioat 4 .Sh SYNOPSIS .Nm +.Op Fl V .Ar channel_number -.Ar num_loops +.Ar num_txns +.Ar [ bufsize +.Ar [ chain-len +.Ar [ duration ] ] ] .Sh DESCRIPTION .Nm allows one to issue some number of test operations to the .Xr ioat 4 driver on a specific hardware channel. +The arguments are as follows: +.Bl -tag -width Ds +.It Fl V +Verify copies for accuracy +.El .Pp -Each loop will allocate two chunks of memory, write data patterns to them, -submit a DMA request to copy one buffer to the other, and compare the contents -in the callback. -If the contents are not as expected, an error is reported. +.Nm +operates in one of two modes; if the +.Ar duration +argument is passed, +.Nm +tries to estimate the copy rate in bytes per second by running +.Ar num_txns +repeatedly in loop. +If +.Ar duration +is not passed, +.Nm +only runs through +.Ar num_txns +once and prints the total bytes copied, as well as error information. +.Pp +The +.Ar bufsize +argument determines the size of buffers to use for each +.Fn ioat_copy +invocation. +The default is 256 KB. +.Pp +The +.Ar chain-len +argument determines the number of copies to chain together in a single DMA +transaction. +The default is 1, and the maximum is currently 4. +.Pp +The +.Ar duration +argument specifies an approximate time limit for the test, in milliseconds. +.Pp +The test will allocate two chunks of memory for each component of each +transaction's chain. +It will initialize them with specific data patterns. +During the test, it submits DMA requests to copy between pairs of buffers. +If the +.Fl V +flag was specified, it will compare the contents in the callback for a copy +error. .Sh FILES .Pa /dev/ioat_test .Pp @@ -55,6 +101,10 @@ and .Nm exposes it with .Cd hw.ioat.enable_ioat_test=1 . +.Sh DIAGNOSTICS +The wait channel +.Va test_submit +indicates that the test code is keeping the DMA engine full of work. .Sh SEE ALSO .Xr ioat 4 .Sh HISTORY Modified: head/tools/tools/ioat/ioatcontrol.c ============================================================================== --- head/tools/tools/ioat/ioatcontrol.c Thu Oct 22 04:33:05 2015 (r289732) +++ head/tools/tools/ioat/ioatcontrol.c Thu Oct 22 04:38:05 2015 (r289733) @@ -28,34 +28,88 @@ __FBSDID("$FreeBSD$"); #include <sys/ioctl.h> +#include <sys/queue.h> #include <fcntl.h> +#include <stdbool.h> #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <sysexits.h> #include <unistd.h> +#include <libutil.h> + #include "ioat_test.h" +static int prettyprint(struct ioat_test *); + +static void +usage(void) +{ + + printf("Usage: %s [-V] <channel #> <txns> [<bufsize> " + "[<chain-len> [duration]]]\n", getprogname()); + exit(EX_USAGE); +} + int main(int argc, char **argv) { struct ioat_test t; - int fd; + int fd, ch; - if (argc < 3) { - printf("Usage: %s <channel #> <num_loops>\n", argv[0]); - return (EX_USAGE); + while ((ch = getopt(argc, argv, "V")) != -1) { + switch (ch) { + case 'V': + t.verify = true; + break; + default: + usage(); + } } + argc -= optind; + argv += optind; + + if (argc < 2) + usage(); + + /* Defaults for optional args */ + t.buffer_size = 256 * 1024; + t.chain_depth = 2; + t.duration = 0; - t.channel_index = atoi(argv[1]); + t.channel_index = atoi(argv[0]); if (t.channel_index > 8) { printf("Channel number must be between 0 and 7.\n"); return (EX_USAGE); } - t.num_loops = atoi(argv[2]); + t.transactions = atoi(argv[1]); + + if (argc >= 3) { + t.buffer_size = atoi(argv[2]); + if (t.buffer_size == 0) { + printf("Buffer size must be greater than zero\n"); + return (EX_USAGE); + } + } + + if (argc >= 4) { + t.chain_depth = atoi(argv[3]); + if (t.chain_depth < 1) { + printf("Chain length must be greater than zero\n"); + return (EX_USAGE); + } + } + + if (argc >= 5) { + t.duration = atoi(argv[4]); + if (t.duration < 1) { + printf("Duration must be greater than zero\n"); + return (EX_USAGE); + } + } fd = open("/dev/ioat_test", O_RDWR); if (fd < 0) { @@ -66,5 +120,44 @@ main(int argc, char **argv) (void)ioctl(fd, IOAT_DMATEST, &t); close(fd); - return (t.status); + return (prettyprint(&t)); +} + +static int +prettyprint(struct ioat_test *t) +{ + char bps[10], bytesh[10]; + uintmax_t bytes; + + if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0 || + t->status[IOAT_TEST_NO_MEMORY] != 0 || + t->status[IOAT_TEST_MISCOMPARE] != 0) { + printf("Errors:\n"); + if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0) + printf("\tNo DMA engine present: %u\n", + (unsigned)t->status[IOAT_TEST_NO_DMA_ENGINE]); + if (t->status[IOAT_TEST_NO_MEMORY] != 0) + printf("\tOut of memory: %u\n", + (unsigned)t->status[IOAT_TEST_NO_MEMORY]); + if (t->status[IOAT_TEST_MISCOMPARE] != 0) + printf("\tMiscompares: %u\n", + (unsigned)t->status[IOAT_TEST_MISCOMPARE]); + } + + printf("Processed %u txns\n", (unsigned)t->status[IOAT_TEST_OK] / + t->chain_depth); + bytes = (uintmax_t)t->buffer_size * t->status[IOAT_TEST_OK]; + + humanize_number(bytesh, sizeof(bytesh), (int64_t)bytes, "B", + HN_AUTOSCALE, HN_DECIMAL); + if (t->duration) { + humanize_number(bps, sizeof(bps), + (int64_t)1000 * bytes / t->duration, "B/s", HN_AUTOSCALE, + HN_DECIMAL); + printf("%ju (%s) copied in %u ms (%s)\n", bytes, bytesh, + (unsigned)t->duration, bps); + } else + printf("%ju (%s) copied\n", bytes, bytesh); + + return (EX_OK); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201510220438.t9M4c67v041733>