Date: Fri, 3 Mar 2017 22:51:05 +0000 (UTC) From: Andriy Gapon <avg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r314638 - in head/sys: dev/amd_ecc_inject modules modules/amd_ecc_inject Message-ID: <201703032251.v23Mp5s2021256@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: avg Date: Fri Mar 3 22:51:04 2017 New Revision: 314638 URL: https://svnweb.freebsd.org/changeset/base/314638 Log: add a module that provides support for DRAM ECC error injection on AMD CPUs I imagine that the module would be useful only to a very limited number of developers, so that's my excuse for not writing any documentation. On a more serious note, please see DRAM Error Injection section of BKDGs for families 10h - 16h. E.g. section 2.13.3.1 of BKDG for AMD Family 15h Models 00h-0Fh Processors. Many thanks to kib for his suggestions and comments. Discussed with: kib MFC after: 3 weeks Differential Revision: https://reviews.freebsd.org/D9824 Added: head/sys/dev/amd_ecc_inject/ head/sys/dev/amd_ecc_inject/ecc_inject.c (contents, props changed) head/sys/modules/amd_ecc_inject/ head/sys/modules/amd_ecc_inject/Makefile (contents, props changed) Modified: head/sys/modules/Makefile Added: head/sys/dev/amd_ecc_inject/ecc_inject.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/dev/amd_ecc_inject/ecc_inject.c Fri Mar 3 22:51:04 2017 (r314638) @@ -0,0 +1,243 @@ +/*- + * Copyright (c) 2017 Andriy Gapon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/sysctl.h> +#include <sys/types.h> + +#include <dev/pci/pcivar.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> + +#include <machine/cputypes.h> +#include <machine/md_var.h> + + +/* + * See BKDG for AMD Family 15h Models 00h-0Fh Processors + * (publication 42301 Rev 3.08 - March 12, 2012): + * - 2.13.3.1 DRAM Error Injection + * - D18F3xB8 NB Array Address + * - D18F3xBC NB Array Data Port + * - D18F3xBC_x8 DRAM ECC + */ +#define NB_MCA_CFG 0x44 +#define DRAM_ECC_EN (1 << 22) +#define NB_MCA_EXTCFG 0x180 +#define ECC_SYMB_SZ (1 << 25) +#define NB_ARRAY_ADDR 0xb8 +#define DRAM_ECC_SEL (0x8 << 28) +#define QUADRANT_SHIFT 1 +#define QUADRANT_MASK 0x3 +#define NB_ARRAY_PORT 0xbc +#define INJ_WORD_SHIFT 20 +#define INJ_WORD_MASK 0x1ff +#define DRAM_ERR_EN (1 << 18) +#define DRAM_WR_REQ (1 << 17) +#define DRAM_RD_REQ (1 << 16) +#define INJ_VECTOR_MASK 0xffff + +static void ecc_ei_inject(int); + +static device_t nbdev; +static int delay_ms = 0; +static int quadrant = 0; /* 0 - 3 */ +static int word_mask = 0x001; /* 9 bits: 8 + 1 for ECC */ +static int bit_mask = 0x0001; /* 16 bits */ + +static int +sysctl_int_with_max(SYSCTL_HANDLER_ARGS) +{ + u_int value; + int error; + + value = *(u_int *)arg1; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || req->newptr == NULL) + return (error); + if (value > arg2) + return (EINVAL); + *(u_int *)arg1 = value; + return (0); +} + +static int +sysctl_nonzero_int_with_max(SYSCTL_HANDLER_ARGS) +{ + u_int value; + int error; + + value = *(u_int *)arg1; + error = sysctl_int_with_max(oidp, &value, arg2, req); + if (error || req->newptr == NULL) + return (error); + if (value == 0) + return (EINVAL); + *(u_int *)arg1 = value; + return (0); +} + +static int +sysctl_proc_inject(SYSCTL_HANDLER_ARGS) +{ + int error; + int i; + + i = 0; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error) + return (error); + if (i != 0) + ecc_ei_inject(i); + return (0); +} + +static SYSCTL_NODE(_hw, OID_AUTO, error_injection, CTLFLAG_RD, NULL, + "Hardware error injection"); +static SYSCTL_NODE(_hw_error_injection, OID_AUTO, dram_ecc, CTLFLAG_RD, NULL, + "DRAM ECC error injection"); +SYSCTL_UINT(_hw_error_injection_dram_ecc, OID_AUTO, delay, + CTLTYPE_UINT | CTLFLAG_RW, &delay_ms, 0, + "Delay in milliseconds between error injections"); +SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, quadrant, + CTLTYPE_UINT | CTLFLAG_RW, &quadrant, QUADRANT_MASK, + sysctl_int_with_max, "IU", + "Index of 16-byte quadrant within 64-byte line where errors " + "should be injected"); +SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, word_mask, + CTLTYPE_UINT | CTLFLAG_RW, &word_mask, INJ_WORD_MASK, + sysctl_nonzero_int_with_max, "IU", + "9-bit mask of words where errors should be injected (8 data + 1 ECC)"); +SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, bit_mask, + CTLTYPE_UINT | CTLFLAG_RW, &bit_mask, INJ_VECTOR_MASK, + sysctl_nonzero_int_with_max, "IU", + "16-bit mask of bits within each selected word where errors " + "should be injected"); +SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, inject, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_proc_inject, "I", + "Inject a number of errors according to configured parameters"); + +static void +ecc_ei_inject_one(void *arg, size_t size) +{ + volatile uint64_t *memory = arg; + uint32_t val; + int i; + + val = DRAM_ECC_SEL | (quadrant << QUADRANT_SHIFT); + pci_write_config(nbdev, NB_ARRAY_ADDR, val, 4); + + val = (word_mask << INJ_WORD_SHIFT) | DRAM_WR_REQ | bit_mask; + pci_write_config(nbdev, NB_ARRAY_PORT, val, 4); + + for (i = 0; i < size / sizeof(uint64_t); i++) { + memory[i] = 0; + val = pci_read_config(nbdev, NB_ARRAY_PORT, 4); + if ((val & DRAM_WR_REQ) == 0) + break; + } + for (i = 0; i < size / sizeof(uint64_t); i++) + memory[0] = memory[i]; +} + +static void +ecc_ei_inject(int count) +{ + vm_offset_t memory; + int injected; + + KASSERT((quadrant & ~QUADRANT_MASK) == 0, + ("quadrant value is outside of range: %u", quadrant)); + KASSERT(word_mask != 0 && (word_mask & ~INJ_WORD_MASK) == 0, + ("word mask value is outside of range: 0x%x", word_mask)); + KASSERT(bit_mask != 0 && (bit_mask & ~INJ_VECTOR_MASK) == 0, + ("bit mask value is outside of range: 0x%x", bit_mask)); + + memory = kmem_alloc_attr(kernel_arena, PAGE_SIZE, M_WAITOK, 0, ~0, + VM_MEMATTR_UNCACHEABLE); + + for (injected = 0; injected < count; injected++) { + ecc_ei_inject_one((void*)memory, PAGE_SIZE); + if (delay_ms != 0 && injected != count - 1) + pause_sbt("ecc_ei_inject", delay_ms * SBT_1MS, 0, 0); + } + + kmem_free(kernel_arena, memory, PAGE_SIZE); +} + +static int +ecc_ei_load(void) +{ + uint32_t val; + + if (cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) { + printf("DRAM ECC error injection is not supported\n"); + return (ENXIO); + } + nbdev = pci_find_bsf(0, 24, 3); + if (nbdev == NULL) { + printf("Couldn't find NB PCI device\n"); + return (ENXIO); + } + val = pci_read_config(nbdev, NB_MCA_CFG, 4); + if ((val & DRAM_ECC_EN) == 0) { + printf("DRAM ECC is not supported or disabled\n"); + return (ENXIO); + } + printf("DRAM ECC error injection support loaded\n"); + return (0); +} + +static int +tsc_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error; + + error = 0; + switch (type) { + case MOD_LOAD: + error = ecc_ei_load(); + break; + case MOD_UNLOAD: + case MOD_SHUTDOWN: + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +DEV_MODULE(tsc, tsc_modevent, NULL); Modified: head/sys/modules/Makefile ============================================================================== --- head/sys/modules/Makefile Fri Mar 3 22:46:20 2017 (r314637) +++ head/sys/modules/Makefile Fri Mar 3 22:51:04 2017 (r314638) @@ -33,6 +33,7 @@ SUBDIR= \ alc \ ale \ alq \ + ${_amd_ecc_inject} \ ${_amdsbwd} \ ${_amdtemp} \ amr \ @@ -610,6 +611,7 @@ _acpi= acpi .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _aesni= aesni .endif +_amd_ecc_inject=amd_ecc_inject _amdsbwd= amdsbwd _amdtemp= amdtemp _arcmsr= arcmsr Added: head/sys/modules/amd_ecc_inject/Makefile ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/modules/amd_ecc_inject/Makefile Fri Mar 3 22:51:04 2017 (r314638) @@ -0,0 +1,8 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../dev/amd_ecc_inject + +KMOD= amd_ecc_inject +SRCS= ecc_inject.c bus_if.h device_if.h pci_if.h + +.include <bsd.kmod.mk>
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201703032251.v23Mp5s2021256>