From owner-freebsd-hackers@FreeBSD.ORG Thu Aug 14 16:11:41 2014 Return-Path: Delivered-To: freebsd-hackers@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 065B6B0E for ; Thu, 14 Aug 2014 16:11:41 +0000 (UTC) Received: from bigwig.baldwin.cx (bigwig.baldwin.cx [IPv6:2001:470:1f11:75::1]) (using TLSv1 with cipher DHE-RSA-CAMELLIA256-SHA (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id BA3DC2A56 for ; Thu, 14 Aug 2014 16:11:40 +0000 (UTC) Received: from jhbbsd.localnet (unknown [209.249.190.124]) by bigwig.baldwin.cx (Postfix) with ESMTPSA id C42A6B977; Thu, 14 Aug 2014 12:11:39 -0400 (EDT) From: John Baldwin To: freebsd-hackers@freebsd.org Subject: Re: PCIe AER Date: Thu, 14 Aug 2014 12:08:30 -0400 User-Agent: KMail/1.13.5 (FreeBSD/8.4-CBSD-20140415; KDE/4.5.5; amd64; ; ) References: <201408122328.s7CNSGoC077640@elf.torek.net> In-Reply-To: <201408122328.s7CNSGoC077640@elf.torek.net> MIME-Version: 1.0 Content-Type: Text/Plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit Message-Id: <201408141208.30208.jhb@freebsd.org> X-Greylist: Sender succeeded SMTP AUTH, not delayed by milter-greylist-4.2.7 (bigwig.baldwin.cx); Thu, 14 Aug 2014 12:11:39 -0400 (EDT) Cc: Chris Torek X-BeenThere: freebsd-hackers@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: Technical Discussions relating to FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 14 Aug 2014 16:11:41 -0000 On Tuesday, August 12, 2014 7:28:16 pm Chris Torek wrote: > Is there any sort of overall plan for advanced error reporting > on PCIe? > > I may need to implement something along these lines soon, at least > for detection (not necessarily correction just yet). As Konstantin noted, there is nothing planned. There is the '-e' flag for pciconf, and I have a hack to allow pciconf to clear errors via a '-C' flag. I believe at least NetApp has their own internal AER support, but my understanding is that it is very grotty and not really suitable for upstreaming. Index: err.c =================================================================== --- err.c (revision 270540) +++ err.c (working copy) @@ -171,3 +171,36 @@ mask = read_config(fd, &p->pc_sel, aer + PCIR_AER_COR_STATUS, 4); print_bits("Corrected", aer_cor, mask); } + +void +clear_errors(int fd, struct pci_conf *p) +{ + uint16_t sta, aer; + uint8_t pcie; + + /* Clear standard PCI errors. */ + sta = read_config(fd, &p->pc_sel, PCIR_STATUS, 2); + if ((sta & PCI_ERRORS) != 0) + write_config(fd, &p->pc_sel, PCIR_STATUS, 2, + sta & ~PCI_ERRORS); + + /* See if this is a PCI-express device. */ + pcie = pci_find_cap(fd, p, PCIY_EXPRESS); + if (pcie == 0) + return; + + /* Clear PCI-e errors. */ + sta = read_config(fd, &p->pc_sel, pcie + PCIER_DEVICE_STA, 2); + if ((sta & PCIE_ERRORS) != 0) + write_config(fd, &p->pc_sel, pcie + PCIER_DEVICE_STA, 2, + sta & PCIE_ERRORS); + + /* See if this device supports AER. */ + aer = pcie_find_cap(fd, p, PCIZ_AER); + if (aer == 0) + return; + + /* Clear AER errors. */ + write_config(fd, &p->pc_sel, aer + PCIR_AER_UC_STATUS, 4, 0xffffffff); + write_config(fd, &p->pc_sel, aer + PCIR_AER_COR_STATUS, 4, 0xffffffff); +} Index: pciconf.c =================================================================== --- pciconf.c (revision 270540) +++ pciconf.c (working copy) @@ -80,6 +81,7 @@ static void readit(const char *, const char *, int); static void writeit(const char *, const char *, const char *, int); static void chkattached(const char *); +static void clearerrs(const char *); static int exitstatus = 0; @@ -88,7 +90,8 @@ { fprintf(stderr, "%s\n%s\n%s\n%s\n", "usage: pciconf -l [-bcevV] [device]", " pciconf -a device", + " pciconf -C device", " pciconf -r [-b | -h] device addr[:addr2]", " pciconf -w [-b | -h] device addr value"); exit (1); @@ -98,14 +102,14 @@ main(int argc, char **argv) { int c; - int listmode, readmode, writemode, attachedmode; + int listmode, readmode, writemode, attachedmode, clearmode; int bars, caps, errors, verbose, vpd; int byte, isshort; - listmode = readmode = writemode = attachedmode = 0; + listmode = readmode = writemode = attachedmode = clearmode = 0; bars = caps = errors = verbose = vpd = byte = isshort = 0; - while ((c = getopt(argc, argv, "abcehlrwvV")) != -1) { + while ((c = getopt(argc, argv, "abcCehlrwvV")) != -1) { switch(c) { case 'a': attachedmode = 1; @@ -116,6 +120,10 @@ byte = 1; break; + case 'C': + clearmode = 1; + break; + case 'c': caps = 1; break; @@ -160,6 +172,7 @@ if ((listmode && optind >= argc + 1) || (writemode && optind + 3 != argc) || (readmode && optind + 2 != argc) + || (clearmode && optind + 1 != argc) || (attachedmode && optind + 1 != argc)) usage(); @@ -174,6 +188,8 @@ } else if (writemode) { writeit(argv[optind], argv[optind + 1], argv[optind + 2], byte ? 1 : isshort ? 2 : 4); + } else if (clearmode) { + clearerrs(argv[optind]); } else { usage(); } @@ -642,6 +663,20 @@ return (pi.pi_data); } +void +write_config(int fd, struct pcisel *sel, long reg, int width, uint32_t value) +{ + struct pci_io pi; + + pi.pi_sel = *sel; + pi.pi_reg = reg; + pi.pi_width = width; + pi.pi_data = value; + + if (ioctl(fd, PCIOCWRITE, &pi) < 0) + err(1, "ioctl(PCIOCWRITE)"); +} + static struct pcisel getdevice(const char *name) { @@ -792,19 +827,18 @@ writeit(const char *name, const char *reg, const char *data, int width) { int fd; - struct pci_io pi; + struct pcisel sel; + u_long r, value; - pi.pi_sel = getsel(name); - pi.pi_reg = strtoul(reg, (char **)0, 0); /* XXX error check */ - pi.pi_width = width; - pi.pi_data = strtoul(data, (char **)0, 0); /* XXX error check */ + sel = getsel(name); + r = strtoul(reg, (char **)0, 0); /* XXX error check */ + value = strtoul(data, (char **)0, 0); /* XXX error check */ fd = open(_PATH_DEVPCI, O_RDWR, 0); if (fd < 0) err(1, "%s", _PATH_DEVPCI); - if (ioctl(fd, PCIOCWRITE, &pi) < 0) - err(1, "ioctl(PCIOCWRITE)"); + write_config(fd, &sel, r, width, value); } static void @@ -825,3 +859,33 @@ exitstatus = pi.pi_data ? 0 : 2; /* exit(2), if NOT attached */ printf("%s: %s%s\n", name, pi.pi_data == 0 ? "not " : "", "attached"); } + +static void +clearerrs(const char *name) +{ + int fd; + struct pci_conf_io pc; + struct pci_conf conf[1]; + struct pci_match_conf patterns[1]; + + fd = open(_PATH_DEVPCI, O_RDWR, 0); + if (fd < 0) + err(1, "%s", _PATH_DEVPCI); + + bzero(&pc, sizeof(struct pci_conf_io)); + pc.match_buf_len = sizeof(conf); + pc.matches = conf; + bzero(&patterns, sizeof(patterns)); + patterns[0].pc_sel = getsel(name); + patterns[0].flags = PCI_GETCONF_MATCH_DOMAIN | PCI_GETCONF_MATCH_BUS | + PCI_GETCONF_MATCH_DEV | PCI_GETCONF_MATCH_FUNC; + pc.num_patterns = 1; + pc.pat_buf_len = sizeof(patterns); + pc.patterns = patterns; + + if (ioctl(fd, PCIOCGETCONF, &pc) == -1) + err(1, "ioctl(PCIOCGETCONF)"); + + clear_errors(fd, conf); + close(fd); +} Index: pciconf.h =================================================================== --- pciconf.h (revision 270540) +++ pciconf.h (working copy) @@ -33,11 +33,14 @@ #ifndef __PCICONF_H__ #define __PCICONF_H__ +void clear_errors(int fd, struct pci_conf *p); void list_caps(int fd, struct pci_conf *p); void list_errors(int fd, struct pci_conf *p); void list_slot(struct pci_conf *p); uint8_t pci_find_cap(int fd, struct pci_conf *p, uint8_t id); uint16_t pcie_find_cap(int fd, struct pci_conf *p, uint16_t id); uint32_t read_config(int fd, struct pcisel *sel, long reg, int width); +void write_config(int fd, struct pcisel *sel, long reg, int width, + uint32_t value); #endif -- John Baldwin