Date: Wed, 13 Aug 2014 09:10:35 +0300 From: Konstantin Belousov <kostikbel@gmail.com> To: Chris Torek <torek@torek.net> Cc: freebsd-hackers@freebsd.org Subject: Re: PCIe AER Message-ID: <20140813061035.GF2737@kib.kiev.ua> In-Reply-To: <201408122328.s7CNSGoC077640@elf.torek.net> References: <201408122328.s7CNSGoC077640@elf.torek.net>
next in thread | previous in thread | raw e-mail | index | archive | help
--w3uUfsyyY1Pqa/ej
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable
On Tue, Aug 12, 2014 at 05:28:16PM -0600, Chris Torek wrote:
> Is there any sort of overall plan for advanced error reporting
> on PCIe?
I do not think so.
>=20
> I may need to implement something along these lines soon, at least
> for detection (not necessarily correction just yet).
I used the following ddb hack to get access to the AER info, when I
debugged something related. It relied on the AER constants which
I already committed. I even did not tried to attach to PCIe bridge
interrupts.
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 9733841..b5b71f4 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -3511,13 +3511,85 @@ pci_add_children(device_t dev, int domain, int busn=
o, size_t dinfo_size)
void
pci_add_child(device_t bus, struct pci_devinfo *dinfo)
{
- dinfo->cfg.dev =3D device_add_child(bus, NULL, -1);
- device_set_ivars(dinfo->cfg.dev, dinfo);
+ device_t dev;
+ int aer;
+ uint32_t r;
+ uint16_t r2;
+
+ dinfo->cfg.dev =3D dev =3D device_add_child(bus, NULL, -1);
+ device_set_ivars(dev, dinfo);
resource_list_init(&dinfo->resources);
- pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
- pci_cfg_restore(dinfo->cfg.dev, dinfo);
+ pci_cfg_save(dev, dinfo, 0);
+ pci_cfg_restore(dev, dinfo);
pci_print_verbose(dinfo);
- pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
+ pci_add_resources(bus, dev, 0, 0);
+
+ if (dinfo->cfg.pcie.pcie_location !=3D 0 &&
+ dinfo->cfg.pcie.pcie_type =3D=3D PCIEM_TYPE_ROOT_PORT) {
+ r2 =3D pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_ROOT_CTL, 2);
+ r2 &=3D ~(PCIEM_ROOT_CTL_SERR_CORR |
+ PCIEM_ROOT_CTL_SERR_NONFATAL | PCIEM_ROOT_CTL_SERR_FATAL);
+ pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_ROOT_CTL, r2, 2);
+ }
+ if (pci_find_extcap(dev, PCIZ_AER, &aer) =3D=3D 0) {
+ r =3D pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
+ pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
+ if (r !=3D 0 && bootverbose) {
+ pci_printf(&dinfo->cfg,
+ "clearing AER UC 0x%08x -> 0x%08x\n",
+ r, pci_read_config(dev, aer + PCIR_AER_UC_STATUS,
+ 4));
+ }
+
+ r =3D pci_read_config(dev, aer + PCIR_AER_UC_MASK, 4);
+ r &=3D ~(PCIM_AER_UC_TRAINING_ERROR |
+ PCIM_AER_UC_DL_PROTOCOL_ERROR |
+ PCIM_AER_UC_SURPRISE_LINK_DOWN |
+ PCIM_AER_UC_POISONED_TLP |
+ PCIM_AER_UC_FC_PROTOCOL_ERROR |
+ PCIM_AER_UC_COMPLETION_TIMEOUT |
+ PCIM_AER_UC_COMPLETER_ABORT |
+ PCIM_AER_UC_UNEXPECTED_COMPLETION |
+ PCIM_AER_UC_RECEIVER_OVERFLOW |
+ PCIM_AER_UC_MALFORMED_TLP |
+ PCIM_AER_UC_ECRC_ERROR |
+ PCIM_AER_UC_UNSUPPORTED_REQUEST |
+ PCIM_AER_UC_ACS_VIOLATION |
+ PCIM_AER_UC_INTERNAL_ERROR |
+ PCIM_AER_UC_MC_BLOCKED_TLP |
+ PCIM_AER_UC_ATOMIC_EGRESS_BLK |
+ PCIM_AER_UC_TLP_PREFIX_BLOCKED);
+ pci_write_config(dev, aer + PCIR_AER_UC_MASK, r, 4);
+
+ r =3D pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
+ pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
+ if (r !=3D 0 && bootverbose) {
+ pci_printf(&dinfo->cfg,
+ "clearing AER COR 0x%08x -> 0x%08x\n",
+ r, pci_read_config(dev, aer + PCIR_AER_COR_STATUS,
+ 4));
+ }
+
+ r =3D pci_read_config(dev, aer + PCIR_AER_COR_MASK, 4);
+ r &=3D ~(PCIM_AER_COR_RECEIVER_ERROR |
+ PCIM_AER_COR_BAD_TLP |
+ PCIM_AER_COR_BAD_DLLP |
+ PCIM_AER_COR_REPLAY_ROLLOVER |
+ PCIM_AER_COR_REPLAY_TIMEOUT |
+ PCIM_AER_COR_ADVISORY_NF_ERROR |
+ PCIM_AER_COR_INTERNAL_ERROR |
+ PCIM_AER_COR_HEADER_LOG_OVFLOW);
+ pci_write_config(dev, aer + PCIR_AER_COR_MASK, r, 4);
+
+ r =3D pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_CTL, 2);
+ r |=3D PCIEM_CTL_COR_ENABLE | PCIEM_CTL_NFER_ENABLE |
+ PCIEM_CTL_FER_ENABLE | PCIEM_CTL_URR_ENABLE;
+ pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_CTL, r, 2);
+ }
}
=20
static int
@@ -5095,3 +5167,128 @@ pci_get_rid_method(device_t dev, device_t child)
=20
return (PCIB_GET_RID(device_get_parent(dev), child));
}
+
+static void
+pci_print_faulted_dev_name(const struct pci_devinfo *dinfo)
+{
+ const char *dev_name;
+ device_t dev;
+
+ dev =3D dinfo->cfg.dev;
+ printf("pci%d:%d:%d:%d", dinfo->cfg.domain, dinfo->cfg.bus,
+ dinfo->cfg.slot, dinfo->cfg.func);
+ dev_name =3D device_get_name(dev);
+ if (dev_name !=3D NULL)
+ printf(" (%s%d)", dev_name, device_get_unit(dev));
+}
+
+void
+pci_print_faulted_dev(void)
+{
+ struct pci_devinfo *dinfo;
+ device_t dev;
+ int aer, i;
+ uint32_t r1, r2;
+ uint16_t status;
+
+ STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
+ dev =3D dinfo->cfg.dev;
+ status =3D pci_read_config(dev, PCIR_STATUS, 2);
+ status &=3D PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
+ PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
+ PCIM_STATUS_SERR | PCIM_STATUS_PERR;
+ if (status !=3D 0) {
+ pci_print_faulted_dev_name(dinfo);
+ printf(" error 0x%04x\n", status);
+ }
+ if (dinfo->cfg.pcie.pcie_location !=3D 0) {
+ status =3D pci_read_config(dev,
+ dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_STA, 2);
+ if ((status & (PCIEM_STA_CORRECTABLE_ERROR |
+ PCIEM_STA_NON_FATAL_ERROR | PCIEM_STA_FATAL_ERROR |
+ PCIEM_STA_UNSUPPORTED_REQ)) !=3D 0) {
+ pci_print_faulted_dev_name(dinfo);
+ printf(" PCIe DEVCTL 0x%04x DEVSTA 0x%04x\n",
+ pci_read_config(dev,
+ dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_CTL, 2),
+ status);
+ }
+ }
+ if (pci_find_extcap(dev, PCIZ_AER, &aer) =3D=3D 0) {
+ r1 =3D pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
+ r2 =3D pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
+ if (r1 !=3D 0 || r2 !=3D 0) {
+ pci_print_faulted_dev_name(dinfo);
+ printf(" AER UC 0x%08x Mask 0x%08x Svr 0x%08x\n"
+ " COR 0x%08x Mask 0x%08x Ctl 0x%08x\n",
+ r1, pci_read_config(dev, aer +
+ PCIR_AER_UC_MASK, 4),
+ pci_read_config(dev, aer +
+ PCIR_AER_UC_SEVERITY, 4),
+ r2, pci_read_config(dev, aer +
+ PCIR_AER_COR_MASK, 4),
+ pci_read_config(dev, aer +
+ PCIR_AER_CAP_CONTROL, 4));
+ for (i =3D 0; i < 4; i++) {
+ r1 =3D pci_read_config(dev, aer +
+ PCIR_AER_HEADER_LOG + i * 4, 4);
+ printf(" HL%d: 0x%08x\n", i, r1);
+ }
+ }
+ }
+ }
+}
+
+#ifdef DDB
+DB_SHOW_COMMAND(pcierr, pci_print_faulted_dev_db)
+{
+
+ pci_print_faulted_dev();
+}
+
+static void
+db_clear_pcie_errors(const struct pci_devinfo *dinfo)
+{
+ device_t dev;
+ int aer;
+ uint32_t r;
+
+ dev =3D dinfo->cfg.dev;
+ r =3D pci_read_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_STA, 2);
+ pci_write_config(dev, dinfo->cfg.pcie.pcie_location +
+ PCIER_DEVICE_STA, r, 2);
+
+ if (pci_find_extcap(dev, PCIZ_AER, &aer) !=3D 0)
+ return;
+ r =3D pci_read_config(dev, aer + PCIR_AER_UC_STATUS, 4);
+ if (r !=3D 0)
+ pci_write_config(dev, aer + PCIR_AER_UC_STATUS, r, 4);
+ r =3D pci_read_config(dev, aer + PCIR_AER_COR_STATUS, 4);
+ if (r !=3D 0)
+ pci_write_config(dev, aer + PCIR_AER_COR_STATUS, r, 4);
+}
+
+DB_COMMAND(pci_clearerr, db_pci_clearerr)
+{
+ struct pci_devinfo *dinfo;
+ device_t dev;
+ uint16_t status, status1;
+
+ STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
+ dev =3D dinfo->cfg.dev;
+ status1 =3D status =3D pci_read_config(dev, PCIR_STATUS, 2);
+ status1 &=3D PCIM_STATUS_MDPERR | PCIM_STATUS_STABORT |
+ PCIM_STATUS_RTABORT | PCIM_STATUS_RMABORT |
+ PCIM_STATUS_SERR | PCIM_STATUS_PERR;
+ if (status1 !=3D 0) {
+ status &=3D ~status1;
+ pci_write_config(dev, PCIR_STATUS, status, 2);
+ }
+ if (dinfo->cfg.pcie.pcie_location !=3D 0)
+ db_clear_pcie_errors(dinfo);
+ }
+}
+#endif
diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
index 0157ee7..51defff 100644
--- a/sys/dev/pci/pcivar.h
+++ b/sys/dev/pci/pcivar.h
@@ -506,6 +506,7 @@ void pci_restore_state(device_t dev);
void pci_save_state(device_t dev);
int pci_set_max_read_req(device_t dev, int size);
=20
+void pci_print_faulted_dev(void);
=20
#ifdef BUS_SPACE_MAXADDR
#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
--w3uUfsyyY1Pqa/ej
Content-Type: application/pgp-signature
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2
iQIcBAEBAgAGBQJT6wFaAAoJEJDCuSvBvK1BLKMP/i6R3SH1y60Jmn1F7+TJiKQg
lvPvQPTmM2vMLUswKAIpsJ3zbchtWjrkKDJaE3Sp0CDtnMTubvqYtBefILlnWDg3
YJOKgTIKPsQfoNMVV85uJ9PI7Lh0x8FeniYQfL99UrBaYQzrybgsoiv78Rep+B3U
tP6FWQyLdcjUO/2WxNqXA1gBXEBJ6A4ODXP8G0YiBF2IOMhvVNDk013h/VvzYwm7
OOZq6PxoyIM7KZqfJ/TRFr6pc6SoTKynuEEMHvW4Q4p3lvYRmmQiZawjbHJk+mz6
LDqx0m/VadidX0EeNUkidjM9hm4ch/QKRqUhemr6Tt5tPonMwzhXmi0QYgUIpgpi
Ehw0HAiVWUIs/GUEwa0uPmKIuiC7zV8/sGn+dd0UOKjToXQQzLfcjVNftuWCdOlB
ZiROxuKr1E/tb09SEo0DZW6Gwlk3khsXSWUSsbVo8WBzIpiljVzBzzmNcljFdHai
e7lzgpflPgKkrtg0b3WZ4Iufj9RoU9ojK1OHgVmYzP10pDhq/edb/9+ZzuBOeqXQ
klx36sFDGjk0/zdN4py5/9u1FcUpstS2J3P3q5ktRfxFn92YigrCd51Keyumftmx
VUYQrc1b5aJJz9BNQ7vtA/tDwZWWfq2jE3j0Sz6ov64pnqvO0bwOI/yA3N4gTZvg
E7qImBdTybniF4Gw502P
=zfrK
-----END PGP SIGNATURE-----
--w3uUfsyyY1Pqa/ej--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140813061035.GF2737>
