Date: Mon, 22 Jul 2019 05:58:37 -0700 From: Cy Schubert <Cy.Schubert@cschubert.com> To: Conrad Meyer <cem@FreeBSD.org> Cc: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: Re: svn commit: r342977 - in head/sys/dev: amdsmn amdtemp Message-ID: <201907221258.x6MCwbZj011527@slippy.cwsent.com> In-Reply-To: <201901122236.x0CMaXsK017117@repo.freebsd.org> References: <201901122236.x0CMaXsK017117@repo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
In message <201901122236.x0CMaXsK017117@repo.freebsd.org>, Conrad Meyer writes: > Author: cem > Date: Sat Jan 12 22:36:33 2019 > New Revision: 342977 > URL: https://svnweb.freebsd.org/changeset/base/342977 > > Log: > amdtemp(4): Add support for Family 15h, Model >=60h > > Family 15h is a bit of an oddball. Early models used the same temperature > register and spec (mostly[1]) as earlier CPU families. > > Model 60h-6Fh and 70-7Fh use something more like Family 17h's Service > Management Network, communicating with it in a similar fashion. To support > them, add support for their version of SMU indirection to amdsmn(4) and use > it in amdtemp(4) on these models. > > While here, clarify some of the deviceid macros in amdtemp(4) that were > added with arbitrary, incorrect family numbers, and remove ones that were > not used. Additionally, clarify intent and condition of heterogenous > multi-socket system detection. > > [1]: 15h adds the "adjust range by -49°C if a certain condition is met," > which previous families did not have. > > Reported by: D. C. <tjoard AT gmail.com> > PR: 234657 > Tested by: D. C. <tjoard AT gmail.com> > > Modified: > head/sys/dev/amdsmn/amdsmn.c > head/sys/dev/amdtemp/amdtemp.c > > Modified: head/sys/dev/amdsmn/amdsmn.c > ============================================================================= > = > --- head/sys/dev/amdsmn/amdsmn.c Sat Jan 12 22:10:31 2019 (r34297 > 6) > +++ head/sys/dev/amdsmn/amdsmn.c Sat Jan 12 22:36:33 2019 (r34297 > 7) > @@ -1,5 +1,5 @@ > /*- > - * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> > + * Copyright (c) 2017-2019 Conrad Meyer <cem@FreeBSD.org> > * All rights reserved. > * > * Redistribution and use in source and binary forms, with or without > @@ -25,7 +25,7 @@ > */ > > /* > - * Driver for the AMD Family 17h CPU System Management Network. > + * Driver for the AMD Family 15h and 17h CPU System Management Network. > */ > > #include <sys/cdefs.h> > @@ -51,24 +51,45 @@ __FBSDID("$FreeBSD$"); > > #include <dev/amdsmn/amdsmn.h> > > -#define SMN_ADDR_REG 0x60 > -#define SMN_DATA_REG 0x64 > +#define F15H_SMN_ADDR_REG 0xb8 > +#define F15H_SMN_DATA_REG 0xbc > +#define F17H_SMN_ADDR_REG 0x60 > +#define F17H_SMN_DATA_REG 0x64 > > +#define PCI_DEVICE_ID_AMD_15H_M60H_ROOT 0x1576 > #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 > -#define PCI_DEVICE_ID_AMD_17H_ROOT_DF_F3 0x1463 > #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 > -#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT_DF_F3 0x15eb > > +struct pciid; > struct amdsmn_softc { > struct mtx smn_lock; > + const struct pciid *smn_pciid; > }; > > -static struct pciid { > +static const struct pciid { > uint16_t amdsmn_vendorid; > uint16_t amdsmn_deviceid; > + uint8_t amdsmn_addr_reg; > + uint8_t amdsmn_data_reg; > } amdsmn_ids[] = { > - { CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_ROOT }, > - { CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT }, > + { > + .amdsmn_vendorid = CPU_VENDOR_AMD, > + .amdsmn_deviceid = PCI_DEVICE_ID_AMD_15H_M60H_ROOT, > + .amdsmn_addr_reg = F15H_SMN_ADDR_REG, > + .amdsmn_data_reg = F15H_SMN_DATA_REG, > + }, > + { > + .amdsmn_vendorid = CPU_VENDOR_AMD, > + .amdsmn_deviceid = PCI_DEVICE_ID_AMD_17H_ROOT, > + .amdsmn_addr_reg = F17H_SMN_ADDR_REG, > + .amdsmn_data_reg = F17H_SMN_DATA_REG, > + }, > + { > + .amdsmn_vendorid = CPU_VENDOR_AMD, > + .amdsmn_deviceid = PCI_DEVICE_ID_AMD_17H_M10H_ROOT, > + .amdsmn_addr_reg = F17H_SMN_ADDR_REG, > + .amdsmn_data_reg = F17H_SMN_DATA_REG, > + }, > }; > > /* > @@ -101,7 +122,7 @@ MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmn, > nitems(amdsmn_ids)); > > static bool > -amdsmn_match(device_t parent) > +amdsmn_match(device_t parent, const struct pciid **pciid_out) > { > uint16_t vendor, device; > size_t i; > @@ -109,10 +130,14 @@ amdsmn_match(device_t parent) > vendor = pci_get_vendor(parent); > device = pci_get_device(parent); > > - for (i = 0; i < nitems(amdsmn_ids); i++) > + for (i = 0; i < nitems(amdsmn_ids); i++) { > if (vendor == amdsmn_ids[i].amdsmn_vendorid && > - device == amdsmn_ids[i].amdsmn_deviceid) > + device == amdsmn_ids[i].amdsmn_deviceid) { > + if (pciid_out != NULL) > + *pciid_out = &amdsmn_ids[i]; > return (true); > + } > + } > return (false); > } > > @@ -124,7 +149,7 @@ amdsmn_identify(driver_t *driver, device_t parent) > /* Make sure we're not being doubly invoked. */ > if (device_find_child(parent, "amdsmn", -1) != NULL) > return; > - if (!amdsmn_match(parent)) > + if (!amdsmn_match(parent, NULL)) > return; > > child = device_add_child(parent, "amdsmn", -1); > @@ -136,21 +161,25 @@ static int > amdsmn_probe(device_t dev) > { > uint32_t family; > + char buf[64]; > > if (resource_disabled("amdsmn", 0)) > return (ENXIO); > - if (!amdsmn_match(device_get_parent(dev))) > + if (!amdsmn_match(device_get_parent(dev), NULL)) > return (ENXIO); > > family = CPUID_TO_FAMILY(cpu_id); > > switch (family) { > + case 0x15: > case 0x17: > break; > default: > return (ENXIO); > } > - device_set_desc(dev, "AMD Family 17h System Management Network"); > + snprintf(buf, sizeof(buf), "AMD Family %xh System Management Network", > + family); > + device_set_desc_copy(dev, buf); > > return (BUS_PROBE_GENERIC); > } > @@ -160,6 +189,9 @@ amdsmn_attach(device_t dev) > { > struct amdsmn_softc *sc = device_get_softc(dev); > > + if (!amdsmn_match(device_get_parent(dev), &sc->smn_pciid)) > + return (ENXIO); > + > mtx_init(&sc->smn_lock, "SMN mtx", "SMN", MTX_DEF); > return (0); > } > @@ -182,8 +214,8 @@ amdsmn_read(device_t dev, uint32_t addr, uint32_t *val > parent = device_get_parent(dev); > > mtx_lock(&sc->smn_lock); > - pci_write_config(parent, SMN_ADDR_REG, addr, 4); > - *value = pci_read_config(parent, SMN_DATA_REG, 4); > + pci_write_config(parent, sc->smn_pciid->amdsmn_addr_reg, addr, 4); > + *value = pci_read_config(parent, sc->smn_pciid->amdsmn_data_reg, 4); > mtx_unlock(&sc->smn_lock); > > return (0); > @@ -198,8 +230,8 @@ amdsmn_write(device_t dev, uint32_t addr, uint32_t val > parent = device_get_parent(dev); > > mtx_lock(&sc->smn_lock); > - pci_write_config(parent, SMN_ADDR_REG, addr, 4); > - pci_write_config(parent, SMN_DATA_REG, value, 4); > + pci_write_config(parent, sc->smn_pciid->amdsmn_addr_reg, addr, 4); > + pci_write_config(parent, sc->smn_pciid->amdsmn_data_reg, value, 4); > mtx_unlock(&sc->smn_lock); > > return (0); > > Modified: head/sys/dev/amdtemp/amdtemp.c > ============================================================================= > = > --- head/sys/dev/amdtemp/amdtemp.c Sat Jan 12 22:10:31 2019 (r34297 > 6) > +++ head/sys/dev/amdtemp/amdtemp.c Sat Jan 12 22:36:33 2019 (r34297 > 7) > @@ -5,6 +5,8 @@ > * Copyright (c) 2009 Norikatsu Shigemura <nork@FreeBSD.org> > * Copyright (c) 2009-2012 Jung-uk Kim <jkim@FreeBSD.org> > * All rights reserved. > + * Copyright (c) 2017-2019 Conrad Meyer <cem@FreeBSD.org> > + * All rights reserved. > * > * Redistribution and use in source and binary forms, with or without > * modification, are permitted provided that the following conditions > @@ -76,44 +78,67 @@ struct amdtemp_softc { > device_t sc_smn; > }; > > +/* > + * N.B. The numbers in macro names below are significant and represent CPU > + * family and model numbers. Do not make up fictitious family or model numb > ers > + * when adding support for new devices. > + */ > #define VENDORID_AMD 0x1022 > #define DEVICEID_AMD_MISC0F 0x1103 > #define DEVICEID_AMD_MISC10 0x1203 > #define DEVICEID_AMD_MISC11 0x1303 > -#define DEVICEID_AMD_MISC12 0x1403 > #define DEVICEID_AMD_MISC14 0x1703 > #define DEVICEID_AMD_MISC15 0x1603 > +#define DEVICEID_AMD_MISC15_M10H 0x1403 > +#define DEVICEID_AMD_MISC15_M30H 0x141d > +#define DEVICEID_AMD_MISC15_M60H_ROOT 0x1576 > #define DEVICEID_AMD_MISC16 0x1533 > #define DEVICEID_AMD_MISC16_M30H 0x1583 > -#define DEVICEID_AMD_MISC17 0x141d > #define DEVICEID_AMD_HOSTB17H_ROOT 0x1450 > -#define DEVICEID_AMD_HOSTB17H_DF_F3 0x1463 > #define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0 > -#define DEVICEID_AMD_HOSTB17H_M10H_DF_F3 0x15eb > > -static struct amdtemp_product { > +static const struct amdtemp_product { > uint16_t amdtemp_vendorid; > uint16_t amdtemp_deviceid; > + /* > + * 0xFC register is only valid on the D18F3 PCI device; SMN temp > + * drivers do not attach to that device. > + */ > + bool amdtemp_has_cpuid; > } amdtemp_products[] = { > - { VENDORID_AMD, DEVICEID_AMD_MISC0F }, > - { VENDORID_AMD, DEVICEID_AMD_MISC10 }, > - { VENDORID_AMD, DEVICEID_AMD_MISC11 }, > - { VENDORID_AMD, DEVICEID_AMD_MISC12 }, > - { VENDORID_AMD, DEVICEID_AMD_MISC14 }, > - { VENDORID_AMD, DEVICEID_AMD_MISC15 }, > - { VENDORID_AMD, DEVICEID_AMD_MISC16 }, > - { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H }, > - { VENDORID_AMD, DEVICEID_AMD_MISC17 }, > - { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT }, > - { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT }, > + { VENDORID_AMD, DEVICEID_AMD_MISC0F, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC10, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC11, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC14, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC15, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC15_M10H, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC15_M30H, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC15_M60H_ROOT, false }, > + { VENDORID_AMD, DEVICEID_AMD_MISC16, true }, > + { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H, true }, > + { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT, false }, > + { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT, false }, > }; > > /* > - * Reported Temperature Control Register > + * Reported Temperature Control Register, family 0Fh-15h (some models), 16h. > */ > #define AMDTEMP_REPTMP_CTRL 0xa4 > > +#define AMDTEMP_REPTMP10H_CURTMP_MASK 0x7ff > +#define AMDTEMP_REPTMP10H_CURTMP_SHIFT 21 > +#define AMDTEMP_REPTMP10H_TJSEL_MASK 0x3 > +#define AMDTEMP_REPTMP10H_TJSEL_SHIFT 16 > + > /* > + * Reported Temperature, Family 15h, M60+ > + * > + * Same register bit definitions as other Family 15h CPUs, but access is > + * indirect via SMN, like Family 17h. > + */ > +#define AMDTEMP_15H_M60H_REPTMP_CTRL 0xd8200ca4 > + > +/* > * Reported Temperature, Family 17h > * > * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register > @@ -123,9 +148,13 @@ static struct amdtemp_product { > */ > #define AMDTEMP_17H_CUR_TMP 0x59800 > #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1 << 19) > -#define AMDTEMP_17H_CUR_TMP_RANGE_OFF 490 > > /* > + * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius). > + */ > +#define AMDTEMP_CURTMP_RANGE_ADJUST 490 > + > +/* > * Thermaltrip Status Register (Family 0Fh only) > */ > #define AMDTEMP_THERMTP_STAT 0xe4 > @@ -151,9 +180,9 @@ static int amdtemp_probe(device_t dev); > static int amdtemp_attach(device_t dev); > static void amdtemp_intrhook(void *arg); > static int amdtemp_detach(device_t dev); > -static int amdtemp_match(device_t dev); > static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor); > static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor); > +static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor > ); > static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor); > static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS); > > @@ -180,8 +209,8 @@ MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1); > MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products, > nitems(amdtemp_products)); > > -static int > -amdtemp_match(device_t dev) > +static bool > +amdtemp_match(device_t dev, const struct amdtemp_product **product_out) > { > int i; > uint16_t vendor, devid; > @@ -191,11 +220,13 @@ amdtemp_match(device_t dev) > > for (i = 0; i < nitems(amdtemp_products); i++) { > if (vendor == amdtemp_products[i].amdtemp_vendorid && > - devid == amdtemp_products[i].amdtemp_deviceid) > - return (1); > + devid == amdtemp_products[i].amdtemp_deviceid) { > + if (product_out != NULL) > + *product_out = &amdtemp_products[i]; > + return (true); > + } > } > - > - return (0); > + return (false); > } > > static void > @@ -207,7 +238,7 @@ amdtemp_identify(driver_t *driver, device_t parent) > if (device_find_child(parent, "amdtemp", -1) != NULL) > return; > > - if (amdtemp_match(parent)) { > + if (amdtemp_match(parent, NULL)) { > child = device_add_child(parent, "amdtemp", -1); > if (child == NULL) > device_printf(parent, "add amdtemp child failed\n"); > @@ -221,7 +252,7 @@ amdtemp_probe(device_t dev) > > if (resource_disabled("amdtemp", 0)) > return (ENXIO); > - if (!amdtemp_match(device_get_parent(dev))) > + if (!amdtemp_match(device_get_parent(dev), NULL)) > return (ENXIO); > > family = CPUID_TO_FAMILY(cpu_id); > @@ -254,23 +285,42 @@ amdtemp_attach(device_t dev) > { > char tn[32]; > u_int regs[4]; > - struct amdtemp_softc *sc = device_get_softc(dev); > + const struct amdtemp_product *product; > + struct amdtemp_softc *sc; > struct sysctl_ctx_list *sysctlctx; > struct sysctl_oid *sysctlnode; > uint32_t cpuid, family, model; > u_int bid; > int erratum319, unit; > + bool needsmn; > > + sc = device_get_softc(dev); > erratum319 = 0; > + needsmn = false; > > - /* > - * CPUID Register is available from Revision F. > - */ > + if (!amdtemp_match(device_get_parent(dev), &product)) > + return (ENXIO); > + > cpuid = cpu_id; > family = CPUID_TO_FAMILY(cpuid); > model = CPUID_TO_MODEL(cpuid); > - if ((family != 0x0f || model >= 0x40) && family != 0x17) { > - cpuid = pci_read_config(dev, AMDTEMP_CPUID, 4); > + > + /* > + * This checks for the byzantine condition of running a heterogenous > + * revision multi-socket system where the attach thread is potentially > + * probing a remote socket's PCI device. > + * > + * Currently, such scenarios are unsupported on models using the SMN > + * (because on those models, amdtemp(4) attaches to a different PCI > + * device than the one that contains AMDTEMP_CPUID). > + * > + * The ancient 0x0F family of devices only supports this register from > + * models 40h+. > + */ > + if (product->amdtemp_has_cpuid && (family > 0x0f || > + (family == 0x0f && model >= 0x40))) { > + cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID, > + 4); > family = CPUID_TO_FAMILY(cpuid); > model = CPUID_TO_MODEL(cpuid); > } > @@ -364,16 +414,30 @@ amdtemp_attach(device_t dev) > case 0x14: > case 0x15: > case 0x16: > + sc->sc_ntemps = 1; > /* > - * There is only one sensor per package. > + * Some later (60h+) models of family 15h use a similar SMN > + * network as family 17h. (However, the register index differs > + * from 17h and the decoding matches other 10h-15h models, > + * which differ from 17h.) > */ > - sc->sc_ntemps = 1; > - > - sc->sc_gettemp = amdtemp_gettemp; > + if (family == 0x15 && model >= 0x60) { > + sc->sc_gettemp = amdtemp_gettemp15hm60h; > + needsmn = true; > + } else > + sc->sc_gettemp = amdtemp_gettemp; > break; > case 0x17: > sc->sc_ntemps = 1; > sc->sc_gettemp = amdtemp_gettemp17h; > + needsmn = true; > + break; > + default: > + device_printf(dev, "Bogus family 0x%x\n", family); > + return (ENXIO); > + } > + > + if (needsmn) { > sc->sc_smn = device_find_child( > device_get_parent(dev), "amdsmn", -1); > if (sc->sc_smn == NULL) { > @@ -381,7 +445,6 @@ amdtemp_attach(device_t dev) > device_printf(dev, "No SMN device found\n"); > return (ENXIO); > } > - break; > } > > /* Find number of cores per package. */ > @@ -585,6 +648,29 @@ amdtemp_gettemp0f(device_t dev, amdsensor_t sensor) > return (temp); > } > > +static uint32_t > +amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val) > +{ > + uint32_t temp; > + > + /* Convert raw register subfield units (0.125C) to units of 0.1C. */ > + temp = ((val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT) & > + AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4; > + > + /* > + * On Family 15h and higher, if CurTmpTjSel is 11b, the range is > + * adjusted down by 49.0 degrees Celsius. (This adjustment is not > + * documented in BKDGs prior to family 15h model 00h.) > + */ > + if (CPUID_TO_FAMILY(cpu_id) >= 0x15 && > + ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) & > + AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3) > + temp -= AMDTEMP_CURTMP_RANGE_ADJUST; > + > + temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10; > + return (temp); > +} > + > static int32_t > amdtemp_gettemp(device_t dev, amdsensor_t sensor) > { > @@ -592,10 +678,19 @@ amdtemp_gettemp(device_t dev, amdsensor_t sensor) > uint32_t temp; > > temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4); > - temp = ((temp >> 21) & 0x7ff) * 5 / 4; > - temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; > + return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp)); > +} > > - return (temp); > +static int32_t > +amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor) > +{ > + struct amdtemp_softc *sc = device_get_softc(dev); > + uint32_t val; > + int error; > + > + error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val); > + KASSERT(error == 0, ("amdsmn_read")); > + return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val)); > } > > static int32_t > @@ -610,7 +705,7 @@ amdtemp_gettemp17h(device_t dev, amdsensor_t sensor) > > temp = ((val >> 21) & 0x7ff) * 5 / 4; > if ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0) > - temp -= AMDTEMP_17H_CUR_TMP_RANGE_OFF; > + temp -= AMDTEMP_CURTMP_RANGE_ADJUST; > temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; > > return (temp); > This broke/removed support for family 0fh. I'm seeing strange temperatures and negative on one machine. -- Cheers, Cy Schubert <Cy.Schubert@cschubert.com> FreeBSD UNIX: <cy@FreeBSD.org> Web: http://www.FreeBSD.org The need of the many outweighs the greed of the few.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201907221258.x6MCwbZj011527>