From owner-svn-src-stable-8@FreeBSD.ORG Fri Mar 26 13:49:47 2010 Return-Path: Delivered-To: svn-src-stable-8@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 2AE101065672; Fri, 26 Mar 2010 13:49:47 +0000 (UTC) (envelope-from jhb@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 18BCD8FC08; Fri, 26 Mar 2010 13:49:47 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o2QDnkMb062952; Fri, 26 Mar 2010 13:49:46 GMT (envelope-from jhb@svn.freebsd.org) Received: (from jhb@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o2QDnkro062945; Fri, 26 Mar 2010 13:49:46 GMT (envelope-from jhb@svn.freebsd.org) Message-Id: <201003261349.o2QDnkro062945@svn.freebsd.org> From: John Baldwin Date: Fri, 26 Mar 2010 13:49:46 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r205689 - in stable/8/sys: amd64/amd64 amd64/include i386/i386 i386/include X-BeenThere: svn-src-stable-8@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 8-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 26 Mar 2010 13:49:47 -0000 Author: jhb Date: Fri Mar 26 13:49:46 2010 New Revision: 205689 URL: http://svn.freebsd.org/changeset/base/205689 Log: MFC 205214: - Extend the machine check record structure to include several fields useful for parsing model-specific and other fields in machine check events including the global machine check capabilities and status registers, CPU identification, and the FreeBSD CPU ID. - Report these added fields in the console log of a machine check so that a record structure can be reconstituted from the console messages. - Parse new architectural errors including memory controller errors. Modified: stable/8/sys/amd64/amd64/mca.c stable/8/sys/amd64/include/mca.h stable/8/sys/amd64/include/specialreg.h stable/8/sys/i386/i386/mca.c stable/8/sys/i386/include/mca.h stable/8/sys/i386/include/specialreg.h Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) stable/8/sys/net/ (props changed) stable/8/sys/netinet/ipfw/ (props changed) Modified: stable/8/sys/amd64/amd64/mca.c ============================================================================== --- stable/8/sys/amd64/amd64/mca.c Fri Mar 26 13:01:53 2010 (r205688) +++ stable/8/sys/amd64/amd64/mca.c Fri Mar 26 13:49:46 2010 (r205689) @@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error) return ("???"); } +static const char * +mca_error_mmtype(uint16_t mca_error) +{ + + switch ((mca_error & 0x70) >> 4) { + case 0x0: + return ("GEN"); + case 0x1: + return ("RD"); + case 0x2: + return ("WR"); + case 0x3: + return ("AC"); + case 0x4: + return ("MS"); + } + return ("???"); +} + /* Dump details about a single machine check. */ static void __nonnull(1) mca_log(const struct mca_record *rec) { uint16_t mca_error; - printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank, + printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, (long long)rec->mr_status); - printf("MCA: CPU %d ", rec->mr_apic_id); + printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", + (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); + printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, + rec->mr_cpu_id, rec->mr_apic_id); + printf("MCA: CPU %d ", rec->mr_cpu); if (rec->mr_status & MC_STATUS_UC) printf("UNCOR "); - else + else { printf("COR "); + if (rec->mr_mcg_cap & MCG_CAP_TES_P) + printf("(%lld) ", ((long long)rec->mr_status & + MC_STATUS_COR_COUNT) >> 38); + } if (rec->mr_status & MC_STATUS_PCC) printf("PCC "); if (rec->mr_status & MC_STATUS_OVER) @@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec) case 0x0004: printf("FRC error"); break; + case 0x0005: + printf("internal parity error"); + break; case 0x0400: printf("internal timer error"); break; @@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec) break; } + /* Memory controller error. */ + if ((mca_error & 0xef80) == 0x0080) { + printf("%s channel ", mca_error_mmtype(mca_error)); + if ((mca_error & 0x000f) != 0x000f) + printf("%d", mca_error & 0x000f); + else + printf("??"); + printf(" memory error"); + break; + } + /* Cache error. */ if ((mca_error & 0xef00) == 0x0100) { printf("%sCACHE %s %s error", @@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_re rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); rec->mr_tsc = rdtsc(); rec->mr_apic_id = PCPU_GET(apic_id); + rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP); + rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS); + rec->mr_cpu_id = cpu_id; + rec->mr_cpu_vendor_id = cpu_vendor_id; + rec->mr_cpu = PCPU_GET(cpuid); /* * Clear machine check. Don't do this for uncorrectable Modified: stable/8/sys/amd64/include/mca.h ============================================================================== --- stable/8/sys/amd64/include/mca.h Fri Mar 26 13:01:53 2010 (r205688) +++ stable/8/sys/amd64/include/mca.h Fri Mar 26 13:49:46 2010 (r205689) @@ -37,6 +37,11 @@ struct mca_record { uint64_t mr_tsc; int mr_apic_id; int mr_bank; + uint64_t mr_mcg_cap; + uint64_t mr_mcg_status; + int mr_cpu_id; + int mr_cpu_vendor_id; + int mr_cpu; }; #ifdef _KERNEL Modified: stable/8/sys/amd64/include/specialreg.h ============================================================================== --- stable/8/sys/amd64/include/specialreg.h Fri Mar 26 13:01:53 2010 (r205688) +++ stable/8/sys/amd64/include/specialreg.h Fri Mar 26 13:49:46 2010 (r205689) @@ -267,6 +267,7 @@ #define MSR_MTRR16kBase 0x258 #define MSR_MTRR4kBase 0x268 #define MSR_PAT 0x277 +#define MSR_MC0_CTL2 0x280 #define MSR_MTRRdefType 0x2ff #define MSR_MC0_CTL 0x400 #define MSR_MC0_STATUS 0x401 @@ -352,8 +353,10 @@ #define MCG_CAP_COUNT 0x000000ff #define MCG_CAP_CTL_P 0x00000100 #define MCG_CAP_EXT_P 0x00000200 +#define MCG_CAP_CMCI_P 0x00000400 #define MCG_CAP_TES_P 0x00000800 #define MCG_CAP_EXT_CNT 0x00ff0000 +#define MCG_CAP_SER_P 0x01000000 #define MCG_STATUS_RIPV 0x00000001 #define MCG_STATUS_EIPV 0x00000002 #define MCG_STATUS_MCIP 0x00000004 @@ -363,9 +366,14 @@ #define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4) #define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4) #define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4) +#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */ #define MC_STATUS_MCA_ERROR 0x000000000000ffff #define MC_STATUS_MODEL_ERROR 0x00000000ffff0000 #define MC_STATUS_OTHER_INFO 0x01ffffff00000000 +#define MC_STATUS_COR_COUNT 0x001fffc000000000 /* If MCG_CAP_TES_P */ +#define MC_STATUS_TES_STATUS 0x0060000000000000 /* If MCG_CAP_TES_P */ +#define MC_STATUS_AR 0x0080000000000000 /* If MCG_CAP_CMCI_P */ +#define MC_STATUS_S 0x0100000000000000 /* If MCG_CAP_CMCI_P */ #define MC_STATUS_PCC 0x0200000000000000 #define MC_STATUS_ADDRV 0x0400000000000000 #define MC_STATUS_MISCV 0x0800000000000000 @@ -373,6 +381,10 @@ #define MC_STATUS_UC 0x2000000000000000 #define MC_STATUS_OVER 0x4000000000000000 #define MC_STATUS_VAL 0x8000000000000000 +#define MC_MISC_RA_LSB 0x000000000000003f /* If MCG_CAP_SER_P */ +#define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */ +#define MC_CTL2_THRESHOLD 0x0000000000003fff +#define MC_CTL2_CMCI_EN 0x0000000040000000 /* * The following four 3-byte registers control the non-cacheable regions. Modified: stable/8/sys/i386/i386/mca.c ============================================================================== --- stable/8/sys/i386/i386/mca.c Fri Mar 26 13:01:53 2010 (r205688) +++ stable/8/sys/i386/i386/mca.c Fri Mar 26 13:49:46 2010 (r205689) @@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error) return ("???"); } +static const char * +mca_error_mmtype(uint16_t mca_error) +{ + + switch ((mca_error & 0x70) >> 4) { + case 0x0: + return ("GEN"); + case 0x1: + return ("RD"); + case 0x2: + return ("WR"); + case 0x3: + return ("AC"); + case 0x4: + return ("MS"); + } + return ("???"); +} + /* Dump details about a single machine check. */ static void __nonnull(1) mca_log(const struct mca_record *rec) { uint16_t mca_error; - printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank, + printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, (long long)rec->mr_status); - printf("MCA: CPU %d ", rec->mr_apic_id); + printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", + (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); + printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, + rec->mr_cpu_id, rec->mr_apic_id); + printf("MCA: CPU %d ", rec->mr_cpu); if (rec->mr_status & MC_STATUS_UC) printf("UNCOR "); - else + else { printf("COR "); + if (rec->mr_mcg_cap & MCG_CAP_TES_P) + printf("(%lld) ", ((long long)rec->mr_status & + MC_STATUS_COR_COUNT) >> 38); + } if (rec->mr_status & MC_STATUS_PCC) printf("PCC "); if (rec->mr_status & MC_STATUS_OVER) @@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec) case 0x0004: printf("FRC error"); break; + case 0x0005: + printf("internal parity error"); + break; case 0x0400: printf("internal timer error"); break; @@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec) break; } + /* Memory controller error. */ + if ((mca_error & 0xef80) == 0x0080) { + printf("%s channel ", mca_error_mmtype(mca_error)); + if ((mca_error & 0x000f) != 0x000f) + printf("%d", mca_error & 0x000f); + else + printf("??"); + printf(" memory error"); + break; + } + /* Cache error. */ if ((mca_error & 0xef00) == 0x0100) { printf("%sCACHE %s %s error", @@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_re rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); rec->mr_tsc = rdtsc(); rec->mr_apic_id = PCPU_GET(apic_id); + rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP); + rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS); + rec->mr_cpu_id = cpu_id; + rec->mr_cpu_vendor_id = cpu_vendor_id; + rec->mr_cpu = PCPU_GET(cpuid); /* * Clear machine check. Don't do this for uncorrectable Modified: stable/8/sys/i386/include/mca.h ============================================================================== --- stable/8/sys/i386/include/mca.h Fri Mar 26 13:01:53 2010 (r205688) +++ stable/8/sys/i386/include/mca.h Fri Mar 26 13:49:46 2010 (r205689) @@ -37,6 +37,11 @@ struct mca_record { uint64_t mr_tsc; int mr_apic_id; int mr_bank; + uint64_t mr_mcg_cap; + uint64_t mr_mcg_status; + int mr_cpu_id; + int mr_cpu_vendor_id; + int mr_cpu; }; #ifdef _KERNEL Modified: stable/8/sys/i386/include/specialreg.h ============================================================================== --- stable/8/sys/i386/include/specialreg.h Fri Mar 26 13:01:53 2010 (r205688) +++ stable/8/sys/i386/include/specialreg.h Fri Mar 26 13:49:46 2010 (r205689) @@ -273,6 +273,7 @@ #define MSR_MTRR16kBase 0x258 #define MSR_MTRR4kBase 0x268 #define MSR_PAT 0x277 +#define MSR_MC0_CTL2 0x280 #define MSR_MTRRdefType 0x2ff #define MSR_MC0_CTL 0x400 #define MSR_MC0_STATUS 0x401 @@ -421,8 +422,10 @@ #define MCG_CAP_COUNT 0x000000ff #define MCG_CAP_CTL_P 0x00000100 #define MCG_CAP_EXT_P 0x00000200 +#define MCG_CAP_CMCI_P 0x00000400 #define MCG_CAP_TES_P 0x00000800 #define MCG_CAP_EXT_CNT 0x00ff0000 +#define MCG_CAP_SER_P 0x01000000 #define MCG_STATUS_RIPV 0x00000001 #define MCG_STATUS_EIPV 0x00000002 #define MCG_STATUS_MCIP 0x00000004 @@ -432,9 +435,14 @@ #define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4) #define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4) #define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4) +#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */ #define MC_STATUS_MCA_ERROR 0x000000000000ffff #define MC_STATUS_MODEL_ERROR 0x00000000ffff0000 #define MC_STATUS_OTHER_INFO 0x01ffffff00000000 +#define MC_STATUS_COR_COUNT 0x001fffc000000000 /* If MCG_CAP_TES_P */ +#define MC_STATUS_TES_STATUS 0x0060000000000000 /* If MCG_CAP_TES_P */ +#define MC_STATUS_AR 0x0080000000000000 /* If MCG_CAP_CMCI_P */ +#define MC_STATUS_S 0x0100000000000000 /* If MCG_CAP_CMCI_P */ #define MC_STATUS_PCC 0x0200000000000000 #define MC_STATUS_ADDRV 0x0400000000000000 #define MC_STATUS_MISCV 0x0800000000000000 @@ -442,6 +450,10 @@ #define MC_STATUS_UC 0x2000000000000000 #define MC_STATUS_OVER 0x4000000000000000 #define MC_STATUS_VAL 0x8000000000000000 +#define MC_MISC_RA_LSB 0x000000000000003f /* If MCG_CAP_SER_P */ +#define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */ +#define MC_CTL2_THRESHOLD 0x0000000000003fff +#define MC_CTL2_CMCI_EN 0x0000000040000000 /* * The following four 3-byte registers control the non-cacheable regions.