From owner-svn-src-all@FreeBSD.ORG Sat Dec 19 10:38:29 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 6883E1065670; Sat, 19 Dec 2009 10:38:29 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 559EF8FC18; Sat, 19 Dec 2009 10:38:29 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id nBJAcTkP030077; Sat, 19 Dec 2009 10:38:29 GMT (envelope-from avg@svn.freebsd.org) Received: (from avg@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id nBJAcSH7030072; Sat, 19 Dec 2009 10:38:28 GMT (envelope-from avg@svn.freebsd.org) Message-Id: <200912191038.nBJAcSH7030072@svn.freebsd.org> From: Andriy Gapon Date: Sat, 19 Dec 2009 10:38:28 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r200711 - in stable/8/sys: amd64/amd64 amd64/include i386/i386 i386/include X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 19 Dec 2009 10:38:29 -0000 Author: avg Date: Sat Dec 19 10:38:28 2009 New Revision: 200711 URL: http://svn.freebsd.org/changeset/base/200711 Log: MFC r200033: mca: improve status checking, recording and reporting Modified: stable/8/sys/amd64/amd64/mca.c stable/8/sys/amd64/include/mca.h stable/8/sys/i386/i386/mca.c stable/8/sys/i386/include/mca.h Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) Modified: stable/8/sys/amd64/amd64/mca.c ============================================================================== --- stable/8/sys/amd64/amd64/mca.c Sat Dec 19 10:28:24 2009 (r200710) +++ stable/8/sys/amd64/amd64/mca.c Sat Dec 19 10:38:28 2009 (r200711) @@ -117,48 +117,6 @@ sysctl_mca_records(SYSCTL_HANDLER_ARGS) return (SYSCTL_OUT(req, &record, sizeof(record))); } -static struct mca_record * -mca_record_entry(int bank) -{ - struct mca_internal *rec; - uint64_t status; - u_int p[4]; - - status = rdmsr(MSR_MC_STATUS(bank)); - if (!(status & MC_STATUS_VAL)) - return (NULL); - - rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO); - if (rec == NULL) { - printf("MCA: Unable to allocate space for an event.\n"); - return (NULL); - } - - /* Save exception information. */ - rec->rec.mr_status = status; - if (status & MC_STATUS_ADDRV) - rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank)); - if (status & MC_STATUS_MISCV) - rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank)); - rec->rec.mr_tsc = rdtsc(); - rec->rec.mr_apic_id = PCPU_GET(apic_id); - - /* - * Clear machine check. Don't do this for uncorrectable - * errors so that the BIOS can see them. - */ - if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { - wrmsr(MSR_MC_STATUS(bank), 0); - do_cpuid(0, p); - } - - mtx_lock_spin(&mca_lock); - STAILQ_INSERT_TAIL(&mca_records, rec, link); - mca_count++; - mtx_unlock_spin(&mca_lock); - return (&rec->rec); -} - static const char * mca_error_ttype(uint16_t mca_error) { @@ -219,11 +177,13 @@ mca_error_request(uint16_t mca_error) } /* Dump details about a single machine check. */ -static void -mca_log(struct mca_record *rec) +static void __nonnull(1) +mca_log(const struct mca_record *rec) { uint16_t mca_error; + printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank, + (long long)rec->mr_status); printf("MCA: CPU %d ", rec->mr_apic_id); if (rec->mr_status & MC_STATUS_UC) printf("UNCOR "); @@ -329,6 +289,59 @@ mca_log(struct mca_record *rec) printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); } +static int __nonnull(2) +mca_check_status(int bank, struct mca_record *rec) +{ + uint64_t status; + u_int p[4]; + + status = rdmsr(MSR_MC_STATUS(bank)); + if (!(status & MC_STATUS_VAL)) + return (0); + + /* Save exception information. */ + rec->mr_status = status; + rec->mr_bank = bank; + rec->mr_addr = 0; + if (status & MC_STATUS_ADDRV) + rec->mr_addr = rdmsr(MSR_MC_ADDR(bank)); + rec->mr_misc = 0; + if (status & MC_STATUS_MISCV) + rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); + rec->mr_tsc = rdtsc(); + rec->mr_apic_id = PCPU_GET(apic_id); + + /* + * Clear machine check. Don't do this for uncorrectable + * errors so that the BIOS can see them. + */ + if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { + wrmsr(MSR_MC_STATUS(bank), 0); + do_cpuid(0, p); + } + return (1); +} + +static void __nonnull(1) +mca_record_entry(const struct mca_record *record) +{ + struct mca_internal *rec; + + rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT); + if (rec == NULL) { + printf("MCA: Unable to allocate space for an event.\n"); + mca_log(record); + return; + } + + rec->rec = *record; + rec->logged = 0; + mtx_lock_spin(&mca_lock); + STAILQ_INSERT_TAIL(&mca_records, rec, link); + mca_count++; + mtx_unlock_spin(&mca_lock); +} + /* * This scans all the machine check banks of the current CPU to see if * there are any machine checks. Any non-recoverable errors are @@ -341,7 +354,7 @@ mca_log(struct mca_record *rec) static int mca_scan(int mcip) { - struct mca_record *rec; + struct mca_record rec; uint64_t mcg_cap, ucmask; int count, i, recoverable; @@ -354,13 +367,13 @@ mca_scan(int mcip) ucmask |= MC_STATUS_OVER; mcg_cap = rdmsr(MSR_MCG_CAP); for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { - rec = mca_record_entry(i); - if (rec != NULL) { + if (mca_check_status(i, &rec)) { count++; - if (rec->mr_status & ucmask) { + if (rec.mr_status & ucmask) { recoverable = 0; - mca_log(rec); + mca_log(&rec); } + mca_record_entry(&rec); } } return (mcip ? recoverable : count); Modified: stable/8/sys/amd64/include/mca.h ============================================================================== --- stable/8/sys/amd64/include/mca.h Sat Dec 19 10:28:24 2009 (r200710) +++ stable/8/sys/amd64/include/mca.h Sat Dec 19 10:38:28 2009 (r200711) @@ -36,6 +36,7 @@ struct mca_record { uint64_t mr_misc; uint64_t mr_tsc; int mr_apic_id; + int mr_bank; }; #ifdef _KERNEL Modified: stable/8/sys/i386/i386/mca.c ============================================================================== --- stable/8/sys/i386/i386/mca.c Sat Dec 19 10:28:24 2009 (r200710) +++ stable/8/sys/i386/i386/mca.c Sat Dec 19 10:38:28 2009 (r200711) @@ -117,48 +117,6 @@ sysctl_mca_records(SYSCTL_HANDLER_ARGS) return (SYSCTL_OUT(req, &record, sizeof(record))); } -static struct mca_record * -mca_record_entry(int bank) -{ - struct mca_internal *rec; - uint64_t status; - u_int p[4]; - - status = rdmsr(MSR_MC_STATUS(bank)); - if (!(status & MC_STATUS_VAL)) - return (NULL); - - rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO); - if (rec == NULL) { - printf("MCA: Unable to allocate space for an event.\n"); - return (NULL); - } - - /* Save exception information. */ - rec->rec.mr_status = status; - if (status & MC_STATUS_ADDRV) - rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank)); - if (status & MC_STATUS_MISCV) - rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank)); - rec->rec.mr_tsc = rdtsc(); - rec->rec.mr_apic_id = PCPU_GET(apic_id); - - /* - * Clear machine check. Don't do this for uncorrectable - * errors so that the BIOS can see them. - */ - if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { - wrmsr(MSR_MC_STATUS(bank), 0); - do_cpuid(0, p); - } - - mtx_lock_spin(&mca_lock); - STAILQ_INSERT_TAIL(&mca_records, rec, link); - mca_count++; - mtx_unlock_spin(&mca_lock); - return (&rec->rec); -} - static const char * mca_error_ttype(uint16_t mca_error) { @@ -219,11 +177,13 @@ mca_error_request(uint16_t mca_error) } /* Dump details about a single machine check. */ -static void -mca_log(struct mca_record *rec) +static void __nonnull(1) +mca_log(const struct mca_record *rec) { uint16_t mca_error; + printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank, + (long long)rec->mr_status); printf("MCA: CPU %d ", rec->mr_apic_id); if (rec->mr_status & MC_STATUS_UC) printf("UNCOR "); @@ -329,6 +289,59 @@ mca_log(struct mca_record *rec) printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); } +static int __nonnull(2) +mca_check_status(int bank, struct mca_record *rec) +{ + uint64_t status; + u_int p[4]; + + status = rdmsr(MSR_MC_STATUS(bank)); + if (!(status & MC_STATUS_VAL)) + return (0); + + /* Save exception information. */ + rec->mr_status = status; + rec->mr_bank = bank; + rec->mr_addr = 0; + if (status & MC_STATUS_ADDRV) + rec->mr_addr = rdmsr(MSR_MC_ADDR(bank)); + rec->mr_misc = 0; + if (status & MC_STATUS_MISCV) + rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); + rec->mr_tsc = rdtsc(); + rec->mr_apic_id = PCPU_GET(apic_id); + + /* + * Clear machine check. Don't do this for uncorrectable + * errors so that the BIOS can see them. + */ + if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { + wrmsr(MSR_MC_STATUS(bank), 0); + do_cpuid(0, p); + } + return (1); +} + +static void __nonnull(1) +mca_record_entry(const struct mca_record *record) +{ + struct mca_internal *rec; + + rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT); + if (rec == NULL) { + printf("MCA: Unable to allocate space for an event.\n"); + mca_log(record); + return; + } + + rec->rec = *record; + rec->logged = 0; + mtx_lock_spin(&mca_lock); + STAILQ_INSERT_TAIL(&mca_records, rec, link); + mca_count++; + mtx_unlock_spin(&mca_lock); +} + /* * This scans all the machine check banks of the current CPU to see if * there are any machine checks. Any non-recoverable errors are @@ -341,7 +354,7 @@ mca_log(struct mca_record *rec) static int mca_scan(int mcip) { - struct mca_record *rec; + struct mca_record rec; uint64_t mcg_cap, ucmask; int count, i, recoverable; @@ -354,13 +367,13 @@ mca_scan(int mcip) ucmask |= MC_STATUS_OVER; mcg_cap = rdmsr(MSR_MCG_CAP); for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { - rec = mca_record_entry(i); - if (rec != NULL) { + if (mca_check_status(i, &rec)) { count++; - if (rec->mr_status & ucmask) { + if (rec.mr_status & ucmask) { recoverable = 0; - mca_log(rec); + mca_log(&rec); } + mca_record_entry(&rec); } } return (mcip ? recoverable : count); Modified: stable/8/sys/i386/include/mca.h ============================================================================== --- stable/8/sys/i386/include/mca.h Sat Dec 19 10:28:24 2009 (r200710) +++ stable/8/sys/i386/include/mca.h Sat Dec 19 10:38:28 2009 (r200711) @@ -36,6 +36,7 @@ struct mca_record { uint64_t mr_misc; uint64_t mr_tsc; int mr_apic_id; + int mr_bank; }; #ifdef _KERNEL