From owner-svn-src-head@FreeBSD.ORG Mon Apr 2 15:07:22 2012 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id C2949106564A; Mon, 2 Apr 2012 15:07:22 +0000 (UTC) (envelope-from jhb@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id AD5F78FC16; Mon, 2 Apr 2012 15:07:22 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q32F7MEJ003086; Mon, 2 Apr 2012 15:07:22 GMT (envelope-from jhb@svn.freebsd.org) Received: (from jhb@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q32F7Maf003081; Mon, 2 Apr 2012 15:07:22 GMT (envelope-from jhb@svn.freebsd.org) Message-Id: <201204021507.q32F7Maf003081@svn.freebsd.org> From: John Baldwin Date: Mon, 2 Apr 2012 15:07:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r233781 - in head/sys: amd64/amd64 i386/i386 x86/include x86/x86 X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 02 Apr 2012 15:07:23 -0000 Author: jhb Date: Mon Apr 2 15:07:22 2012 New Revision: 233781 URL: http://svn.freebsd.org/changeset/base/233781 Log: Make machine check exception logging more readable. On newer Intel systems, an uncorrected ECC error tends to fire on all CPUs in a package simultaneously and the current printf hacks are not sufficient to make the messages legible. Instead, use the existing mca_lock spinlock to serialize calls to mca_log() and change the machine check code to panic directly when an unrecoverable error is encoutered rather than falling back to a trap_fatal() call in trap() (which adds nearly a screen-full of logging messages that aren't useful for machine checks). MFC after: 2 weeks Modified: head/sys/amd64/amd64/trap.c head/sys/i386/i386/trap.c head/sys/x86/include/mca.h head/sys/x86/x86/mca.c Modified: head/sys/amd64/amd64/trap.c ============================================================================== --- head/sys/amd64/amd64/trap.c Mon Apr 2 14:52:28 2012 (r233780) +++ head/sys/amd64/amd64/trap.c Mon Apr 2 15:07:22 2012 (r233781) @@ -233,8 +233,7 @@ trap(struct trapframe *frame) #endif if (type == T_MCHK) { - if (!mca_intr()) - trap_fatal(frame, 0); + mca_intr(); goto out; } Modified: head/sys/i386/i386/trap.c ============================================================================== --- head/sys/i386/i386/trap.c Mon Apr 2 14:52:28 2012 (r233780) +++ head/sys/i386/i386/trap.c Mon Apr 2 15:07:22 2012 (r233781) @@ -254,8 +254,7 @@ trap(struct trapframe *frame) #endif if (type == T_MCHK) { - if (!mca_intr()) - trap_fatal(frame, 0); + mca_intr(); goto out; } Modified: head/sys/x86/include/mca.h ============================================================================== --- head/sys/x86/include/mca.h Mon Apr 2 14:52:28 2012 (r233780) +++ head/sys/x86/include/mca.h Mon Apr 2 15:07:22 2012 (r233781) @@ -48,7 +48,7 @@ struct mca_record { void cmc_intr(void); void mca_init(void); -int mca_intr(void); +void mca_intr(void); void mca_resume(void); #endif Modified: head/sys/x86/x86/mca.c ============================================================================== --- head/sys/x86/x86/mca.c Mon Apr 2 14:52:28 2012 (r233780) +++ head/sys/x86/x86/mca.c Mon Apr 2 15:07:22 2012 (r233781) @@ -457,9 +457,9 @@ mca_record_entry(enum scan_mode mode, co mtx_lock_spin(&mca_lock); rec = STAILQ_FIRST(&mca_freelist); if (rec == NULL) { - mtx_unlock_spin(&mca_lock); printf("MCA: Unable to allocate space for an event.\n"); mca_log(record); + mtx_unlock_spin(&mca_lock); return; } STAILQ_REMOVE_HEAD(&mca_freelist, link); @@ -589,7 +589,9 @@ mca_scan(enum scan_mode mode) count++; if (rec.mr_status & ucmask) { recoverable = 0; + mtx_lock_spin(&mca_lock); mca_log(&rec); + mtx_unlock_spin(&mca_lock); } mca_record_entry(mode, &rec); } @@ -636,9 +638,7 @@ mca_scan_cpus(void *arg) STAILQ_FOREACH(mca, &mca_records, link) { if (!mca->logged) { mca->logged = 1; - mtx_unlock_spin(&mca_lock); mca_log(&mca->rec); - mtx_lock_spin(&mca_lock); } } mtx_unlock_spin(&mca_lock); @@ -924,7 +924,7 @@ mca_init_bsp(void *arg __unused) SYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL); /* Called when a machine check exception fires. */ -int +void mca_intr(void) { uint64_t mcg_status; @@ -938,7 +938,7 @@ mca_intr(void) printf("MC Type: 0x%jx Address: 0x%jx\n", (uintmax_t)rdmsr(MSR_P5_MC_TYPE), (uintmax_t)rdmsr(MSR_P5_MC_ADDR)); - return (0); + panic("Machine check"); } /* Scan the banks and check for any non-recoverable errors. */ @@ -949,7 +949,8 @@ mca_intr(void) /* Clear MCIP. */ wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); - return (recoverable); + if (!recoverable) + panic("Unrecoverable machine check exception"); } #ifdef DEV_APIC @@ -972,9 +973,7 @@ cmc_intr(void) STAILQ_FOREACH(mca, &mca_records, link) { if (!mca->logged) { mca->logged = 1; - mtx_unlock_spin(&mca_lock); mca_log(&mca->rec); - mtx_lock_spin(&mca_lock); } } mtx_unlock_spin(&mca_lock);