Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 2 Apr 2012 15:07:22 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r233781 - in head/sys: amd64/amd64 i386/i386 x86/include x86/x86
Message-ID:  <201204021507.q32F7Maf003081@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Mon Apr  2 15:07:22 2012
New Revision: 233781
URL: http://svn.freebsd.org/changeset/base/233781

Log:
  Make machine check exception logging more readable.  On newer Intel systems,
  an uncorrected ECC error tends to fire on all CPUs in a package
  simultaneously and the current printf hacks are not sufficient to make
  the messages legible.  Instead, use the existing mca_lock spinlock to
  serialize calls to mca_log() and change the machine check code to panic
  directly when an unrecoverable error is encoutered rather than falling
  back to a trap_fatal() call in trap() (which adds nearly a screen-full of
  logging messages that aren't useful for machine checks).
  
  MFC after:	2 weeks

Modified:
  head/sys/amd64/amd64/trap.c
  head/sys/i386/i386/trap.c
  head/sys/x86/include/mca.h
  head/sys/x86/x86/mca.c

Modified: head/sys/amd64/amd64/trap.c
==============================================================================
--- head/sys/amd64/amd64/trap.c	Mon Apr  2 14:52:28 2012	(r233780)
+++ head/sys/amd64/amd64/trap.c	Mon Apr  2 15:07:22 2012	(r233781)
@@ -233,8 +233,7 @@ trap(struct trapframe *frame)
 #endif
 
 	if (type == T_MCHK) {
-		if (!mca_intr())
-			trap_fatal(frame, 0);
+		mca_intr();
 		goto out;
 	}
 

Modified: head/sys/i386/i386/trap.c
==============================================================================
--- head/sys/i386/i386/trap.c	Mon Apr  2 14:52:28 2012	(r233780)
+++ head/sys/i386/i386/trap.c	Mon Apr  2 15:07:22 2012	(r233781)
@@ -254,8 +254,7 @@ trap(struct trapframe *frame)
 #endif
 
 	if (type == T_MCHK) {
-		if (!mca_intr())
-			trap_fatal(frame, 0);
+		mca_intr();
 		goto out;
 	}
 

Modified: head/sys/x86/include/mca.h
==============================================================================
--- head/sys/x86/include/mca.h	Mon Apr  2 14:52:28 2012	(r233780)
+++ head/sys/x86/include/mca.h	Mon Apr  2 15:07:22 2012	(r233781)
@@ -48,7 +48,7 @@ struct mca_record {
 
 void	cmc_intr(void);
 void	mca_init(void);
-int	mca_intr(void);
+void	mca_intr(void);
 void	mca_resume(void);
 
 #endif

Modified: head/sys/x86/x86/mca.c
==============================================================================
--- head/sys/x86/x86/mca.c	Mon Apr  2 14:52:28 2012	(r233780)
+++ head/sys/x86/x86/mca.c	Mon Apr  2 15:07:22 2012	(r233781)
@@ -457,9 +457,9 @@ mca_record_entry(enum scan_mode mode, co
 		mtx_lock_spin(&mca_lock);
 		rec = STAILQ_FIRST(&mca_freelist);
 		if (rec == NULL) {
-			mtx_unlock_spin(&mca_lock);
 			printf("MCA: Unable to allocate space for an event.\n");
 			mca_log(record);
+			mtx_unlock_spin(&mca_lock);
 			return;
 		}
 		STAILQ_REMOVE_HEAD(&mca_freelist, link);
@@ -589,7 +589,9 @@ mca_scan(enum scan_mode mode)
 			count++;
 			if (rec.mr_status & ucmask) {
 				recoverable = 0;
+				mtx_lock_spin(&mca_lock);
 				mca_log(&rec);
+				mtx_unlock_spin(&mca_lock);
 			}
 			mca_record_entry(mode, &rec);
 		}
@@ -636,9 +638,7 @@ mca_scan_cpus(void *arg)
 		STAILQ_FOREACH(mca, &mca_records, link) {
 			if (!mca->logged) {
 				mca->logged = 1;
-				mtx_unlock_spin(&mca_lock);
 				mca_log(&mca->rec);
-				mtx_lock_spin(&mca_lock);
 			}
 		}
 		mtx_unlock_spin(&mca_lock);
@@ -924,7 +924,7 @@ mca_init_bsp(void *arg __unused)
 SYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL);
 
 /* Called when a machine check exception fires. */
-int
+void
 mca_intr(void)
 {
 	uint64_t mcg_status;
@@ -938,7 +938,7 @@ mca_intr(void)
 		printf("MC Type: 0x%jx  Address: 0x%jx\n",
 		    (uintmax_t)rdmsr(MSR_P5_MC_TYPE),
 		    (uintmax_t)rdmsr(MSR_P5_MC_ADDR));
-		return (0);
+		panic("Machine check");
 	}
 
 	/* Scan the banks and check for any non-recoverable errors. */
@@ -949,7 +949,8 @@ mca_intr(void)
 
 	/* Clear MCIP. */
 	wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
-	return (recoverable);
+	if (!recoverable)
+		panic("Unrecoverable machine check exception");
 }
 
 #ifdef DEV_APIC
@@ -972,9 +973,7 @@ cmc_intr(void)
 		STAILQ_FOREACH(mca, &mca_records, link) {
 			if (!mca->logged) {
 				mca->logged = 1;
-				mtx_unlock_spin(&mca_lock);
 				mca_log(&mca->rec);
-				mtx_lock_spin(&mca_lock);
 			}
 		}
 		mtx_unlock_spin(&mca_lock);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204021507.q32F7Maf003081>