From owner-svn-src-all@FreeBSD.ORG Fri Apr 2 10:36:40 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 5B48C106564A; Fri, 2 Apr 2010 10:36:40 +0000 (UTC) (envelope-from marius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 4A0868FC18; Fri, 2 Apr 2010 10:36:40 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o32Aae9T028434; Fri, 2 Apr 2010 10:36:40 GMT (envelope-from marius@svn.freebsd.org) Received: (from marius@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o32Aae4x028432; Fri, 2 Apr 2010 10:36:40 GMT (envelope-from marius@svn.freebsd.org) Message-Id: <201004021036.o32Aae4x028432@svn.freebsd.org> From: Marius Strobl Date: Fri, 2 Apr 2010 10:36:40 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r206086 - head/sys/sparc64/sparc64 X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 02 Apr 2010 10:36:40 -0000 Author: marius Date: Fri Apr 2 10:36:40 2010 New Revision: 206086 URL: http://svn.freebsd.org/changeset/base/206086 Log: - Try do deal gracefully with correctable ECC errors. - Improve the reporting of unhandled kernel and user traps. Modified: head/sys/sparc64/sparc64/trap.c Modified: head/sys/sparc64/sparc64/trap.c ============================================================================== --- head/sys/sparc64/sparc64/trap.c Fri Apr 2 08:57:39 2010 (r206085) +++ head/sys/sparc64/sparc64/trap.c Fri Apr 2 10:36:40 2010 (r206086) @@ -106,6 +106,7 @@ void trap(struct trapframe *tf); void syscall(struct trapframe *tf); static int fetch_syscall_args(struct thread *td, struct syscall_args *sa); +static int trap_cecc(void); static int trap_pfault(struct thread *td, struct trapframe *tf); extern char copy_fault[]; @@ -240,6 +241,10 @@ int debugger_on_signal = 0; SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW, &debugger_on_signal, 0, ""); +u_int corrected_ecc = 0; +SYSCTL_UINT(_machdep, OID_AUTO, corrected_ecc, CTLFLAG_RD, &corrected_ecc, 0, + "corrected ECC errors"); + /* * SUNW,set-trap-table allows to take over %tba from the PROM, which * will turn off interrupts and handle outstanding ones while doing so, @@ -308,10 +313,16 @@ trap(struct trapframe *tf) case T_SPILL: sig = rwindow_save(td); break; + case T_CORRECTED_ECC_ERROR: + sig = trap_cecc(); + break; default: - if (tf->tf_type < 0 || tf->tf_type >= T_MAX || - trap_sig[tf->tf_type] == -1) - panic("trap: bad trap type"); + if (tf->tf_type < 0 || tf->tf_type >= T_MAX) + panic("trap: bad trap type %#lx (user)", + tf->tf_type); + else if (trap_sig[tf->tf_type] == -1) + panic("trap: %s (user)", + trap_msg[tf->tf_type]); sig = trap_sig[tf->tf_type]; break; } @@ -400,18 +411,53 @@ trap(struct trapframe *tf) } error = 1; break; + case T_CORRECTED_ECC_ERROR: + error = trap_cecc(); + break; default: error = 1; break; } - if (error != 0) - panic("trap: %s", trap_msg[tf->tf_type & ~T_KERNEL]); + if (error != 0) { + tf->tf_type &= ~T_KERNEL; + if (tf->tf_type < 0 || tf->tf_type >= T_MAX) + panic("trap: bad trap type %#lx (kernel)", + tf->tf_type); + else if (trap_sig[tf->tf_type] == -1) + panic("trap: %s (kernel)", + trap_msg[tf->tf_type]); + } } CTR1(KTR_TRAP, "trap: td=%p return", td); } static int +trap_cecc(void) +{ + u_long eee; + + /* + * Turn off (non-)correctable error reporting while we're dealing + * with the error. + */ + eee = ldxa(0, ASI_ESTATE_ERROR_EN_REG); + stxa_sync(0, ASI_ESTATE_ERROR_EN_REG, eee & ~(AA_ESTATE_NCEEN | + AA_ESTATE_CEEN)); + /* Flush the caches in order ensure no corrupt data got installed. */ + cache_flush(); + /* Ensure the caches are still turned on (should be). */ + cache_enable(PCPU_GET(impl)); + /* Clear the the error from the AFSR. */ + stxa_sync(0, ASI_AFSR, ldxa(0, ASI_AFSR)); + corrected_ecc++; + printf("corrected ECC error\n"); + /* Turn (non-)correctable error reporting back on. */ + stxa_sync(0, ASI_ESTATE_ERROR_EN_REG, eee); + return (0); +} + +static int trap_pfault(struct thread *td, struct trapframe *tf) { struct vmspace *vm; @@ -664,7 +710,7 @@ syscall(struct trapframe *tf) */ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ? - syscallnames[sa.code] : "???"); + syscallnames[sa.code] : "???"); KASSERT(td->td_critnest == 0, ("System call %s returning in a critical section", (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?