Date: Fri, 26 Aug 2005 19:05:12 +0000 (GMT) From: jkoshy@FreeBSD.ORG (Joseph Koshy) To: Stephan Uphoff <ups@FreeBSD.org> Cc: cvs-src@FreeBSD.org, src-committers@FreeBSD.org, cvs-all@FreeBSD.org, jkoshy@FreeBSD.ORG Subject: Re: cvs commit: src/sys/amd64/amd64 exception.S Message-ID: <20050826190512.1717116A420@hub.freebsd.org> In-Reply-To: Message from Stephan Uphoff <ups@FreeBSD.org> of "Thu, 25 Aug 2005 20:33:43 GMT." <200508252033.j7PKXhdT078969@repoman.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
> ups 2005-08-25 20:33:43 UTC > > FreeBSD src repository > > Modified files: > sys/amd64/amd64 exception.S > Log: > NMI handler should not enable interrupts. > > Tested by: kris@ > MFC after: 3 weeks Actually, NMI handling doesn't quite work on the AMD64, even with this change. The problem is that the kernel protects code where it uses SWAPGS from reentrancy by blocking interrupts (using 'cli'). However, NMIs are non-maskable, and if an NMI is taken when in such a critical section we end up with the GS.base values getting out of sync and a processor reset or a double fault on a subsequent use of the out-of-sync %gs value. The attached patch addresses this bug for NMIs. It is needed before you can use hwpmc(4) to sample on an AMD64 box. The general solution is more complicated than this patch. In theory all unmaskable traps into the kernel appear to be susceptible to having the processor's GS.base values getting out of sync. In practice though, this appears to be a problem only for NMIs as most of the other exceptions do not happen during the vulnerable critical section. Debug exceptions taken from userland cannot trigger the bug. A machine check exception would be vulnerable, but we don't implement this today. Regards, Koshy <jkoshy@freebsd.org> Index: db_trace.c =================================================================== RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/db_trace.c,v retrieving revision 1.68 diff -u -u -r1.68 db_trace.c --- db_trace.c 3 Aug 2005 04:33:48 -0000 1.68 +++ db_trace.c 20 Aug 2005 15:17:12 -0000 @@ -317,7 +317,8 @@ db_symbol_values(sym, &name, NULL); if (name != NULL) { if (strcmp(name, "calltrap") == 0 || - strcmp(name, "fork_trampoline") == 0) + strcmp(name, "fork_trampoline") == 0 || + strcmp(name, "nmi_calltrap") == 0) frame_type = TRAP; else if (strncmp(name, "Xatpic_intr", 11) == 0 || strncmp(name, "Xatpic_fastintr", 15) == 0 || Index: exception.S =================================================================== RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/exception.S,v retrieving revision 1.126 diff -u -u -r1.126 exception.S --- exception.S 25 Aug 2005 20:33:43 -0000 1.126 +++ exception.S 26 Aug 2005 18:31:39 -0000 @@ -93,8 +93,6 @@ jmp alltraps IDTVEC(div) TRAP(T_DIVIDE) -IDTVEC(nmi) - TRAP_NOEN(T_NMI) IDTVEC(ofl) TRAP(T_OFLOW) IDTVEC(bnd) @@ -313,6 +311,82 @@ IDTVEC(fast_syscall32) sysret +/* + * NMI handling is special. + * + * First, NMIs do not respect the state of the processor's RFLAGS.IF + * bit and the NMI handler may be invoked at any time, including when + * the processor is in a critical section. In particular, this means + * that the processor's GS.base values could be inconsistent on entry + * to the handler. + * + * Second, the processor treats NMIs specially, blocking further NMIs + * till an 'iretq' instruction is executed. We therefore need to + * execute the NMI handler with interrupts disabled to prevent a + * nested interrupt from executing an 'iretq' instruction and + * inadvertently taking the processor out of NMI mode. + * + * We use '%ebx', a C-preserved register, to remember whether to swap + * GS back on the exit path. + */ + +IDTVEC(nmi) + subq $TF_RIP,%rsp + movq $(T_NMI),TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + xorl %ebx,%ebx + testb $SEL_RPL_MASK,TF_CS(%rsp) + jnz nmi_needswapgs /* we came from userland */ + movl $MSR_GSBASE,%ecx + rdmsr + cmpl $VM_MAXUSER_ADDRESS >> 32,%edx + jae nmi_calltrap /* GS.base holds a kernel VA */ +nmi_needswapgs: + incl %ebx + swapgs +/* Note: this label is also used by ddb and gdb: */ +nmi_calltrap: + FAKE_MCOUNT(TF_RIP(%rsp)) + call trap + MEXITCOUNT + testl %ebx,%ebx + jz nmi_restoreregs + swapgs +nmi_restoreregs: + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_RAX(%rsp),%rax + movq TF_RBX(%rsp),%rbx + movq TF_RBP(%rsp),%rbp + movq TF_R10(%rsp),%r10 + movq TF_R11(%rsp),%r11 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + addq $TF_RIP,%rsp + iretq + ENTRY(fork_trampoline) movq %r12, %rdi /* function */ movq %rbx, %rsi /* arg1 */ Index: genassym.c =================================================================== RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/genassym.c,v retrieving revision 1.155 diff -u -u -r1.155 genassym.c --- genassym.c 20 Nov 2004 02:30:59 -0000 1.155 +++ genassym.c 20 Aug 2005 15:11:01 -0000 @@ -209,3 +209,5 @@ ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse)); + +ASSYM(MSR_GSBASE, MSR_GSBASE); Index: trap.c =================================================================== RCS file: /cvs/FreeBSD/src/sys/amd64/amd64/trap.c,v retrieving revision 1.289 diff -u -u -r1.289 trap.c --- trap.c 29 Jun 2005 23:23:16 -0000 1.289 +++ trap.c 20 Aug 2005 15:18:25 -0000 @@ -219,9 +219,9 @@ type); /* * We shouldn't enable interrupts while in a critical - * section. + * section, or if servicing an NMI. */ - if (td->td_critnest == 0) + if (type != T_NMI && td->td_critnest == 0) enable_intr(); } }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20050826190512.1717116A420>