From owner-svn-src-user@FreeBSD.ORG Sat May 23 19:17:28 2009 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 480F51065678; Sat, 23 May 2009 19:17:28 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 34BB98FC26; Sat, 23 May 2009 19:17:28 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n4NJHSPF064646; Sat, 23 May 2009 19:17:28 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n4NJHRC6064638; Sat, 23 May 2009 19:17:27 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200905231917.n4NJHRC6064638@svn.freebsd.org> From: Kip Macy Date: Sat, 23 May 2009 19:17:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r192663 - in user/kmacy/releng_7_2_fcs/sys: amd64/amd64 amd64/include i386/i386 kern X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 23 May 2009 19:17:28 -0000 Author: kmacy Date: Sat May 23 19:17:27 2009 New Revision: 192663 URL: http://svn.freebsd.org/changeset/base/192663 Log: fix hwpmc callchain and NMI handling Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/exception.S Sat May 23 19:17:27 2009 (r192663) @@ -383,22 +383,24 @@ IDTVEC(fast_syscall32) * NMI handling is special. * * First, NMIs do not respect the state of the processor's RFLAGS.IF - * bit and the NMI handler may be invoked at any time, including when - * the processor is in a critical section with RFLAGS.IF == 0. In - * particular, this means that the processor's GS.base values could be - * inconsistent on entry to the handler, and so we need to read - * MSR_GSBASE to determine if a 'swapgs' is needed. We use '%ebx', a - * C-preserved register, to remember whether to swap GS back on the - * exit path. + * bit. The NMI handler may be entered at any time, including when + * the processor is in a critical section with RFLAGS.IF == 0. + * The processor's GS.base value could be invalid on entry to the + * handler. * * Second, the processor treats NMIs specially, blocking further NMIs - * until an 'iretq' instruction is executed. We therefore need to - * execute the NMI handler with interrupts disabled to prevent a - * nested interrupt from executing an 'iretq' instruction and - * inadvertently taking the processor out of NMI mode. + * until an 'iretq' instruction is executed. We thus need to execute + * the NMI handler with interrupts disabled, to prevent a nested interrupt + * from executing an 'iretq' instruction and inadvertently taking the + * processor out of NMI mode. * - * Third, the NMI handler runs on its own stack (tss_ist1), shared - * with the double fault handler. + * Third, the NMI handler runs on its own stack (tss_ist2). The canonical + * GS.base value for the processor is stored just above the bottom of its + * NMI stack. For NMIs taken from kernel mode, the current value in + * the processor's GS.base is saved at entry to C-preserved register %r12, + * the canonical value for GS.base is then loaded into the processor, and + * the saved value is restored at exit time. For NMIs taken from user mode, + * the cheaper 'SWAPGS' instructions are used for swapping GS.base. */ IDTVEC(nmi) @@ -423,12 +425,22 @@ IDTVEC(nmi) movq %r15,TF_R15(%rsp) xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) - jnz nmi_needswapgs /* we came from userland */ + jnz nmi_fromuserspace + /* + * We've interrupted the kernel. Preserve GS.base in %r12. + */ movl $MSR_GSBASE,%ecx rdmsr - cmpl $VM_MAXUSER_ADDRESS >> 32,%edx - jae nmi_calltrap /* GS.base holds a kernel VA */ -nmi_needswapgs: + movq %rax,%r12 + shlq $32,%rdx + orq %rdx,%r12 + /* Retrieve and load the canonical value for GS.base. */ + movq TF_SIZE(%rsp),%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + jmp nmi_calltrap +nmi_fromuserspace: incl %ebx swapgs /* Note: this label is also used by ddb and gdb: */ @@ -439,14 +451,19 @@ nmi_calltrap: MEXITCOUNT #ifdef HWPMC_HOOKS /* - * Check if the current trap was from user mode and if so - * whether the current thread needs a user call chain to be - * captured. We are still in NMI mode at this point. + * Capture a userspace callchain if needed. + * + * - Check if the current trap was from user mode. + * - Check if the current thread is valid. + * - Check if the thread requires a user call chain to be + * captured. + * + * We are still in NMI mode at this point. */ - testb $SEL_RPL_MASK,TF_CS(%rsp) - jz nocallchain - movq PCPU(CURTHREAD),%rax /* curthread present? */ - orq %rax,%rax + testl %ebx,%ebx + jz nocallchain /* not from userspace */ + movq PCPU(CURTHREAD),%rax + orq %rax,%rax /* curthread present? */ jz nocallchain testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ jz nocallchain @@ -459,9 +476,9 @@ nmi_calltrap: */ movq %rsp,%rsi /* source stack pointer */ movq $TF_SIZE,%rcx - movq PCPU(RSP0),%rbx - subq %rcx,%rbx - movq %rbx,%rdi /* destination stack pointer */ + movq PCPU(RSP0),%rdx + subq %rcx,%rdx + movq %rdx,%rdi /* destination stack pointer */ shrq $3,%rcx /* trap frame size in long words */ cld @@ -470,7 +487,7 @@ nmi_calltrap: movl %ss,%eax pushq %rax /* tf_ss */ - pushq %rbx /* tf_rsp (on kernel stack) */ + pushq %rdx /* tf_rsp (on kernel stack) */ pushfq /* tf_rflags */ movl %cs,%eax pushq %rax /* tf_cs */ @@ -480,21 +497,36 @@ outofnmi: /* * At this point the processor has exited NMI mode and is running * with interrupts turned off on the normal kernel stack. - * We turn interrupts back on, and take the usual 'doreti' exit - * path. * * If a pending NMI gets recognized at or after this point, it - * will cause a kernel callchain to be traced. Since this path - * is only taken for NMI interrupts from user space, our `swapgs' - * state is correct for taking the doreti path. + * will cause a kernel callchain to be traced. + * + * We turn interrupts back on, and call the user callchain capture hook. */ + movq pmc_hook,%rax + orq %rax,%rax + jz nocallchain + movq PCPU(CURTHREAD),%rdi /* thread */ + movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */ + movq %rsp,%rdx /* frame */ sti - jmp doreti + call *%rax + cli nocallchain: #endif testl %ebx,%ebx - jz nmi_restoreregs + jz nmi_kernelexit swapgs + jmp nmi_restoreregs +nmi_kernelexit: + /* + * Put back the preserved MSR_GSBASE value. + */ + movl $MSR_GSBASE,%ecx + movq %r12,%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr nmi_restoreregs: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/genassym.c Sat May 23 19:17:27 2009 (r192663) @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_hwpmc_hooks.h" #include "opt_kstack_pages.h" #include @@ -44,6 +45,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef HWPMC_HOOKS +#include +#endif #include #include #include @@ -223,3 +227,7 @@ ASSYM(MTX_LOCK, offsetof(struct mtx, mtx ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse)); ASSYM(MSR_GSBASE, MSR_GSBASE); + +#ifdef HWPMC_HOOKS +ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN); +#endif Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/machdep.c Sat May 23 19:17:27 2009 (r192663) @@ -680,6 +680,9 @@ struct gate_descriptor *idt = &idt0[0]; static char dblfault_stack[PAGE_SIZE] __aligned(16); +static char nmi0_stack[PAGE_SIZE] __aligned(16); +CTASSERT(sizeof(struct nmi_pcpu) == 16); + struct amd64tss common_tss[MAXCPU]; /* software prototypes -- in more palatable form */ @@ -1150,6 +1153,7 @@ hammer_time(u_int64_t modulep, u_int64_t caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; + struct nmi_pcpu *np; u_int64_t msr; char *env; @@ -1224,7 +1228,7 @@ hammer_time(u_int64_t modulep, u_int64_t setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 1); + setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); @@ -1297,6 +1301,14 @@ hammer_time(u_int64_t modulep, u_int64_t /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; + /* + * NMI stack, runs on ist2. The pcpu pointer is stored just + * above the start of the ist2 stack. + */ + np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; + np->np_pcpu = (register_t) pc; + common_tss[0].tss_ist2 = (long) np; + /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss); Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/mp_machdep.c Sat May 23 19:17:27 2009 (r192663) @@ -98,6 +98,7 @@ void *bootstacks[MAXCPU]; /* Temporary holder for double fault stack */ char *doublefault_stack; +char *nmi_stack; /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -455,6 +456,7 @@ void init_secondary(void) { struct pcpu *pc; + struct nmi_pcpu *np; u_int64_t msr, cr0; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; @@ -468,6 +470,10 @@ init_secondary(void) common_tss[cpu].tss_iobase = sizeof(struct amd64tss); common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; + /* The NMI stack runs on IST2. */ + np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; + common_tss[cpu].tss_ist2 = (long) np; + /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; ssdtosyssd(&gdt_segs[GPROC0_SEL], @@ -492,6 +498,9 @@ init_secondary(void) pc->pc_rsp0 = 0; pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; + /* Save the per-cpu pointer for use by the NMI handler. */ + np->np_pcpu = (register_t) pc; + wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ @@ -758,6 +767,7 @@ start_all_aps(void) /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); + nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; Modified: user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/amd64/include/intr_machdep.h Sat May 23 19:17:27 2009 (r192663) @@ -120,6 +120,15 @@ struct intsrc { struct trapframe; +/* + * The following data structure holds per-cpu data, and is placed just + * above the top of the space used for the NMI stack. + */ +struct nmi_pcpu { + register_t np_pcpu; + register_t __padding; /* pad to 16 bytes */ +}; + extern struct mtx icu_lock; extern int elcr_found; Modified: user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/i386/i386/exception.s Sat May 23 19:17:27 2009 (r192663) @@ -439,9 +439,18 @@ doreti_nmi: iret outofnmi: /* - * Clear interrupts and jump to AST handling code. + * Call the callchain capture hook after turning interrupts back on. */ + movl pmc_hook,%ecx + orl %ecx,%ecx + jz doreti_exit + pushl %esp /* frame pointer */ + pushl $PMC_FN_USER_CALLCHAIN /* command */ + movl PCPU(CURTHREAD),%eax + pushl %eax /* curthread */ sti + call *%ecx + addl $12,%esp jmp doreti_ast ENTRY(end_exceptions) #endif Modified: user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/i386/i386/genassym.c Sat May 23 19:17:27 2009 (r192663) @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_compat.h" +#include "opt_hwpmc_hooks.h" #include "opt_kstack_pages.h" #include @@ -44,6 +45,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef HWPMC_HOOKS +#include +#endif #include #include #include @@ -231,3 +235,13 @@ ASSYM(MTX_RECURSECNT, offsetof(struct mt ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base)); ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat)); #endif + +#ifdef XEN +#include +ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3)); +ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START); +#endif + +#ifdef HWPMC_HOOKS +ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN); +#endif Modified: user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c Sat May 23 19:17:05 2009 (r192662) +++ user/kmacy/releng_7_2_fcs/sys/kern/subr_trap.c Sat May 23 19:17:27 2009 (r192663) @@ -44,7 +44,6 @@ #include __FBSDID("$FreeBSD$"); -#include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_mac.h" #ifdef __i386__