From owner-svn-src-projects@FreeBSD.ORG Sat Feb 28 17:59:03 2009 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id A543E106568B; Sat, 28 Feb 2009 17:59:03 +0000 (UTC) (envelope-from jb@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 8FD978FC18; Sat, 28 Feb 2009 17:59:03 +0000 (UTC) (envelope-from jb@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n1SHx3iu017457; Sat, 28 Feb 2009 17:59:03 GMT (envelope-from jb@svn.freebsd.org) Received: (from jb@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n1SHx3cp017445; Sat, 28 Feb 2009 17:59:03 GMT (envelope-from jb@svn.freebsd.org) Message-Id: <200902281759.n1SHx3cp017445@svn.freebsd.org> From: John Birrell Date: Sat, 28 Feb 2009 17:59:03 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r189188 - in projects/jbuild/sys/amd64: amd64 conf ia32 include linux32 pci X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 28 Feb 2009 17:59:04 -0000 Author: jb Date: Sat Feb 28 17:59:02 2009 New Revision: 189188 URL: http://svn.freebsd.org/changeset/base/189188 Log: MFC Modified: projects/jbuild/sys/amd64/amd64/amd64_mem.c projects/jbuild/sys/amd64/amd64/busdma_machdep.c projects/jbuild/sys/amd64/amd64/cpu_switch.S projects/jbuild/sys/amd64/amd64/exception.S projects/jbuild/sys/amd64/amd64/fpu.c projects/jbuild/sys/amd64/amd64/identcpu.c projects/jbuild/sys/amd64/amd64/initcpu.c projects/jbuild/sys/amd64/amd64/io_apic.c projects/jbuild/sys/amd64/amd64/local_apic.c projects/jbuild/sys/amd64/amd64/machdep.c projects/jbuild/sys/amd64/amd64/mp_machdep.c projects/jbuild/sys/amd64/amd64/msi.c projects/jbuild/sys/amd64/amd64/pmap.c projects/jbuild/sys/amd64/conf/DEFAULTS projects/jbuild/sys/amd64/conf/GENERIC projects/jbuild/sys/amd64/conf/NOTES projects/jbuild/sys/amd64/ia32/ia32_signal.c projects/jbuild/sys/amd64/ia32/ia32_sigtramp.S projects/jbuild/sys/amd64/include/apicvar.h projects/jbuild/sys/amd64/include/cpufunc.h projects/jbuild/sys/amd64/include/cputypes.h projects/jbuild/sys/amd64/include/fpu.h projects/jbuild/sys/amd64/include/intr_machdep.h projects/jbuild/sys/amd64/include/legacyvar.h projects/jbuild/sys/amd64/include/md_var.h projects/jbuild/sys/amd64/include/specialreg.h projects/jbuild/sys/amd64/linux32/linux32_locore.s projects/jbuild/sys/amd64/linux32/linux32_machdep.c projects/jbuild/sys/amd64/linux32/linux32_sysvec.c projects/jbuild/sys/amd64/pci/pci_bus.c Modified: projects/jbuild/sys/amd64/amd64/amd64_mem.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/amd64_mem.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/amd64_mem.c Sat Feb 28 17:59:02 2009 (r189188) @@ -678,9 +678,17 @@ amd64_mem_drvinit(void *unused) return; if ((cpu_id & 0xf00) != 0x600 && (cpu_id & 0xf00) != 0xf00) return; - if (cpu_vendor_id != CPU_VENDOR_INTEL && - cpu_vendor_id != CPU_VENDOR_AMD) + switch (cpu_vendor_id) { + case CPU_VENDOR_INTEL: + case CPU_VENDOR_AMD: + break; + case CPU_VENDOR_CENTAUR: + if (cpu_exthigh >= 0x80000008) + break; + /* FALLTHROUGH */ + default: return; + } mem_range_softc.mr_op = &amd64_mrops; } SYSINIT(amd64memdev, SI_SUB_DRIVERS, SI_ORDER_FIRST, amd64_mem_drvinit, NULL); Modified: projects/jbuild/sys/amd64/amd64/busdma_machdep.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/busdma_machdep.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/busdma_machdep.c Sat Feb 28 17:59:02 2009 (r189188) @@ -93,6 +93,7 @@ struct bounce_zone { int active_bpages; int total_bounced; int total_deferred; + int map_count; bus_size_t alignment; bus_size_t boundary; bus_addr_t lowaddr; @@ -418,7 +419,7 @@ bus_dmamap_create(bus_dma_tag_t dmat, in else maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr)); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 - || (dmat->map_count > 0 && bz->total_bpages < maxpages)) { + || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); @@ -434,6 +435,7 @@ bus_dmamap_create(bus_dma_tag_t dmat, in error = 0; } } + bz->map_count++; } else { *mapp = NULL; } @@ -457,6 +459,8 @@ bus_dmamap_destroy(bus_dma_tag_t dmat, b __func__, dmat, EBUSY); return (EBUSY); } + if (dmat->bounce_zone) + dmat->bounce_zone->map_count--; free(map, M_DEVBUF); } dmat->map_count--; @@ -989,6 +993,7 @@ alloc_bounce_zone(bus_dma_tag_t dmat) bz->lowaddr = dmat->lowaddr; bz->alignment = dmat->alignment; bz->boundary = dmat->boundary; + bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); @@ -1128,6 +1133,13 @@ add_bounce_page(bus_dma_tag_t dmat, bus_ bz->active_bpages++; mtx_unlock(&bounce_lock); + if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { + /* page offset needs to be preserved */ + bpage->vaddr &= ~PAGE_MASK; + bpage->busaddr &= ~PAGE_MASK; + bpage->vaddr |= vaddr & PAGE_MASK; + bpage->busaddr |= vaddr & PAGE_MASK; + } bpage->datavaddr = vaddr; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); Modified: projects/jbuild/sys/amd64/amd64/cpu_switch.S ============================================================================== --- projects/jbuild/sys/amd64/amd64/cpu_switch.S Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/cpu_switch.S Sat Feb 28 17:59:02 2009 (r189188) @@ -199,6 +199,7 @@ done_load_seg: cmpq PCB_FSBASE(%r8),%r9 jz 1f /* Restore userland %fs */ +restore_fsbase: movl $MSR_FSBASE,%ecx movl PCB_FSBASE(%r8),%eax movl PCB_FSBASE+4(%r8),%edx @@ -259,12 +260,12 @@ do_kthread: jmp do_tss store_seg: - movl %gs,PCB_GS(%r8) + mov %gs,PCB_GS(%r8) testl $PCB_GS32BIT,PCB_FLAGS(%r8) jnz 2f -1: movl %ds,PCB_DS(%r8) - movl %es,PCB_ES(%r8) - movl %fs,PCB_FS(%r8) +1: mov %ds,PCB_DS(%r8) + mov %es,PCB_ES(%r8) + mov %fs,PCB_FS(%r8) jmp done_store_seg 2: movq PCPU(GS32P),%rax movq (%rax),%rax @@ -276,12 +277,12 @@ load_seg: jnz 2f 1: movl $MSR_GSBASE,%ecx rdmsr - movl PCB_GS(%r8),%gs + mov PCB_GS(%r8),%gs wrmsr - movl PCB_DS(%r8),%ds - movl PCB_ES(%r8),%es - movl PCB_FS(%r8),%fs - jmp done_load_seg + mov PCB_DS(%r8),%ds + mov PCB_ES(%r8),%es + mov PCB_FS(%r8),%fs + jmp restore_fsbase /* Restore userland %gs while preserving kernel gsbase */ 2: movq PCPU(GS32P),%rax movq PCB_GS32SD(%r8),%rcx Modified: projects/jbuild/sys/amd64/amd64/exception.S ============================================================================== --- projects/jbuild/sys/amd64/amd64/exception.S Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/exception.S Sat Feb 28 17:59:02 2009 (r189188) @@ -383,22 +383,24 @@ IDTVEC(fast_syscall32) * NMI handling is special. * * First, NMIs do not respect the state of the processor's RFLAGS.IF - * bit and the NMI handler may be invoked at any time, including when - * the processor is in a critical section with RFLAGS.IF == 0. In - * particular, this means that the processor's GS.base values could be - * inconsistent on entry to the handler, and so we need to read - * MSR_GSBASE to determine if a 'swapgs' is needed. We use '%ebx', a - * C-preserved register, to remember whether to swap GS back on the - * exit path. + * bit. The NMI handler may be entered at any time, including when + * the processor is in a critical section with RFLAGS.IF == 0. + * The processor's GS.base value could be invalid on entry to the + * handler. * * Second, the processor treats NMIs specially, blocking further NMIs - * until an 'iretq' instruction is executed. We therefore need to - * execute the NMI handler with interrupts disabled to prevent a - * nested interrupt from executing an 'iretq' instruction and - * inadvertently taking the processor out of NMI mode. + * until an 'iretq' instruction is executed. We thus need to execute + * the NMI handler with interrupts disabled, to prevent a nested interrupt + * from executing an 'iretq' instruction and inadvertently taking the + * processor out of NMI mode. * - * Third, the NMI handler runs on its own stack (tss_ist1), shared - * with the double fault handler. + * Third, the NMI handler runs on its own stack (tss_ist2). The canonical + * GS.base value for the processor is stored just above the bottom of its + * NMI stack. For NMIs taken from kernel mode, the current value in + * the processor's GS.base is saved at entry to C-preserved register %r12, + * the canonical value for GS.base is then loaded into the processor, and + * the saved value is restored at exit time. For NMIs taken from user mode, + * the cheaper 'SWAPGS' instructions are used for swapping GS.base. */ IDTVEC(nmi) @@ -423,12 +425,22 @@ IDTVEC(nmi) movq %r15,TF_R15(%rsp) xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) - jnz nmi_needswapgs /* we came from userland */ + jnz nmi_fromuserspace + /* + * We've interrupted the kernel. Preserve GS.base in %r12. + */ movl $MSR_GSBASE,%ecx rdmsr - cmpl $VM_MAXUSER_ADDRESS >> 32,%edx - jae nmi_calltrap /* GS.base holds a kernel VA */ -nmi_needswapgs: + movq %rax,%r12 + shlq $32,%rdx + orq %rdx,%r12 + /* Retrieve and load the canonical value for GS.base. */ + movq TF_SIZE(%rsp),%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + jmp nmi_calltrap +nmi_fromuserspace: incl %ebx swapgs /* Note: this label is also used by ddb and gdb: */ @@ -439,14 +451,19 @@ nmi_calltrap: MEXITCOUNT #ifdef HWPMC_HOOKS /* - * Check if the current trap was from user mode and if so - * whether the current thread needs a user call chain to be - * captured. We are still in NMI mode at this point. + * Capture a userspace callchain if needed. + * + * - Check if the current trap was from user mode. + * - Check if the current thread is valid. + * - Check if the thread requires a user call chain to be + * captured. + * + * We are still in NMI mode at this point. */ - testb $SEL_RPL_MASK,TF_CS(%rsp) - jz nocallchain - movq PCPU(CURTHREAD),%rax /* curthread present? */ - orq %rax,%rax + testl %ebx,%ebx + jz nocallchain /* not from userspace */ + movq PCPU(CURTHREAD),%rax + orq %rax,%rax /* curthread present? */ jz nocallchain testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ jz nocallchain @@ -494,11 +511,22 @@ outofnmi: movq %rsp,%rdx /* frame */ sti call *%rax + cli nocallchain: #endif testl %ebx,%ebx - jz nmi_restoreregs + jz nmi_kernelexit swapgs + jmp nmi_restoreregs +nmi_kernelexit: + /* + * Put back the preserved MSR_GSBASE value. + */ + movl $MSR_GSBASE,%ecx + movq %r12,%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr nmi_restoreregs: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi Modified: projects/jbuild/sys/amd64/amd64/fpu.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/fpu.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/fpu.c Sat Feb 28 17:59:02 2009 (r189188) @@ -98,10 +98,8 @@ typedef u_char bool_t; static void fpu_clean_state(void); -int hw_float = 1; -SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, - CTLFLAG_RD, &hw_float, 0, - "Floatingpoint instructions executed in hardware"); +SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, + NULL, 1, "Floating point instructions executed in hardware"); static struct savefpu fpu_cleanstate; static bool_t fpu_cleanstate_ready; @@ -391,6 +389,7 @@ fpudna() { struct pcb *pcb; register_t s; + u_short control; if (PCPU_GET(fpcurthread) == curthread) { printf("fpudna: fpcurthread == curthread %d times\n", @@ -421,6 +420,10 @@ fpudna() * explicitly load sanitized registers. */ fxrstor(&fpu_cleanstate); + if (pcb->pcb_flags & PCB_32BIT) { + control = __INITIAL_FPUCW_I386__; + fldcw(&control); + } pcb->pcb_flags |= PCB_FPUINITDONE; } else fxrstor(&pcb->pcb_save); Modified: projects/jbuild/sys/amd64/amd64/identcpu.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/identcpu.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/identcpu.c Sat Feb 28 17:59:02 2009 (r189188) @@ -72,6 +72,7 @@ void panicifcpuunsupported(void); static u_int find_cpu_vendor_id(void); static void print_AMD_info(void); static void print_AMD_assoc(int i); +static void print_via_padlock_info(void); int cpu_class; char machine[] = "amd64"; @@ -102,6 +103,7 @@ static struct { } cpu_vendors[] = { { INTEL_VENDOR_ID, CPU_VENDOR_INTEL }, /* GenuineIntel */ { AMD_VENDOR_ID, CPU_VENDOR_AMD }, /* AuthenticAMD */ + { CENTAUR_VENDOR_ID, CPU_VENDOR_CENTAUR }, /* CentaurHauls */ }; int cpu_cores; @@ -131,24 +133,33 @@ printcpuinfo(void) } } - if (cpu_vendor_id == CPU_VENDOR_INTEL) { + switch (cpu_vendor_id) { + case CPU_VENDOR_INTEL: /* Please make up your mind folks! */ strcat(cpu_model, "EM64T"); - } else if (cpu_vendor_id == CPU_VENDOR_AMD) { + break; + case CPU_VENDOR_AMD: /* * Values taken from AMD Processor Recognition * http://www.amd.com/K6/k6docs/pdf/20734g.pdf * (also describes ``Features'' encodings. */ strcpy(cpu_model, "AMD "); - switch (cpu_id & 0xF00) { - case 0xf00: + if ((cpu_id & 0xf00) == 0xf00) strcat(cpu_model, "AMD64 Processor"); - break; - default: + else strcat(cpu_model, "Unknown"); - break; - } + break; + case CPU_VENDOR_CENTAUR: + strcpy(cpu_model, "VIA "); + if ((cpu_id & 0xff0) == 0x6f0) + strcat(cpu_model, "Nano Processor"); + else + strcat(cpu_model, "Unknown"); + break; + default: + strcat(cpu_model, "Unknown"); + break; } /* @@ -180,7 +191,8 @@ printcpuinfo(void) printf(" Id = 0x%x", cpu_id); if (cpu_vendor_id == CPU_VENDOR_INTEL || - cpu_vendor_id == CPU_VENDOR_AMD) { + cpu_vendor_id == CPU_VENDOR_AMD || + cpu_vendor_id == CPU_VENDOR_CENTAUR) { printf(" Stepping = %u", cpu_id & 0xf); if (cpu_high > 0) { u_int cmp = 1, htt = 1; @@ -352,6 +364,9 @@ printcpuinfo(void) ); } + if (cpu_vendor_id == CPU_VENDOR_CENTAUR) + print_via_padlock_info(); + if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_AMD) cpu_feature &= ~CPUID_HTT; @@ -375,6 +390,12 @@ printcpuinfo(void) AMD64_CPU_MODEL(cpu_id) >= 0x3)) tsc_is_invariant = 1; break; + case CPU_VENDOR_CENTAUR: + if (AMD64_CPU_FAMILY(cpu_id) == 0x6 && + AMD64_CPU_MODEL(cpu_id) >= 0xf && + (rdmsr(0x1203) & 0x100000000ULL) == 0) + tsc_is_invariant = 1; + break; } if (tsc_is_invariant) printf("\n TSC: P-state invariant"); @@ -456,7 +477,7 @@ EVENTHANDLER_DEFINE(cpufreq_post_change, EVENTHANDLER_PRI_ANY); /* - * Final stage of CPU identification. -- Should I check TI? + * Final stage of CPU identification. */ void identify_cpu(void) @@ -478,7 +499,8 @@ identify_cpu(void) cpu_feature2 = regs[2]; if (cpu_vendor_id == CPU_VENDOR_INTEL || - cpu_vendor_id == CPU_VENDOR_AMD) { + cpu_vendor_id == CPU_VENDOR_AMD || + cpu_vendor_id == CPU_VENDOR_CENTAUR) { do_cpuid(0x80000000, regs); cpu_exthigh = regs[0]; } @@ -599,3 +621,37 @@ print_AMD_info(void) print_AMD_l2_assoc((regs[2] >> 12) & 0x0f); } } + +static void +print_via_padlock_info(void) +{ + u_int regs[4]; + + /* Check for supported models. */ + switch (cpu_id & 0xff0) { + case 0x690: + if ((cpu_id & 0xf) < 3) + return; + case 0x6a0: + case 0x6d0: + case 0x6f0: + break; + default: + return; + } + + do_cpuid(0xc0000000, regs); + if (regs[0] >= 0xc0000001) + do_cpuid(0xc0000001, regs); + else + return; + + printf("\n VIA Padlock Features=0x%b", regs[3], + "\020" + "\003RNG" /* RNG */ + "\007AES" /* ACE */ + "\011AES-CTR" /* ACE2 */ + "\013SHA1,SHA256" /* PHE */ + "\015RSA" /* PMM */ + ); +} Modified: projects/jbuild/sys/amd64/amd64/initcpu.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/initcpu.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/initcpu.c Sat Feb 28 17:59:02 2009 (r189188) @@ -54,6 +54,8 @@ u_int cpu_feature2; /* Feature flags */ u_int amd_feature; /* AMD feature flags */ u_int amd_feature2; /* AMD feature flags */ u_int amd_pminfo; /* AMD advanced power management info */ +u_int via_feature_rng; /* VIA RNG features */ +u_int via_feature_xcrypt; /* VIA ACE features */ u_int cpu_high; /* Highest arg to CPUID */ u_int cpu_exthigh; /* Highest arg to extended CPUID */ u_int cpu_id; /* Stepping ID */ @@ -64,6 +66,75 @@ u_int cpu_vendor_id; /* CPU vendor ID * u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ +SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, + &via_feature_rng, 0, "VIA C3/C7 RNG feature available in CPU"); +SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD, + &via_feature_xcrypt, 0, "VIA C3/C7 xcrypt feature available in CPU"); + +/* + * Initialize special VIA C3/C7 features + */ +static void +init_via(void) +{ + u_int regs[4], val; + u_int64_t msreg; + + do_cpuid(0xc0000000, regs); + val = regs[0]; + if (val >= 0xc0000001) { + do_cpuid(0xc0000001, regs); + val = regs[3]; + } else + val = 0; + + /* Enable RNG if present and disabled */ + if (val & VIA_CPUID_HAS_RNG) { + if (!(val & VIA_CPUID_DO_RNG)) { + msreg = rdmsr(0x110B); + msreg |= 0x40; + wrmsr(0x110B, msreg); + } + via_feature_rng = VIA_HAS_RNG; + } + /* Enable AES engine if present and disabled */ + if (val & VIA_CPUID_HAS_ACE) { + if (!(val & VIA_CPUID_DO_ACE)) { + msreg = rdmsr(0x1107); + msreg |= (0x01 << 28); + wrmsr(0x1107, msreg); + } + via_feature_xcrypt |= VIA_HAS_AES; + } + /* Enable ACE2 engine if present and disabled */ + if (val & VIA_CPUID_HAS_ACE2) { + if (!(val & VIA_CPUID_DO_ACE2)) { + msreg = rdmsr(0x1107); + msreg |= (0x01 << 28); + wrmsr(0x1107, msreg); + } + via_feature_xcrypt |= VIA_HAS_AESCTR; + } + /* Enable SHA engine if present and disabled */ + if (val & VIA_CPUID_HAS_PHE) { + if (!(val & VIA_CPUID_DO_PHE)) { + msreg = rdmsr(0x1107); + msreg |= (0x01 << 28/**/); + wrmsr(0x1107, msreg); + } + via_feature_xcrypt |= VIA_HAS_SHA; + } + /* Enable MM engine if present and disabled */ + if (val & VIA_CPUID_HAS_PMM) { + if (!(val & VIA_CPUID_DO_PMM)) { + msreg = rdmsr(0x1107); + msreg |= (0x01 << 28/**/); + wrmsr(0x1107, msreg); + } + via_feature_xcrypt |= VIA_HAS_MM; + } +} + /* * Initialize CPU control registers */ @@ -81,4 +152,8 @@ initializecpu(void) wrmsr(MSR_EFER, msr); pg_nx = PG_NX; } + if (cpu_vendor_id == CPU_VENDOR_CENTAUR && + AMD64_CPU_FAMILY(cpu_id) == 0x6 && + AMD64_CPU_MODEL(cpu_id) >= 0xf) + init_via(); } Modified: projects/jbuild/sys/amd64/amd64/io_apic.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/io_apic.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/io_apic.c Sat Feb 28 17:59:02 2009 (r189188) @@ -327,39 +327,56 @@ ioapic_assign_cpu(struct intsrc *isrc, u { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; struct ioapic *io = (struct ioapic *)isrc->is_pic; + u_int old_vector; + u_int old_id; + /* + * keep 1st core as the destination for NMI + */ + if (intpin->io_irq == IRQ_NMI) + apic_id = 0; + + /* + * Set us up to free the old irq. + */ + old_vector = intpin->io_vector; + old_id = intpin->io_cpu; + if (old_vector && apic_id == old_id) + return; + + /* + * Allocate an APIC vector for this interrupt pin. Once + * we have a vector we program the interrupt pin. + */ intpin->io_cpu = apic_id; + intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq); if (bootverbose) { - printf("ioapic%u: Assigning ", io->io_id); + printf("ioapic%u: routing intpin %u (", io->io_id, + intpin->io_intpin); ioapic_print_irq(intpin); - printf(" to local APIC %u\n", intpin->io_cpu); + printf(") to lapic %u vector %u\n", intpin->io_cpu, + intpin->io_vector); } ioapic_program_intpin(intpin); + /* + * Free the old vector after the new one is established. This is done + * to prevent races where we could miss an interrupt. + */ + if (old_vector) + apic_free_vector(old_id, old_vector, intpin->io_irq); } static void ioapic_enable_intr(struct intsrc *isrc) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; - struct ioapic *io = (struct ioapic *)isrc->is_pic; - if (intpin->io_vector == 0) { - /* - * Allocate an APIC vector for this interrupt pin. Once - * we have a vector we program the interrupt pin. - */ - intpin->io_vector = apic_alloc_vector(intpin->io_irq); - if (bootverbose) { - printf("ioapic%u: routing intpin %u (", io->io_id, - intpin->io_intpin); - ioapic_print_irq(intpin); - printf(") to vector %u\n", intpin->io_vector); - } - ioapic_program_intpin(intpin); - apic_enable_vector(intpin->io_vector); - } + if (intpin->io_vector == 0) + ioapic_assign_cpu(isrc, pcpu_find(0)->pc_apic_id); + apic_enable_vector(intpin->io_cpu, intpin->io_vector); } + static void ioapic_disable_intr(struct intsrc *isrc) { @@ -369,11 +386,11 @@ ioapic_disable_intr(struct intsrc *isrc) if (intpin->io_vector != 0) { /* Mask this interrupt pin and free its APIC vector. */ vector = intpin->io_vector; - apic_disable_vector(vector); + apic_disable_vector(intpin->io_cpu, vector); intpin->io_masked = 1; intpin->io_vector = 0; ioapic_program_intpin(intpin); - apic_free_vector(vector, intpin->io_irq); + apic_free_vector(intpin->io_cpu, vector, intpin->io_irq); } } Modified: projects/jbuild/sys/amd64/amd64/local_apic.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/local_apic.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/local_apic.c Sat Feb 28 17:59:02 2009 (r189188) @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include @@ -109,6 +111,8 @@ struct lapic { u_long la_hard_ticks; u_long la_stat_ticks; u_long la_prof_ticks; + /* Include IDT_SYSCALL to make indexing easier. */ + u_int la_ioint_irqs[APIC_NUM_IOINTS + 1]; } static lapics[MAX_APIC_ID + 1]; /* XXX: should thermal be an NMI? */ @@ -134,8 +138,6 @@ static inthand_t *ioint_handlers[] = { IDTVEC(apic_isr7), /* 224 - 255 */ }; -/* Include IDT_SYSCALL to make indexing easier. */ -static u_int ioint_irqs[APIC_NUM_IOINTS + 1]; static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, @@ -215,14 +217,12 @@ lapic_init(vm_paddr_t addr) /* Perform basic initialization of the BSP's local APIC. */ lapic_enable(); - ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; /* Set BSP's per-CPU local APIC ID. */ PCPU_SET(apic_id, lapic_id()); /* Local APIC timer interrupt. */ setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYSIGT, SEL_KPL, 0); - ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; /* XXX: error/thermal interrupts */ } @@ -254,6 +254,9 @@ lapic_create(u_int apic_id, int boot_cpu lapics[apic_id].la_lvts[i] = lvts[i]; lapics[apic_id].la_lvts[i].lvt_active = 0; } + lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; + lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = + IRQ_TIMER; #ifdef SMP cpu_add(apic_id, boot_cpu); @@ -664,7 +667,8 @@ lapic_handle_intr(int vector, struct tra if (vector == -1) panic("Couldn't get vector from ISR!"); - isrc = intr_lookup_source(apic_idt_to_irq(vector)); + isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), + vector)); intr_execute_handlers(isrc, frame); } @@ -779,9 +783,19 @@ lapic_timer_enable_intr(void) lapic->lvt_timer = value; } +u_int +apic_cpuid(u_int apic_id) +{ +#ifdef SMP + return apic_cpuids[apic_id]; +#else + return 0; +#endif +} + /* Request a free IDT vector to be used by the specified IRQ. */ u_int -apic_alloc_vector(u_int irq) +apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; @@ -793,9 +807,9 @@ apic_alloc_vector(u_int irq) */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { - if (ioint_irqs[vector] != 0) + if (lapics[apic_id].la_ioint_irqs[vector] != 0) continue; - ioint_irqs[vector] = irq; + lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); return (vector + APIC_IO_INTS); } @@ -810,7 +824,7 @@ apic_alloc_vector(u_int irq) * satisfied, 0 is returned. */ u_int -apic_alloc_vectors(u_int *irqs, u_int count, u_int align) +apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { u_int first, run, vector; @@ -833,7 +847,7 @@ apic_alloc_vectors(u_int *irqs, u_int co for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ - if (ioint_irqs[vector] != 0) { + if (lapics[apic_id].la_ioint_irqs[vector] != 0) { run = 0; first = 0; continue; @@ -853,7 +867,8 @@ apic_alloc_vectors(u_int *irqs, u_int co /* Found a run, assign IRQs and return the first vector. */ for (vector = 0; vector < count; vector++) - ioint_irqs[first + vector] = irqs[vector]; + lapics[apic_id].la_ioint_irqs[first + vector] = + irqs[vector]; mtx_unlock_spin(&icu_lock); return (first + APIC_IO_INTS); } @@ -862,8 +877,14 @@ apic_alloc_vectors(u_int *irqs, u_int co return (0); } +/* + * Enable a vector for a particular apic_id. Since all lapics share idt + * entries and ioint_handlers this enables the vector on all lapics. lapics + * which do not have the vector configured would report spurious interrupts + * should it fire. + */ void -apic_enable_vector(u_int vector) +apic_enable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); @@ -873,38 +894,61 @@ apic_enable_vector(u_int vector) } void -apic_disable_vector(u_int vector) +apic_disable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); +#ifdef notyet + /* + * We can not currently clear the idt entry because other cpus + * may have a valid vector at this offset. + */ setidt(vector, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); +#endif } /* Release an APIC vector when it's no longer in use. */ void -apic_free_vector(u_int vector, u_int irq) +apic_free_vector(u_int apic_id, u_int vector, u_int irq) { + struct thread *td; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); - KASSERT(ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); + KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == + irq, ("IRQ mismatch")); + + /* + * Bind us to the cpu that owned the vector before freeing it so + * we don't lose an interrupt delivery race. + */ + td = curthread; + thread_lock(td); + if (sched_is_bound(td)) + panic("apic_free_vector: Thread already bound.\n"); + sched_bind(td, apic_cpuid(apic_id)); + thread_unlock(td); mtx_lock_spin(&icu_lock); - ioint_irqs[vector - APIC_IO_INTS] = 0; + lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0; mtx_unlock_spin(&icu_lock); + thread_lock(td); + sched_unbind(td); + thread_unlock(td); + } /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ u_int -apic_idt_to_irq(u_int vector) +apic_idt_to_irq(u_int apic_id, u_int vector) { KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); - return (ioint_irqs[vector - APIC_IO_INTS]); + return (lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]); } #ifdef DDB @@ -915,6 +959,7 @@ DB_SHOW_COMMAND(apic, db_show_apic) { struct intsrc *isrc; int i, verbose; + u_int apic_id; u_int irq; if (strcmp(modif, "vv") == 0) @@ -923,9 +968,14 @@ DB_SHOW_COMMAND(apic, db_show_apic) verbose = 1; else verbose = 0; - for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { - irq = ioint_irqs[i]; - if (irq != 0 && irq != IRQ_SYSCALL) { + for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) { + if (lapics[apic_id].la_present == 0) + continue; + db_printf("Interrupts bound to lapic %u\n", apic_id); + for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { + irq = lapics[apic_id].la_ioint_irqs[i]; + if (irq == 0 || irq == IRQ_SYSCALL) + continue; db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); Modified: projects/jbuild/sys/amd64/amd64/machdep.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/machdep.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/machdep.c Sat Feb 28 17:59:02 2009 (r189188) @@ -809,6 +809,9 @@ struct gate_descriptor *idt = &idt0[0]; static char dblfault_stack[PAGE_SIZE] __aligned(16); +static char nmi0_stack[PAGE_SIZE] __aligned(16); +CTASSERT(sizeof(struct nmi_pcpu) == 16); + struct amd64tss common_tss[MAXCPU]; /* software prototypes -- in more palatable form */ @@ -1291,6 +1294,7 @@ hammer_time(u_int64_t modulep, u_int64_t caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; + struct nmi_pcpu *np; u_int64_t msr; char *env; @@ -1365,7 +1369,7 @@ hammer_time(u_int64_t modulep, u_int64_t setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 1); + setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); @@ -1438,6 +1442,14 @@ hammer_time(u_int64_t modulep, u_int64_t /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; + /* + * NMI stack, runs on ist2. The pcpu pointer is stored just + * above the start of the ist2 stack. + */ + np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; + np->np_pcpu = (register_t) pc; + common_tss[0].tss_ist2 = (long) np; + /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss); Modified: projects/jbuild/sys/amd64/amd64/mp_machdep.c ============================================================================== --- projects/jbuild/sys/amd64/amd64/mp_machdep.c Sat Feb 28 17:58:53 2009 (r189187) +++ projects/jbuild/sys/amd64/amd64/mp_machdep.c Sat Feb 28 17:59:02 2009 (r189188) @@ -92,6 +92,7 @@ void *bootstacks[MAXCPU]; /* Temporary holder for double fault stack */ char *doublefault_stack; +char *nmi_stack; /* Hotwire a 0->4MB V==P mapping */ extern pt_entry_t *KPTphys; @@ -150,8 +151,10 @@ struct cpu_info { int cpu_present:1; int cpu_bsp:1; int cpu_disabled:1; + int cpu_hyperthread:1; } static cpu_info[MAX_APIC_ID + 1]; int cpu_apic_ids[MAXCPU]; +int apic_cpuids[MAX_APIC_ID + 1]; /* Holds pending bitmap based IPIs per CPU */ static volatile u_int cpu_ipi_pending[MAXCPU]; @@ -349,11 +352,7 @@ cpu_mp_start(void) KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); cpu_apic_ids[0] = boot_cpu_id; - - assign_cpu_ids(); - - /* Start each Application Processor */ - start_all_aps(); + apic_cpuids[boot_cpu_id] = 0; /* Setup the initial logical CPUs info. */ logical_cpus = logical_cpus_mask = 0; @@ -401,6 +400,11 @@ cpu_mp_start(void) hyperthreading_cpus = logical_cpus; } + assign_cpu_ids(); + + /* Start each Application Processor */ + start_all_aps(); + set_interrupt_apic_ids(); } @@ -412,18 +416,26 @@ void cpu_mp_announce(void) { int i, x; + const char *hyperthread; /* List CPUs */ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) continue; + if (cpu_info[x].cpu_hyperthread) { + hyperthread = "/HT"; + } else { + hyperthread = ""; + } if (cpu_info[x].cpu_disabled) - printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); + printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", + hyperthread, x); else { KASSERT(i < mp_ncpus, ("mp_ncpus and actual cpus are out of whack")); - printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); + printf(" cpu%d (AP%s): APIC ID: %2d\n", i++, + hyperthread, x); } } } @@ -435,6 +447,7 @@ void init_secondary(void) { struct pcpu *pc; + struct nmi_pcpu *np; u_int64_t msr, cr0; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; @@ -448,6 +461,10 @@ init_secondary(void) common_tss[cpu].tss_iobase = sizeof(struct amd64tss); common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; + /* The NMI stack runs on IST2. */ + np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; + common_tss[cpu].tss_ist2 = (long) np; + /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; ssdtosyssd(&gdt_segs[GPROC0_SEL], @@ -472,6 +489,9 @@ init_secondary(void) pc->pc_rsp0 = 0; pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; + /* Save the per-cpu pointer for use by the NMI handler. */ + np->np_pcpu = (register_t) pc; + wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ @@ -631,11 +651,28 @@ assign_cpu_ids(void) { u_int i; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***