From owner-svn-src-all@FreeBSD.ORG Mon Feb 9 21:01:02 2015 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 1FC9F504; Mon, 9 Feb 2015 21:01:02 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 09294DBE; Mon, 9 Feb 2015 21:01:02 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t19L11JL010989; Mon, 9 Feb 2015 21:01:01 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t19L0vsv010944; Mon, 9 Feb 2015 21:00:57 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201502092100.t19L0vsv010944@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Mon, 9 Feb 2015 21:00:57 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r278473 - in head/sys: amd64/amd64 amd64/include amd64/vmm contrib/dev/acpica/include i386/i386 i386/include x86/acpica x86/include x86/x86 x86/xen X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 09 Feb 2015 21:01:02 -0000 Author: kib Date: Mon Feb 9 21:00:56 2015 New Revision: 278473 URL: https://svnweb.freebsd.org/changeset/base/278473 Log: Add x2APIC support. Enable it by default if CPU is capable. The hw.x2apic_enable tunable allows disabling it from the loader prompt. To closely repeat effects of the uncached memory ops when accessing registers in the xAPIC mode, the x2APIC writes to MSRs are preceeded by mfence, except for the EOI notifications. This is probably too strict, only ICR writes to send IPI require serialization to ensure that other CPUs see the previous actions when IPI is delivered. This may be changed later. In vmm justreturn IPI handler, call doreti_iret instead of doing iretd inline, to handle corner conditions. Note that the patch only switches LAPICs into x2APIC mode. It does not enables FreeBSD to support > 255 CPUs, which requires parsing x2APIC MADT entries and doing interrupts remapping, but is the required step on the way. Reviewed by: neel Tested by: pho (real hardware), neel (on bhyve) Discussed with: jhb, grehan Sponsored by: The FreeBSD Foundation MFC after: 2 months Modified: head/sys/amd64/amd64/apic_vector.S head/sys/amd64/amd64/genassym.c head/sys/amd64/amd64/mp_machdep.c head/sys/amd64/include/cpufunc.h head/sys/amd64/vmm/vmm_support.S head/sys/contrib/dev/acpica/include/actbl2.h head/sys/i386/i386/apic_vector.s head/sys/i386/i386/genassym.c head/sys/i386/i386/mp_machdep.c head/sys/i386/include/cpufunc.h head/sys/x86/acpica/madt.c head/sys/x86/include/apicreg.h head/sys/x86/include/apicvar.h head/sys/x86/include/specialreg.h head/sys/x86/x86/io_apic.c head/sys/x86/x86/local_apic.c head/sys/x86/xen/xen_apic.c Modified: head/sys/amd64/amd64/apic_vector.S ============================================================================== --- head/sys/amd64/amd64/apic_vector.S Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/amd64/amd64/apic_vector.S Mon Feb 9 21:00:56 2015 (r278473) @@ -39,6 +39,7 @@ #include "opt_smp.h" #include +#include #include #include "assym.s" @@ -49,6 +50,22 @@ #define LK #endif + .text + SUPERALIGN_TEXT + /* End Of Interrupt to APIC */ +as_lapic_eoi: + cmpl $0,x2apic_mode + jne 1f + movq lapic_map,%rax + movl $0,LA_EOI(%rax) + ret +1: + movl $MSR_APIC_EOI,%ecx + xorl %eax,%eax + xorl %edx,%edx + wrmsr + ret + /* * I/O Interrupt Entry Point. Rather than having one entry point for * each interrupt source, we use one entry point for each 32-bit word @@ -62,15 +79,22 @@ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ FAKE_MCOUNT(TF_RIP(%rsp)) ; \ - movq lapic, %rdx ; /* pointer to local APIC */ \ + cmpl $0,x2apic_mode ; \ + je 1f ; \ + movl $(MSR_APIC_ISR0 + index),%ecx ; \ + rdmsr ; \ + jmp 2f ; \ +1: ; \ + movq lapic_map, %rdx ; /* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \ +2: ; \ bsrl %eax, %eax ; /* index of highest set bit in ISR */ \ - jz 1f ; \ + jz 3f ; \ addl $(32 * index),%eax ; \ movq %rsp, %rsi ; \ movl %eax, %edi ; /* pass the IRQ */ \ call lapic_handle_intr ; \ -1: ; \ +3: ; \ MEXITCOUNT ; \ jmp doreti @@ -160,8 +184,7 @@ IDTVEC(xen_intr_upcall) SUPERALIGN_TEXT invltlb_ret: - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + call as_lapic_eoi POP_FRAME jmp doreti_iret @@ -228,8 +251,7 @@ IDTVEC(invlcache) IDTVEC(ipi_intr_bitmap_handler) PUSH_FRAME - movq lapic, %rdx - movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ + call as_lapic_eoi FAKE_MCOUNT(TF_RIP(%rsp)) @@ -245,8 +267,7 @@ IDTVEC(ipi_intr_bitmap_handler) IDTVEC(cpustop) PUSH_FRAME - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + call as_lapic_eoi call cpustop_handler jmp doreti @@ -260,8 +281,7 @@ IDTVEC(cpususpend) PUSH_FRAME call cpususpend_handler - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + call as_lapic_eoi jmp doreti /* @@ -279,7 +299,6 @@ IDTVEC(rendezvous) incq (%rax) #endif call smp_rendezvous_action - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + call as_lapic_eoi jmp doreti #endif /* SMP */ Modified: head/sys/amd64/amd64/genassym.c ============================================================================== --- head/sys/amd64/amd64/genassym.c Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/amd64/amd64/genassym.c Mon Feb 9 21:00:56 2015 (r278473) @@ -220,13 +220,8 @@ ASSYM(PC_COMMONTSSP, offsetof(struct pcp ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss)); ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt)); -ASSYM(LA_VER, offsetof(struct LAPIC, version)); -ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); -ASSYM(LA_EOI, offsetof(struct LAPIC, eoi)); -ASSYM(LA_SVR, offsetof(struct LAPIC, svr)); -ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo)); -ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi)); -ASSYM(LA_ISR, offsetof(struct LAPIC, isr0)); +ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); +ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL); ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); Modified: head/sys/amd64/amd64/mp_machdep.c ============================================================================== --- head/sys/amd64/amd64/mp_machdep.c Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/amd64/amd64/mp_machdep.c Mon Feb 9 21:00:56 2015 (r278473) @@ -705,8 +705,11 @@ init_secondary(void) wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); - /* Disable local APIC just to be sure. */ - lapic_disable(); + /* + * On real hardware, switch to x2apic mode if possible. + * Disable local APIC until BSP directed APs to run. + */ + lapic_xapic_mode(); /* signal our startup to the BSP. */ mp_naps++; Modified: head/sys/amd64/include/cpufunc.h ============================================================================== --- head/sys/amd64/include/cpufunc.h Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/amd64/include/cpufunc.h Mon Feb 9 21:00:56 2015 (r278473) @@ -343,6 +343,15 @@ rdmsr(u_int msr) return (low | ((uint64_t)high << 32)); } +static __inline uint32_t +rdmsr32(u_int msr) +{ + uint32_t low; + + __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "rdx"); + return (low); +} + static __inline uint64_t rdpmc(u_int pmc) { @@ -826,6 +835,7 @@ u_long rcr2(void); u_long rcr3(void); u_long rcr4(void); uint64_t rdmsr(u_int msr); +uint32_t rdmsr32(u_int msr); uint64_t rdpmc(u_int pmc); uint64_t rdr0(void); uint64_t rdr1(void); Modified: head/sys/amd64/vmm/vmm_support.S ============================================================================== --- head/sys/amd64/vmm/vmm_support.S Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/amd64/vmm/vmm_support.S Mon Feb 9 21:00:56 2015 (r278473) @@ -30,13 +30,14 @@ #include -#define LA_EOI 0xB0 - .text SUPERALIGN_TEXT IDTVEC(justreturn) + pushq %rdx pushq %rax - movq lapic, %rax - movl $0, LA_EOI(%rax) + pushq %rcx + call as_lapic_eoi + popq %rcx popq %rax - iretq + popq %rdx + jmp doreti_iret Modified: head/sys/contrib/dev/acpica/include/actbl2.h ============================================================================== --- head/sys/contrib/dev/acpica/include/actbl2.h Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/contrib/dev/acpica/include/actbl2.h Mon Feb 9 21:00:56 2015 (r278473) @@ -466,6 +466,7 @@ typedef struct acpi_table_dmar /* Masks for Flags field above */ #define ACPI_DMAR_INTR_REMAP (1) +#define ACPI_DMAR_X2APIC_OPT_OUT (2) /* DMAR subtable header */ Modified: head/sys/i386/i386/apic_vector.s ============================================================================== --- head/sys/i386/i386/apic_vector.s Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/i386/i386/apic_vector.s Mon Feb 9 21:00:56 2015 (r278473) @@ -39,10 +39,27 @@ #include "opt_smp.h" #include +#include #include #include "assym.s" + .text + SUPERALIGN_TEXT + /* End Of Interrupt to APIC */ +as_lapic_eoi: + cmpl $0,x2apic_mode + jne 1f + movl lapic_map,%eax + movl $0,LA_EOI(%eax) + ret +1: + movl $MSR_APIC_EOI,%ecx + xorl %eax,%eax + xorl %edx,%edx + wrmsr + ret + /* * I/O Interrupt Entry Point. Rather than having one entry point for * each interrupt source, we use one entry point for each 32-bit word @@ -58,16 +75,23 @@ IDTVEC(vec_name) ; \ SET_KERNEL_SREGS ; \ cld ; \ FAKE_MCOUNT(TF_EIP(%esp)) ; \ - movl lapic, %edx ; /* pointer to local APIC */ \ + cmpl $0,x2apic_mode ; \ + je 1f ; \ + movl $(MSR_APIC_ISR0 + index),%ecx ; \ + rdmsr ; \ + jmp 2f ; \ +1: ; \ + movl lapic_map, %edx ;/* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \ +2: ; \ bsrl %eax, %eax ; /* index of highest set bit in ISR */ \ - jz 1f ; \ + jz 3f ; \ addl $(32 * index),%eax ; \ pushl %esp ; \ pushl %eax ; /* pass the IRQ */ \ call lapic_handle_intr ; \ addl $8, %esp ; /* discard parameter */ \ -1: ; \ +3: ; \ MEXITCOUNT ; \ jmp doreti @@ -164,8 +188,7 @@ IDTVEC(xen_intr_upcall) .text SUPERALIGN_TEXT invltlb_ret: - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + call as_lapic_eoi POP_FRAME iret @@ -232,8 +255,7 @@ IDTVEC(ipi_intr_bitmap_handler) SET_KERNEL_SREGS cld - movl lapic, %edx - movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + call as_lapic_eoi FAKE_MCOUNT(TF_EIP(%esp)) @@ -251,9 +273,7 @@ IDTVEC(cpustop) SET_KERNEL_SREGS cld - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ - + call as_lapic_eoi call cpustop_handler POP_FRAME @@ -270,9 +290,7 @@ IDTVEC(cpususpend) SET_KERNEL_SREGS cld - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ - + call as_lapic_eoi call cpususpend_handler POP_FRAME @@ -298,8 +316,7 @@ IDTVEC(rendezvous) #endif call smp_rendezvous_action - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + call as_lapic_eoi POP_FRAME iret @@ -315,8 +332,7 @@ IDTVEC(lazypmap) call pmap_lazyfix_action - movl lapic, %eax - movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + call as_lapic_eoi POP_FRAME iret #endif /* SMP */ Modified: head/sys/i386/i386/genassym.c ============================================================================== --- head/sys/i386/i386/genassym.c Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/i386/i386/genassym.c Mon Feb 9 21:00:56 2015 (r278473) @@ -219,13 +219,8 @@ ASSYM(PC_CURPMAP, offsetof(struct pcpu, ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss)); #ifdef DEV_APIC -ASSYM(LA_VER, offsetof(struct LAPIC, version)); -ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); -ASSYM(LA_EOI, offsetof(struct LAPIC, eoi)); -ASSYM(LA_SVR, offsetof(struct LAPIC, svr)); -ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo)); -ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi)); -ASSYM(LA_ISR, offsetof(struct LAPIC, isr0)); +ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); +ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL); #endif ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); Modified: head/sys/i386/i386/mp_machdep.c ============================================================================== --- head/sys/i386/i386/mp_machdep.c Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/i386/i386/mp_machdep.c Mon Feb 9 21:00:56 2015 (r278473) @@ -719,8 +719,11 @@ init_secondary(void) load_cr0(cr0); CHECK_WRITE(0x38, 5); - /* Disable local APIC just to be sure. */ - lapic_disable(); + /* + * On real hardware, switch to x2apic mode if possible. + * Disable local APIC until BSP directed APs to run. + */ + lapic_xapic_mode(); /* signal our startup to the BSP. */ mp_naps++; Modified: head/sys/i386/include/cpufunc.h ============================================================================== --- head/sys/i386/include/cpufunc.h Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/i386/include/cpufunc.h Mon Feb 9 21:00:56 2015 (r278473) @@ -346,6 +346,15 @@ rdmsr(u_int msr) return (rv); } +static __inline uint32_t +rdmsr32(u_int msr) +{ + uint32_t low; + + __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "edx"); + return (low); +} + static __inline uint64_t rdpmc(u_int pmc) { Modified: head/sys/x86/acpica/madt.c ============================================================================== --- head/sys/x86/acpica/madt.c Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/x86/acpica/madt.c Mon Feb 9 21:00:56 2015 (r278473) @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -127,8 +128,27 @@ madt_probe_cpus(void) static int madt_setup_local(void) { + ACPI_TABLE_DMAR *dmartbl; + vm_paddr_t dmartbl_physaddr; madt = pmap_mapbios(madt_physaddr, madt_length); + if ((cpu_feature2 & CPUID2_X2APIC) != 0) { + x2apic_mode = 1; + dmartbl_physaddr = acpi_find_table(ACPI_SIG_DMAR); + if (dmartbl_physaddr != 0) { + dmartbl = acpi_map_table(dmartbl_physaddr, + ACPI_SIG_DMAR); + if ((dmartbl->Flags & ACPI_DMAR_X2APIC_OPT_OUT) != 0) { + x2apic_mode = 0; + if (bootverbose) + printf( + "x2APIC available but disabled by DMAR table\n"); + } + acpi_unmap_table(dmartbl); + } + TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_mode); + } + lapic_init(madt->Address); printf("ACPI APIC Table: <%.*s %.*s>\n", (int)sizeof(madt->Header.OemId), madt->Header.OemId, Modified: head/sys/x86/include/apicreg.h ============================================================================== --- head/sys/x86/include/apicreg.h Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/x86/include/apicreg.h Mon Feb 9 21:00:56 2015 (r278473) @@ -193,6 +193,66 @@ struct LAPIC { typedef struct LAPIC lapic_t; +enum LAPIC_REGISTERS { + LAPIC_ID = 0x2, + LAPIC_VERSION = 0x3, + LAPIC_TPR = 0x8, + LAPIC_APR = 0x9, + LAPIC_PPR = 0xa, + LAPIC_EOI = 0xb, + LAPIC_LDR = 0xd, + LAPIC_DFR = 0xe, /* Not in x2APIC */ + LAPIC_SVR = 0xf, + LAPIC_ISR0 = 0x10, + LAPIC_ISR1 = 0x11, + LAPIC_ISR2 = 0x12, + LAPIC_ISR3 = 0x13, + LAPIC_ISR4 = 0x14, + LAPIC_ISR5 = 0x15, + LAPIC_ISR6 = 0x16, + LAPIC_ISR7 = 0x17, + LAPIC_TMR0 = 0x18, + LAPIC_TMR1 = 0x19, + LAPIC_TMR2 = 0x1a, + LAPIC_TMR3 = 0x1b, + LAPIC_TMR4 = 0x1c, + LAPIC_TMR5 = 0x1d, + LAPIC_TMR6 = 0x1e, + LAPIC_TMR7 = 0x1f, + LAPIC_IRR0 = 0x20, + LAPIC_IRR1 = 0x21, + LAPIC_IRR2 = 0x22, + LAPIC_IRR3 = 0x23, + LAPIC_IRR4 = 0x24, + LAPIC_IRR5 = 0x25, + LAPIC_IRR6 = 0x26, + LAPIC_IRR7 = 0x27, + LAPIC_ESR = 0x28, + LAPIC_LVT_CMCI = 0x2f, + LAPIC_ICR_LO = 0x30, + LAPIC_ICR_HI = 0x31, /* Not in x2APIC */ + LAPIC_LVT_TIMER = 0x32, + LAPIC_LVT_THERMAL = 0x33, + LAPIC_LVT_PCINT = 0x34, + LAPIC_LVT_LINT0 = 0x35, + LAPIC_LVT_LINT1 = 0x36, + LAPIC_LVT_ERROR = 0x37, + LAPIC_ICR_TIMER = 0x38, + LAPIC_CCR_TIMER = 0x39, + LAPIC_DCR_TIMER = 0x3e, + LAPIC_SELF_IPI = 0x3f, /* Only in x2APIC */ +}; + +/* + * The LAPIC_SELF_IPI register only exists in x2APIC mode. The + * formula below is applicable only to reserve the memory region, + * i.e. for xAPIC mode, where LAPIC_SELF_IPI finely serves as the + * address past end of the region. + */ +#define LAPIC_MEM_REGION (LAPIC_SELF_IPI * 0x10) + +#define LAPIC_MEM_MUL 0x10 + /****************************************************************************** * I/O APIC structure */ Modified: head/sys/x86/include/apicvar.h ============================================================================== --- head/sys/x86/include/apicvar.h Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/x86/include/apicvar.h Mon Feb 9 21:00:56 2015 (r278473) @@ -189,6 +189,7 @@ int ioapic_set_smi(void *cookie, u_int p struct apic_ops { void (*create)(u_int, int); void (*init)(vm_paddr_t); + void (*xapic_mode)(void); void (*setup)(int); void (*dump)(const char *); void (*disable)(void); @@ -243,6 +244,13 @@ lapic_init(vm_paddr_t addr) } static inline void +lapic_xapic_mode(void) +{ + + apic_ops.xapic_mode(); +} + +static inline void lapic_setup(int boot) { @@ -417,5 +425,11 @@ void lapic_handle_intr(int vector, struc void lapic_handle_timer(struct trapframe *frame); void xen_intr_handle_upcall(struct trapframe *frame); +extern int x2apic_mode; + +#ifdef _SYS_SYSCTL_H_ +SYSCTL_DECL(_hw_apic); +#endif + #endif /* !LOCORE */ #endif /* _X86_APICVAR_H_ */ Modified: head/sys/x86/include/specialreg.h ============================================================================== --- head/sys/x86/include/specialreg.h Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/x86/include/specialreg.h Mon Feb 9 21:00:56 2015 (r278473) @@ -470,6 +470,7 @@ /* * X2APIC MSRs */ +#define MSR_APIC_000 0x800 #define MSR_APIC_ID 0x802 #define MSR_APIC_VERSION 0x803 #define MSR_APIC_TPR 0x808 Modified: head/sys/x86/x86/io_apic.c ============================================================================== --- head/sys/x86/x86/io_apic.c Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/x86/x86/io_apic.c Mon Feb 9 21:00:56 2015 (r278473) @@ -130,7 +130,6 @@ struct pic ioapic_template = { ioapic_en static int next_ioapic_base; static u_int next_id; -static SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); static int enable_extint; SYSCTL_INT(_hw_apic, OID_AUTO, enable_extint, CTLFLAG_RDTUN, &enable_extint, 0, "Enable the ExtINT pin in the first I/O APIC"); @@ -896,7 +895,7 @@ apic_attach(device_t dev) int i; /* Reserve the local APIC. */ - apic_add_resource(dev, 0, lapic_paddr, sizeof(lapic_t)); + apic_add_resource(dev, 0, lapic_paddr, LAPIC_MEM_REGION); i = 1; STAILQ_FOREACH(io, &ioapic_list, io_next) { apic_add_resource(dev, i, io->io_paddr, IOAPIC_MEM_REGION); Modified: head/sys/x86/x86/local_apic.c ============================================================================== --- head/sys/x86/x86/local_apic.c Mon Feb 9 19:28:11 2015 (r278472) +++ head/sys/x86/x86/local_apic.c Mon Feb 9 21:00:56 2015 (r278473) @@ -49,12 +49,14 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -154,11 +156,99 @@ static u_int32_t lapic_timer_divisors[] extern inthand_t IDTVEC(rsvd); -volatile lapic_t *lapic; +volatile char *lapic_map; vm_paddr_t lapic_paddr; +int x2apic_mode; static u_long lapic_timer_divisor; static struct eventtimer lapic_et; +SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); +SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); + +static uint32_t +lapic_read32(enum LAPIC_REGISTERS reg) +{ + uint32_t res; + + if (x2apic_mode) { + res = rdmsr32(MSR_APIC_000 + reg); + } else { + res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); + } + return (res); +} + +static void +lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) +{ + + if (x2apic_mode) { + mfence(); + wrmsr(MSR_APIC_000 + reg, val); + } else { + *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; + } +} + +static void +lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) +{ + + if (x2apic_mode) { + wrmsr(MSR_APIC_000 + reg, val); + } else { + *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; + } +} + +static uint64_t +lapic_read_icr(void) +{ + uint64_t v; + uint32_t vhi, vlo; + + if (x2apic_mode) { + v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); + } else { + vhi = lapic_read32(LAPIC_ICR_HI); + vlo = lapic_read32(LAPIC_ICR_LO); + v = ((uint64_t)vhi << 32) | vlo; + } + return (v); +} + +static uint64_t +lapic_read_icr_lo(void) +{ + + return (lapic_read32(LAPIC_ICR_LO)); +} + +static void +lapic_write_icr(uint32_t vhi, uint32_t vlo) +{ + uint64_t v; + + if (x2apic_mode) { + v = ((uint64_t)vhi << 32) | vlo; + mfence(); + wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); + } else { + lapic_write32(LAPIC_ICR_HI, vhi); + lapic_write32(LAPIC_ICR_LO, vlo); + } +} + +static void +native_lapic_enable_x2apic(void) +{ + uint64_t apic_base; + + apic_base = rdmsr(MSR_APICBASE); + apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; + wrmsr(MSR_APICBASE, apic_base); +} + static void lapic_enable(void); static void lapic_resume(struct pic *pic, bool suspend_cancelled); static void lapic_timer_oneshot(struct lapic *, @@ -179,6 +269,7 @@ struct pic lapic_pic = { .pic_resume = l /* Forward declarations for apic_ops */ static void native_lapic_create(u_int apic_id, int boot_cpu); static void native_lapic_init(vm_paddr_t addr); +static void native_lapic_xapic_mode(void); static void native_lapic_setup(int boot); static void native_lapic_dump(const char *str); static void native_lapic_disable(void); @@ -213,6 +304,7 @@ static int native_lapic_set_lvt_trigger struct apic_ops apic_ops = { .create = native_lapic_create, .init = native_lapic_init, + .xapic_mode = native_lapic_xapic_mode, .setup = native_lapic_setup, .dump = native_lapic_dump, .disable = native_lapic_disable, @@ -291,11 +383,20 @@ native_lapic_init(vm_paddr_t addr) u_int regs[4]; int i, arat; - /* Map the local APIC and setup the spurious interrupt handler. */ + /* + * Enable x2APIC mode if possible, otherwise map the local + * APIC registers page. + */ KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); - lapic_paddr = addr; - lapic = pmap_mapdev(addr, sizeof(lapic_t)); + if (x2apic_mode) { + native_lapic_enable_x2apic(); + } else { + lapic_paddr = addr; + lapic_map = pmap_mapdev(addr, PAGE_SIZE); + } + + /* Setup the spurious interrupt handler. */ setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, GSEL_APIC); @@ -398,33 +499,51 @@ native_lapic_dump(const char* str) { uint32_t maxlvt; - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; printf("cpu%d %s:\n", PCPU_GET(cpuid), str); - printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n", - lapic->id, lapic->version, lapic->ldr, lapic->dfr); - printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", - lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); + printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", + lapic_read32(LAPIC_ID), lapic_read32(LAPIC_VERSION), + lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); + if ((cpu_feature2 & CPUID2_X2APIC) != 0) + printf(" x2APIC: %d", x2apic_mode); + printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", + lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), + lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", - lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error); + lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), + lapic_read32(LAPIC_LVT_ERROR)); if (maxlvt >= APIC_LVT_PMC) - printf(" pmc: 0x%08x", lapic->lvt_pcint); + printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); printf("\n"); if (maxlvt >= APIC_LVT_CMCI) - printf(" cmci: 0x%08x\n", lapic->lvt_cmci); + printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); +} + +static void +native_lapic_xapic_mode(void) +{ + register_t saveintr; + + saveintr = intr_disable(); + if (x2apic_mode) + native_lapic_enable_x2apic(); + native_lapic_disable(); + intr_restore(saveintr); } static void native_lapic_setup(int boot) { struct lapic *la; - u_int32_t maxlvt; + uint32_t maxlvt; register_t saveintr; char buf[MAXCOMLEN + 1]; + saveintr = intr_disable(); + la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); - saveintr = intr_disable(); - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); @@ -433,16 +552,21 @@ native_lapic_setup(int boot) lapic_enable(); /* Program LINT[01] LVT entries. */ - lapic->lvt_lint0 = lvt_mode(la, APIC_LVT_LINT0, lapic->lvt_lint0); - lapic->lvt_lint1 = lvt_mode(la, APIC_LVT_LINT1, lapic->lvt_lint1); + lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, + lapic_read32(LAPIC_LVT_LINT0))); + lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, + lapic_read32(LAPIC_LVT_LINT1))); /* Program the PMC LVT entry if present. */ - if (maxlvt >= APIC_LVT_PMC) - lapic->lvt_pcint = lvt_mode(la, APIC_LVT_PMC, lapic->lvt_pcint); + if (maxlvt >= APIC_LVT_PMC) { + lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, + LAPIC_LVT_PCINT)); + } /* Program timer LVT and setup handler. */ - la->lvt_timer_cache = lapic->lvt_timer = - lvt_mode(la, APIC_LVT_TIMER, lapic->lvt_timer); + la->lvt_timer_cache = lvt_mode(la, APIC_LVT_TIMER, + lapic_read32(LAPIC_LVT_TIMER)); + lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_cache); if (boot) { snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); intrcnt_add(buf, &la->la_timer_count); @@ -460,14 +584,17 @@ native_lapic_setup(int boot) } /* Program error LVT and clear any existing errors. */ - lapic->lvt_error = lvt_mode(la, APIC_LVT_ERROR, lapic->lvt_error); - lapic->esr = 0; + lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, + lapic_read32(LAPIC_LVT_ERROR))); + lapic_write32(LAPIC_ESR, 0); /* XXX: Thermal LVT */ /* Program the CMCI LVT entry if present. */ - if (maxlvt >= APIC_LVT_CMCI) - lapic->lvt_cmci = lvt_mode(la, APIC_LVT_CMCI, lapic->lvt_cmci); + if (maxlvt >= APIC_LVT_CMCI) { + lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, + lapic_read32(LAPIC_LVT_CMCI))); + } intr_restore(saveintr); } @@ -478,9 +605,9 @@ native_lapic_reenable_pmc(void) #ifdef HWPMC_HOOKS uint32_t value; - value = lapic->lvt_pcint; + value = lapic_read32(LAPIC_LVT_PCINT); value &= ~APIC_LVT_M; - lapic->lvt_pcint = value; + lapic_write32(LAPIC_LVT_PCINT, value); #endif } @@ -491,7 +618,8 @@ lapic_update_pmc(void *dummy) struct lapic *la; la = &lapics[lapic_id()]; - lapic->lvt_pcint = lvt_mode(la, APIC_LVT_PMC, lapic->lvt_pcint); + lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, + lapic_read32(LAPIC_LVT_PCINT))); } #endif @@ -502,11 +630,11 @@ native_lapic_enable_pmc(void) u_int32_t maxlvt; /* Fail if the local APIC is not present. */ - if (lapic == NULL) + if (!x2apic_mode && lapic_map == NULL) return (0); /* Fail if the PMC LVT is not present. */ - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return (0); @@ -536,11 +664,11 @@ native_lapic_disable_pmc(void) u_int32_t maxlvt; /* Fail if the local APIC is not present. */ - if (lapic == NULL) + if (!x2apic_mode && lapic_map == NULL) return; /* Fail if the PMC LVT is not present. */ - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < APIC_LVT_PMC) return; @@ -569,7 +697,8 @@ lapic_et_start(struct eventtimer *et, sb lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot(la, APIC_TIMER_MAX_COUNT, 0); DELAY(1000000); - value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer; + value = APIC_TIMER_MAX_COUNT - + lapic_read32(LAPIC_CCR_TIMER); if (value != APIC_TIMER_MAX_COUNT) break; lapic_timer_divisor <<= 1; @@ -613,21 +742,21 @@ native_lapic_disable(void) uint32_t value; /* Software disable the local APIC. */ - value = lapic->svr; + value = lapic_read32(LAPIC_SVR); value &= ~APIC_SVR_SWEN; - lapic->svr = value; + lapic_write32(LAPIC_SVR, value); } static void lapic_enable(void) { - u_int32_t value; + uint32_t value; /* Program the spurious vector to enable the local APIC. */ - value = lapic->svr; + value = lapic_read32(LAPIC_SVR); value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); - value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT); - lapic->svr = value; + value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; + lapic_write32(LAPIC_SVR, value); } /* Reset the local APIC on the BSP during resume. */ @@ -641,27 +770,29 @@ lapic_resume(struct pic *pic, bool suspe static int native_lapic_id(void) { + uint32_t v; - KASSERT(lapic != NULL, ("local APIC is not mapped")); - return (lapic->id >> APIC_ID_SHIFT); + KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); + v = lapic_read32(LAPIC_ID); + if (!x2apic_mode) + v >>= APIC_ID_SHIFT; + return (v); } static int native_lapic_intr_pending(u_int vector) { - volatile u_int32_t *irr; + uint32_t irr; /* - * The IRR registers are an array of 128-bit registers each of - * which only describes 32 interrupts in the low 32 bits.. Thus, - * we divide the vector by 32 to get the 128-bit index. We then - * multiply that index by 4 to get the equivalent index from - * treating the IRR as an array of 32-bit registers. Finally, we - * modulus the vector by 32 to determine the individual bit to - * test. + * The IRR registers are an array of registers each of which + * only describes 32 interrupts in the low 32 bits. Thus, we + * divide the vector by 32 to get the register index. + * Finally, we modulus the vector by 32 to determine the + * individual bit to test. */ - irr = &lapic->irr0; - return (irr[(vector / 32) * 4] & 1 << (vector % 32)); + irr = lapic_read32(LAPIC_IRR0 + vector / 32); + return (irr & 1 << (vector % 32)); } static void @@ -818,13 +949,13 @@ static void lapic_set_tpr(u_int vector) { #ifdef CHEAP_TPR - lapic->tpr = vector; + lapic_write32(LAPIC_TPR, vector); #else - u_int32_t tpr; + uint32_t tpr; - tpr = lapic->tpr & ~APIC_TPR_PRIO; + tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; tpr |= vector; - lapic->tpr = tpr; + lapic_write32(LAPIC_TPR, tpr); #endif } @@ -832,7 +963,7 @@ static void native_lapic_eoi(void) { - lapic->eoi = 0; + lapic_write32_nofence(LAPIC_EOI, 0); } void @@ -894,46 +1025,46 @@ lapic_timer_set_divisor(u_int divisor) KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) / sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor)); - lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1]; + lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); } static void lapic_timer_oneshot(struct lapic *la, u_int count, int enable_int) { - u_int32_t value; + uint32_t value; value = la->lvt_timer_cache; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_ONE_SHOT; if (enable_int) value &= ~APIC_LVT_M; - lapic->lvt_timer = value; - lapic->icr_timer = count; + lapic_write32(LAPIC_LVT_TIMER, value); + lapic_write32(LAPIC_ICR_TIMER, count); } static void lapic_timer_periodic(struct lapic *la, u_int count, int enable_int) { - u_int32_t value; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***