Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 9 Feb 2015 21:00:57 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r278473 - in head/sys: amd64/amd64 amd64/include amd64/vmm contrib/dev/acpica/include i386/i386 i386/include x86/acpica x86/include x86/x86 x86/xen
Message-ID:  <201502092100.t19L0vsv010944@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Mon Feb  9 21:00:56 2015
New Revision: 278473
URL: https://svnweb.freebsd.org/changeset/base/278473

Log:
  Add x2APIC support.  Enable it by default if CPU is capable.  The
  hw.x2apic_enable tunable allows disabling it from the loader prompt.
  
  To closely repeat effects of the uncached memory ops when accessing
  registers in the xAPIC mode, the x2APIC writes to MSRs are preceeded
  by mfence, except for the EOI notifications.  This is probably too
  strict, only ICR writes to send IPI require serialization to ensure
  that other CPUs see the previous actions when IPI is delivered.  This
  may be changed later.
  
  In vmm justreturn IPI handler, call doreti_iret instead of doing iretd
  inline, to handle corner conditions.
  
  Note that the patch only switches LAPICs into x2APIC mode. It does not
  enables FreeBSD to support > 255 CPUs, which requires parsing x2APIC
  MADT entries and doing interrupts remapping, but is the required step
  on the way.
  
  Reviewed by:	neel
  Tested by:	pho (real hardware), neel (on bhyve)
  Discussed with:	jhb, grehan
  Sponsored by:	The FreeBSD Foundation
  MFC after:	2 months

Modified:
  head/sys/amd64/amd64/apic_vector.S
  head/sys/amd64/amd64/genassym.c
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/include/cpufunc.h
  head/sys/amd64/vmm/vmm_support.S
  head/sys/contrib/dev/acpica/include/actbl2.h
  head/sys/i386/i386/apic_vector.s
  head/sys/i386/i386/genassym.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/include/cpufunc.h
  head/sys/x86/acpica/madt.c
  head/sys/x86/include/apicreg.h
  head/sys/x86/include/apicvar.h
  head/sys/x86/include/specialreg.h
  head/sys/x86/x86/io_apic.c
  head/sys/x86/x86/local_apic.c
  head/sys/x86/xen/xen_apic.c

Modified: head/sys/amd64/amd64/apic_vector.S
==============================================================================
--- head/sys/amd64/amd64/apic_vector.S	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/amd64/amd64/apic_vector.S	Mon Feb  9 21:00:56 2015	(r278473)
@@ -39,6 +39,7 @@
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
+#include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
@@ -49,6 +50,22 @@
 #define LK
 #endif
 
+	.text
+	SUPERALIGN_TEXT
+	/* End Of Interrupt to APIC */
+as_lapic_eoi:
+	cmpl	$0,x2apic_mode
+	jne	1f
+	movq	lapic_map,%rax
+	movl	$0,LA_EOI(%rax)
+	ret
+1:
+	movl	$MSR_APIC_EOI,%ecx
+	xorl	%eax,%eax
+	xorl	%edx,%edx
+	wrmsr
+	ret
+
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
@@ -62,15 +79,22 @@
 IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	FAKE_MCOUNT(TF_RIP(%rsp)) ;					\
-	movq	lapic, %rdx ;	/* pointer to local APIC */		\
+	cmpl	$0,x2apic_mode ;					\
+	je	1f ;							\
+	movl	$(MSR_APIC_ISR0 + index),%ecx ;				\
+	rdmsr ;								\
+	jmp	2f ;							\
+1: ;									\
+	movq	lapic_map, %rdx ;	/* pointer to local APIC */	\
 	movl	LA_ISR + 16 * (index)(%rdx), %eax ;	/* load ISR */	\
+2: ;									\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
-	jz	1f ;							\
+	jz	3f ;							\
 	addl	$(32 * index),%eax ;					\
 	movq	%rsp, %rsi	;                                       \
 	movl	%eax, %edi ;	/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
-1: ;									\
+3: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
@@ -160,8 +184,7 @@ IDTVEC(xen_intr_upcall)
 
 	SUPERALIGN_TEXT
 invltlb_ret:
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	POP_FRAME
 	jmp	doreti_iret
 
@@ -228,8 +251,7 @@ IDTVEC(invlcache)
 IDTVEC(ipi_intr_bitmap_handler)		
 	PUSH_FRAME
 
-	movq	lapic, %rdx
-	movl	$0, LA_EOI(%rdx)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	
 	FAKE_MCOUNT(TF_RIP(%rsp))
 
@@ -245,8 +267,7 @@ IDTVEC(ipi_intr_bitmap_handler)		
 IDTVEC(cpustop)
 	PUSH_FRAME
 
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 
 	call	cpustop_handler
 	jmp	doreti
@@ -260,8 +281,7 @@ IDTVEC(cpususpend)
 	PUSH_FRAME
 
 	call	cpususpend_handler
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	jmp	doreti
 
 /*
@@ -279,7 +299,6 @@ IDTVEC(rendezvous)
 	incq	(%rax)
 #endif
 	call	smp_rendezvous_action
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	jmp	doreti
 #endif /* SMP */

Modified: head/sys/amd64/amd64/genassym.c
==============================================================================
--- head/sys/amd64/amd64/genassym.c	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/amd64/amd64/genassym.c	Mon Feb  9 21:00:56 2015	(r278473)
@@ -220,13 +220,8 @@ ASSYM(PC_COMMONTSSP, offsetof(struct pcp
 ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
 ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
  
-ASSYM(LA_VER, offsetof(struct LAPIC, version));
-ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
-ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
-ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
-ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
-ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
-ASSYM(LA_ISR, offsetof(struct LAPIC, isr0));
+ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
+ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL);
 
 ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
 ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/amd64/amd64/mp_machdep.c	Mon Feb  9 21:00:56 2015	(r278473)
@@ -705,8 +705,11 @@ init_secondary(void)
 	wrmsr(MSR_STAR, msr);
 	wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
 
-	/* Disable local APIC just to be sure. */
-	lapic_disable();
+	/*
+	 * On real hardware, switch to x2apic mode if possible.
+	 * Disable local APIC until BSP directed APs to run.
+	 */
+	lapic_xapic_mode();
 
 	/* signal our startup to the BSP. */
 	mp_naps++;

Modified: head/sys/amd64/include/cpufunc.h
==============================================================================
--- head/sys/amd64/include/cpufunc.h	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/amd64/include/cpufunc.h	Mon Feb  9 21:00:56 2015	(r278473)
@@ -343,6 +343,15 @@ rdmsr(u_int msr)
 	return (low | ((uint64_t)high << 32));
 }
 
+static __inline uint32_t
+rdmsr32(u_int msr)
+{
+	uint32_t low;
+
+	__asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "rdx");
+	return (low);
+}
+
 static __inline uint64_t
 rdpmc(u_int pmc)
 {
@@ -826,6 +835,7 @@ u_long	rcr2(void);
 u_long	rcr3(void);
 u_long	rcr4(void);
 uint64_t rdmsr(u_int msr);
+uint32_t rdmsr32(u_int msr);
 uint64_t rdpmc(u_int pmc);
 uint64_t rdr0(void);
 uint64_t rdr1(void);

Modified: head/sys/amd64/vmm/vmm_support.S
==============================================================================
--- head/sys/amd64/vmm/vmm_support.S	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/amd64/vmm/vmm_support.S	Mon Feb  9 21:00:56 2015	(r278473)
@@ -30,13 +30,14 @@
 
 #include <machine/asmacros.h>
 
-#define	LA_EOI	0xB0
-
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(justreturn)
+	pushq	%rdx
 	pushq	%rax
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)
+	pushq	%rcx
+	call	as_lapic_eoi
+	popq	%rcx
 	popq	%rax
-	iretq
+	popq	%rdx
+	jmp	doreti_iret

Modified: head/sys/contrib/dev/acpica/include/actbl2.h
==============================================================================
--- head/sys/contrib/dev/acpica/include/actbl2.h	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/contrib/dev/acpica/include/actbl2.h	Mon Feb  9 21:00:56 2015	(r278473)
@@ -466,6 +466,7 @@ typedef struct acpi_table_dmar
 /* Masks for Flags field above */
 
 #define ACPI_DMAR_INTR_REMAP        (1)
+#define	ACPI_DMAR_X2APIC_OPT_OUT    (2)
 
 
 /* DMAR subtable header */

Modified: head/sys/i386/i386/apic_vector.s
==============================================================================
--- head/sys/i386/i386/apic_vector.s	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/i386/i386/apic_vector.s	Mon Feb  9 21:00:56 2015	(r278473)
@@ -39,10 +39,27 @@
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
+#include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
 
+	.text
+	SUPERALIGN_TEXT
+	/* End Of Interrupt to APIC */
+as_lapic_eoi:
+	cmpl	$0,x2apic_mode
+	jne	1f
+	movl	lapic_map,%eax
+	movl	$0,LA_EOI(%eax)
+	ret
+1:
+	movl	$MSR_APIC_EOI,%ecx
+	xorl	%eax,%eax
+	xorl	%edx,%edx
+	wrmsr
+	ret
+
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
@@ -58,16 +75,23 @@ IDTVEC(vec_name) ;							\
 	SET_KERNEL_SREGS ;						\
 	cld ;								\
 	FAKE_MCOUNT(TF_EIP(%esp)) ;					\
-	movl	lapic, %edx ;	/* pointer to local APIC */		\
+	cmpl	$0,x2apic_mode ;					\
+	je	1f ;							\
+	movl	$(MSR_APIC_ISR0 + index),%ecx ;				\
+	rdmsr ;								\
+	jmp	2f ;							\
+1: ;									\
+	movl	lapic_map, %edx ;/* pointer to local APIC */		\
 	movl	LA_ISR + 16 * (index)(%edx), %eax ;	/* load ISR */	\
+2: ;									\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
-	jz	1f ;							\
+	jz	3f ;							\
 	addl	$(32 * index),%eax ;					\
 	pushl	%esp		;                                       \
 	pushl	%eax ;		/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
 	addl	$8, %esp ;	/* discard parameter */			\
-1: ;									\
+3: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
@@ -164,8 +188,7 @@ IDTVEC(xen_intr_upcall)
 	.text
 	SUPERALIGN_TEXT
 invltlb_ret:
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	POP_FRAME
 	iret
 
@@ -232,8 +255,7 @@ IDTVEC(ipi_intr_bitmap_handler)	
 	SET_KERNEL_SREGS
 	cld
 
-	movl	lapic, %edx
-	movl	$0, LA_EOI(%edx)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	
 	FAKE_MCOUNT(TF_EIP(%esp))
 
@@ -251,9 +273,7 @@ IDTVEC(cpustop)
 	SET_KERNEL_SREGS
 	cld
 
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
-
+	call	as_lapic_eoi
 	call	cpustop_handler
 
 	POP_FRAME
@@ -270,9 +290,7 @@ IDTVEC(cpususpend)
 	SET_KERNEL_SREGS
 	cld
 
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
-
+	call	as_lapic_eoi
 	call	cpususpend_handler
 
 	POP_FRAME
@@ -298,8 +316,7 @@ IDTVEC(rendezvous)
 #endif
 	call	smp_rendezvous_action
 
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	POP_FRAME
 	iret
 	
@@ -315,8 +332,7 @@ IDTVEC(lazypmap)
 
 	call	pmap_lazyfix_action
 
-	movl	lapic, %eax
-	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
+	call	as_lapic_eoi
 	POP_FRAME
 	iret
 #endif /* SMP */

Modified: head/sys/i386/i386/genassym.c
==============================================================================
--- head/sys/i386/i386/genassym.c	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/i386/i386/genassym.c	Mon Feb  9 21:00:56 2015	(r278473)
@@ -219,13 +219,8 @@ ASSYM(PC_CURPMAP, offsetof(struct pcpu, 
 ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss));
 
 #ifdef DEV_APIC
-ASSYM(LA_VER, offsetof(struct LAPIC, version));
-ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
-ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
-ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
-ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
-ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
-ASSYM(LA_ISR, offsetof(struct LAPIC, isr0));
+ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
+ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL);
 #endif
 
 ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));

Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/i386/i386/mp_machdep.c	Mon Feb  9 21:00:56 2015	(r278473)
@@ -719,8 +719,11 @@ init_secondary(void)
 	load_cr0(cr0);
 	CHECK_WRITE(0x38, 5);
 	
-	/* Disable local APIC just to be sure. */
-	lapic_disable();
+	/*
+	 * On real hardware, switch to x2apic mode if possible.
+	 * Disable local APIC until BSP directed APs to run.
+	 */
+	lapic_xapic_mode();
 
 	/* signal our startup to the BSP. */
 	mp_naps++;

Modified: head/sys/i386/include/cpufunc.h
==============================================================================
--- head/sys/i386/include/cpufunc.h	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/i386/include/cpufunc.h	Mon Feb  9 21:00:56 2015	(r278473)
@@ -346,6 +346,15 @@ rdmsr(u_int msr)
 	return (rv);
 }
 
+static __inline uint32_t
+rdmsr32(u_int msr)
+{
+	uint32_t low;
+
+	__asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "edx");
+	return (low);
+}
+
 static __inline uint64_t
 rdpmc(u_int pmc)
 {

Modified: head/sys/x86/acpica/madt.c
==============================================================================
--- head/sys/x86/acpica/madt.c	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/x86/acpica/madt.c	Mon Feb  9 21:00:56 2015	(r278473)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <x86/apicreg.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
+#include <machine/md_var.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <contrib/dev/acpica/include/actables.h>
@@ -127,8 +128,27 @@ madt_probe_cpus(void)
 static int
 madt_setup_local(void)
 {
+	ACPI_TABLE_DMAR *dmartbl;
+	vm_paddr_t dmartbl_physaddr;
 
 	madt = pmap_mapbios(madt_physaddr, madt_length);
+	if ((cpu_feature2 & CPUID2_X2APIC) != 0) {
+		x2apic_mode = 1;
+		dmartbl_physaddr = acpi_find_table(ACPI_SIG_DMAR);
+		if (dmartbl_physaddr != 0) {
+			dmartbl = acpi_map_table(dmartbl_physaddr,
+			    ACPI_SIG_DMAR);
+			if ((dmartbl->Flags & ACPI_DMAR_X2APIC_OPT_OUT) != 0) {
+				x2apic_mode = 0;
+				if (bootverbose)
+					printf(
+		"x2APIC available but disabled by DMAR table\n");
+			}
+			acpi_unmap_table(dmartbl);
+		}
+		TUNABLE_INT_FETCH("hw.x2apic_enable", &x2apic_mode);
+	}
+
 	lapic_init(madt->Address);
 	printf("ACPI APIC Table: <%.*s %.*s>\n",
 	    (int)sizeof(madt->Header.OemId), madt->Header.OemId,

Modified: head/sys/x86/include/apicreg.h
==============================================================================
--- head/sys/x86/include/apicreg.h	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/x86/include/apicreg.h	Mon Feb  9 21:00:56 2015	(r278473)
@@ -193,6 +193,66 @@ struct LAPIC {
 
 typedef struct LAPIC lapic_t;
 
+enum LAPIC_REGISTERS {
+	LAPIC_ID	= 0x2,
+	LAPIC_VERSION	= 0x3,
+	LAPIC_TPR	= 0x8,
+	LAPIC_APR	= 0x9,
+	LAPIC_PPR	= 0xa,
+	LAPIC_EOI	= 0xb,
+	LAPIC_LDR	= 0xd,
+	LAPIC_DFR	= 0xe, /* Not in x2APIC */
+	LAPIC_SVR	= 0xf,
+	LAPIC_ISR0	= 0x10,
+	LAPIC_ISR1	= 0x11,
+	LAPIC_ISR2	= 0x12,
+	LAPIC_ISR3	= 0x13,
+	LAPIC_ISR4	= 0x14,
+	LAPIC_ISR5	= 0x15,
+	LAPIC_ISR6	= 0x16,
+	LAPIC_ISR7	= 0x17,
+	LAPIC_TMR0	= 0x18,
+	LAPIC_TMR1	= 0x19,
+	LAPIC_TMR2	= 0x1a,
+	LAPIC_TMR3	= 0x1b,
+	LAPIC_TMR4	= 0x1c,
+	LAPIC_TMR5	= 0x1d,
+	LAPIC_TMR6	= 0x1e,
+	LAPIC_TMR7	= 0x1f,
+	LAPIC_IRR0	= 0x20,
+	LAPIC_IRR1	= 0x21,
+	LAPIC_IRR2	= 0x22,
+	LAPIC_IRR3	= 0x23,
+	LAPIC_IRR4	= 0x24,
+	LAPIC_IRR5	= 0x25,
+	LAPIC_IRR6	= 0x26,
+	LAPIC_IRR7	= 0x27,
+	LAPIC_ESR	= 0x28,
+	LAPIC_LVT_CMCI	= 0x2f,
+	LAPIC_ICR_LO	= 0x30,
+	LAPIC_ICR_HI	= 0x31, /* Not in x2APIC */
+	LAPIC_LVT_TIMER	= 0x32,
+	LAPIC_LVT_THERMAL = 0x33,
+	LAPIC_LVT_PCINT	= 0x34,
+	LAPIC_LVT_LINT0	= 0x35,
+	LAPIC_LVT_LINT1	= 0x36,
+	LAPIC_LVT_ERROR	= 0x37,
+	LAPIC_ICR_TIMER	= 0x38,
+	LAPIC_CCR_TIMER	= 0x39,
+	LAPIC_DCR_TIMER	= 0x3e,
+	LAPIC_SELF_IPI	= 0x3f, /* Only in x2APIC */
+};
+
+/*
+ * The LAPIC_SELF_IPI register only exists in x2APIC mode.  The
+ * formula below is applicable only to reserve the memory region,
+ * i.e. for xAPIC mode, where LAPIC_SELF_IPI finely serves as the
+ * address past end of the region.
+ */
+#define	LAPIC_MEM_REGION (LAPIC_SELF_IPI * 0x10)
+
+#define	LAPIC_MEM_MUL	0x10
+
 /******************************************************************************
  * I/O APIC structure
  */

Modified: head/sys/x86/include/apicvar.h
==============================================================================
--- head/sys/x86/include/apicvar.h	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/x86/include/apicvar.h	Mon Feb  9 21:00:56 2015	(r278473)
@@ -189,6 +189,7 @@ int	ioapic_set_smi(void *cookie, u_int p
 struct apic_ops {
 	void	(*create)(u_int, int);
 	void	(*init)(vm_paddr_t);
+	void	(*xapic_mode)(void);
 	void	(*setup)(int);
 	void	(*dump)(const char *);
 	void	(*disable)(void);
@@ -243,6 +244,13 @@ lapic_init(vm_paddr_t addr)
 }
 
 static inline void
+lapic_xapic_mode(void)
+{
+
+	apic_ops.xapic_mode();
+}
+
+static inline void
 lapic_setup(int boot)
 {
 
@@ -417,5 +425,11 @@ void	lapic_handle_intr(int vector, struc
 void	lapic_handle_timer(struct trapframe *frame);
 void	xen_intr_handle_upcall(struct trapframe *frame);
 
+extern int x2apic_mode;
+
+#ifdef _SYS_SYSCTL_H_
+SYSCTL_DECL(_hw_apic);
+#endif
+
 #endif /* !LOCORE */
 #endif /* _X86_APICVAR_H_ */

Modified: head/sys/x86/include/specialreg.h
==============================================================================
--- head/sys/x86/include/specialreg.h	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/x86/include/specialreg.h	Mon Feb  9 21:00:56 2015	(r278473)
@@ -470,6 +470,7 @@
 /*
  * X2APIC MSRs
  */
+#define	MSR_APIC_000		0x800
 #define	MSR_APIC_ID		0x802
 #define	MSR_APIC_VERSION	0x803
 #define	MSR_APIC_TPR		0x808

Modified: head/sys/x86/x86/io_apic.c
==============================================================================
--- head/sys/x86/x86/io_apic.c	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/x86/x86/io_apic.c	Mon Feb  9 21:00:56 2015	(r278473)
@@ -130,7 +130,6 @@ struct pic ioapic_template = { ioapic_en
 static int next_ioapic_base;
 static u_int next_id;
 
-static SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
 static int enable_extint;
 SYSCTL_INT(_hw_apic, OID_AUTO, enable_extint, CTLFLAG_RDTUN, &enable_extint, 0,
     "Enable the ExtINT pin in the first I/O APIC");
@@ -896,7 +895,7 @@ apic_attach(device_t dev)
 	int i;
 
 	/* Reserve the local APIC. */
-	apic_add_resource(dev, 0, lapic_paddr, sizeof(lapic_t));
+	apic_add_resource(dev, 0, lapic_paddr, LAPIC_MEM_REGION);
 	i = 1;
 	STAILQ_FOREACH(io, &ioapic_list, io_next) {
 		apic_add_resource(dev, i, io->io_paddr, IOAPIC_MEM_REGION);

Modified: head/sys/x86/x86/local_apic.c
==============================================================================
--- head/sys/x86/x86/local_apic.c	Mon Feb  9 19:28:11 2015	(r278472)
+++ head/sys/x86/x86/local_apic.c	Mon Feb  9 21:00:56 2015	(r278473)
@@ -49,12 +49,14 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
+#include <sys/sysctl.h>
 #include <sys/timeet.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
+#include <machine/cpufunc.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
@@ -154,11 +156,99 @@ static u_int32_t lapic_timer_divisors[] 
 
 extern inthand_t IDTVEC(rsvd);
 
-volatile lapic_t *lapic;
+volatile char *lapic_map;
 vm_paddr_t lapic_paddr;
+int x2apic_mode;
 static u_long lapic_timer_divisor;
 static struct eventtimer lapic_et;
 
+SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
+SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
+
+static uint32_t
+lapic_read32(enum LAPIC_REGISTERS reg)
+{
+	uint32_t res;
+
+	if (x2apic_mode) {
+		res = rdmsr32(MSR_APIC_000 + reg);
+	} else {
+		res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
+	}
+	return (res);
+}
+
+static void
+lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
+{
+
+	if (x2apic_mode) {
+		mfence();
+		wrmsr(MSR_APIC_000 + reg, val);
+	} else {
+		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
+	}
+}
+
+static void
+lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
+{
+
+	if (x2apic_mode) {
+		wrmsr(MSR_APIC_000 + reg, val);
+	} else {
+		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
+	}
+}
+
+static uint64_t
+lapic_read_icr(void)
+{
+	uint64_t v;
+	uint32_t vhi, vlo;
+
+	if (x2apic_mode) {
+		v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO);
+	} else {
+		vhi = lapic_read32(LAPIC_ICR_HI);
+		vlo = lapic_read32(LAPIC_ICR_LO);
+		v = ((uint64_t)vhi << 32) | vlo;
+	}
+	return (v);
+}
+
+static uint64_t
+lapic_read_icr_lo(void)
+{
+
+	return (lapic_read32(LAPIC_ICR_LO));
+}
+
+static void
+lapic_write_icr(uint32_t vhi, uint32_t vlo)
+{
+	uint64_t v;
+
+	if (x2apic_mode) {
+		v = ((uint64_t)vhi << 32) | vlo;
+		mfence();
+		wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
+	} else {
+		lapic_write32(LAPIC_ICR_HI, vhi);
+		lapic_write32(LAPIC_ICR_LO, vlo);
+	}
+}
+
+static void
+native_lapic_enable_x2apic(void)
+{
+	uint64_t apic_base;
+
+	apic_base = rdmsr(MSR_APICBASE);
+	apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
+	wrmsr(MSR_APICBASE, apic_base);
+}
+
 static void	lapic_enable(void);
 static void	lapic_resume(struct pic *pic, bool suspend_cancelled);
 static void	lapic_timer_oneshot(struct lapic *,
@@ -179,6 +269,7 @@ struct pic lapic_pic = { .pic_resume = l
 /* Forward declarations for apic_ops */
 static void	native_lapic_create(u_int apic_id, int boot_cpu);
 static void	native_lapic_init(vm_paddr_t addr);
+static void	native_lapic_xapic_mode(void);
 static void	native_lapic_setup(int boot);
 static void	native_lapic_dump(const char *str);
 static void	native_lapic_disable(void);
@@ -213,6 +304,7 @@ static int 	native_lapic_set_lvt_trigger
 struct apic_ops apic_ops = {
 	.create			= native_lapic_create,
 	.init			= native_lapic_init,
+	.xapic_mode		= native_lapic_xapic_mode,
 	.setup			= native_lapic_setup,
 	.dump			= native_lapic_dump,
 	.disable		= native_lapic_disable,
@@ -291,11 +383,20 @@ native_lapic_init(vm_paddr_t addr)
 	u_int regs[4];
 	int i, arat;
 
-	/* Map the local APIC and setup the spurious interrupt handler. */
+	/*
+	 * Enable x2APIC mode if possible, otherwise map the local
+	 * APIC registers page.
+	 */
 	KASSERT(trunc_page(addr) == addr,
 	    ("local APIC not aligned on a page boundary"));
-	lapic_paddr = addr;
-	lapic = pmap_mapdev(addr, sizeof(lapic_t));
+	if (x2apic_mode) {
+		native_lapic_enable_x2apic();
+	} else {
+		lapic_paddr = addr;
+		lapic_map = pmap_mapdev(addr, PAGE_SIZE);
+	}
+
+	/* Setup the spurious interrupt handler. */
 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 
@@ -398,33 +499,51 @@ native_lapic_dump(const char* str)
 {
 	uint32_t maxlvt;
 
-	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
-	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
-	    lapic->id, lapic->version, lapic->ldr, lapic->dfr);
-	printf("  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
-	    lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
+	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
+	    lapic_read32(LAPIC_ID), lapic_read32(LAPIC_VERSION),
+	    lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
+	if ((cpu_feature2 & CPUID2_X2APIC) != 0)
+		printf(" x2APIC: %d", x2apic_mode);
+	printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
+	    lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
+	    lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
-	    lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error);
+	    lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
+	    lapic_read32(LAPIC_LVT_ERROR));
 	if (maxlvt >= APIC_LVT_PMC)
-		printf(" pmc: 0x%08x", lapic->lvt_pcint);
+		printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
 	printf("\n");
 	if (maxlvt >= APIC_LVT_CMCI)
-		printf("   cmci: 0x%08x\n", lapic->lvt_cmci);
+		printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
+}
+
+static void
+native_lapic_xapic_mode(void)
+{
+	register_t saveintr;
+
+	saveintr = intr_disable();
+	if (x2apic_mode)
+		native_lapic_enable_x2apic();
+	native_lapic_disable();
+	intr_restore(saveintr);
 }
 
 static void
 native_lapic_setup(int boot)
 {
 	struct lapic *la;
-	u_int32_t maxlvt;
+	uint32_t maxlvt;
 	register_t saveintr;
 	char buf[MAXCOMLEN + 1];
 
+	saveintr = intr_disable();
+
 	la = &lapics[lapic_id()];
 	KASSERT(la->la_present, ("missing APIC structure"));
-	saveintr = intr_disable();
-	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
@@ -433,16 +552,21 @@ native_lapic_setup(int boot)
 	lapic_enable();
 
 	/* Program LINT[01] LVT entries. */
-	lapic->lvt_lint0 = lvt_mode(la, APIC_LVT_LINT0, lapic->lvt_lint0);
-	lapic->lvt_lint1 = lvt_mode(la, APIC_LVT_LINT1, lapic->lvt_lint1);
+	lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
+	    lapic_read32(LAPIC_LVT_LINT0)));
+	lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
+	    lapic_read32(LAPIC_LVT_LINT1)));
 
 	/* Program the PMC LVT entry if present. */
-	if (maxlvt >= APIC_LVT_PMC)
-		lapic->lvt_pcint = lvt_mode(la, APIC_LVT_PMC, lapic->lvt_pcint);
+	if (maxlvt >= APIC_LVT_PMC) {
+		lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
+		    LAPIC_LVT_PCINT));
+	}
 
 	/* Program timer LVT and setup handler. */
-	la->lvt_timer_cache = lapic->lvt_timer =
-	    lvt_mode(la, APIC_LVT_TIMER, lapic->lvt_timer);
+	la->lvt_timer_cache = lvt_mode(la, APIC_LVT_TIMER,
+	    lapic_read32(LAPIC_LVT_TIMER));
+	lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_cache);
 	if (boot) {
 		snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid));
 		intrcnt_add(buf, &la->la_timer_count);
@@ -460,14 +584,17 @@ native_lapic_setup(int boot)
 	}
 
 	/* Program error LVT and clear any existing errors. */
-	lapic->lvt_error = lvt_mode(la, APIC_LVT_ERROR, lapic->lvt_error);
-	lapic->esr = 0;
+	lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
+	    lapic_read32(LAPIC_LVT_ERROR)));
+	lapic_write32(LAPIC_ESR, 0);
 
 	/* XXX: Thermal LVT */
 
 	/* Program the CMCI LVT entry if present. */
-	if (maxlvt >= APIC_LVT_CMCI)
-		lapic->lvt_cmci = lvt_mode(la, APIC_LVT_CMCI, lapic->lvt_cmci);
+	if (maxlvt >= APIC_LVT_CMCI) {
+		lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
+		    lapic_read32(LAPIC_LVT_CMCI)));
+	}
 	    
 	intr_restore(saveintr);
 }
@@ -478,9 +605,9 @@ native_lapic_reenable_pmc(void)
 #ifdef HWPMC_HOOKS
 	uint32_t value;
 
-	value =  lapic->lvt_pcint;
+	value = lapic_read32(LAPIC_LVT_PCINT);
 	value &= ~APIC_LVT_M;
-	lapic->lvt_pcint = value;
+	lapic_write32(LAPIC_LVT_PCINT, value);
 #endif
 }
 
@@ -491,7 +618,8 @@ lapic_update_pmc(void *dummy)
 	struct lapic *la;
 
 	la = &lapics[lapic_id()];
-	lapic->lvt_pcint = lvt_mode(la, APIC_LVT_PMC, lapic->lvt_pcint);
+	lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
+	    lapic_read32(LAPIC_LVT_PCINT)));
 }
 #endif
 
@@ -502,11 +630,11 @@ native_lapic_enable_pmc(void)
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
-	if (lapic == NULL)
+	if (!x2apic_mode && lapic_map == NULL)
 		return (0);
 
 	/* Fail if the PMC LVT is not present. */
-	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return (0);
 
@@ -536,11 +664,11 @@ native_lapic_disable_pmc(void)
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
-	if (lapic == NULL)
+	if (!x2apic_mode && lapic_map == NULL)
 		return;
 
 	/* Fail if the PMC LVT is not present. */
-	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return;
 
@@ -569,7 +697,8 @@ lapic_et_start(struct eventtimer *et, sb
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_oneshot(la, APIC_TIMER_MAX_COUNT, 0);
 			DELAY(1000000);
-			value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
+			value = APIC_TIMER_MAX_COUNT -
+			    lapic_read32(LAPIC_CCR_TIMER);
 			if (value != APIC_TIMER_MAX_COUNT)
 				break;
 			lapic_timer_divisor <<= 1;
@@ -613,21 +742,21 @@ native_lapic_disable(void)
 	uint32_t value;
 
 	/* Software disable the local APIC. */
-	value = lapic->svr;
+	value = lapic_read32(LAPIC_SVR);
 	value &= ~APIC_SVR_SWEN;
-	lapic->svr = value;
+	lapic_write32(LAPIC_SVR, value);
 }
 
 static void
 lapic_enable(void)
 {
-	u_int32_t value;
+	uint32_t value;
 
 	/* Program the spurious vector to enable the local APIC. */
-	value = lapic->svr;
+	value = lapic_read32(LAPIC_SVR);
 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
-	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
-	lapic->svr = value;
+	value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
+	lapic_write32(LAPIC_SVR, value);
 }
 
 /* Reset the local APIC on the BSP during resume. */
@@ -641,27 +770,29 @@ lapic_resume(struct pic *pic, bool suspe
 static int
 native_lapic_id(void)
 {
+	uint32_t v;
 
-	KASSERT(lapic != NULL, ("local APIC is not mapped"));
-	return (lapic->id >> APIC_ID_SHIFT);
+	KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
+	v = lapic_read32(LAPIC_ID);
+	if (!x2apic_mode)
+		v >>= APIC_ID_SHIFT;
+	return (v);
 }
 
 static int
 native_lapic_intr_pending(u_int vector)
 {
-	volatile u_int32_t *irr;
+	uint32_t irr;
 
 	/*
-	 * The IRR registers are an array of 128-bit registers each of
-	 * which only describes 32 interrupts in the low 32 bits..  Thus,
-	 * we divide the vector by 32 to get the 128-bit index.  We then
-	 * multiply that index by 4 to get the equivalent index from
-	 * treating the IRR as an array of 32-bit registers.  Finally, we
-	 * modulus the vector by 32 to determine the individual bit to
-	 * test.
+	 * The IRR registers are an array of registers each of which
+	 * only describes 32 interrupts in the low 32 bits.  Thus, we
+	 * divide the vector by 32 to get the register index.
+	 * Finally, we modulus the vector by 32 to determine the
+	 * individual bit to test.
 	 */
-	irr = &lapic->irr0;
-	return (irr[(vector / 32) * 4] & 1 << (vector % 32));
+	irr = lapic_read32(LAPIC_IRR0 + vector / 32);
+	return (irr & 1 << (vector % 32));
 }
 
 static void
@@ -818,13 +949,13 @@ static void
 lapic_set_tpr(u_int vector)
 {
 #ifdef CHEAP_TPR
-	lapic->tpr = vector;
+	lapic_write32(LAPIC_TPR, vector);
 #else
-	u_int32_t tpr;
+	uint32_t tpr;
 
-	tpr = lapic->tpr & ~APIC_TPR_PRIO;
+	tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
 	tpr |= vector;
-	lapic->tpr = tpr;
+	lapic_write32(LAPIC_TPR, tpr);
 #endif
 }
 
@@ -832,7 +963,7 @@ static void
 native_lapic_eoi(void)
 {
 
-	lapic->eoi = 0;
+	lapic_write32_nofence(LAPIC_EOI, 0);
 }
 
 void
@@ -894,46 +1025,46 @@ lapic_timer_set_divisor(u_int divisor)
 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
 	KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
 	    sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
-	lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1];
+	lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
 }
 
 static void
 lapic_timer_oneshot(struct lapic *la, u_int count, int enable_int)
 {
-	u_int32_t value;
+	uint32_t value;
 
 	value = la->lvt_timer_cache;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_ONE_SHOT;
 	if (enable_int)
 		value &= ~APIC_LVT_M;
-	lapic->lvt_timer = value;
-	lapic->icr_timer = count;
+	lapic_write32(LAPIC_LVT_TIMER, value);
+	lapic_write32(LAPIC_ICR_TIMER, count);
 }
 
 static void
 lapic_timer_periodic(struct lapic *la, u_int count, int enable_int)
 {
-	u_int32_t value;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201502092100.t19L0vsv010944>