Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 14 May 2011 18:37:24 +0000 (UTC)
From:      Peter Grehan <grehan@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r221905 - in projects/bhyve_ref/sys: amd64/amd64 amd64/conf amd64/include conf dev/blackhole dev/bvm kern modules modules/blackhole
Message-ID:  <201105141837.p4EIbODA011628@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: grehan
Date: Sat May 14 18:37:24 2011
New Revision: 221905
URL: http://svn.freebsd.org/changeset/base/221905

Log:
  bhyve import part 2 of 2, guest kernel changes.
  This branch is now considered frozen: future bhyve development will take
  place in a branch off -CURRENT.
  
  sys/dev/bvm/bvm_console.c
  sys/dev/bvm/bvm_dbg.c
  - simple console driver/gdb debug port used for bringup. supported
    by user-space bhyve executable
  
  sys/conf/options.amd64
  sys/amd64/amd64/minidump_machdep.c
  - allow NKPT to be set in the kernel config file
  
  sys/amd64/conf/GENERIC
  - mptable config options; bhyve user-space executable creates an mptable
    with number of CPUs, and optional vendor extension
  - add bvm console/debug
  - set NKPT to 512 to allow loading of large RAM disks from the loader
  - include kdb/gdb
  
  sys/amd64/amd64/local_apic.c
  sys/amd64/amd64/apic_vector.S
  sys/amd64/include/specialreg.h
  - if x2apic mode available, use MSRs to access the local APIC, otherwise
    fall back to 'classic' MMIO mode
  
  sys/amd64/amd64/mp_machdep.c
  - support AP spinup on CPU models that don't have real-mode support by
    overwriting the real-mode page with a message that supplies the bhyve
    user-space executable with enough information to start the AP directly
    in 64-bit mode.
  
  sys/amd64/amd64/vm_machdep.c
  - insert pause statements into cpu shutdown busy-wait loops
  
  sys/dev/blackhole/blackhole.c
  sys/modules/blackhole/Makefile
  - boot-time loadable module that claims all PCI bus/slot/funcs specified
    in an env var that are to be used for PCI passthrough
  
  sys/amd64/amd64/intr_machdep.c
  - allow round-robin assignment of device interrupts to CPUs to be disabled
    from the loader
  
  sys/amd64/include/bus.h
  - convert string ins/outs instructions to loops of individual in/out since
    bhyve doesn't support these yet
  
  sys/kern/subr_bus.c
  - if the device was no created with a fixed devclass, then remove it's
    association with the devclass it was associated with during probe.
    Otherwise, new drivers do not get a chance to probe/attach since the
    device will stay married to the first driver that it probed successfully
    but failed to attach.
  
  Sponsored by:	NetApp, Inc.

Added:
  projects/bhyve_ref/sys/dev/blackhole/
  projects/bhyve_ref/sys/dev/blackhole/blackhole.c
  projects/bhyve_ref/sys/dev/bvm/
  projects/bhyve_ref/sys/dev/bvm/bvm_console.c
  projects/bhyve_ref/sys/dev/bvm/bvm_dbg.c
  projects/bhyve_ref/sys/modules/blackhole/
  projects/bhyve_ref/sys/modules/blackhole/Makefile
Modified:
  projects/bhyve_ref/sys/amd64/amd64/apic_vector.S
  projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c
  projects/bhyve_ref/sys/amd64/amd64/local_apic.c
  projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c
  projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c
  projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c
  projects/bhyve_ref/sys/amd64/conf/GENERIC
  projects/bhyve_ref/sys/amd64/include/bus.h
  projects/bhyve_ref/sys/amd64/include/specialreg.h
  projects/bhyve_ref/sys/conf/files.amd64
  projects/bhyve_ref/sys/conf/options.amd64
  projects/bhyve_ref/sys/kern/subr_bus.c
  projects/bhyve_ref/sys/modules/Makefile

Modified: projects/bhyve_ref/sys/amd64/amd64/apic_vector.S
==============================================================================
--- projects/bhyve_ref/sys/amd64/amd64/apic_vector.S	Sat May 14 18:22:14 2011	(r221904)
+++ projects/bhyve_ref/sys/amd64/amd64/apic_vector.S	Sat May 14 18:37:24 2011	(r221905)
@@ -55,7 +55,14 @@ IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	FAKE_MCOUNT(TF_RIP(%rsp)) ;					\
 	movq	lapic, %rdx ;	/* pointer to local APIC */		\
+	testq	%rdx, %rdx;						\
+	jnz	3f;							\
+	movl	$MSR_APIC_ISR ## index, %ecx;				\
+	rdmsr;								\
+	jmp	4f;							\
+3:	;								\
 	movl	LA_ISR + 16 * (index)(%rdx), %eax ;	/* load ISR */	\
+4:	;								\
 	bsrl	%eax, %eax ;	/* index of highset set bit in ISR */	\
 	jz	2f ;							\
 	addl	$(32 * index),%eax ;					\
@@ -117,6 +124,26 @@ IDTVEC(errorint)
 	jmp	doreti
 
 #ifdef SMP
+
+/*
+ * We assume that %rax is being saved/restored outside of this macro
+ */
+#define	DO_EOI								\
+	movq	lapic, %rax;						\
+	testq	%rax, %rax;						\
+	jz	8f;							\
+	movl	$0, LA_EOI(%rax);					\
+	jmp	9f;							\
+8:;									\
+	pushq	%rcx;							\
+	pushq	%rdx;							\
+	xorl	%edx, %edx;	/* eax is already zero */		\
+	movl	$MSR_APIC_EOI, %ecx;					\
+	wrmsr;								\
+	popq	%rdx;							\
+	popq	%rcx;							\
+9:
+	
 /*
  * Global address space TLB shootdown.
  */
@@ -128,8 +155,7 @@ IDTVEC(invltlb)
 	movq	%cr3, %rax		/* invalidate the TLB */
 	movq	%rax, %cr3
 
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	DO_EOI
 
 	lock
 	incl	smp_tlb_wait
@@ -148,8 +174,7 @@ IDTVEC(invlpg)
 	movq	smp_tlb_addr1, %rax
 	invlpg	(%rax)			/* invalidate single page */
 
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	DO_EOI
 
 	lock
 	incl	smp_tlb_wait
@@ -173,8 +198,7 @@ IDTVEC(invlrng)
 	cmpq	%rax, %rdx
 	jb	1b
 
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	DO_EOI
 
 	lock
 	incl	smp_tlb_wait
@@ -193,8 +217,7 @@ IDTVEC(invlcache)
 
 	wbinvd
 
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	DO_EOI
 
 	lock
 	incl	smp_tlb_wait
@@ -210,9 +233,8 @@ IDTVEC(invlcache)
 IDTVEC(ipi_intr_bitmap_handler)		
 	PUSH_FRAME
 
-	movq	lapic, %rdx
-	movl	$0, LA_EOI(%rdx)	/* End Of Interrupt to APIC */
-	
+	DO_EOI
+
 	FAKE_MCOUNT(TF_RIP(%rsp))
 
 	call	ipi_bitmap_handler
@@ -227,8 +249,7 @@ IDTVEC(ipi_intr_bitmap_handler)		
 IDTVEC(cpustop)
 	PUSH_FRAME
 
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	DO_EOI
 
 	call	cpustop_handler
 	jmp	doreti
@@ -241,8 +262,7 @@ IDTVEC(cpustop)
 IDTVEC(cpususpend)
 	PUSH_FRAME
 
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	DO_EOI
 
 	call	cpususpend_handler
 
@@ -259,7 +279,6 @@ IDTVEC(cpususpend)
 IDTVEC(rendezvous)
 	PUSH_FRAME
 	call	smp_rendezvous_action
-	movq	lapic, %rax
-	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
+	DO_EOI
 	jmp	doreti
 #endif /* SMP */

Modified: projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c
==============================================================================
--- projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c	Sat May 14 18:22:14 2011	(r221904)
+++ projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c	Sat May 14 18:37:24 2011	(r221905)
@@ -78,6 +78,8 @@ static STAILQ_HEAD(, pic) pics;
 
 #ifdef SMP
 static int assign_cpu;
+static int round_robin_interrupts = 1;
+TUNABLE_INT("round_robin_interrupts", &round_robin_interrupts);
 #endif
 
 static int	intr_assign_cpu(void *arg, u_char cpu);
@@ -460,6 +462,10 @@ intr_next_cpu(void)
 	if (!assign_cpu)
 		return (cpu_apic_ids[0]);
 
+	/* All interrupts go to the BSP if not allowed to round robin */
+	if (!round_robin_interrupts)
+		return (cpu_apic_ids[0]);
+
 	mtx_lock_spin(&icu_lock);
 	apic_id = cpu_apic_ids[current_cpu];
 	do {

Modified: projects/bhyve_ref/sys/amd64/amd64/local_apic.c
==============================================================================
--- projects/bhyve_ref/sys/amd64/amd64/local_apic.c	Sat May 14 18:22:14 2011	(r221904)
+++ projects/bhyve_ref/sys/amd64/amd64/local_apic.c	Sat May 14 18:37:24 2011	(r221905)
@@ -148,6 +148,7 @@ volatile lapic_t *lapic;
 vm_paddr_t lapic_paddr;
 static u_long lapic_timer_divisor, lapic_timer_period, lapic_timer_hz;
 static enum lapic_clock clockcoverage;
+static int x2apic;
 
 static void	lapic_enable(void);
 static void	lapic_resume(struct pic *pic);
@@ -156,6 +157,36 @@ static void	lapic_timer_oneshot(u_int co
 static void	lapic_timer_periodic(u_int count);
 static void	lapic_timer_set_divisor(u_int divisor);
 static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
+static uint32_t	lapic_version(void);
+static uint32_t	lapic_ldr(void);
+static uint32_t	lapic_dfr(void);
+static uint32_t	lapic_lvt_lint0(void);
+static void	lapic_set_lvt_lint0(uint32_t value);
+static uint32_t	lapic_lvt_lint1(void);
+static void	lapic_set_lvt_lint1(uint32_t value);
+static uint32_t	lapic_tpr(void);
+static uint32_t	lapic_svr(void);
+static void	lapic_set_svr(uint32_t value);
+static uint32_t	lapic_lvt_timer(void);
+static void	lapic_set_lvt_timer(uint32_t value);
+static uint32_t	lapic_lvt_thermal(void);
+static uint32_t	lapic_lvt_error(void);
+static void	lapic_set_lvt_error(uint32_t value);
+static uint32_t	lapic_lvt_pcint(void);
+static void	lapic_set_lvt_pcint(uint32_t value);
+static uint32_t	lapic_esr(void);
+static void	lapic_set_esr(uint32_t value);
+static uint32_t	lapic_ccr_timer(void);
+static void	lapic_set_dcr_timer(uint32_t value);
+static void	lapic_set_icr_timer(uint32_t value);
+uint32_t	lapic_irr(int num);
+uint32_t	lapic_tmr(int num);
+uint32_t	lapic_isr(int num);
+static uint32_t	lapic_icr_lo(void);
+static void	lapic_set_icr_lo(uint32_t value);
+static uint32_t	lapic_icr_hi(void);
+static void	lapic_set_icr_hi(uint32_t value);
+static boolean_t lapic_missing(void);
 
 struct pic lapic_pic = { .pic_resume = lapic_resume };
 
@@ -206,12 +237,20 @@ lvt_mode(struct lapic *la, u_int pin, ui
 void
 lapic_init(vm_paddr_t addr)
 {
-
-	/* Map the local APIC and setup the spurious interrupt handler. */
-	KASSERT(trunc_page(addr) == addr,
-	    ("local APIC not aligned on a page boundary"));
-	lapic = pmap_mapdev(addr, sizeof(lapic_t));
-	lapic_paddr = addr;
+	if ((cpu_feature2 & CPUID2_X2APIC) != 0 &&
+	    (rdmsr(MSR_APICBASE) & APICBASE_X2APIC) != 0) {
+		x2apic = 1;
+		if (bootverbose)
+			printf("Local APIC access using x2APIC MSRs\n");
+	} else {
+		/*
+		 * Map the local APIC and setup the spurious interrupt handler.
+		 */
+		KASSERT(trunc_page(addr) == addr,
+		    ("local APIC not aligned on a page boundary"));
+		lapic = pmap_mapdev(addr, sizeof(lapic_t));
+		lapic_paddr = addr;
+	}
 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Perform basic initialization of the BSP's local APIC. */
@@ -276,12 +315,12 @@ lapic_dump(const char* str)
 
 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
 	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
-	    lapic->id, lapic->version, lapic->ldr, lapic->dfr);
+	    lapic_id(), lapic_version(), lapic_ldr(), lapic_dfr());
 	printf("  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
-	    lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
+	    lapic_lvt_lint0(), lapic_lvt_lint1(), lapic_tpr(), lapic_svr());
 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x pmc: 0x%08x\n",
-	    lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error,
-	    lapic->lvt_pcint);
+	    lapic_lvt_timer(), lapic_lvt_thermal(), lapic_lvt_error(),
+	    lapic_lvt_pcint());
 }
 
 void
@@ -295,7 +334,7 @@ lapic_setup(int boot)
 	la = &lapics[lapic_id()];
 	KASSERT(la->la_present, ("missing APIC structure"));
 	eflags = intr_disable();
-	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+	maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
@@ -304,15 +343,15 @@ lapic_setup(int boot)
 	lapic_enable();
 
 	/* Program LINT[01] LVT entries. */
-	lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
-	lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
+	lapic_set_lvt_lint0(lvt_mode(la, LVT_LINT0, lapic_lvt_lint0()));
+	lapic_set_lvt_lint1(lvt_mode(la, LVT_LINT1, lapic_lvt_lint1()));
 
 	/* Program the PMC LVT entry if present. */
 	if (maxlvt >= LVT_PMC)
-		lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
+		lapic_set_lvt_pcint(lvt_mode(la, LVT_PMC, lapic_lvt_pcint()));
 
 	/* Program timer LVT and setup handler. */
-	lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer);
+	lapic_set_lvt_timer(lvt_mode(la, LVT_TIMER, lapic_lvt_timer()));
 	if (boot) {
 		snprintf(buf, sizeof(buf), "cpu%d: timer", PCPU_GET(cpuid));
 		intrcnt_add(buf, &la->la_timer_count);
@@ -328,8 +367,8 @@ lapic_setup(int boot)
 	}
 
 	/* Program error LVT and clear any existing errors. */
-	lapic->lvt_error = lvt_mode(la, LVT_ERROR, lapic->lvt_error);
-	lapic->esr = 0;
+	lapic_set_lvt_error(lvt_mode(la, LVT_ERROR, lapic_lvt_error()));
+	lapic_set_esr(0);
 
 	/* XXX: Thermal LVT */
 
@@ -342,9 +381,9 @@ lapic_reenable_pmc(void)
 #ifdef HWPMC_HOOKS
 	uint32_t value;
 
-	value =  lapic->lvt_pcint;
+	value =  lapic_lvt_pcint();
 	value &= ~APIC_LVT_M;
-	lapic->lvt_pcint = value;
+	lapic_set_lvt_pcint(value);
 #endif
 }
 
@@ -355,7 +394,7 @@ lapic_update_pmc(void *dummy)
 	struct lapic *la;
 
 	la = &lapics[lapic_id()];
-	lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
+	lapic_set_lvt_pcint(lvt_mode(la, LVT_PMC, lapic_lvt_pcint()));
 }
 #endif
 
@@ -366,11 +405,11 @@ lapic_enable_pmc(void)
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
-	if (lapic == NULL)
+	if (lapic_missing())
 		return (0);
 
 	/* Fail if the PMC LVT is not present. */
-	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+	maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < LVT_PMC)
 		return (0);
 
@@ -400,11 +439,11 @@ lapic_disable_pmc(void)
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
-	if (lapic == NULL)
+	if (lapic_missing())
 		return;
 
 	/* Fail if the PMC LVT is not present. */
-	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
+	maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < LVT_PMC)
 		return;
 
@@ -435,7 +474,7 @@ lapic_setup_clock(enum lapic_clock srcsd
 	MPASS(srcsdes != LAPIC_CLOCK_NONE);
 
 	/* Can't drive the timer without a local APIC. */
-	if (lapic == NULL ||
+	if (lapic_missing() ||
 	    (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0)) {
 		clockcoverage = LAPIC_CLOCK_NONE;
 		return (clockcoverage);
@@ -449,7 +488,7 @@ lapic_setup_clock(enum lapic_clock srcsd
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		lapic_timer_oneshot(APIC_TIMER_MAX_COUNT);
 		DELAY(2000000);
-		value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
+		value = APIC_TIMER_MAX_COUNT - lapic_ccr_timer();
 		if (value != APIC_TIMER_MAX_COUNT)
 			break;
 		lapic_timer_divisor <<= 1;
@@ -509,9 +548,9 @@ lapic_disable(void)
 	uint32_t value;
 
 	/* Software disable the local APIC. */
-	value = lapic->svr;
+	value = lapic_svr();
 	value &= ~APIC_SVR_SWEN;
-	lapic->svr = value;
+	lapic_set_svr(value);
 }
 
 static void
@@ -520,10 +559,10 @@ lapic_enable(void)
 	u_int32_t value;
 
 	/* Program the spurious vector to enable the local APIC. */
-	value = lapic->svr;
+	value = lapic_svr();
 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
 	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
-	lapic->svr = value;
+	lapic_set_svr(value);
 }
 
 /* Reset the local APIC on the BSP during resume. */
@@ -534,19 +573,342 @@ lapic_resume(struct pic *pic)
 	lapic_setup(0);
 }
 
+static uint32_t
+lapic_version(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_VERSION));
+	else
+		return (lapic->version);
+}
+
+static uint32_t
+lapic_ldr(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_LDR));
+	else
+		return (lapic->ldr);
+}
+
+static uint32_t
+lapic_dfr(void)
+{
+
+	if (x2apic)
+		return (0xffffffff);	/* DFR not available in x2APIC mode */
+	else
+		return (lapic->dfr);
+}
+
+static uint32_t
+lapic_lvt_lint0(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_LVT_LINT0));
+	else
+		return (lapic->lvt_lint0);
+}
+
+static void
+lapic_set_lvt_lint0(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_LVT_LINT0, value);
+	else
+		lapic->lvt_lint0 = value;
+}
+
+static uint32_t
+lapic_lvt_lint1(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_LVT_LINT1));
+	else
+		return (lapic->lvt_lint1);
+}
+
+static void
+lapic_set_lvt_lint1(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_LVT_LINT1, value);
+	else
+		lapic->lvt_lint1 = value;
+}
+
+static uint32_t
+lapic_tpr(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_TPR));
+	else
+		return (lapic->tpr);
+}
+
+static uint32_t
+lapic_svr(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_SVR));
+	else
+		return (lapic->svr);
+}
+
+static void
+lapic_set_svr(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_SVR, value);
+	else
+		lapic->svr = value;
+}
+
+static uint32_t
+lapic_lvt_timer(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_LVT_TIMER));
+	else
+		return (lapic->lvt_timer);
+}
+
+static void
+lapic_set_lvt_timer(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_LVT_TIMER, value);
+	else
+		lapic->lvt_timer = value;
+}
+
+static uint32_t
+lapic_lvt_thermal(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_LVT_THERMAL));
+	else
+		return (lapic->lvt_thermal);
+}
+
+static uint32_t
+lapic_lvt_error(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_LVT_ERROR));
+	else
+		return (lapic->lvt_error);
+}
+
+static void
+lapic_set_lvt_error(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_LVT_ERROR, value);
+	else
+		lapic->lvt_error = value;
+}
+
+static uint32_t
+lapic_lvt_pcint(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_LVT_PCINT));
+	else
+		return (lapic->lvt_pcint);
+}
+
+static void
+lapic_set_lvt_pcint(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_LVT_PCINT, value);
+	else
+		lapic->lvt_pcint = value;
+}
+
+static uint32_t
+lapic_esr(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_ESR));
+	else
+		return (lapic->esr);
+}
+
+static void
+lapic_set_esr(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_ESR, value);
+	else
+		lapic->esr = value;
+}
+
+static uint32_t
+lapic_ccr_timer(void)
+{
+
+	if (x2apic)
+		return (rdmsr(MSR_APIC_CCR_TIMER));
+	else
+		return (lapic->ccr_timer);
+}
+
+static void
+lapic_set_dcr_timer(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_DCR_TIMER, value);
+	else
+		lapic->dcr_timer = value;
+}
+
+static void
+lapic_set_icr_timer(uint32_t value)
+{
+
+	if (x2apic)
+		wrmsr(MSR_APIC_ICR_TIMER, value);
+	else
+		lapic->icr_timer = value;
+}
+
+uint32_t
+lapic_tmr(int num)
+{
+	int msr;
+	volatile uint32_t *regptr;
+
+	KASSERT(num >= 0 && num < 8, ("lapic_tmr: invalid num %d", num));
+
+	if (x2apic) {
+		msr = MSR_APIC_TMR0 + num;
+		return (rdmsr(msr));
+	} else {
+		regptr = &lapic->tmr0;
+		return (regptr[num * 4]);
+	}
+}
+
+uint32_t
+lapic_irr(int num)
+{
+	int msr;
+	volatile uint32_t *regptr;
+
+	KASSERT(num >= 0 && num < 8, ("lapic_irr: invalid num %d", num));
+
+	if (x2apic) {
+		msr = MSR_APIC_IRR0 + num;
+		return (rdmsr(msr));
+	} else {
+		regptr = &lapic->irr0;
+		return (regptr[num * 4]);
+	}
+}
+
+uint32_t
+lapic_isr(int num)
+{
+	int msr;
+	volatile uint32_t *regptr;
+
+	KASSERT(num >= 0 && num < 8, ("lapic_isr: invalid num %d", num));
+
+	if (x2apic) {
+		msr = MSR_APIC_ISR0 + num;
+		return (rdmsr(msr));
+	} else {
+		regptr = &lapic->isr0;
+		return (regptr[num * 4]);
+	}
+}
+
+static uint32_t icr_hi_stashed[MAXCPU];
+
+static uint32_t
+lapic_icr_lo(void)
+{
+
+	if (x2apic)
+		return (0);
+	else
+		return (lapic->icr_lo);
+}
+
+static void
+lapic_set_icr_lo(uint32_t value)
+{
+
+	if (x2apic) {
+		wrmsr(MSR_APIC_ICR,
+		      (uint64_t)icr_hi_stashed[curcpu] << 32 | value);
+	} else
+		lapic->icr_lo = value;
+}
+
+static uint32_t
+lapic_icr_hi(void)
+{
+
+	if (x2apic)
+		return (0);
+	else
+		return (lapic->icr_hi);
+}
+
+static void
+lapic_set_icr_hi(uint32_t value)
+{
+	if (x2apic)
+		icr_hi_stashed[curcpu] = value >> APIC_ID_SHIFT; /* XXX */
+	else
+		lapic->icr_hi = value;
+}
+
+static boolean_t
+lapic_missing(void)
+{
+
+	if (x2apic == 0 && lapic == NULL)
+		return (TRUE);
+	else
+		return (FALSE);
+}
+
 int
 lapic_id(void)
 {
 
-	KASSERT(lapic != NULL, ("local APIC is not mapped"));
-	return (lapic->id >> APIC_ID_SHIFT);
+	if (x2apic)
+		return (rdmsr(MSR_APIC_ID));
+	else
+		return (lapic->id >> APIC_ID_SHIFT);
 }
 
 int
 lapic_intr_pending(u_int vector)
 {
-	volatile u_int32_t *irr;
-
 	/*
 	 * The IRR registers are an array of 128-bit registers each of
 	 * which only describes 32 interrupts in the low 32 bits..  Thus,
@@ -556,8 +918,7 @@ lapic_intr_pending(u_int vector)
 	 * modulus the vector by 32 to determine the individual bit to
 	 * test.
 	 */
-	irr = &lapic->irr0;
-	return (irr[(vector / 32) * 4] & 1 << (vector % 32));
+	return (lapic_irr(vector / 32) & 1 << (vector % 32));
 }
 
 void
@@ -713,13 +1074,19 @@ void
 lapic_set_tpr(u_int vector)
 {
 #ifdef CHEAP_TPR
-	lapic->tpr = vector;
+	if (x2apic)
+		wrmsr(MSR_APIC_TPR, vector);
+	else
+		lapic->tpr = vector;
 #else
 	u_int32_t tpr;
 
-	tpr = lapic->tpr & ~APIC_TPR_PRIO;
+	tpr = lapic_tpr() & ~APIC_TPR_PRIO;
 	tpr |= vector;
-	lapic->tpr = tpr;
+	if (x2apic)
+		wrmsr(MSR_APIC_TPR, tpr);
+	else
+		lapic->tpr = tpr;
 #endif
 }
 
@@ -727,7 +1094,10 @@ void
 lapic_eoi(void)
 {
 
-	lapic->eoi = 0;
+	if (x2apic)
+		wrmsr(MSR_APIC_EOI, 0);
+	else
+		lapic->eoi = 0;
 }
 
 void
@@ -819,7 +1189,7 @@ lapic_timer_set_divisor(u_int divisor)
 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
 	KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
 	    sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
-	lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1];
+	lapic_set_dcr_timer(lapic_timer_divisors[ffs(divisor) - 1]);
 }
 
 static void
@@ -827,11 +1197,11 @@ lapic_timer_oneshot(u_int count)
 {
 	u_int32_t value;
 
-	value = lapic->lvt_timer;
+	value = lapic_lvt_timer();
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_ONE_SHOT;
-	lapic->lvt_timer = value;
-	lapic->icr_timer = count;
+	lapic_set_lvt_timer(value);
+	lapic_set_icr_timer(count);
 }
 
 static void
@@ -839,11 +1209,11 @@ lapic_timer_periodic(u_int count)
 {
 	u_int32_t value;
 
-	value = lapic->lvt_timer;
+	value = lapic_lvt_timer();
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_PERIODIC;
-	lapic->lvt_timer = value;
-	lapic->icr_timer = count;
+	lapic_set_lvt_timer(value);
+	lapic_set_icr_timer(count);
 }
 
 static void
@@ -851,9 +1221,9 @@ lapic_timer_enable_intr(void)
 {
 	u_int32_t value;
 
-	value = lapic->lvt_timer;
+	value = lapic_lvt_timer();
 	value &= ~APIC_LVT_M;
-	lapic->lvt_timer = value;
+	lapic_set_lvt_timer(value);
 }
 
 void
@@ -867,8 +1237,8 @@ lapic_handle_error(void)
 	 * to update its value to indicate any errors that have
 	 * occurred since the previous write to the register.
 	 */
-	lapic->esr = 0;
-	esr = lapic->esr;
+	lapic_set_esr(0);
+	esr = lapic_esr();
 
 	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
 	lapic_eoi();
@@ -1115,17 +1485,17 @@ DB_SHOW_COMMAND(lapic, db_show_lapic)
 	uint32_t v;
 
 	db_printf("lapic ID = %d\n", lapic_id());
-	v = lapic->version;
+	v = lapic_version();
 	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
 	    v & 0xf);
 	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
-	v = lapic->svr;
+	v = lapic_svr();
 	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
 	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
-	db_printf("TPR      = %02x\n", lapic->tpr);
+	db_printf("TPR      = %02x\n", lapic_tpr());
 
 #define dump_field(prefix, index)					\
-	dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index,	\
+	dump_mask(__XSTRING(prefix ## index), lapic_ ## prefix(index),	\
 	    index * 32)
 
 	db_printf("In-service Interrupts:\n");
@@ -1300,7 +1670,7 @@ lapic_ipi_wait(int delay)
 	} else
 		incr = 1;
 	for (x = 0; x < delay; x += incr) {
-		if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
+		if ((lapic_icr_lo() & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
 			return (1);
 		ia32_pause();
 	}
@@ -1313,7 +1683,7 @@ lapic_ipi_raw(register_t icrlo, u_int de
 	register_t value, eflags;
 
 	/* XXX: Need more sanity checking of icrlo? */
-	KASSERT(lapic != NULL, ("%s called too early", __func__));
+	KASSERT(!lapic_missing(), ("%s called too early", __func__));
 	KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 	    ("%s: invalid dest field", __func__));
 	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
@@ -1322,17 +1692,17 @@ lapic_ipi_raw(register_t icrlo, u_int de
 	/* Set destination in ICR HI register if it is being used. */
 	eflags = intr_disable();
 	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
-		value = lapic->icr_hi;
+		value = lapic_icr_hi();
 		value &= ~APIC_ID_MASK;
 		value |= dest << APIC_ID_SHIFT;
-		lapic->icr_hi = value;
+		lapic_set_icr_hi(value);
 	}
 
 	/* Program the contents of the IPI and dispatch it. */
-	value = lapic->icr_lo;
+	value = lapic_icr_lo();
 	value &= APIC_ICRLO_RESV_MASK;
 	value |= icrlo;
-	lapic->icr_lo = value;
+	lapic_set_icr_lo(value);
 	intr_restore(eflags);
 }
 
@@ -1409,7 +1779,7 @@ lapic_ipi_vectored(u_int vector, int des
 		printf("APIC: IPI might be stuck\n");
 #else /* !needsattention */
 		/* Wait until mesage is sent without a timeout. */
-		while (lapic->icr_lo & APIC_DELSTAT_PEND)
+		while (lapic_icr_lo() & APIC_DELSTAT_PEND)
 			ia32_pause();
 #endif /* needsattention */
 	}

Modified: projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c
==============================================================================
--- projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c	Sat May 14 18:22:14 2011	(r221904)
+++ projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c	Sat May 14 18:37:24 2011	(r221905)
@@ -27,6 +27,7 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_pmap.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>

Modified: projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c	Sat May 14 18:22:14 2011	(r221904)
+++ projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c	Sat May 14 18:37:24 2011	(r221905)
@@ -140,6 +140,26 @@ struct cpu_info {
 int cpu_apic_ids[MAXCPU];
 int apic_cpuids[MAX_APIC_ID + 1];
 
+/*
+ * Trampoline for hypervisor direct 64-bit jump.
+ *
+ *   0  -	signature for guest->host verification
+ *   8  -	virtual address of this page
+ *  16  -	instruction virtual address
+ *  24  -	stack pointer virtual address
+ *  32  -	CR3, physical address of kernel page table
+ *  40  -	24-byte area for null/code/data GDT entries
+ */
+#define MP_V64T_SIG	0xcafebabecafebabeULL
+struct mp_v64tramp {
+	uint64_t	mt_sig;
+	uint64_t	mt_virt;
+	uint64_t	mt_eip;
+	uint64_t	mt_rsp;
+	uint64_t	mt_cr3;
+	uint64_t	mt_gdtr[3];
+};
+
 /* Holds pending bitmap based IPIs per CPU */
 static volatile u_int cpu_ipi_pending[MAXCPU];
 
@@ -873,6 +893,29 @@ start_all_aps(void)
 		bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
 		bootAP = cpu;
 
+		/*
+		 * If running in a VM that doesn't support the unrestricted
+		 * guest 16-bit mode, forget most of the above and create
+		 * the data block that allows the hypervisor to direct-jump
+		 * into 64-bit mode. Copy this over the top of the 16-bit
+		 * bootstrap. The startup-IPI informs the hypervisor which
+		 * physical page this data block lies in. The hypervisor
+		 * will then use the block to initialise register state of
+		 * the AP in an almost identical fashion to how it builds
+		 * the BSP initial register state.
+		 */
+		if (testenv("hw.use_bvm_mptramp")) {
+			struct mp_v64tramp mv;
+
+			bzero(&mv, sizeof(mv));
+			mv.mt_sig = MP_V64T_SIG;
+			mv.mt_virt = (uint64_t) va;
+			mv.mt_eip = (uint64_t) init_secondary;
+			mv.mt_rsp = (uint64_t) bootSTK;
+			mv.mt_cr3 = KPML4phys;
+			bcopy(&mv, (void *) va, sizeof(mv));
+		}
+
 		/* attempt to start the Application Processor */
 		if (!start_ap(apic_id)) {
 			/* restore the warmstart vector */

Modified: projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c	Sat May 14 18:22:14 2011	(r221904)
+++ projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c	Sat May 14 18:37:24 2011	(r221905)
@@ -507,8 +507,10 @@ cpu_reset_proxy()
 {
 
 	cpu_reset_proxy_active = 1;
-	while (cpu_reset_proxy_active == 1)
+	while (cpu_reset_proxy_active == 1) {
+		ia32_pause();
 		;	/* Wait for other cpu to see that we've started */
+	}
 	stop_cpus((1<<cpu_reset_proxyid));
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
@@ -539,14 +541,17 @@ cpu_reset()
 			atomic_store_rel_int(&started_cpus, 1 << 0);
 
 			cnt = 0;
-			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
+			while (cpu_reset_proxy_active == 0 && cnt < 10000000) {
+				ia32_pause();
 				cnt++;	/* Wait for BSP to announce restart */
+			}
 			if (cpu_reset_proxy_active == 0)
 				printf("cpu_reset: Failed to restart BSP\n");
 			enable_intr();
 			cpu_reset_proxy_active = 2;
 
-			while (1);
+			while (1)
+				ia32_pause();
 			/* NOTREACHED */
 		}
 

Modified: projects/bhyve_ref/sys/amd64/conf/GENERIC
==============================================================================
--- projects/bhyve_ref/sys/amd64/conf/GENERIC	Sat May 14 18:22:14 2011	(r221904)
+++ projects/bhyve_ref/sys/amd64/conf/GENERIC	Sat May 14 18:37:24 2011	(r221905)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105141837.p4EIbODA011628>