From owner-svn-src-projects@FreeBSD.ORG Sat May 14 18:37:24 2011 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id B0A14106566C; Sat, 14 May 2011 18:37:24 +0000 (UTC) (envelope-from grehan@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 9D8CE8FC0C; Sat, 14 May 2011 18:37:24 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p4EIbOIW011637; Sat, 14 May 2011 18:37:24 GMT (envelope-from grehan@svn.freebsd.org) Received: (from grehan@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p4EIbODA011628; Sat, 14 May 2011 18:37:24 GMT (envelope-from grehan@svn.freebsd.org) Message-Id: <201105141837.p4EIbODA011628@svn.freebsd.org> From: Peter Grehan Date: Sat, 14 May 2011 18:37:24 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r221905 - in projects/bhyve_ref/sys: amd64/amd64 amd64/conf amd64/include conf dev/blackhole dev/bvm kern modules modules/blackhole X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 14 May 2011 18:37:24 -0000 Author: grehan Date: Sat May 14 18:37:24 2011 New Revision: 221905 URL: http://svn.freebsd.org/changeset/base/221905 Log: bhyve import part 2 of 2, guest kernel changes. This branch is now considered frozen: future bhyve development will take place in a branch off -CURRENT. sys/dev/bvm/bvm_console.c sys/dev/bvm/bvm_dbg.c - simple console driver/gdb debug port used for bringup. supported by user-space bhyve executable sys/conf/options.amd64 sys/amd64/amd64/minidump_machdep.c - allow NKPT to be set in the kernel config file sys/amd64/conf/GENERIC - mptable config options; bhyve user-space executable creates an mptable with number of CPUs, and optional vendor extension - add bvm console/debug - set NKPT to 512 to allow loading of large RAM disks from the loader - include kdb/gdb sys/amd64/amd64/local_apic.c sys/amd64/amd64/apic_vector.S sys/amd64/include/specialreg.h - if x2apic mode available, use MSRs to access the local APIC, otherwise fall back to 'classic' MMIO mode sys/amd64/amd64/mp_machdep.c - support AP spinup on CPU models that don't have real-mode support by overwriting the real-mode page with a message that supplies the bhyve user-space executable with enough information to start the AP directly in 64-bit mode. sys/amd64/amd64/vm_machdep.c - insert pause statements into cpu shutdown busy-wait loops sys/dev/blackhole/blackhole.c sys/modules/blackhole/Makefile - boot-time loadable module that claims all PCI bus/slot/funcs specified in an env var that are to be used for PCI passthrough sys/amd64/amd64/intr_machdep.c - allow round-robin assignment of device interrupts to CPUs to be disabled from the loader sys/amd64/include/bus.h - convert string ins/outs instructions to loops of individual in/out since bhyve doesn't support these yet sys/kern/subr_bus.c - if the device was no created with a fixed devclass, then remove it's association with the devclass it was associated with during probe. Otherwise, new drivers do not get a chance to probe/attach since the device will stay married to the first driver that it probed successfully but failed to attach. Sponsored by: NetApp, Inc. Added: projects/bhyve_ref/sys/dev/blackhole/ projects/bhyve_ref/sys/dev/blackhole/blackhole.c projects/bhyve_ref/sys/dev/bvm/ projects/bhyve_ref/sys/dev/bvm/bvm_console.c projects/bhyve_ref/sys/dev/bvm/bvm_dbg.c projects/bhyve_ref/sys/modules/blackhole/ projects/bhyve_ref/sys/modules/blackhole/Makefile Modified: projects/bhyve_ref/sys/amd64/amd64/apic_vector.S projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c projects/bhyve_ref/sys/amd64/amd64/local_apic.c projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c projects/bhyve_ref/sys/amd64/conf/GENERIC projects/bhyve_ref/sys/amd64/include/bus.h projects/bhyve_ref/sys/amd64/include/specialreg.h projects/bhyve_ref/sys/conf/files.amd64 projects/bhyve_ref/sys/conf/options.amd64 projects/bhyve_ref/sys/kern/subr_bus.c projects/bhyve_ref/sys/modules/Makefile Modified: projects/bhyve_ref/sys/amd64/amd64/apic_vector.S ============================================================================== --- projects/bhyve_ref/sys/amd64/amd64/apic_vector.S Sat May 14 18:22:14 2011 (r221904) +++ projects/bhyve_ref/sys/amd64/amd64/apic_vector.S Sat May 14 18:37:24 2011 (r221905) @@ -55,7 +55,14 @@ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ FAKE_MCOUNT(TF_RIP(%rsp)) ; \ movq lapic, %rdx ; /* pointer to local APIC */ \ + testq %rdx, %rdx; \ + jnz 3f; \ + movl $MSR_APIC_ISR ## index, %ecx; \ + rdmsr; \ + jmp 4f; \ +3: ; \ movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \ +4: ; \ bsrl %eax, %eax ; /* index of highset set bit in ISR */ \ jz 2f ; \ addl $(32 * index),%eax ; \ @@ -117,6 +124,26 @@ IDTVEC(errorint) jmp doreti #ifdef SMP + +/* + * We assume that %rax is being saved/restored outside of this macro + */ +#define DO_EOI \ + movq lapic, %rax; \ + testq %rax, %rax; \ + jz 8f; \ + movl $0, LA_EOI(%rax); \ + jmp 9f; \ +8:; \ + pushq %rcx; \ + pushq %rdx; \ + xorl %edx, %edx; /* eax is already zero */ \ + movl $MSR_APIC_EOI, %ecx; \ + wrmsr; \ + popq %rdx; \ + popq %rcx; \ +9: + /* * Global address space TLB shootdown. */ @@ -128,8 +155,7 @@ IDTVEC(invltlb) movq %cr3, %rax /* invalidate the TLB */ movq %rax, %cr3 - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + DO_EOI lock incl smp_tlb_wait @@ -148,8 +174,7 @@ IDTVEC(invlpg) movq smp_tlb_addr1, %rax invlpg (%rax) /* invalidate single page */ - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + DO_EOI lock incl smp_tlb_wait @@ -173,8 +198,7 @@ IDTVEC(invlrng) cmpq %rax, %rdx jb 1b - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + DO_EOI lock incl smp_tlb_wait @@ -193,8 +217,7 @@ IDTVEC(invlcache) wbinvd - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + DO_EOI lock incl smp_tlb_wait @@ -210,9 +233,8 @@ IDTVEC(invlcache) IDTVEC(ipi_intr_bitmap_handler) PUSH_FRAME - movq lapic, %rdx - movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ - + DO_EOI + FAKE_MCOUNT(TF_RIP(%rsp)) call ipi_bitmap_handler @@ -227,8 +249,7 @@ IDTVEC(ipi_intr_bitmap_handler) IDTVEC(cpustop) PUSH_FRAME - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + DO_EOI call cpustop_handler jmp doreti @@ -241,8 +262,7 @@ IDTVEC(cpustop) IDTVEC(cpususpend) PUSH_FRAME - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + DO_EOI call cpususpend_handler @@ -259,7 +279,6 @@ IDTVEC(cpususpend) IDTVEC(rendezvous) PUSH_FRAME call smp_rendezvous_action - movq lapic, %rax - movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + DO_EOI jmp doreti #endif /* SMP */ Modified: projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c ============================================================================== --- projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c Sat May 14 18:22:14 2011 (r221904) +++ projects/bhyve_ref/sys/amd64/amd64/intr_machdep.c Sat May 14 18:37:24 2011 (r221905) @@ -78,6 +78,8 @@ static STAILQ_HEAD(, pic) pics; #ifdef SMP static int assign_cpu; +static int round_robin_interrupts = 1; +TUNABLE_INT("round_robin_interrupts", &round_robin_interrupts); #endif static int intr_assign_cpu(void *arg, u_char cpu); @@ -460,6 +462,10 @@ intr_next_cpu(void) if (!assign_cpu) return (cpu_apic_ids[0]); + /* All interrupts go to the BSP if not allowed to round robin */ + if (!round_robin_interrupts) + return (cpu_apic_ids[0]); + mtx_lock_spin(&icu_lock); apic_id = cpu_apic_ids[current_cpu]; do { Modified: projects/bhyve_ref/sys/amd64/amd64/local_apic.c ============================================================================== --- projects/bhyve_ref/sys/amd64/amd64/local_apic.c Sat May 14 18:22:14 2011 (r221904) +++ projects/bhyve_ref/sys/amd64/amd64/local_apic.c Sat May 14 18:37:24 2011 (r221905) @@ -148,6 +148,7 @@ volatile lapic_t *lapic; vm_paddr_t lapic_paddr; static u_long lapic_timer_divisor, lapic_timer_period, lapic_timer_hz; static enum lapic_clock clockcoverage; +static int x2apic; static void lapic_enable(void); static void lapic_resume(struct pic *pic); @@ -156,6 +157,36 @@ static void lapic_timer_oneshot(u_int co static void lapic_timer_periodic(u_int count); static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); +static uint32_t lapic_version(void); +static uint32_t lapic_ldr(void); +static uint32_t lapic_dfr(void); +static uint32_t lapic_lvt_lint0(void); +static void lapic_set_lvt_lint0(uint32_t value); +static uint32_t lapic_lvt_lint1(void); +static void lapic_set_lvt_lint1(uint32_t value); +static uint32_t lapic_tpr(void); +static uint32_t lapic_svr(void); +static void lapic_set_svr(uint32_t value); +static uint32_t lapic_lvt_timer(void); +static void lapic_set_lvt_timer(uint32_t value); +static uint32_t lapic_lvt_thermal(void); +static uint32_t lapic_lvt_error(void); +static void lapic_set_lvt_error(uint32_t value); +static uint32_t lapic_lvt_pcint(void); +static void lapic_set_lvt_pcint(uint32_t value); +static uint32_t lapic_esr(void); +static void lapic_set_esr(uint32_t value); +static uint32_t lapic_ccr_timer(void); +static void lapic_set_dcr_timer(uint32_t value); +static void lapic_set_icr_timer(uint32_t value); +uint32_t lapic_irr(int num); +uint32_t lapic_tmr(int num); +uint32_t lapic_isr(int num); +static uint32_t lapic_icr_lo(void); +static void lapic_set_icr_lo(uint32_t value); +static uint32_t lapic_icr_hi(void); +static void lapic_set_icr_hi(uint32_t value); +static boolean_t lapic_missing(void); struct pic lapic_pic = { .pic_resume = lapic_resume }; @@ -206,12 +237,20 @@ lvt_mode(struct lapic *la, u_int pin, ui void lapic_init(vm_paddr_t addr) { - - /* Map the local APIC and setup the spurious interrupt handler. */ - KASSERT(trunc_page(addr) == addr, - ("local APIC not aligned on a page boundary")); - lapic = pmap_mapdev(addr, sizeof(lapic_t)); - lapic_paddr = addr; + if ((cpu_feature2 & CPUID2_X2APIC) != 0 && + (rdmsr(MSR_APICBASE) & APICBASE_X2APIC) != 0) { + x2apic = 1; + if (bootverbose) + printf("Local APIC access using x2APIC MSRs\n"); + } else { + /* + * Map the local APIC and setup the spurious interrupt handler. + */ + KASSERT(trunc_page(addr) == addr, + ("local APIC not aligned on a page boundary")); + lapic = pmap_mapdev(addr, sizeof(lapic_t)); + lapic_paddr = addr; + } setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); /* Perform basic initialization of the BSP's local APIC. */ @@ -276,12 +315,12 @@ lapic_dump(const char* str) printf("cpu%d %s:\n", PCPU_GET(cpuid), str); printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n", - lapic->id, lapic->version, lapic->ldr, lapic->dfr); + lapic_id(), lapic_version(), lapic_ldr(), lapic_dfr()); printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", - lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); + lapic_lvt_lint0(), lapic_lvt_lint1(), lapic_tpr(), lapic_svr()); printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x pmc: 0x%08x\n", - lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error, - lapic->lvt_pcint); + lapic_lvt_timer(), lapic_lvt_thermal(), lapic_lvt_error(), + lapic_lvt_pcint()); } void @@ -295,7 +334,7 @@ lapic_setup(int boot) la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); eflags = intr_disable(); - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); @@ -304,15 +343,15 @@ lapic_setup(int boot) lapic_enable(); /* Program LINT[01] LVT entries. */ - lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0); - lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1); + lapic_set_lvt_lint0(lvt_mode(la, LVT_LINT0, lapic_lvt_lint0())); + lapic_set_lvt_lint1(lvt_mode(la, LVT_LINT1, lapic_lvt_lint1())); /* Program the PMC LVT entry if present. */ if (maxlvt >= LVT_PMC) - lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint); + lapic_set_lvt_pcint(lvt_mode(la, LVT_PMC, lapic_lvt_pcint())); /* Program timer LVT and setup handler. */ - lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer); + lapic_set_lvt_timer(lvt_mode(la, LVT_TIMER, lapic_lvt_timer())); if (boot) { snprintf(buf, sizeof(buf), "cpu%d: timer", PCPU_GET(cpuid)); intrcnt_add(buf, &la->la_timer_count); @@ -328,8 +367,8 @@ lapic_setup(int boot) } /* Program error LVT and clear any existing errors. */ - lapic->lvt_error = lvt_mode(la, LVT_ERROR, lapic->lvt_error); - lapic->esr = 0; + lapic_set_lvt_error(lvt_mode(la, LVT_ERROR, lapic_lvt_error())); + lapic_set_esr(0); /* XXX: Thermal LVT */ @@ -342,9 +381,9 @@ lapic_reenable_pmc(void) #ifdef HWPMC_HOOKS uint32_t value; - value = lapic->lvt_pcint; + value = lapic_lvt_pcint(); value &= ~APIC_LVT_M; - lapic->lvt_pcint = value; + lapic_set_lvt_pcint(value); #endif } @@ -355,7 +394,7 @@ lapic_update_pmc(void *dummy) struct lapic *la; la = &lapics[lapic_id()]; - lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint); + lapic_set_lvt_pcint(lvt_mode(la, LVT_PMC, lapic_lvt_pcint())); } #endif @@ -366,11 +405,11 @@ lapic_enable_pmc(void) u_int32_t maxlvt; /* Fail if the local APIC is not present. */ - if (lapic == NULL) + if (lapic_missing()) return (0); /* Fail if the PMC LVT is not present. */ - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < LVT_PMC) return (0); @@ -400,11 +439,11 @@ lapic_disable_pmc(void) u_int32_t maxlvt; /* Fail if the local APIC is not present. */ - if (lapic == NULL) + if (lapic_missing()) return; /* Fail if the PMC LVT is not present. */ - maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; + maxlvt = (lapic_version() & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < LVT_PMC) return; @@ -435,7 +474,7 @@ lapic_setup_clock(enum lapic_clock srcsd MPASS(srcsdes != LAPIC_CLOCK_NONE); /* Can't drive the timer without a local APIC. */ - if (lapic == NULL || + if (lapic_missing() || (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0)) { clockcoverage = LAPIC_CLOCK_NONE; return (clockcoverage); @@ -449,7 +488,7 @@ lapic_setup_clock(enum lapic_clock srcsd lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); DELAY(2000000); - value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer; + value = APIC_TIMER_MAX_COUNT - lapic_ccr_timer(); if (value != APIC_TIMER_MAX_COUNT) break; lapic_timer_divisor <<= 1; @@ -509,9 +548,9 @@ lapic_disable(void) uint32_t value; /* Software disable the local APIC. */ - value = lapic->svr; + value = lapic_svr(); value &= ~APIC_SVR_SWEN; - lapic->svr = value; + lapic_set_svr(value); } static void @@ -520,10 +559,10 @@ lapic_enable(void) u_int32_t value; /* Program the spurious vector to enable the local APIC. */ - value = lapic->svr; + value = lapic_svr(); value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT); - lapic->svr = value; + lapic_set_svr(value); } /* Reset the local APIC on the BSP during resume. */ @@ -534,19 +573,342 @@ lapic_resume(struct pic *pic) lapic_setup(0); } +static uint32_t +lapic_version(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_VERSION)); + else + return (lapic->version); +} + +static uint32_t +lapic_ldr(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_LDR)); + else + return (lapic->ldr); +} + +static uint32_t +lapic_dfr(void) +{ + + if (x2apic) + return (0xffffffff); /* DFR not available in x2APIC mode */ + else + return (lapic->dfr); +} + +static uint32_t +lapic_lvt_lint0(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_LVT_LINT0)); + else + return (lapic->lvt_lint0); +} + +static void +lapic_set_lvt_lint0(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_LVT_LINT0, value); + else + lapic->lvt_lint0 = value; +} + +static uint32_t +lapic_lvt_lint1(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_LVT_LINT1)); + else + return (lapic->lvt_lint1); +} + +static void +lapic_set_lvt_lint1(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_LVT_LINT1, value); + else + lapic->lvt_lint1 = value; +} + +static uint32_t +lapic_tpr(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_TPR)); + else + return (lapic->tpr); +} + +static uint32_t +lapic_svr(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_SVR)); + else + return (lapic->svr); +} + +static void +lapic_set_svr(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_SVR, value); + else + lapic->svr = value; +} + +static uint32_t +lapic_lvt_timer(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_LVT_TIMER)); + else + return (lapic->lvt_timer); +} + +static void +lapic_set_lvt_timer(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_LVT_TIMER, value); + else + lapic->lvt_timer = value; +} + +static uint32_t +lapic_lvt_thermal(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_LVT_THERMAL)); + else + return (lapic->lvt_thermal); +} + +static uint32_t +lapic_lvt_error(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_LVT_ERROR)); + else + return (lapic->lvt_error); +} + +static void +lapic_set_lvt_error(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_LVT_ERROR, value); + else + lapic->lvt_error = value; +} + +static uint32_t +lapic_lvt_pcint(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_LVT_PCINT)); + else + return (lapic->lvt_pcint); +} + +static void +lapic_set_lvt_pcint(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_LVT_PCINT, value); + else + lapic->lvt_pcint = value; +} + +static uint32_t +lapic_esr(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_ESR)); + else + return (lapic->esr); +} + +static void +lapic_set_esr(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_ESR, value); + else + lapic->esr = value; +} + +static uint32_t +lapic_ccr_timer(void) +{ + + if (x2apic) + return (rdmsr(MSR_APIC_CCR_TIMER)); + else + return (lapic->ccr_timer); +} + +static void +lapic_set_dcr_timer(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_DCR_TIMER, value); + else + lapic->dcr_timer = value; +} + +static void +lapic_set_icr_timer(uint32_t value) +{ + + if (x2apic) + wrmsr(MSR_APIC_ICR_TIMER, value); + else + lapic->icr_timer = value; +} + +uint32_t +lapic_tmr(int num) +{ + int msr; + volatile uint32_t *regptr; + + KASSERT(num >= 0 && num < 8, ("lapic_tmr: invalid num %d", num)); + + if (x2apic) { + msr = MSR_APIC_TMR0 + num; + return (rdmsr(msr)); + } else { + regptr = &lapic->tmr0; + return (regptr[num * 4]); + } +} + +uint32_t +lapic_irr(int num) +{ + int msr; + volatile uint32_t *regptr; + + KASSERT(num >= 0 && num < 8, ("lapic_irr: invalid num %d", num)); + + if (x2apic) { + msr = MSR_APIC_IRR0 + num; + return (rdmsr(msr)); + } else { + regptr = &lapic->irr0; + return (regptr[num * 4]); + } +} + +uint32_t +lapic_isr(int num) +{ + int msr; + volatile uint32_t *regptr; + + KASSERT(num >= 0 && num < 8, ("lapic_isr: invalid num %d", num)); + + if (x2apic) { + msr = MSR_APIC_ISR0 + num; + return (rdmsr(msr)); + } else { + regptr = &lapic->isr0; + return (regptr[num * 4]); + } +} + +static uint32_t icr_hi_stashed[MAXCPU]; + +static uint32_t +lapic_icr_lo(void) +{ + + if (x2apic) + return (0); + else + return (lapic->icr_lo); +} + +static void +lapic_set_icr_lo(uint32_t value) +{ + + if (x2apic) { + wrmsr(MSR_APIC_ICR, + (uint64_t)icr_hi_stashed[curcpu] << 32 | value); + } else + lapic->icr_lo = value; +} + +static uint32_t +lapic_icr_hi(void) +{ + + if (x2apic) + return (0); + else + return (lapic->icr_hi); +} + +static void +lapic_set_icr_hi(uint32_t value) +{ + if (x2apic) + icr_hi_stashed[curcpu] = value >> APIC_ID_SHIFT; /* XXX */ + else + lapic->icr_hi = value; +} + +static boolean_t +lapic_missing(void) +{ + + if (x2apic == 0 && lapic == NULL) + return (TRUE); + else + return (FALSE); +} + int lapic_id(void) { - KASSERT(lapic != NULL, ("local APIC is not mapped")); - return (lapic->id >> APIC_ID_SHIFT); + if (x2apic) + return (rdmsr(MSR_APIC_ID)); + else + return (lapic->id >> APIC_ID_SHIFT); } int lapic_intr_pending(u_int vector) { - volatile u_int32_t *irr; - /* * The IRR registers are an array of 128-bit registers each of * which only describes 32 interrupts in the low 32 bits.. Thus, @@ -556,8 +918,7 @@ lapic_intr_pending(u_int vector) * modulus the vector by 32 to determine the individual bit to * test. */ - irr = &lapic->irr0; - return (irr[(vector / 32) * 4] & 1 << (vector % 32)); + return (lapic_irr(vector / 32) & 1 << (vector % 32)); } void @@ -713,13 +1074,19 @@ void lapic_set_tpr(u_int vector) { #ifdef CHEAP_TPR - lapic->tpr = vector; + if (x2apic) + wrmsr(MSR_APIC_TPR, vector); + else + lapic->tpr = vector; #else u_int32_t tpr; - tpr = lapic->tpr & ~APIC_TPR_PRIO; + tpr = lapic_tpr() & ~APIC_TPR_PRIO; tpr |= vector; - lapic->tpr = tpr; + if (x2apic) + wrmsr(MSR_APIC_TPR, tpr); + else + lapic->tpr = tpr; #endif } @@ -727,7 +1094,10 @@ void lapic_eoi(void) { - lapic->eoi = 0; + if (x2apic) + wrmsr(MSR_APIC_EOI, 0); + else + lapic->eoi = 0; } void @@ -819,7 +1189,7 @@ lapic_timer_set_divisor(u_int divisor) KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) / sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor)); - lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1]; + lapic_set_dcr_timer(lapic_timer_divisors[ffs(divisor) - 1]); } static void @@ -827,11 +1197,11 @@ lapic_timer_oneshot(u_int count) { u_int32_t value; - value = lapic->lvt_timer; + value = lapic_lvt_timer(); value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_ONE_SHOT; - lapic->lvt_timer = value; - lapic->icr_timer = count; + lapic_set_lvt_timer(value); + lapic_set_icr_timer(count); } static void @@ -839,11 +1209,11 @@ lapic_timer_periodic(u_int count) { u_int32_t value; - value = lapic->lvt_timer; + value = lapic_lvt_timer(); value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_PERIODIC; - lapic->lvt_timer = value; - lapic->icr_timer = count; + lapic_set_lvt_timer(value); + lapic_set_icr_timer(count); } static void @@ -851,9 +1221,9 @@ lapic_timer_enable_intr(void) { u_int32_t value; - value = lapic->lvt_timer; + value = lapic_lvt_timer(); value &= ~APIC_LVT_M; - lapic->lvt_timer = value; + lapic_set_lvt_timer(value); } void @@ -867,8 +1237,8 @@ lapic_handle_error(void) * to update its value to indicate any errors that have * occurred since the previous write to the register. */ - lapic->esr = 0; - esr = lapic->esr; + lapic_set_esr(0); + esr = lapic_esr(); printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); lapic_eoi(); @@ -1115,17 +1485,17 @@ DB_SHOW_COMMAND(lapic, db_show_lapic) uint32_t v; db_printf("lapic ID = %d\n", lapic_id()); - v = lapic->version; + v = lapic_version(); db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, v & 0xf); db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); - v = lapic->svr; + v = lapic_svr(); db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, v & APIC_SVR_ENABLE ? "enabled" : "disabled"); - db_printf("TPR = %02x\n", lapic->tpr); + db_printf("TPR = %02x\n", lapic_tpr()); #define dump_field(prefix, index) \ - dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index, \ + dump_mask(__XSTRING(prefix ## index), lapic_ ## prefix(index), \ index * 32) db_printf("In-service Interrupts:\n"); @@ -1300,7 +1670,7 @@ lapic_ipi_wait(int delay) } else incr = 1; for (x = 0; x < delay; x += incr) { - if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) + if ((lapic_icr_lo() & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) return (1); ia32_pause(); } @@ -1313,7 +1683,7 @@ lapic_ipi_raw(register_t icrlo, u_int de register_t value, eflags; /* XXX: Need more sanity checking of icrlo? */ - KASSERT(lapic != NULL, ("%s called too early", __func__)); + KASSERT(!lapic_missing(), ("%s called too early", __func__)); KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid dest field", __func__)); KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, @@ -1322,17 +1692,17 @@ lapic_ipi_raw(register_t icrlo, u_int de /* Set destination in ICR HI register if it is being used. */ eflags = intr_disable(); if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { - value = lapic->icr_hi; + value = lapic_icr_hi(); value &= ~APIC_ID_MASK; value |= dest << APIC_ID_SHIFT; - lapic->icr_hi = value; + lapic_set_icr_hi(value); } /* Program the contents of the IPI and dispatch it. */ - value = lapic->icr_lo; + value = lapic_icr_lo(); value &= APIC_ICRLO_RESV_MASK; value |= icrlo; - lapic->icr_lo = value; + lapic_set_icr_lo(value); intr_restore(eflags); } @@ -1409,7 +1779,7 @@ lapic_ipi_vectored(u_int vector, int des printf("APIC: IPI might be stuck\n"); #else /* !needsattention */ /* Wait until mesage is sent without a timeout. */ - while (lapic->icr_lo & APIC_DELSTAT_PEND) + while (lapic_icr_lo() & APIC_DELSTAT_PEND) ia32_pause(); #endif /* needsattention */ } Modified: projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c ============================================================================== --- projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c Sat May 14 18:22:14 2011 (r221904) +++ projects/bhyve_ref/sys/amd64/amd64/minidump_machdep.c Sat May 14 18:37:24 2011 (r221905) @@ -27,6 +27,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_pmap.h" #include #include #include Modified: projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c ============================================================================== --- projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c Sat May 14 18:22:14 2011 (r221904) +++ projects/bhyve_ref/sys/amd64/amd64/mp_machdep.c Sat May 14 18:37:24 2011 (r221905) @@ -140,6 +140,26 @@ struct cpu_info { int cpu_apic_ids[MAXCPU]; int apic_cpuids[MAX_APIC_ID + 1]; +/* + * Trampoline for hypervisor direct 64-bit jump. + * + * 0 - signature for guest->host verification + * 8 - virtual address of this page + * 16 - instruction virtual address + * 24 - stack pointer virtual address + * 32 - CR3, physical address of kernel page table + * 40 - 24-byte area for null/code/data GDT entries + */ +#define MP_V64T_SIG 0xcafebabecafebabeULL +struct mp_v64tramp { + uint64_t mt_sig; + uint64_t mt_virt; + uint64_t mt_eip; + uint64_t mt_rsp; + uint64_t mt_cr3; + uint64_t mt_gdtr[3]; +}; + /* Holds pending bitmap based IPIs per CPU */ static volatile u_int cpu_ipi_pending[MAXCPU]; @@ -873,6 +893,29 @@ start_all_aps(void) bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; + /* + * If running in a VM that doesn't support the unrestricted + * guest 16-bit mode, forget most of the above and create + * the data block that allows the hypervisor to direct-jump + * into 64-bit mode. Copy this over the top of the 16-bit + * bootstrap. The startup-IPI informs the hypervisor which + * physical page this data block lies in. The hypervisor + * will then use the block to initialise register state of + * the AP in an almost identical fashion to how it builds + * the BSP initial register state. + */ + if (testenv("hw.use_bvm_mptramp")) { + struct mp_v64tramp mv; + + bzero(&mv, sizeof(mv)); + mv.mt_sig = MP_V64T_SIG; + mv.mt_virt = (uint64_t) va; + mv.mt_eip = (uint64_t) init_secondary; + mv.mt_rsp = (uint64_t) bootSTK; + mv.mt_cr3 = KPML4phys; + bcopy(&mv, (void *) va, sizeof(mv)); + } + /* attempt to start the Application Processor */ if (!start_ap(apic_id)) { /* restore the warmstart vector */ Modified: projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c ============================================================================== --- projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c Sat May 14 18:22:14 2011 (r221904) +++ projects/bhyve_ref/sys/amd64/amd64/vm_machdep.c Sat May 14 18:37:24 2011 (r221905) @@ -507,8 +507,10 @@ cpu_reset_proxy() { cpu_reset_proxy_active = 1; - while (cpu_reset_proxy_active == 1) + while (cpu_reset_proxy_active == 1) { + ia32_pause(); ; /* Wait for other cpu to see that we've started */ + } stop_cpus((1<