Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 Sep 2012 22:31:35 +0000 (UTC)
From:      Neel Natu <neel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r240941 - in projects/bhyve/sys: amd64/vmm amd64/vmm/intel amd64/vmm/io modules/vmm
Message-ID:  <201209252231.q8PMVZwX006907@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: neel
Date: Tue Sep 25 22:31:35 2012
New Revision: 240941
URL: http://svn.freebsd.org/changeset/base/240941

Log:
  Add support for trapping MMIO writes to local apic registers and emulating them.
  
  The default behavior is still to present the local apic to the guest in the
  x2apic mode.

Added:
  projects/bhyve/sys/amd64/vmm/vmm_instruction_emul.c   (contents, props changed)
  projects/bhyve/sys/amd64/vmm/vmm_instruction_emul.h   (contents, props changed)
Modified:
  projects/bhyve/sys/amd64/vmm/intel/vmcs.h
  projects/bhyve/sys/amd64/vmm/intel/vmx.c
  projects/bhyve/sys/amd64/vmm/io/vlapic.c
  projects/bhyve/sys/amd64/vmm/vmm.c
  projects/bhyve/sys/amd64/vmm/vmm_lapic.c
  projects/bhyve/sys/amd64/vmm/vmm_lapic.h
  projects/bhyve/sys/amd64/vmm/x86.c
  projects/bhyve/sys/amd64/vmm/x86.h
  projects/bhyve/sys/modules/vmm/Makefile

Modified: projects/bhyve/sys/amd64/vmm/intel/vmcs.h
==============================================================================
--- projects/bhyve/sys/amd64/vmm/intel/vmcs.h	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/intel/vmcs.h	Tue Sep 25 22:31:35 2012	(r240941)
@@ -66,6 +66,7 @@ uint64_t vmcs_read(uint32_t encoding);
 #define	vmcs_exit_reason()		(vmcs_read(VMCS_EXIT_REASON) & 0xffff)
 #define	vmcs_exit_qualification()	vmcs_read(VMCS_EXIT_QUALIFICATION)
 #define	vmcs_guest_cr3()		vmcs_read(VMCS_GUEST_CR3)
+#define	vmcs_gpa()			vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
 
 #endif	/* _KERNEL */
 
@@ -324,4 +325,13 @@ uint64_t vmcs_read(uint32_t encoding);
  */
 #define	EXIT_QUAL_NMI_WHILE_STI_BLOCKING	3
 
+/*
+ * Exit qualification for EPT violation
+ */
+#define	EPT_VIOLATION_DATA_READ		(1UL << 0)
+#define	EPT_VIOLATION_DATA_WRITE	(1UL << 1)
+#define	EPT_VIOLATION_INST_FETCH	(1UL << 2)
+#define	EPT_VIOLATION_GLA_VALID		(1UL << 7)
+#define	EPT_VIOLATION_XLAT_VALID	(1UL << 8)
+
 #endif

Modified: projects/bhyve/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- projects/bhyve/sys/amd64/vmm/intel/vmx.c	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/intel/vmx.c	Tue Sep 25 22:31:35 2012	(r240941)
@@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$");
 #include <machine/specialreg.h>
 #include <machine/vmparam.h>
 
+#include <x86/apicreg.h>
+
 #include <machine/vmm.h>
 #include "vmm_lapic.h"
 #include "vmm_msr.h"
@@ -60,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include "vmx.h"
 #include "x86.h"
 #include "vmx_controls.h"
+#include "vmm_instruction_emul.h"
 
 #define	CR4_VMXE	(1UL << 13)
 
@@ -771,21 +774,17 @@ vmx_vminit(struct vm *vm)
 }
 
 static int
-vmx_handle_cpuid(int vcpu, struct vmxctx *vmxctx)
+vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx)
 {
 	int handled, func;
 	
 	func = vmxctx->guest_rax;
 
-	handled = x86_emulate_cpuid((uint32_t*)(&vmxctx->guest_rax),
-	    (uint32_t*)(&vmxctx->guest_rbx), (uint32_t*)(&vmxctx->guest_rcx),
-	    (uint32_t*)(&vmxctx->guest_rdx), vcpu);
-#if 0
-	printf("%s: func %x rax %lx rbx %lx rcx %lx rdx %lx handled %d\n",
-		__func__, func, vmxctx->guest_rax, vmxctx->guest_rbx,
-		vmxctx->guest_rcx, vmxctx->guest_rdx, handled);
-#endif
-
+	handled = x86_emulate_cpuid(vm, vcpu,
+				    (uint32_t*)(&vmxctx->guest_rax),
+				    (uint32_t*)(&vmxctx->guest_rbx),
+				    (uint32_t*)(&vmxctx->guest_rcx),
+				    (uint32_t*)(&vmxctx->guest_rdx));
 	return (handled);
 }
 
@@ -1146,13 +1145,54 @@ vmx_emulate_cr_access(struct vmx *vmx, i
 }
 
 static int
+vmx_lapic_fault(struct vm *vm, int cpu,
+		uint64_t gpa, uint64_t rip, uint64_t cr3, uint64_t ept_qual)
+{
+	int read, write, handled;
+
+	/*
+	 * For this to be a legitimate access to the local apic:
+	 * - the GPA in the local apic page
+	 * - the GPA must be aligned on a 16 byte boundary
+	 */
+	if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE)
+		return (UNHANDLED);
+
+	if ((gpa & 0xF) != 0)
+		return (UNHANDLED);
+
+	/* EPT violation on an instruction fetch doesn't make sense here */
+	if (ept_qual & EPT_VIOLATION_INST_FETCH)
+		return (UNHANDLED);
+
+	/* EPT violation must be a read fault or a write fault but not both */
+	read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
+	write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
+	if ((read ^ write) == 0)
+		return (UNHANDLED);
+
+	/*
+	 * The EPT violation must have been caused by accessing a guest-physical
+	 * address that is a translation of a guest-linear address.
+	 */
+	if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
+	    (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
+		return (UNHANDLED);
+	}
+
+	handled = lapic_mmio(vm, cpu, gpa - DEFAULT_APIC_BASE, read, rip, cr3);
+
+	return (handled);
+}
+
+static int
 vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
 	int handled;
 	struct vmcs *vmcs;
 	struct vmxctx *vmxctx;
 	uint32_t eax, ecx, edx;
-	uint64_t qual;
+	uint64_t qual, gpa, cr3;
 
 	handled = 0;
 	vmcs = &vmx->vmcs[vcpu];
@@ -1229,11 +1269,17 @@ vmx_exit_process(struct vmx *vmx, int vc
 		vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
 		break;
 	case EXIT_REASON_CPUID:
-		handled = vmx_handle_cpuid(vcpu, vmxctx);
+		handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
 		break;
 	case EXIT_REASON_EPT_FAULT:
-		vmexit->exitcode = VM_EXITCODE_PAGING;
-		vmexit->u.paging.cr3 = vmcs_guest_cr3();
+		gpa = vmcs_gpa();
+		cr3 = vmcs_guest_cr3();
+		handled = vmx_lapic_fault(vmx->vm, vcpu,
+					  gpa, vmexit->rip, cr3, qual);
+		if (!handled) {
+			vmexit->exitcode = VM_EXITCODE_PAGING;
+			vmexit->u.paging.cr3 = cr3;
+		}
 		break;
 	default:
 		break;

Modified: projects/bhyve/sys/amd64/vmm/io/vlapic.c
==============================================================================
--- projects/bhyve/sys/amd64/vmm/io/vlapic.c	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/io/vlapic.c	Tue Sep 25 22:31:35 2012	(r240941)
@@ -87,7 +87,7 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic",
 #define VLAPIC_VERSION		(16)
 #define VLAPIC_MAXLVT_ENTRIES	(5)
 
-#define	x2apic(vlapic)		((vlapic)->msr_apicbase & APICBASE_X2APIC)
+#define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
 
 enum boot_state {
 	BS_INIT,
@@ -433,7 +433,10 @@ lapic_process_icr(struct vlapic *vlapic,
 	struct vlapic *vlapic2;
 	struct vm_exit *vmexit;
 	
-	dest = icrval >> 32;
+	if (x2apic(vlapic))
+		dest = icrval >> 32;
+	else
+		dest = icrval >> (32 + 24);
 	vec = icrval & APIC_VECTOR_MASK;
 	mode = icrval & APIC_DELMODE_MASK;
 
@@ -703,8 +706,18 @@ vlapic_op_mem_write(void* dev, uint64_t 
 			lapic->svr = data;
 			break;
 		case APIC_OFFSET_ICR_LOW: 
+			if (!x2apic(vlapic)) {
+				data &= 0xffffffff;
+				data |= (uint64_t)lapic->icr_hi << 32;
+			}
 			retval = lapic_process_icr(vlapic, data);
 			break;
+		case APIC_OFFSET_ICR_HI:
+			if (!x2apic(vlapic)) {
+				retval = 0;
+				lapic->icr_hi = data;
+			}
+			break;
 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
 			reg = vlapic_get_lvt(vlapic, offset);	
 			if (!(lapic->svr & APIC_SVR_ENABLE)) {
@@ -810,19 +823,26 @@ static struct io_region vlapic_mmio[VM_M
 struct vlapic *
 vlapic_init(struct vm *vm, int vcpuid)
 {
+	int err;
+	enum x2apic_state state;
 	struct vlapic 		*vlapic;
 
+	err = vm_get_x2apic_state(vm, vcpuid, &state);
+	if (err)
+		panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
+
 	vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
 	vlapic->vm = vm;
 	vlapic->vcpuid = vcpuid;
 
-	vlapic->msr_apicbase = DEFAULT_APIC_BASE |
-			       APICBASE_ENABLED |
-			       APICBASE_X2APIC;
+	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
 
 	if (vcpuid == 0)
 		vlapic->msr_apicbase |= APICBASE_BSP;
 
+	if (state == X2APIC_ENABLED)
+		vlapic->msr_apicbase |= APICBASE_X2APIC;
+
 	vlapic->ops = &vlapic_dev_ops;
 
 	vlapic->mmio = vlapic_mmio + vcpuid;
@@ -856,6 +876,15 @@ vlapic_get_apicbase(struct vlapic *vlapi
 void
 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
 {
+	int err;
+	enum x2apic_state state;
+
+	err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
+	if (err)
+		panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
+
+	if (state == X2APIC_DISABLED)
+		val &= ~APICBASE_X2APIC;
 
 	vlapic->msr_apicbase = val;
 }

Modified: projects/bhyve/sys/amd64/vmm/vmm.c
==============================================================================
--- projects/bhyve/sys/amd64/vmm/vmm.c	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/vmm.c	Tue Sep 25 22:31:35 2012	(r240941)
@@ -160,11 +160,11 @@ vcpu_init(struct vm *vm, uint32_t vcpu_i
 
 	vcpu->hostcpu = -1;
 	vcpu->vcpuid = vcpu_id;
+	vcpu->x2apic_state = X2APIC_ENABLED;
 	vcpu->vlapic = vlapic_init(vm, vcpu_id);
 	vcpu->guestfpu = fpu_save_area_alloc();
 	fpu_save_area_reset(vcpu->guestfpu);
 	vcpu->stats = vmm_stat_alloc();
-	vcpu->x2apic_state = X2APIC_ENABLED;
 }
 
 struct vm_exit *

Added: projects/bhyve/sys/amd64/vmm/vmm_instruction_emul.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/bhyve/sys/amd64/vmm/vmm_instruction_emul.c	Tue Sep 25 22:31:35 2012	(r240941)
@@ -0,0 +1,385 @@
+/*-
+ * Copyright (c) 2012 Sandvine, Inc.
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+
+#include "vmm_instruction_emul.h"
+
+#define	GB	(1024 * 1024 * 1024)
+
+static enum vm_reg_name gpr_map[16] = {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15
+};
+
+static void
+vie_init(struct vie *vie)
+{
+
+	bzero(vie, sizeof(struct vie));
+
+	vie->op_size = VIE_OP_SIZE_32BIT;
+
+	vie->base_register = VM_REG_LAST;
+	vie->index_register = VM_REG_LAST;
+	vie->operand_register = VM_REG_LAST;
+}
+
+static int
+gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
+	uint64_t *gpa, uint64_t *gpaend)
+{
+	vm_paddr_t hpa;
+	int nlevels, ptpshift, ptpindex;
+	uint64_t *ptpbase, pte, pgsize;
+
+	/*
+	 * XXX assumes 64-bit guest with 4 page walk levels
+	 */
+	nlevels = 4;
+	while (--nlevels >= 0) {
+		/* Zero out the lower 12 bits and the upper 12 bits */
+		ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
+
+		hpa = vm_gpa2hpa(vm, ptpphys, PAGE_SIZE);
+		if (hpa == -1)
+			goto error;
+
+		ptpbase = (uint64_t *)PHYS_TO_DMAP(hpa);
+
+		ptpshift = PAGE_SHIFT + nlevels * 9;
+		ptpindex = (gla >> ptpshift) & 0x1FF;
+		pgsize = 1UL << ptpshift;
+
+		pte = ptpbase[ptpindex];
+
+		if ((pte & PG_V) == 0)
+			goto error;
+
+		if (pte & PG_PS) {
+			if (pgsize > 1 * GB)
+				goto error;
+			else
+				break;
+		}
+
+		ptpphys = pte;
+	}
+
+	/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
+	pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
+	*gpa = pte | (gla & (pgsize - 1));
+	*gpaend = pte + pgsize;
+	return (0);
+
+error:
+	return (-1);
+}
+
+void
+vmm_fetch_instruction(struct vm *vm, uint64_t rip, uint64_t cr3,
+		      struct vie *vie)
+{
+	int n, err;
+	uint64_t hpa, gpa, gpaend;
+
+	/*
+	 * XXX cache previously fetched instructions using 'rip' as the tag
+	 */
+
+	vie_init(vie);
+
+	/*
+	 * Copy up to 15 bytes of the instruction stream into 'vie'
+	 */
+	while (vie->num_valid < VIE_INST_SIZE) {
+		err = gla2gpa(vm, rip, cr3, &gpa, &gpaend);
+		if (err)
+			break;
+
+		n = min(VIE_INST_SIZE - vie->num_valid, gpaend - gpa);
+
+		hpa = vm_gpa2hpa(vm, gpa, n);
+		if (hpa == -1)
+			break;
+
+		bcopy((void *)PHYS_TO_DMAP(hpa), &vie->inst[vie->num_valid], n);
+
+		rip += n;
+		vie->num_valid += n;
+	}
+}
+
+static int
+vie_peek(struct vie *vie, uint8_t *x)
+{
+	if (vie->num_processed < vie->num_valid) {
+		*x = vie->inst[vie->num_processed];
+		return (0);
+	} else
+		return (-1);
+}
+
+static void
+vie_advance(struct vie *vie)
+{
+	if (vie->num_processed >= vie->num_valid)
+		panic("vie_advance: %d/%d", vie->num_processed, vie->num_valid);
+
+	vie->num_processed++;
+}
+
+static int
+decode_rex(struct vie *vie)
+{
+	uint8_t x;
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	if (x >= 0x40 && x <= 0x4F) {
+		vie->rex_w = x & 0x8 ? 1 : 0;
+		vie->rex_r = x & 0x4 ? 1 : 0;
+		vie->rex_x = x & 0x2 ? 1 : 0;
+		vie->rex_b = x & 0x1 ? 1 : 0;
+
+		vie_advance(vie);
+	}
+
+	return (0);
+}
+
+static int
+decode_opcode(struct vie *vie)
+{
+	uint8_t x;
+
+	static const uint8_t flags[256] = {
+		[0x89] = VIE_F_HAS_MODRM | VIE_F_FROM_REG | VIE_F_TO_RM,
+		[0x8B] = VIE_F_HAS_MODRM | VIE_F_FROM_RM | VIE_F_TO_REG,
+		[0xC7] = VIE_F_HAS_MODRM | VIE_F_FROM_IMM | VIE_F_TO_RM,
+	};
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	vie->opcode_byte = x;
+	vie->opcode_flags = flags[x];
+
+	vie_advance(vie);
+
+	if (vie->opcode_flags == 0)
+		return (-1);
+	else
+		return (0);
+}
+
+/*
+ * XXX assuming 32-bit or 64-bit guest
+ */
+static int
+decode_modrm(struct vie *vie)
+{
+	uint8_t x;
+
+	if ((vie->opcode_flags & VIE_F_HAS_MODRM) == 0)
+		return (0);
+
+	if (vie_peek(vie, &x))
+		return (-1);
+
+	vie->mod = (x >> 6) & 0x3;
+	vie->rm =  (x >> 0) & 0x7;
+	vie->reg = (x >> 3) & 0x7;
+
+	if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
+	    (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
+			/*
+			 * Table 2-5: Special Cases of REX Encodings
+			 *
+			 * mod=0, r/m=5 is used in the compatibility mode to
+			 * indicate a disp32 without a base register.
+			 *
+			 * mod!=3, r/m=4 is used in the compatibility mode to
+			 * indicate that the SIB byte is present.
+			 *
+			 * The 'b' bit in the REX prefix is don't care in
+			 * this case.
+			 */
+	} else {
+		vie->rm |= (vie->rex_b << 3);
+	}
+
+	vie->reg |= (vie->rex_r << 3);
+
+	/* SIB addressing not supported yet */
+	if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
+		return (-1);
+
+	vie->base_register = gpr_map[vie->rm];
+
+	if (vie->opcode_flags & (VIE_F_FROM_REG | VIE_F_TO_REG))
+		vie->operand_register = gpr_map[vie->reg];
+
+	switch (vie->mod) {
+	case VIE_MOD_INDIRECT_DISP8:
+		vie->disp_bytes = 1;
+		break;
+	case VIE_MOD_INDIRECT_DISP32:
+		vie->disp_bytes = 4;
+		break;
+	case VIE_MOD_INDIRECT:
+		if (vie->rm == VIE_RM_DISP32) {
+			vie->disp_bytes = 4;
+			vie->base_register = VM_REG_LAST;	/* no base */
+		}
+		break;
+	}
+
+	/* calculate the operand size */
+	if (vie->rex_w)
+		vie->op_size = VIE_OP_SIZE_64BIT;
+
+	if (vie->opcode_flags & VIE_F_FROM_IMM)
+		vie->imm_bytes = 4;
+
+	vie_advance(vie);
+
+	return (0);
+}
+
+static int
+decode_displacement(struct vie *vie)
+{
+	int n, i;
+	uint8_t x;
+
+	union {
+		char	buf[4];
+		int8_t	signed8;
+		int32_t	signed32;
+	} u;
+
+	if ((n = vie->disp_bytes) == 0)
+		return (0);
+
+	if (n != 1 && n != 4)
+		panic("decode_displacement: invalid disp_bytes %d", n);
+
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+
+	if (n == 1)
+		vie->displacement = u.signed8;		/* sign-extended */
+	else
+		vie->displacement = u.signed32;		/* sign-extended */
+
+	return (0);
+}
+
+static int
+decode_immediate(struct vie *vie)
+{
+	int i, n;
+	uint8_t x;
+	union {
+		char	buf[4];
+		int32_t	signed32;
+	} u;
+
+	if ((n = vie->imm_bytes) == 0)
+		return (0);
+
+	if (n != 4)
+		panic("decode_immediate: invalid imm_bytes %d", n);
+
+	for (i = 0; i < n; i++) {
+		if (vie_peek(vie, &x))
+			return (-1);
+
+		u.buf[i] = x;
+		vie_advance(vie);
+	}
+	
+	vie->immediate = u.signed32;		/* sign-extended */
+
+	return (0);
+}
+
+int
+vmm_decode_instruction(struct vie *vie)
+{
+	if (decode_rex(vie))
+		return (-1);
+
+	if (decode_opcode(vie))
+		return (-1);
+
+	if (decode_modrm(vie))
+		return (-1);
+
+	if (decode_displacement(vie))
+		return (-1);
+	
+	if (decode_immediate(vie))
+		return (-1);
+
+	return (0);
+}

Added: projects/bhyve/sys/amd64/vmm/vmm_instruction_emul.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/bhyve/sys/amd64/vmm/vmm_instruction_emul.h	Tue Sep 25 22:31:35 2012	(r240941)
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+enum vie_op_size {
+	VIE_OP_SIZE_32BIT,		/* default */
+	VIE_OP_SIZE_64BIT,
+	VIE_OP_SIZE_8BIT
+};
+
+#define	VIE_INST_SIZE	15
+struct vie {
+	uint8_t		inst[VIE_INST_SIZE];
+
+	uint8_t		rex_w:1,
+			rex_r:1,
+			rex_x:1,
+			rex_b:1;
+
+	uint8_t		mod:2,
+			reg:4,
+			rm:4;
+
+
+	uint8_t		opcode_byte;
+	uint16_t	opcode_flags;
+	uint8_t		disp_bytes;
+	uint8_t		imm_bytes;
+
+	int		num_valid;
+	int		num_processed;
+
+	enum vm_reg_name base_register;
+	enum vm_reg_name index_register;
+	enum vm_reg_name operand_register;
+
+	int		op_size;
+	int64_t		displacement;
+	int64_t		immediate;
+};
+
+#define	VIE_F_HAS_MODRM	(1 << 0)
+#define	VIE_F_FROM_RM	(1 << 1)
+#define	VIE_F_FROM_REG	(1 << 2)
+#define	VIE_F_TO_RM	(1 << 3)
+#define	VIE_F_TO_REG	(1 << 4)
+#define	VIE_F_FROM_IMM	(1 << 5)
+
+#define	VIE_MOD_INDIRECT		0
+#define	VIE_MOD_INDIRECT_DISP8		1
+#define	VIE_MOD_INDIRECT_DISP32		2
+#define	VIE_MOD_DIRECT			3
+
+#define	VIE_RM_SIB			4
+#define	VIE_RM_DISP32			5
+
+struct vm;
+
+void	vmm_fetch_instruction(struct vm *vm, uint64_t rip, uint64_t cr3,
+			      struct vie *vie);
+
+int	vmm_decode_instruction(struct vie *vie);
+
+#endif

Modified: projects/bhyve/sys/amd64/vmm/vmm_lapic.c
==============================================================================
--- projects/bhyve/sys/amd64/vmm/vmm_lapic.c	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/vmm_lapic.c	Tue Sep 25 22:31:35 2012	(r240941)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include "vmm_ipi.h"
 #include "vmm_lapic.h"
 #include "vlapic.h"
+#include "vmm_instruction_emul.h"
 
 static int
 lapic_write(struct vlapic *vlapic, u_int offset, uint64_t val)
@@ -174,3 +175,73 @@ lapic_wrmsr(struct vm *vm, int cpu, u_in
 
 	return (handled);
 }
+
+int
+lapic_mmio(struct vm *vm, int cpu, u_int offset, int read,
+	   uint64_t rip, uint64_t cr3)
+{
+	int handled, error;
+	uint64_t val;
+	struct vie vie;
+	struct vlapic *vlapic;
+
+	const int UNHANDLED = 0;
+
+	vlapic = vm_lapic(vm, cpu);
+
+	vmm_fetch_instruction(vm, rip, cr3, &vie);
+
+	if (vmm_decode_instruction(&vie) != 0)
+		return (UNHANDLED);
+
+	/* Only 32-bit accesses to local apic */
+	if (vie.op_size != VIE_OP_SIZE_32BIT)
+		return (UNHANDLED);
+
+	/*
+	 * XXX
+	 * The operand register in which we store the result of the
+	 * read must be a GPR that we can modify even if the vcpu
+	 * is "running". All the GPRs qualify except for %rsp.
+	 *
+	 * This is a limitation of the vm_set_register() API
+	 * and can be fixed if necessary.
+	 */
+	if (vie.operand_register == VM_REG_GUEST_RSP)
+		return (UNHANDLED);
+
+	if (read) {
+		if ((vie.opcode_flags & VIE_F_TO_REG) == 0)
+			return (UNHANDLED);
+
+		if (vie.operand_register >= VM_REG_LAST)
+			return (UNHANDLED);
+
+		handled = lapic_read(vlapic, offset, &val);
+		if (handled) {
+			error = vm_set_register(vm, cpu, vie.operand_register,
+						val);
+			if (error)
+				panic("lapic_mmio: error %d setting gpr %d",
+				      error, vie.operand_register);
+		}
+	} else {
+		if ((vie.opcode_flags & VIE_F_FROM_REG) &&
+		    (vie.operand_register < VM_REG_LAST)) {
+			error = vm_get_register(vm, cpu, vie.operand_register,
+						&val);
+			if (error) {
+				panic("lapic_mmio: error %d getting gpr %d",
+				      error, vie.operand_register);
+			}
+		} else if (vie.opcode_flags & VIE_F_FROM_IMM) {
+			val = vie.immediate;
+		} else {
+			return (UNHANDLED);
+		}
+
+		handled = lapic_write(vlapic, offset, val);
+	}
+
+	return (handled);
+}

Modified: projects/bhyve/sys/amd64/vmm/vmm_lapic.h
==============================================================================
--- projects/bhyve/sys/amd64/vmm/vmm_lapic.h	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/vmm_lapic.h	Tue Sep 25 22:31:35 2012	(r240941)
@@ -35,6 +35,9 @@ boolean_t lapic_msr(u_int num);
 int	lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
 int	lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
 
+int	lapic_mmio(struct vm *vm, int cpu, u_int offset, int read,
+		   uint64_t rip, uint64_t cr3);
+
 void	lapic_timer_tick(struct vm *vm, int cpu);
 
 /*

Modified: projects/bhyve/sys/amd64/vmm/x86.c
==============================================================================
--- projects/bhyve/sys/amd64/vmm/x86.c	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/x86.c	Tue Sep 25 22:31:35 2012	(r240941)
@@ -29,13 +29,17 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/param.h>
 #include <sys/types.h>
 #include <sys/systm.h>
+#include <sys/cpuset.h>
 
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 
+#include <machine/vmm.h>
+
 #include "x86.h"
 
 #define	CPUID_VM_HIGH		0x40000000
@@ -43,10 +47,12 @@ __FBSDID("$FreeBSD$");
 static const char bhyve_id[12] = "BHyVE BHyVE ";
 
 int
-x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx,
-	uint32_t vcpu_id)
+x86_emulate_cpuid(struct vm *vm, int vcpu_id,
+		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
 {
+	int error;
 	unsigned int 	func, regs[4];
+	enum x2apic_state x2apic_state;
 
 	func = *eax;
 
@@ -91,6 +97,12 @@ x86_emulate_cpuid(uint32_t *eax, uint32_
 		case CPUID_0000_0001:
 			do_cpuid(1, regs);
 
+			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
+			if (error) {
+				panic("x86_emulate_cpuid: error %d "
+				      "fetching x2apic state", error);
+			}
+
 			/*
 			 * Override the APIC ID only in ebx
 			 */
@@ -102,7 +114,11 @@ x86_emulate_cpuid(uint32_t *eax, uint32_
 			 * Advertise x2APIC capability and Hypervisor guest.
 			 */
 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
-			regs[2] |= CPUID2_X2APIC | CPUID2_HV;
+
+			regs[2] |= CPUID2_HV;
+
+			if (x2apic_state != X2APIC_DISABLED)
+				regs[2] |= CPUID2_X2APIC;
 
 			/*
 			 * Hide xsave/osxsave/avx until the FPU save/restore

Modified: projects/bhyve/sys/amd64/vmm/x86.h
==============================================================================
--- projects/bhyve/sys/amd64/vmm/x86.h	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/amd64/vmm/x86.h	Tue Sep 25 22:31:35 2012	(r240941)
@@ -57,7 +57,7 @@
  */
 #define CPUID_0000_0001_FEAT0_VMX	(1<<5)
 
-int x86_emulate_cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
-		      uint32_t *edx, uint32_t vcpu_id);
+int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx,
+		      uint32_t *ecx, uint32_t *edx);
 
 #endif

Modified: projects/bhyve/sys/modules/vmm/Makefile
==============================================================================
--- projects/bhyve/sys/modules/vmm/Makefile	Tue Sep 25 22:15:59 2012	(r240940)
+++ projects/bhyve/sys/modules/vmm/Makefile	Tue Sep 25 22:31:35 2012	(r240941)
@@ -13,6 +13,7 @@ CFLAGS+= -I${.CURDIR}/../../amd64/vmm/in
 .PATH: ${.CURDIR}/../../amd64/vmm
 SRCS+=	vmm.c		\
 	vmm_dev.c	\
+	vmm_instruction_emul.c	\
 	vmm_ipi.c	\
 	vmm_lapic.c	\
 	vmm_mem.c	\



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201209252231.q8PMVZwX006907>