Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 22 Jul 2014 04:39:17 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r268976 - in stable/10: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/intel usr.sbin/bhyve
Message-ID:  <201407220439.s6M4dHeV047899@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Tue Jul 22 04:39:16 2014
New Revision: 268976
URL: http://svnweb.freebsd.org/changeset/base/268976

Log:
  MFC 266424,266476,266524,266573,266595,266626,266627,266633,266641,266642,
  266708,266724,266934,266935,268521:
  Emulation of the "ins" and "outs" instructions.
  
  Various fixes for translating guest linear addresses to guest physical
  addresses.

Modified:
  stable/10/lib/libvmmapi/vmmapi.c
  stable/10/lib/libvmmapi/vmmapi.h
  stable/10/sys/amd64/include/vmm.h
  stable/10/sys/amd64/include/vmm_dev.h
  stable/10/sys/amd64/include/vmm_instruction_emul.h
  stable/10/sys/amd64/vmm/intel/vmx.c
  stable/10/sys/amd64/vmm/vmm.c
  stable/10/sys/amd64/vmm/vmm_dev.c
  stable/10/sys/amd64/vmm/vmm_instruction_emul.c
  stable/10/sys/amd64/vmm/vmm_ioport.c
  stable/10/sys/amd64/vmm/vmm_ioport.h
  stable/10/sys/amd64/vmm/vmm_ktr.h
  stable/10/usr.sbin/bhyve/bhyverun.c
  stable/10/usr.sbin/bhyve/inout.c
  stable/10/usr.sbin/bhyve/inout.h
  stable/10/usr.sbin/bhyve/mem.c
  stable/10/usr.sbin/bhyve/pci_virtio_block.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/lib/libvmmapi/vmmapi.c
==============================================================================
--- stable/10/lib/libvmmapi/vmmapi.c	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/lib/libvmmapi/vmmapi.c	Tue Jul 22 04:39:16 2014	(r268976)
@@ -33,8 +33,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/_iovec.h>
 
 #include <machine/specialreg.h>
+#include <machine/param.h>
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -937,3 +939,107 @@ vm_get_hpet_capabilities(struct vmctx *c
 		*capabilities = cap.capabilities;
 	return (error);
 }
+
+static int
+gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, int *fault, uint64_t *gpa)
+{
+	struct vm_gla2gpa gg;
+	int error;
+
+	bzero(&gg, sizeof(struct vm_gla2gpa));
+	gg.vcpuid = vcpu;
+	gg.prot = prot;
+	gg.gla = gla;
+	gg.paging = *paging;
+
+	error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
+	if (error == 0) {
+		*fault = gg.fault;
+		*gpa = gg.gpa;
+	}
+	return (error);
+}
+
+#ifndef min
+#define	min(a,b)	(((a) < (b)) ? (a) : (b))
+#endif
+
+int
+vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt)
+{
+	uint64_t gpa;
+	int error, fault, i, n, off;
+
+	for (i = 0; i < iovcnt; i++) {
+		iov[i].iov_base = 0;
+		iov[i].iov_len = 0;
+	}
+
+	while (len) {
+		assert(iovcnt > 0);
+		error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa);
+		if (error)
+			return (-1);
+		if (fault)
+			return (1);
+
+		off = gpa & PAGE_MASK;
+		n = min(len, PAGE_SIZE - off);
+
+		iov->iov_base = (void *)gpa;
+		iov->iov_len = n;
+		iov++;
+		iovcnt--;
+
+		gla += n;
+		len -= n;
+	}
+	return (0);
+}
+
+void
+vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
+{
+	const char *src;
+	char *dst;
+	uint64_t gpa;
+	size_t n;
+
+	dst = vp;
+	while (len) {
+		assert(iov->iov_len);
+		gpa = (uint64_t)iov->iov_base;
+		n = min(len, iov->iov_len);
+		src = vm_map_gpa(ctx, gpa, n);
+		bcopy(src, dst, n);
+
+		iov++;
+		dst += n;
+		len -= n;
+	}
+}
+
+void
+vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
+    size_t len)
+{
+	const char *src;
+	char *dst;
+	uint64_t gpa;
+	size_t n;
+
+	src = vp;
+	while (len) {
+		assert(iov->iov_len);
+		gpa = (uint64_t)iov->iov_base;
+		n = min(len, iov->iov_len);
+		dst = vm_map_gpa(ctx, gpa, n);
+		bcopy(src, dst, n);
+
+		iov++;
+		src += n;
+		len -= n;
+	}
+}

Modified: stable/10/lib/libvmmapi/vmmapi.h
==============================================================================
--- stable/10/lib/libvmmapi/vmmapi.h	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/lib/libvmmapi/vmmapi.h	Tue Jul 22 04:39:16 2014	(r268976)
@@ -29,6 +29,7 @@
 #ifndef _VMMAPI_H_
 #define	_VMMAPI_H_
 
+struct iovec;
 struct vmctx;
 enum x2apic_state;
 
@@ -109,6 +110,18 @@ int	vm_set_x2apic_state(struct vmctx *ct
 
 int	vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities);
 
+/*
+ * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'.
+ * The 'iovcnt' should be big enough to accomodate all GPA segments.
+ * Returns 0 on success, 1 on a guest fault condition and -1 otherwise.
+ */
+int	vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
+	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt);
+void	vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
+	    void *host_dst, size_t len);
+void	vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
+	    struct iovec *guest_iov, size_t len);
+
 /* Reset vcpu register state */
 int	vcpu_reset(struct vmctx *ctx, int vcpu);
 

Modified: stable/10/sys/amd64/include/vmm.h
==============================================================================
--- stable/10/sys/amd64/include/vmm.h	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/sys/amd64/include/vmm.h	Tue Jul 22 04:39:16 2014	(r268976)
@@ -54,6 +54,7 @@ struct vmspace;
 struct vm_object;
 struct pmap;
 
+enum vm_reg_name;
 enum x2apic_state;
 
 typedef int	(*vmm_init_func_t)(int ipinum);
@@ -236,10 +237,11 @@ int vm_exception_pending(struct vm *vm, 
 
 void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */
 void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */
+void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2);
 
-#endif	/* KERNEL */
+enum vm_reg_name vm_segment_name(int seg_encoding);
 
-#include <machine/vmm_instruction_emul.h>
+#endif	/* KERNEL */
 
 #define	VM_MAXCPU	16			/* maximum virtual cpus */
 
@@ -280,6 +282,7 @@ enum vm_reg_name {
 	VM_REG_GUEST_IDTR,
 	VM_REG_GUEST_GDTR,
 	VM_REG_GUEST_EFER,
+	VM_REG_GUEST_CR2,
 	VM_REG_LAST
 };
 
@@ -318,6 +321,76 @@ struct seg_desc {
 	uint32_t	limit;
 	uint32_t	access;
 };
+#define	SEG_DESC_TYPE(desc)		((desc)->access & 0x001f)
+#define	SEG_DESC_PRESENT(desc)		((desc)->access & 0x0080)
+#define	SEG_DESC_DEF32(desc)		((desc)->access & 0x4000)
+#define	SEG_DESC_GRANULARITY(desc)	((desc)->access & 0x8000)
+#define	SEG_DESC_UNUSABLE(desc)		((desc)->access & 0x10000)
+
+enum vm_cpu_mode {
+	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
+	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+	PAGING_MODE_FLAT,
+	PAGING_MODE_32,
+	PAGING_MODE_PAE,
+	PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+	uint64_t	cr3;
+	int		cpl;
+	enum vm_cpu_mode cpu_mode;
+	enum vm_paging_mode paging_mode;
+};
+
+/*
+ * The data structures 'vie' and 'vie_op' are meant to be opaque to the
+ * consumers of instruction decoding. The only reason why their contents
+ * need to be exposed is because they are part of the 'vm_exit' structure.
+ */
+struct vie_op {
+	uint8_t		op_byte;	/* actual opcode byte */
+	uint8_t		op_type;	/* type of operation (e.g. MOV) */
+	uint16_t	op_flags;
+};
+
+#define	VIE_INST_SIZE	15
+struct vie {
+	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
+	uint8_t		num_valid;		/* size of the instruction */
+	uint8_t		num_processed;
+
+	uint8_t		rex_w:1,		/* REX prefix */
+			rex_r:1,
+			rex_x:1,
+			rex_b:1,
+			rex_present:1;
+
+	uint8_t		mod:2,			/* ModRM byte */
+			reg:4,
+			rm:4;
+
+	uint8_t		ss:2,			/* SIB byte */
+			index:4,
+			base:4;
+
+	uint8_t		disp_bytes;
+	uint8_t		imm_bytes;
+
+	uint8_t		scale;
+	int		base_register;		/* VM_REG_GUEST_xyz */
+	int		index_register;		/* VM_REG_GUEST_xyz */
+
+	int64_t		displacement;		/* optional addr displacement */
+	int64_t		immediate;		/* optional immediate operand */
+
+	uint8_t		decoded;	/* set to 1 if successfully decoded */
+
+	struct vie_op	op;			/* opcode description */
+};
 
 enum vm_exitcode {
 	VM_EXITCODE_INOUT,
@@ -335,22 +408,38 @@ enum vm_exitcode {
 	VM_EXITCODE_RENDEZVOUS,
 	VM_EXITCODE_IOAPIC_EOI,
 	VM_EXITCODE_SUSPENDED,
+	VM_EXITCODE_INOUT_STR,
 	VM_EXITCODE_MAX
 };
 
+struct vm_inout {
+	uint16_t	bytes:3;	/* 1 or 2 or 4 */
+	uint16_t	in:1;
+	uint16_t	string:1;
+	uint16_t	rep:1;
+	uint16_t	port;
+	uint32_t	eax;		/* valid for out */
+};
+
+struct vm_inout_str {
+	struct vm_inout	inout;		/* must be the first element */
+	struct vm_guest_paging paging;
+	uint64_t	rflags;
+	uint64_t	cr0;
+	uint64_t	index;
+	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
+	int		addrsize;
+	enum vm_reg_name seg_name;
+	struct seg_desc seg_desc;
+};
+
 struct vm_exit {
 	enum vm_exitcode	exitcode;
 	int			inst_length;	/* 0 means unknown */
 	uint64_t		rip;
 	union {
-		struct {
-			uint16_t	bytes:3;	/* 1 or 2 or 4 */
-			uint16_t	in:1;		/* out is 0, in is 1 */
-			uint16_t	string:1;
-			uint16_t	rep:1;
-			uint16_t	port;
-			uint32_t	eax;		/* valid for out */
-		} inout;
+		struct vm_inout	inout;
+		struct vm_inout_str inout_str;
 		struct {
 			uint64_t	gpa;
 			int		fault_type;
@@ -358,9 +447,7 @@ struct vm_exit {
 		struct {
 			uint64_t	gpa;
 			uint64_t	gla;
-			uint64_t	cr3;
-			enum vie_cpu_mode cpu_mode;
-			enum vie_paging_mode paging_mode;
+			struct vm_guest_paging paging;
 			struct vie	vie;
 		} inst_emul;
 		/*

Modified: stable/10/sys/amd64/include/vmm_dev.h
==============================================================================
--- stable/10/sys/amd64/include/vmm_dev.h	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/sys/amd64/include/vmm_dev.h	Tue Jul 22 04:39:16 2014	(r268976)
@@ -168,6 +168,15 @@ struct vm_suspend {
 	enum vm_suspend_how how;
 };
 
+struct vm_gla2gpa {
+	int		vcpuid;		/* inputs */
+	int 		prot;		/* PROT_READ or PROT_WRITE */
+	uint64_t	gla;
+	struct vm_guest_paging paging;
+	int		fault;		/* outputs */
+	uint64_t	gpa;
+};
+
 enum {
 	/* general routines */
 	IOCNUM_ABIVERS = 0,
@@ -180,6 +189,7 @@ enum {
 	IOCNUM_MAP_MEMORY = 10,
 	IOCNUM_GET_MEMORY_SEG = 11,
 	IOCNUM_GET_GPA_PMAP = 12,
+	IOCNUM_GLA2GPA = 13,
 
 	/* register/state accessors */
 	IOCNUM_SET_REGISTER = 20,
@@ -289,4 +299,6 @@ enum {
 	_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
 #define	VM_GET_GPA_PMAP \
 	_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
+#define	VM_GLA2GPA	\
+	_IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
 #endif

Modified: stable/10/sys/amd64/include/vmm_instruction_emul.h
==============================================================================
--- stable/10/sys/amd64/include/vmm_instruction_emul.h	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/sys/amd64/include/vmm_instruction_emul.h	Tue Jul 22 04:39:16 2014	(r268976)
@@ -29,63 +29,7 @@
 #ifndef	_VMM_INSTRUCTION_EMUL_H_
 #define	_VMM_INSTRUCTION_EMUL_H_
 
-enum vie_cpu_mode {
-	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
-	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
-};
-
-enum vie_paging_mode {
-	PAGING_MODE_FLAT,
-	PAGING_MODE_32,
-	PAGING_MODE_PAE,
-	PAGING_MODE_64,
-};
-
-/*
- * The data structures 'vie' and 'vie_op' are meant to be opaque to the
- * consumers of instruction decoding. The only reason why their contents
- * need to be exposed is because they are part of the 'vm_exit' structure.
- */
-struct vie_op {
-	uint8_t		op_byte;	/* actual opcode byte */
-	uint8_t		op_type;	/* type of operation (e.g. MOV) */
-	uint16_t	op_flags;
-};
-
-#define	VIE_INST_SIZE	15
-struct vie {
-	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
-	uint8_t		num_valid;		/* size of the instruction */
-	uint8_t		num_processed;
-
-	uint8_t		rex_w:1,		/* REX prefix */
-			rex_r:1,
-			rex_x:1,
-			rex_b:1,
-			rex_present:1;
-
-	uint8_t		mod:2,			/* ModRM byte */
-			reg:4,
-			rm:4;
-
-	uint8_t		ss:2,			/* SIB byte */
-			index:4,
-			base:4;
-
-	uint8_t		disp_bytes;
-	uint8_t		imm_bytes;
-
-	uint8_t		scale;
-	int		base_register;		/* VM_REG_GUEST_xyz */
-	int		index_register;		/* VM_REG_GUEST_xyz */
-
-	int64_t		displacement;		/* optional addr displacement */
-	int64_t		immediate;		/* optional immediate operand */
-
-	uint8_t		decoded;	/* set to 1 if successfully decoded */
-
-	struct vie_op	op;			/* opcode description */
-};
+#include <sys/mman.h>
 
 /*
  * Callback functions to read and write memory regions.
@@ -111,6 +55,24 @@ int vmm_emulate_instruction(void *vm, in
 			    mem_region_read_t mrr, mem_region_write_t mrw,
 			    void *mrarg);
 
+int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
+    uint64_t val, int size);
+
+/*
+ * Returns 1 if an alignment check exception should be injected and 0 otherwise.
+ */
+int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
+    uint64_t rflags, uint64_t gla);
+
+/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
+int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+
+uint64_t vie_size2mask(int size);
+
+int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+    struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
+    uint64_t *gla);
+
 #ifdef _KERNEL
 /*
  * APIs to fetch and decode the instruction from nested page fault handler.
@@ -118,8 +80,18 @@ int vmm_emulate_instruction(void *vm, in
  * 'vie' must be initialized before calling 'vmm_fetch_instruction()'
  */
 int vmm_fetch_instruction(struct vm *vm, int cpuid,
-			  uint64_t rip, int inst_length, uint64_t cr3,
-			  enum vie_paging_mode paging_mode, struct vie *vie);
+			  struct vm_guest_paging *guest_paging,
+			  uint64_t rip, int inst_length, struct vie *vie);
+
+/*
+ * Translate the guest linear address 'gla' to a guest physical address.
+ *
+ * Returns 0 on success and '*gpa' contains the result of the translation.
+ * Returns 1 if an exception was injected into the guest.
+ * Returns -1 otherwise.
+ */
+int vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa);
 
 void vie_init(struct vie *vie);
 
@@ -136,7 +108,7 @@ void vie_init(struct vie *vie);
  */
 #define	VIE_INVALID_GLA		(1UL << 63)	/* a non-canonical address */
 int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
-			   enum vie_cpu_mode cpu_mode, struct vie *vie);
+			   enum vm_cpu_mode cpu_mode, struct vie *vie);
 #endif	/* _KERNEL */
 
 #endif	/* _VMM_INSTRUCTION_EMUL_H_ */

Modified: stable/10/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- stable/10/sys/amd64/vmm/intel/vmx.c	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/sys/amd64/vmm/intel/vmx.c	Tue Jul 22 04:39:16 2014	(r268976)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
 #include "vmm_host.h"
 #include "vmm_ioport.h"
 #include "vmm_ipi.h"
@@ -185,6 +186,8 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_
  */
 #define	APIC_ACCESS_ADDRESS	0xFFFFF000
 
+static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
+static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
 static void vmx_inject_pir(struct vlapic *vlapic);
 
 #ifdef KTR
@@ -539,7 +542,7 @@ static int
 vmx_init(int ipinum)
 {
 	int error, use_tpr_shadow;
-	uint64_t fixed0, fixed1, feature_control;
+	uint64_t basic, fixed0, fixed1, feature_control;
 	uint32_t tmp, procbased2_vid_bits;
 
 	/* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
@@ -559,6 +562,17 @@ vmx_init(int ipinum)
 		return (ENXIO);
 	}
 
+	/*
+	 * Verify capabilities MSR_VMX_BASIC:
+	 * - bit 54 indicates support for INS/OUTS decoding
+	 */
+	basic = rdmsr(MSR_VMX_BASIC);
+	if ((basic & (1UL << 54)) == 0) {
+		printf("vmx_init: processor does not support desired basic "
+		    "capabilities\n");
+		return (EINVAL);
+	}
+
 	/* Check support for primary processor-based VM-execution controls */
 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
 			       MSR_VMX_TRUE_PROCBASED_CTLS,
@@ -1539,7 +1553,19 @@ vmx_emulate_cr_access(struct vmx *vmx, i
 	return (HANDLED);
 }
 
-static enum vie_cpu_mode
+/*
+ * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL
+ */
+static int
+vmx_cpl(void)
+{
+	uint32_t ssar;
+
+	ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS);
+	return ((ssar >> 5) & 0x3);
+}
+
+static enum vm_cpu_mode
 vmx_cpu_mode(void)
 {
 
@@ -1549,7 +1575,7 @@ vmx_cpu_mode(void)
 		return (CPU_MODE_COMPATIBILITY);
 }
 
-static enum vie_paging_mode
+static enum vm_paging_mode
 vmx_paging_mode(void)
 {
 
@@ -1563,6 +1589,89 @@ vmx_paging_mode(void)
 		return (PAGING_MODE_PAE);
 }
 
+static uint64_t
+inout_str_index(struct vmx *vmx, int vcpuid, int in)
+{
+	uint64_t val;
+	int error;
+	enum vm_reg_name reg;
+
+	reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
+	error = vmx_getreg(vmx, vcpuid, reg, &val);
+	KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error));
+	return (val);
+}
+
+static uint64_t
+inout_str_count(struct vmx *vmx, int vcpuid, int rep)
+{
+	uint64_t val;
+	int error;
+
+	if (rep) {
+		error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val);
+		KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error));
+	} else {
+		val = 1;
+	}
+	return (val);
+}
+
+static int
+inout_str_addrsize(uint32_t inst_info)
+{
+	uint32_t size;
+
+	size = (inst_info >> 7) & 0x7;
+	switch (size) {
+	case 0:
+		return (2);	/* 16 bit */
+	case 1:
+		return (4);	/* 32 bit */
+	case 2:
+		return (8);	/* 64 bit */
+	default:
+		panic("%s: invalid size encoding %d", __func__, size);
+	}
+}
+
+static void
+inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in,
+    struct vm_inout_str *vis)
+{
+	int error, s;
+
+	if (in) {
+		vis->seg_name = VM_REG_GUEST_ES;
+	} else {
+		s = (inst_info >> 15) & 0x7;
+		vis->seg_name = vm_segment_name(s);
+	}
+
+	error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc);
+	KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error));
+
+	/* XXX modify svm.c to update bit 16 of seg_desc.access (unusable) */
+}
+
+static void
+vmx_paging_info(struct vm_guest_paging *paging)
+{
+	paging->cr3 = vmcs_guest_cr3();
+	paging->cpl = vmx_cpl();
+	paging->cpu_mode = vmx_cpu_mode();
+	paging->paging_mode = vmx_paging_mode();
+}
+
+static void
+vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
+{
+	vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+	vmexit->u.inst_emul.gpa = gpa;
+	vmexit->u.inst_emul.gla = gla;
+	vmx_paging_info(&vmexit->u.inst_emul.paging);
+}
+
 static int
 ept_fault_type(uint64_t ept_qual)
 {
@@ -1754,12 +1863,8 @@ vmx_handle_apic_access(struct vmx *vmx, 
 	}
 
 	if (allowed) {
-		vmexit->exitcode = VM_EXITCODE_INST_EMUL;
-		vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset;
-		vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
-		vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
-		vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode();
-		vmexit->u.inst_emul.paging_mode = vmx_paging_mode();
+		vmexit_inst_emul(vmexit, DEFAULT_APIC_BASE + offset,
+		    VIE_INVALID_GLA);
 	}
 
 	/*
@@ -1776,10 +1881,12 @@ vmx_handle_apic_access(struct vmx *vmx, 
 static int
 vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
-	int error, handled;
+	int error, handled, in;
 	struct vmxctx *vmxctx;
 	struct vlapic *vlapic;
-	uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, reason;
+	struct vm_inout_str *vis;
+	uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
+	uint32_t reason;
 	uint64_t qual, gpa;
 	bool retu;
 
@@ -1936,15 +2043,22 @@ vmx_exit_process(struct vmx *vmx, int vc
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1);
 		vmexit->exitcode = VM_EXITCODE_INOUT;
 		vmexit->u.inout.bytes = (qual & 0x7) + 1;
-		vmexit->u.inout.in = (qual & 0x8) ? 1 : 0;
+		vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0;
 		vmexit->u.inout.string = (qual & 0x10) ? 1 : 0;
 		vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0;
 		vmexit->u.inout.port = (uint16_t)(qual >> 16);
 		vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
-		error = emulate_ioport(vmx->vm, vcpu, vmexit);
-		if (error == 0)  {
-			handled = 1;
-			vmxctx->guest_rax = vmexit->u.inout.eax;
+		if (vmexit->u.inout.string) {
+			inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
+			vmexit->exitcode = VM_EXITCODE_INOUT_STR;
+			vis = &vmexit->u.inout_str;
+			vmx_paging_info(&vis->paging);
+			vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS);
+			vis->cr0 = vmcs_read(VMCS_GUEST_CR0);
+			vis->index = inout_str_index(vmx, vcpu, in);
+			vis->count = inout_str_count(vmx, vcpu, vis->inout.rep);
+			vis->addrsize = inout_str_addrsize(inst_info);
+			inout_str_seginfo(vmx, vcpu, inst_info, in, vis);
 		}
 		break;
 	case EXIT_REASON_CPUID:
@@ -1990,12 +2104,7 @@ vmx_exit_process(struct vmx *vmx, int vc
 			vmexit->u.paging.fault_type = ept_fault_type(qual);
 			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
 		} else if (ept_emulation_fault(qual)) {
-			vmexit->exitcode = VM_EXITCODE_INST_EMUL;
-			vmexit->u.inst_emul.gpa = gpa;
-			vmexit->u.inst_emul.gla = vmcs_gla();
-			vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
-			vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode();
-			vmexit->u.inst_emul.paging_mode = vmx_paging_mode();
+			vmexit_inst_emul(vmexit, gpa, vmcs_gla());
 			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
 		}
 		/*
@@ -2324,6 +2433,8 @@ vmxctx_regptr(struct vmxctx *vmxctx, int
 		return (&vmxctx->guest_r14);
 	case VM_REG_GUEST_R15:
 		return (&vmxctx->guest_r15);
+	case VM_REG_GUEST_CR2:
+		return (&vmxctx->guest_cr2);
 	default:
 		break;
 	}

Modified: stable/10/sys/amd64/vmm/vmm.c
==============================================================================
--- stable/10/sys/amd64/vmm/vmm.c	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/sys/amd64/vmm/vmm.c	Tue Jul 22 04:39:16 2014	(r268976)
@@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
 
+#include "vmm_ioport.h"
 #include "vmm_ktr.h"
 #include "vmm_host.h"
 #include "vmm_mem.h"
@@ -1131,34 +1133,33 @@ vm_handle_inst_emul(struct vm *vm, int v
 	struct vie *vie;
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
-	int error, inst_length;
-	uint64_t rip, gla, gpa, cr3;
-	enum vie_cpu_mode cpu_mode;
-	enum vie_paging_mode paging_mode;
+	uint64_t gla, gpa;
+	struct vm_guest_paging *paging;
 	mem_region_read_t mread;
 	mem_region_write_t mwrite;
+	int error;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
-	rip = vme->rip;
-	inst_length = vme->inst_length;
-
 	gla = vme->u.inst_emul.gla;
 	gpa = vme->u.inst_emul.gpa;
-	cr3 = vme->u.inst_emul.cr3;
-	cpu_mode = vme->u.inst_emul.cpu_mode;
-	paging_mode = vme->u.inst_emul.paging_mode;
 	vie = &vme->u.inst_emul.vie;
+	paging = &vme->u.inst_emul.paging;
 
 	vie_init(vie);
 
 	/* Fetch, decode and emulate the faulting instruction */
-	if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
-	    paging_mode, vie) != 0)
+	error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
+	    vme->inst_length, vie);
+	if (error == 1)
+		return (0);		/* Resume guest to handle page fault */
+	else if (error == -1)
 		return (EFAULT);
+	else if (error != 0)
+		panic("%s: vmm_fetch_instruction error %d", __func__, error);
 
-	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
+	if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0)
 		return (EFAULT);
 
 	/* return to userland unless this is an in-kernel emulated device */
@@ -1348,6 +1349,10 @@ restart:
 		case VM_EXITCODE_INST_EMUL:
 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
 			break;
+		case VM_EXITCODE_INOUT:
+		case VM_EXITCODE_INOUT_STR:
+			error = vm_handle_inout(vm, vcpuid, vme, &retu);
+			break;
 		default:
 			retu = true;	/* handled in userland */
 			break;
@@ -1430,6 +1435,25 @@ vm_inject_fault(struct vm *vm, int vcpui
 }
 
 void
+vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2)
+{
+	struct vm_exception pf = {
+		.vector = IDT_PF,
+		.error_code_valid = 1,
+		.error_code = error_code
+	};
+	int error;
+
+	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
+	    error_code, cr2);
+
+	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
+	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
+
+	vm_inject_fault(vm, vcpuid, &pf);
+}
+
+void
 vm_inject_gp(struct vm *vm, int vcpuid)
 {
 	struct vm_exception gpf = {
@@ -1856,3 +1880,20 @@ vm_atpit(struct vm *vm)
 {
 	return (vm->vatpit);
 }
+
+enum vm_reg_name
+vm_segment_name(int seg)
+{
+	static enum vm_reg_name seg_names[] = {
+		VM_REG_GUEST_ES,
+		VM_REG_GUEST_CS,
+		VM_REG_GUEST_SS,
+		VM_REG_GUEST_DS,
+		VM_REG_GUEST_FS,
+		VM_REG_GUEST_GS
+	};
+
+	KASSERT(seg >= 0 && seg < nitems(seg_names),
+	    ("%s: invalid segment encoding %d", __func__, seg));
+	return (seg_names[seg]);
+}

Modified: stable/10/sys/amd64/vmm/vmm_dev.c
==============================================================================
--- stable/10/sys/amd64/vmm/vmm_dev.c	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/sys/amd64/vmm/vmm_dev.c	Tue Jul 22 04:39:16 2014	(r268976)
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
 #include <machine/vmm_dev.h>
 
 #include "vmm_lapic.h"
@@ -168,6 +169,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 	struct vm_x2apic *x2apic;
 	struct vm_gpa_pte *gpapte;
 	struct vm_suspend *vmsuspend;
+	struct vm_gla2gpa *gg;
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL)
@@ -192,6 +194,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 	case VM_PPTDEV_MSI:
 	case VM_PPTDEV_MSIX:
 	case VM_SET_X2APIC_STATE:
+	case VM_GLA2GPA:
 		/*
 		 * XXX fragile, handle with care
 		 * Assumes that the first field of the ioctl data is the vcpu.
@@ -415,6 +418,27 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 	case VM_GET_HPET_CAPABILITIES:
 		error = vhpet_getcap((struct vm_hpet_cap *)data);
 		break;
+	case VM_GLA2GPA: {
+		CTASSERT(PROT_READ == VM_PROT_READ);
+		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
+		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
+		gg = (struct vm_gla2gpa *)data;
+		error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
+		    gg->prot, &gg->gpa);
+		KASSERT(error == 0 || error == 1 || error == -1,
+		    ("%s: vmm_gla2gpa unknown error %d", __func__, error));
+		if (error >= 0) {
+			/*
+			 * error = 0: the translation was successful
+			 * error = 1: a fault was injected into the guest
+			 */
+			gg->fault = error;
+			error = 0;
+		} else {
+			error = EFAULT;
+		}
+		break;
+	}
 	default:
 		error = ENOTTY;
 		break;

Modified: stable/10/sys/amd64/vmm/vmm_instruction_emul.c
==============================================================================
--- stable/10/sys/amd64/vmm/vmm_instruction_emul.c	Tue Jul 22 04:38:28 2014	(r268975)
+++ stable/10/sys/amd64/vmm/vmm_instruction_emul.c	Tue Jul 22 04:39:16 2014	(r268976)
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/pcpu.h>
 #include <sys/systm.h>
+#include <sys/proc.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
@@ -46,9 +47,15 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vmm.h>
 
+#include <assert.h>
 #include <vmmapi.h>
+#define	KASSERT(exp,msg)	assert((exp))
 #endif	/* _KERNEL */
 
+#include <machine/vmm_instruction_emul.h>
+#include <x86/psl.h>
+#include <x86/specialreg.h>
+
 /* struct vie_op.op_type */
 enum {
 	VIE_OP_TYPE_NONE = 0,
@@ -205,7 +212,7 @@ vie_read_bytereg(void *vm, int vcpuid, s
 	return (error);
 }
 
-static int
+int
 vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
 		    uint64_t val, int size)
 {
@@ -560,6 +567,155 @@ vmm_emulate_instruction(void *vm, int vc
 	return (error);
 }
 
+int
+vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
+{
+	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
+	    ("%s: invalid size %d", __func__, size));
+	KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl));
+
+	if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
+		return (0);
+
+	return ((gla & (size - 1)) ? 1 : 0);
+}
+
+int
+vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
+{
+	uint64_t mask;
+
+	if (cpu_mode != CPU_MODE_64BIT)
+		return (0);
+
+	/*
+	 * The value of the bit 47 in the 'gla' should be replicated in the
+	 * most significant 16 bits.
+	 */
+	mask = ~((1UL << 48) - 1);
+	if (gla & (1UL << 47))
+		return ((gla & mask) != mask);
+	else
+		return ((gla & mask) != 0);
+}
+
+uint64_t
+vie_size2mask(int size)
+{
+	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
+	    ("vie_size2mask: invalid size %d", size));
+	return (size2mask[size]);
+}
+
+int
+vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+    struct seg_desc *desc, uint64_t offset, int length, int addrsize,
+    int prot, uint64_t *gla)
+{
+	uint64_t firstoff, low_limit, high_limit, segbase;
+	int glasize, type;
+
+	KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS,
+	    ("%s: invalid segment %d", __func__, seg));
+	KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
+	    ("%s: invalid operand size %d", __func__, length));
+	KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
+	    ("%s: invalid prot %#x", __func__, prot));
+
+	firstoff = offset;
+	if (cpu_mode == CPU_MODE_64BIT) {
+		KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address "
+		    "size %d for cpu_mode %d", __func__, addrsize, cpu_mode));
+		glasize = 8;
+	} else {
+		KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address "
+		    "size %d for cpu mode %d", __func__, addrsize, cpu_mode));
+		glasize = 4;
+		/*
+		 * If the segment selector is loaded with a NULL selector
+		 * then the descriptor is unusable and attempting to use
+		 * it results in a #GP(0).
+		 */
+		if (SEG_DESC_UNUSABLE(desc))
+			return (-1);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201407220439.s6M4dHeV047899>