Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 28 May 2015 17:37:02 +0000 (UTC)
From:      Neel Natu <neel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r283657 - in head: sys/amd64/include sys/amd64/vmm sys/amd64/vmm/amd sys/amd64/vmm/intel usr.sbin/bhyve
Message-ID:  <201505281737.t4SHb2KK051966@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: neel
Date: Thu May 28 17:37:01 2015
New Revision: 283657
URL: https://svnweb.freebsd.org/changeset/base/283657

Log:
  Fix non-deterministic delays when accessing a vcpu that was in "running" or
  "sleeping" state. This is done by forcing the vcpu to transition to "idle"
  by returning to userspace with an exit code of VM_EXITCODE_REQIDLE.
  
  MFC after:      2 weeks

Modified:
  head/sys/amd64/include/vmm.h
  head/sys/amd64/vmm/amd/svm.c
  head/sys/amd64/vmm/intel/vmx.c
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_stat.c
  head/sys/amd64/vmm/vmm_stat.h
  head/usr.sbin/bhyve/bhyverun.c

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h	Thu May 28 17:06:50 2015	(r283656)
+++ head/sys/amd64/include/vmm.h	Thu May 28 17:37:01 2015	(r283657)
@@ -120,13 +120,18 @@ struct vm_object;
 struct vm_guest_paging;
 struct pmap;
 
+struct vm_eventinfo {
+	void	*rptr;		/* rendezvous cookie */
+	int	*sptr;		/* suspend cookie */
+	int	*iptr;		/* reqidle cookie */
+};
+
 typedef int	(*vmm_init_func_t)(int ipinum);
 typedef int	(*vmm_cleanup_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
-				  struct pmap *pmap, void *rendezvous_cookie,
-				  void *suspend_cookie);
+		    struct pmap *pmap, struct vm_eventinfo *info);
 typedef void	(*vmi_cleanup_func_t)(void *vmi);
 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t *retval);
@@ -208,6 +213,7 @@ struct vm_exit *vm_exitinfo(struct vm *v
 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
 
 #ifdef _SYS__CPUSET_H_
 /*
@@ -232,17 +238,24 @@ cpuset_t vm_suspended_cpus(struct vm *vm
 #endif	/* _SYS__CPUSET_H_ */
 
 static __inline int
-vcpu_rendezvous_pending(void *rendezvous_cookie)
+vcpu_rendezvous_pending(struct vm_eventinfo *info)
+{
+
+	return (*((uintptr_t *)(info->rptr)) != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
 {
 
-	return (*(uintptr_t *)rendezvous_cookie != 0);
+	return (*info->sptr);
 }
 
 static __inline int
-vcpu_suspended(void *suspend_cookie)
+vcpu_reqidle(struct vm_eventinfo *info)
 {
 
-	return (*(int *)suspend_cookie);
+	return (*info->iptr);
 }
 
 /*
@@ -506,6 +519,7 @@ enum vm_exitcode {
 	VM_EXITCODE_MONITOR,
 	VM_EXITCODE_MWAIT,
 	VM_EXITCODE_SVM,
+	VM_EXITCODE_REQIDLE,
 	VM_EXITCODE_MAX
 };
 

Modified: head/sys/amd64/vmm/amd/svm.c
==============================================================================
--- head/sys/amd64/vmm/amd/svm.c	Thu May 28 17:06:50 2015	(r283656)
+++ head/sys/amd64/vmm/amd/svm.c	Thu May 28 17:37:01 2015	(r283657)
@@ -1900,7 +1900,7 @@ enable_gintr(void)
  */
 static int
 svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, 
-	void *rend_cookie, void *suspended_cookie)
+	struct vm_eventinfo *evinfo)
 {
 	struct svm_regctx *gctx;
 	struct svm_softc *svm_sc;
@@ -1975,18 +1975,24 @@ svm_vmrun(void *arg, int vcpu, register_
 		 */
 		disable_gintr();
 
-		if (vcpu_suspended(suspended_cookie)) {
+		if (vcpu_suspended(evinfo)) {
 			enable_gintr();
 			vm_exit_suspended(vm, vcpu, state->rip);
 			break;
 		}
 
-		if (vcpu_rendezvous_pending(rend_cookie)) {
+		if (vcpu_rendezvous_pending(evinfo)) {
 			enable_gintr();
 			vm_exit_rendezvous(vm, vcpu, state->rip);
 			break;
 		}
 
+		if (vcpu_reqidle(evinfo)) {
+			enable_gintr();
+			vm_exit_reqidle(vm, vcpu, state->rip);
+			break;
+		}
+
 		/* We are asked to give the cpu by scheduler. */
 		if (vcpu_should_yield(vm, vcpu)) {
 			enable_gintr();

Modified: head/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.c	Thu May 28 17:06:50 2015	(r283656)
+++ head/sys/amd64/vmm/intel/vmx.c	Thu May 28 17:37:01 2015	(r283657)
@@ -2554,7 +2554,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int
 
 static int
 vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
-    void *rendezvous_cookie, void *suspend_cookie)
+    struct vm_eventinfo *evinfo)
 {
 	int rc, handled, launched;
 	struct vmx *vmx;
@@ -2623,18 +2623,24 @@ vmx_run(void *arg, int vcpu, register_t 
 		 * vmx_inject_interrupts() can suspend the vcpu due to a
 		 * triple fault.
 		 */
-		if (vcpu_suspended(suspend_cookie)) {
+		if (vcpu_suspended(evinfo)) {
 			enable_intr();
 			vm_exit_suspended(vmx->vm, vcpu, rip);
 			break;
 		}
 
-		if (vcpu_rendezvous_pending(rendezvous_cookie)) {
+		if (vcpu_rendezvous_pending(evinfo)) {
 			enable_intr();
 			vm_exit_rendezvous(vmx->vm, vcpu, rip);
 			break;
 		}
 
+		if (vcpu_reqidle(evinfo)) {
+			enable_intr();
+			vm_exit_reqidle(vmx->vm, vcpu, rip);
+			break;
+		}
+
 		if (vcpu_should_yield(vm, vcpu)) {
 			enable_intr();
 			vm_exit_astpending(vmx->vm, vcpu, rip);

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c	Thu May 28 17:06:50 2015	(r283656)
+++ head/sys/amd64/vmm/vmm.c	Thu May 28 17:37:01 2015	(r283657)
@@ -95,6 +95,7 @@ struct vcpu {
 	struct mtx 	mtx;		/* (o) protects 'state' and 'hostcpu' */
 	enum vcpu_state	state;		/* (o) vcpu state */
 	int		hostcpu;	/* (o) vcpu's host cpu */
+	int		reqidle;	/* (i) request vcpu to idle */
 	struct vlapic	*vlapic;	/* (i) APIC device model */
 	enum x2apic_state x2apic_state;	/* (i) APIC mode */
 	uint64_t	exitintinfo;	/* (i) events pending at VM exit */
@@ -164,8 +165,8 @@ static struct vmm_ops *ops;
 #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
 
 #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
-#define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
-	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
+#define	VMRUN(vmi, vcpu, rip, pmap, evinfo) \
+	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO)
 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 #define	VMSPACE_ALLOC(min, max) \
 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
@@ -221,6 +222,28 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_f
 SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
     "Force use of I/O MMU even if no passthrough devices were found.");
 
+static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
+
+#ifdef KTR
+static const char *
+vcpu_state2str(enum vcpu_state state)
+{
+
+	switch (state) {
+	case VCPU_IDLE:
+		return ("idle");
+	case VCPU_FROZEN:
+		return ("frozen");
+	case VCPU_RUNNING:
+		return ("running");
+	case VCPU_SLEEPING:
+		return ("sleeping");
+	default:
+		return ("unknown");
+	}
+}
+#endif
+
 static void
 vcpu_cleanup(struct vm *vm, int i, bool destroy)
 {
@@ -255,6 +278,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bo
 
 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+	vcpu->reqidle = 0;
 	vcpu->exitintinfo = 0;
 	vcpu->nmi_pending = 0;
 	vcpu->extint_pending = 0;
@@ -980,11 +1004,13 @@ save_guest_fpustate(struct vcpu *vcpu)
 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 
 static int
-vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
     bool from_idle)
 {
+	struct vcpu *vcpu;
 	int error;
 
+	vcpu = &vm->vcpu[vcpuid];
 	vcpu_assert_locked(vcpu);
 
 	/*
@@ -993,8 +1019,13 @@ vcpu_set_state_locked(struct vcpu *vcpu,
 	 * ioctl() operating on a vcpu at any point.
 	 */
 	if (from_idle) {
-		while (vcpu->state != VCPU_IDLE)
+		while (vcpu->state != VCPU_IDLE) {
+			vcpu->reqidle = 1;
+			vcpu_notify_event_locked(vcpu, false);
+			VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to "
+			    "idle requested", vcpu_state2str(vcpu->state));
 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+		}
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 		    "vcpu idle state"));
@@ -1031,6 +1062,9 @@ vcpu_set_state_locked(struct vcpu *vcpu,
 	if (error)
 		return (EBUSY);
 
+	VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s",
+	    vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
+
 	vcpu->state = newstate;
 	if (newstate == VCPU_RUNNING)
 		vcpu->hostcpu = curcpu;
@@ -1053,11 +1087,11 @@ vcpu_require_state(struct vm *vm, int vc
 }
 
 static void
-vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
+vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate)
 {
 	int error;
 
-	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
+	if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0)
 		panic("Error %d setting state to %d", error, newstate);
 }
 
@@ -1145,7 +1179,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid,
 		 * vcpu returned from VMRUN() and before it acquired the
 		 * vcpu lock above.
 		 */
-		if (vm->rendezvous_func != NULL || vm->suspend)
+		if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle)
 			break;
 		if (vm_nmi_pending(vm, vcpuid))
 			break;
@@ -1182,13 +1216,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid,
 		}
 
 		t = ticks;
-		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+		vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
 		/*
 		 * XXX msleep_spin() cannot be interrupted by signals so
 		 * wake up periodically to check pending signals.
 		 */
 		msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
-		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+		vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
 	}
 
@@ -1350,9 +1384,9 @@ vm_handle_suspend(struct vm *vm, int vcp
 
 		if (vm->rendezvous_func == NULL) {
 			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
-			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+			vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
 			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
-			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+			vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
 		} else {
 			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
 			vcpu_unlock(vcpu);
@@ -1375,6 +1409,19 @@ vm_handle_suspend(struct vm *vm, int vcp
 	return (0);
 }
 
+static int
+vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu)
+{
+	struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle));
+	vcpu->reqidle = 0;
+	vcpu_unlock(vcpu);
+	*retu = true;
+	return (0);
+}
+
 int
 vm_suspend(struct vm *vm, enum vm_suspend_how how)
 {
@@ -1432,6 +1479,18 @@ vm_exit_rendezvous(struct vm *vm, int vc
 }
 
 void
+vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
+{
+	struct vm_exit *vmexit;
+
+	vmexit = vm_exitinfo(vm, vcpuid);
+	vmexit->rip = rip;
+	vmexit->inst_length = 0;
+	vmexit->exitcode = VM_EXITCODE_REQIDLE;
+	vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
+}
+
+void
 vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
@@ -1446,6 +1505,7 @@ vm_exit_astpending(struct vm *vm, int vc
 int
 vm_run(struct vm *vm, struct vm_run *vmrun)
 {
+	struct vm_eventinfo evinfo;
 	int error, vcpuid;
 	struct vcpu *vcpu;
 	struct pcb *pcb;
@@ -1453,7 +1513,6 @@ vm_run(struct vm *vm, struct vm_run *vmr
 	struct vm_exit *vme;
 	bool retu, intr_disabled;
 	pmap_t pmap;
-	void *rptr, *sptr;
 
 	vcpuid = vmrun->cpuid;
 
@@ -1466,11 +1525,12 @@ vm_run(struct vm *vm, struct vm_run *vmr
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
-	rptr = &vm->rendezvous_func;
-	sptr = &vm->suspend;
 	pmap = vmspace_pmap(vm->vmspace);
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
+	evinfo.rptr = &vm->rendezvous_func;
+	evinfo.sptr = &vm->suspend;
+	evinfo.iptr = &vcpu->reqidle;
 restart:
 	critical_enter();
 
@@ -1485,7 +1545,7 @@ restart:
 	restore_guest_fpustate(vcpu);
 
 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
-	error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr);
+	error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
 	save_guest_fpustate(vcpu);
@@ -1498,6 +1558,9 @@ restart:
 		retu = false;
 		vcpu->nextrip = vme->rip + vme->inst_length;
 		switch (vme->exitcode) {
+		case VM_EXITCODE_REQIDLE:
+			error = vm_handle_reqidle(vm, vcpuid, &retu);
+			break;
 		case VM_EXITCODE_SUSPENDED:
 			error = vm_handle_suspend(vm, vcpuid, &retu);
 			break;
@@ -1536,6 +1599,8 @@ restart:
 	if (error == 0 && retu == false)
 		goto restart;
 
+	VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode);
+
 	/* copy the exit information */
 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
 	return (error);
@@ -2072,7 +2137,7 @@ vcpu_set_state(struct vm *vm, int vcpuid
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
-	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+	error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
 	vcpu_unlock(vcpu);
 
 	return (error);
@@ -2168,15 +2233,11 @@ vm_set_x2apic_state(struct vm *vm, int v
  * - If the vcpu is running on a different host_cpu then an IPI will be directed
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
-void
-vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+static void
+vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
 {
 	int hostcpu;
-	struct vcpu *vcpu;
-
-	vcpu = &vm->vcpu[vcpuid];
 
-	vcpu_lock(vcpu);
 	hostcpu = vcpu->hostcpu;
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
@@ -2201,6 +2262,15 @@ vcpu_notify_event(struct vm *vm, int vcp
 		if (vcpu->state == VCPU_SLEEPING)
 			wakeup_one(vcpu);
 	}
+}
+
+void
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+{
+	struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	vcpu_notify_event_locked(vcpu, lapic_intr);
 	vcpu_unlock(vcpu);
 }
 

Modified: head/sys/amd64/vmm/vmm_stat.c
==============================================================================
--- head/sys/amd64/vmm/vmm_stat.c	Thu May 28 17:06:50 2015	(r283656)
+++ head/sys/amd64/vmm/vmm_stat.c	Thu May 28 17:37:01 2015	(r283657)
@@ -164,6 +164,7 @@ VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits 
 VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
 VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
 VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
+VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
 VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
 VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
 VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");

Modified: head/sys/amd64/vmm/vmm_stat.h
==============================================================================
--- head/sys/amd64/vmm/vmm_stat.h	Thu May 28 17:06:50 2015	(r283656)
+++ head/sys/amd64/vmm/vmm_stat.h	Thu May 28 17:37:01 2015	(r283657)
@@ -157,4 +157,5 @@ VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
 VMM_STAT_DECLARE(VMEXIT_USERSPACE);
 VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
 VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
+VMM_STAT_DECLARE(VMEXIT_REQIDLE);
 #endif

Modified: head/usr.sbin/bhyve/bhyverun.c
==============================================================================
--- head/usr.sbin/bhyve/bhyverun.c	Thu May 28 17:06:50 2015	(r283656)
+++ head/usr.sbin/bhyve/bhyverun.c	Thu May 28 17:37:01 2015	(r283657)
@@ -100,7 +100,7 @@ static struct vm_exit vmexit[VM_MAXCPU];
 
 struct bhyvestats {
         uint64_t        vmexit_bogus;
-        uint64_t        vmexit_bogus_switch;
+	uint64_t	vmexit_reqidle;
         uint64_t        vmexit_hlt;
         uint64_t        vmexit_pause;
         uint64_t        vmexit_mtrap;
@@ -461,6 +461,17 @@ vmexit_bogus(struct vmctx *ctx, struct v
 }
 
 static int
+vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	assert(vmexit->inst_length == 0);
+
+	stats.vmexit_reqidle++;
+
+	return (VMEXIT_CONTINUE);
+}
+
+static int
 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
@@ -571,6 +582,7 @@ static vmexit_handler_t handler[VM_EXITC
 	[VM_EXITCODE_VMX]    = vmexit_vmx,
 	[VM_EXITCODE_SVM]    = vmexit_svm,
 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
+	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201505281737.t4SHb2KK051966>