Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 30 Apr 2019 23:53:55 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r346978 - in stable: 11/sys/amd64/vmm/intel 12/sys/amd64/vmm/intel
Message-ID:  <201904302353.x3UNrtUH084677@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Tue Apr 30 23:53:54 2019
New Revision: 346978
URL: https://svnweb.freebsd.org/changeset/base/346978

Log:
  MFC 344711: Fix missed posted interrupts in VT-x in bhyve.
  
  When a vCPU is HLTed, interrupts with a priority below the processor
  priority (PPR) should not resume the vCPU while interrupts at or above
  the PPR should.  With posted interrupts, bhyve maintains a bitmap of
  pending interrupts in PIR descriptor along with a single 'pending'
  bit.  This bit is checked by a CPU running in guest mode at various
  places to determine if it should be checked.  In addition, another CPU
  can force a CPU in guest mode to check for pending interrupts by
  sending an IPI to a special IDT vector reserved for this purpose.
  
  bhyve had a bug in that it would only notify a guest vCPU of an
  interrupt (e.g. by sending the special IPI or by resuming it if it was
  idle due to HLT) if an interrupt arrived that was higher priority than
  PPR and no interrupts were currently pending.  This assumed that if
  the 'pending' bit was set, any needed notification was already in
  progress.  However, if the first interrupt sent to a HLTed vCPU was
  lower priority than PPR and the second was higher than PPR, the first
  interrupt would set 'pending' but not notify the vCPU, and the second
  interrupt would not notify the vCPU because 'pending' was already set.
  To fix this, track the priority of pending interrupts in a separate
  per-vCPU bitmask and notify a vCPU anytime an interrupt arrives that
  is above PPR and higher than any previously-received interrupt.
  
  This was found and debugged in the bhyve port to SmartOS maintained by
  Joyent.  Relevant SmartOS bugs with more background:
  
  https://smartos.org/bugview/OS-6829
  https://smartos.org/bugview/OS-6930
  https://smartos.org/bugview/OS-7354

Modified:
  stable/11/sys/amd64/vmm/intel/vmx.c
Directory Properties:
  stable/11/   (props changed)

Changes in other areas also in this revision:
Modified:
  stable/12/sys/amd64/vmm/intel/vmx.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/11/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- stable/11/sys/amd64/vmm/intel/vmx.c	Tue Apr 30 23:01:13 2019	(r346977)
+++ stable/11/sys/amd64/vmm/intel/vmx.c	Tue Apr 30 23:53:54 2019	(r346978)
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
+ * Copyright (c) 2018 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -3264,8 +3265,11 @@ struct vlapic_vtx {
 	struct vlapic	vlapic;
 	struct pir_desc	*pir_desc;
 	struct vmx	*vmx;
+	u_int	pending_prio;
 };
 
+#define VPR_PRIO_BIT(vpr)	(1 << ((vpr) >> 4))
+
 #define	VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg)	\
 do {									\
 	VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d",	\
@@ -3287,7 +3291,7 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, 
 	struct vlapic_vtx *vlapic_vtx;
 	struct pir_desc *pir_desc;
 	uint64_t mask;
-	int idx, notify;
+	int idx, notify = 0;
 
 	vlapic_vtx = (struct vlapic_vtx *)vlapic;
 	pir_desc = vlapic_vtx->pir_desc;
@@ -3300,8 +3304,38 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, 
 	idx = vector / 64;
 	mask = 1UL << (vector % 64);
 	atomic_set_long(&pir_desc->pir[idx], mask);
-	notify = atomic_cmpset_long(&pir_desc->pending, 0, 1);
 
+	/*
+	 * A notification is required whenever the 'pending' bit makes a
+	 * transition from 0->1.
+	 *
+	 * Even if the 'pending' bit is already asserted, notification about
+	 * the incoming interrupt may still be necessary.  For example, if a
+	 * vCPU is HLTed with a high PPR, a low priority interrupt would cause
+	 * the 0->1 'pending' transition with a notification, but the vCPU
+	 * would ignore the interrupt for the time being.  The same vCPU would
+	 * need to then be notified if a high-priority interrupt arrived which
+	 * satisfied the PPR.
+	 *
+	 * The priorities of interrupts injected while 'pending' is asserted
+	 * are tracked in a custom bitfield 'pending_prio'.  Should the
+	 * to-be-injected interrupt exceed the priorities already present, the
+	 * notification is sent.  The priorities recorded in 'pending_prio' are
+	 * cleared whenever the 'pending' bit makes another 0->1 transition.
+	 */
+	if (atomic_cmpset_long(&pir_desc->pending, 0, 1) != 0) {
+		notify = 1;
+		vlapic_vtx->pending_prio = 0;
+	} else {
+		const u_int old_prio = vlapic_vtx->pending_prio;
+		const u_int prio_bit = VPR_PRIO_BIT(vector & APIC_TPR_INT);
+
+		if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) {
+			atomic_set_int(&vlapic_vtx->pending_prio, prio_bit);
+			notify = 1;
+		}
+	}
+
 	VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
 	    level, "vmx_set_intr_ready");
 	return (notify);
@@ -3345,14 +3379,31 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
 	VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
 	    lapic->ppr);
 
+	vpr = 0;
 	for (i = 3; i >= 0; i--) {
 		pirval = pir_desc->pir[i];
 		if (pirval != 0) {
 			vpr = (i * 64 + flsl(pirval) - 1) & 0xf0;
-			return (vpr > ppr);
+			break;
 		}
 	}
-	return (0);
+
+	/*
+	 * If the highest-priority pending interrupt falls short of the
+	 * processor priority of this vCPU, ensure that 'pending_prio' does not
+	 * have any stale bits which would preclude a higher-priority interrupt
+	 * from incurring a notification later.
+	 */
+	if (vpr <= ppr) {
+		const u_int prio_bit = VPR_PRIO_BIT(vpr);
+		const u_int old = vlapic_vtx->pending_prio;
+
+		if (old > prio_bit && (old & prio_bit) == 0) {
+			vlapic_vtx->pending_prio = prio_bit;
+		}
+		return (0);
+	}
+	return (1);
 }
 
 static void



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201904302353.x3UNrtUH084677>