Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 21 Dec 2019 20:25:46 +0100
From:      Yamagi <lists@yamagi.org>
To:        freebsd-virtualization@freebsd.org
Subject:   [PATCH] Untangle TPR shadowing and APIC virtualization / Make Win guests on Bhyve _fast_
Message-ID:  <20191221202546.caca1f242a907cf50b5562e3@yamagi.org>

next in thread | raw e-mail | index | archive | help
--Signature=_Sat__21_Dec_2019_20_25_46_+0100_88vibovcNf1BDUAv
Content-Type: text/plain; charset=US-ASCII
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

Hi,
a long known problem with Bhyve is that Windows guests are rather slow.
With Windows 10 1903 this became much worse, to the point that the
guest is unusable. I have found the reason for this: Windows hammers on
the %cr8 control register. For example, Windows 10 1909 on an i7-2620M
has about 68,000 %cr8 accesses per second. Each of them triggers a vm
exit.

The most common solution is TPR shadowing. Many thanks to royger in
#bhyve for getting me on the right track. Bhyve already implements TPR
shadowing. On AMD SVM it just works, but the implementation for Intel
VT-x is bound to APIC virtualization. And APIC virtualization is a Xeon
feature that is missing on most (all?) desktop CPUs.

The patch - further down inline or under [0] - separates TPR shadowing
from APIC virtualization, so TPR shadowing can be used on desktop CPUs
as well. The patch doesn't just give a small speed boost, it's a
difference like day and night. As an example, without the patch, the
installation of Windows 10 1909 takes about 2280 seconds from start to
first reboot. With the patch, only 370 seconds. On an old Thinkpad
X220, Windows 10 guests were previously unusable, now they are resonable
fast.

The patch does:

* Add a new tuneable 'hw.vmm.vmx.use_tpr_shadowing' to disable TLP
  shadowing. Also add 'hw.vmm.vmx.cap.tpr_shadowing' to be able to query
  if TPR shadowing is used.

* Detach the initialization of TPR shadowing from the initialization of
  APIC virtualization. APIC virtualization still needs TPR shadowing,
  but not vice versa. Any CPU that supports APIC virtualization should
  also support TPR shadowing.

* When TPR shadowing is used, the APIC page of each vCPU is written to
  the VMCS_VIRTUAL_APIC field of the VMCS so that the CPU can write
  directly to the page without intercept.

* On vm exit, vlapic_update_ppr() is called to update the PPR.

The patch was tested on an i7-2620M, an i7-6700k and a Xeon Silver
4110. Both Windows and FreeBSD guests work correctly.

Regards,
Yamagi

0: https://gist.github.com/Yamagi/de70c08eadeeef14eec4cb42aeb5957f

----

diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 605fd0bda766..324a1e9d0c3c 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -172,6 +172,10 @@ static int cap_invpcid;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid,
     0, "Guests are allowed to use INVPCID");
=20
+static int tpr_shadowing;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD,
+    &tpr_shadowing, 0, "TPR shadowin support");
+
 static int virtual_interrupt_delivery;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_=
RD,
     &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery supp=
ort");
@@ -627,7 +631,7 @@ vmx_restore(void)
 static int
 vmx_init(int ipinum)
 {
-	int error, use_tpr_shadow;
+	int error;
 	uint64_t basic, fixed0, fixed1, feature_control;
 	uint32_t tmp, procbased2_vid_bits;
=20
@@ -750,6 +754,24 @@ vmx_init(int ipinum)
 	    MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
 	    &tmp) =3D=3D 0);
=20
+	/*
+	 * Check support for TPR shadow.
+	 */
+	error =3D vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+	    MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
+	    &tmp);
+	if (error =3D=3D 0) {
+		tpr_shadowing =3D 1;
+		TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing",
+		    &tpr_shadowing);
+	}
+
+	if (tpr_shadowing) {
+		procbased_ctls |=3D PROCBASED_USE_TPR_SHADOW;
+		procbased_ctls &=3D ~PROCBASED_CR8_LOAD_EXITING;
+		procbased_ctls &=3D ~PROCBASED_CR8_STORE_EXITING;
+	}
+
 	/*
 	 * Check support for virtual interrupt delivery.
 	 */
@@ -758,13 +780,9 @@ vmx_init(int ipinum)
 	    PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
 	    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
=20
-	use_tpr_shadow =3D (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
-	    MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
-	    &tmp) =3D=3D 0);
-
 	error =3D vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
 	    procbased2_vid_bits, 0, &tmp);
-	if (error =3D=3D 0 && use_tpr_shadow) {
+	if (error =3D=3D 0 && tpr_shadowing) {
 		virtual_interrupt_delivery =3D 1;
 		TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
 		    &virtual_interrupt_delivery);
@@ -775,13 +793,6 @@ vmx_init(int ipinum)
 		procbased_ctls2 |=3D procbased2_vid_bits;
 		procbased_ctls2 &=3D ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
=20
-		/*
-		 * No need to emulate accesses to %CR8 if virtual
-		 * interrupt delivery is enabled.
-		 */
-		procbased_ctls &=3D ~PROCBASED_CR8_LOAD_EXITING;
-		procbased_ctls &=3D ~PROCBASED_CR8_STORE_EXITING;
-
 		/*
 		 * Check for Posted Interrupts only if Virtual Interrupt
 		 * Delivery is enabled.
@@ -1051,10 +1062,13 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 		vmx->ctx[i].guest_dr6 =3D DBREG_DR6_RESERVED1;
 		error +=3D vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
=20
-		if (virtual_interrupt_delivery) {
-			error +=3D vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
+		if (tpr_shadowing) {
 			error +=3D vmwrite(VMCS_VIRTUAL_APIC,
 			    vtophys(&vmx->apic_page[i]));
+		}
+
+		if (virtual_interrupt_delivery) {
+			error +=3D vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
 			error +=3D vmwrite(VMCS_EOI_EXIT0, 0);
 			error +=3D vmwrite(VMCS_EOI_EXIT1, 0);
 			error +=3D vmwrite(VMCS_EOI_EXIT2, 0);
@@ -2313,6 +2327,14 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct v=
m_exit *vmexit)
 		}
 	}
=20
+	/*
+	 * If 'TPR shadowing' is used, update the local APICs PPR.
+	 */
+	if (tpr_shadowing) {
+		vlapic =3D vm_lapic(vmx->vm, vcpu);
+		vlapic_update_ppr(vlapic);
+	}
+
 	switch (reason) {
 	case EXIT_REASON_TASK_SWITCH:
 		ts =3D &vmexit->u.task_switch;
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 74e6cd967396..289fdb7e077d 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -490,7 +490,7 @@ dump_isrvec_stk(struct vlapic *vlapic)
  * Algorithm adopted from section "Interrupt, Task and Processor Priority"
  * in Intel Architecture Manual Vol 3a.
  */
-static void
+void
 vlapic_update_ppr(struct vlapic *vlapic)
 {
 	int isrvec, tpr, ppr;
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
index 2a5f54003253..71b97feab6bc 100644
--- a/sys/amd64/vmm/io/vlapic.h
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -74,6 +74,8 @@ void vlapic_post_intr(struct vlapic *vlapic, int hostcpu,=
 int ipinum);
 void vlapic_fire_cmci(struct vlapic *vlapic);
 int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
=20
+void vlapic_update_ppr(struct vlapic *vlapic);
+
 uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
 int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
 void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state =
s);

--=20
Homepage: https://www.yamagi.org
Github:   https://github.com/yamagi
GPG:      0x1D502515

--Signature=_Sat__21_Dec_2019_20_25_46_+0100_88vibovcNf1BDUAv
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----

iQIzBAEBCAAdFiEEOXu/lxyufwz0gC5x6xRy5x1QJRUFAl3+cboACgkQ6xRy5x1Q
JRUvAA/+PuqhedCjcAl45VRGx1qiW6erTxctKu8nDe0OQCRDa9736NggtuzvV7tx
CIWMy5KLzkddbqkxnQc9h5thzhdbo1UJYHSivp4rQWvjj4iPE7NLyz+Ug3jOg9LG
Nrc/dmz1/OvWmp6HZSiVVZYACs1iv3W9QSaDnh6Q1krLrkwEC5T2bCIpBK+nZ7Ga
0rHp+GMdkYr5ys6QGsDjhGBuGIpVtAsmhw96l1dYhaxVco2Ox09cxepz+exDDFIp
3YzO3HVt2U/NCHT1cz6kPcYrN0CT6eRhWp+aLIzdQgJolia2aZxnWAHxR8BvdVmg
yPeiNYeAopntUuURMu9ClXVxzZA3+VLchy55Y2TzQuVHe06GozEGAJ9cyCbIBbSR
854z6pPdaTc4I+y1WZucGEielHSHVYN94hQK2Gqxtlb27dRW7p08aL79eBB8vQz5
etKGKRtT5BR44WdeGowJ6DXTYUYBRPMOICQb+5ih8wr2JgGzqWO7WkXlzNH8D/MX
2GT0eFW0nXs6jkpixHdcQECgycADz2Ex8GGTDkLIZP/nh5JuSuLbHu8WUfGErxxb
2uUGmEHewLTsLVtbl3VwF9erpQmwmIzLv3Nc9vRqbesR1235V4trBtZjJWZgR65Q
lCCEvjU+7fecc2RMIi7X9uAatt0rQLUCc/DQVjOL1Jly+VFpYiw=
=BGMY
-----END PGP SIGNATURE-----

--Signature=_Sat__21_Dec_2019_20_25_46_+0100_88vibovcNf1BDUAv--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20191221202546.caca1f242a907cf50b5562e3>