Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 7 Sep 2022 11:31:46 +0200
From:      FreeBSD User <freebsd@walstatt-de.de>
To:        Emmanuel Vadot <manu@FreeBSD.org>
Cc:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   Re: git: a5a918b7a906 - main - vmm: permit some IPIs to be handled by userspace
Message-ID:  <20220907113213.2cffd0bf@thor.intern.walstatt.dynvpn.de>
In-Reply-To: <202209070708.28778ww2018229@gitrepo.freebsd.org>
References:  <202209070708.28778ww2018229@gitrepo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Am Wed, 7 Sep 2022 07:08:58 GMT
Emmanuel Vadot <manu@FreeBSD.org> schrieb:

> The branch main has been updated by manu:
>=20
> URL: https://cgit.FreeBSD.org/src/commit/?id=3Da5a918b7a906eaa88e0833eac7=
0a15989d535b02
>=20
> commit a5a918b7a906eaa88e0833eac70a15989d535b02
> Author:     Corvin K=C3=B6hne <CorvinK@beckhoff.com>
> AuthorDate: 2022-09-07 07:07:03 +0000
> Commit:     Emmanuel Vadot <manu@FreeBSD.org>
> CommitDate: 2022-09-07 07:07:03 +0000
>=20
>     vmm: permit some IPIs to be handled by userspace
>    =20
>     Add VM_EXITCODE_IPI to permit returning unhandled IPIs to userland.
>     INIT and Startup IPIs are now returned to userland. Due to backward
>     compatibility reasons, a new capability is added for enabling
>     VM_EXITCODE_IPI.
>    =20
>     MFC after:              2 weeks
>     Differential Revision:  https://reviews.freebsd.org/D35623
>     Sponsored by:           Beckhoff Automation GmbH & Co. KG
> ---
>  sys/amd64/include/vmm.h        |   8 ++
>  sys/amd64/vmm/amd/svm.c        |  10 +++
>  sys/amd64/vmm/intel/vmx.c      |   8 ++
>  sys/amd64/vmm/io/vlapic.c      | 166 ++++++++++++++++++++++++-----------=
------
>  sys/amd64/vmm/io/vlapic_priv.h |   2 +
>  usr.sbin/bhyve/bhyverun.c      |  34 +++++++++
>  usr.sbin/bhyve/spinup_ap.c     |   3 +
>  7 files changed, 162 insertions(+), 69 deletions(-)
>=20
> diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
> index dcf862c34264..37a74f053fb3 100644
> --- a/sys/amd64/include/vmm.h
> +++ b/sys/amd64/include/vmm.h
> @@ -31,6 +31,7 @@
>  #ifndef _VMM_H_
>  #define	_VMM_H_
> =20
> +#include <sys/cpuset.h>
>  #include <sys/sdt.h>
>  #include <x86/segments.h>
> =20
> @@ -483,6 +484,7 @@ enum vm_cap_type {
>  	VM_CAP_BPT_EXIT,
>  	VM_CAP_RDPID,
>  	VM_CAP_RDTSCP,
> +	VM_CAP_IPI_EXIT,
>  	VM_CAP_MAX
>  };
> =20
> @@ -630,6 +632,7 @@ enum vm_exitcode {
>  	VM_EXITCODE_DEBUG,
>  	VM_EXITCODE_VMINSN,
>  	VM_EXITCODE_BPT,
> +	VM_EXITCODE_IPI,
>  	VM_EXITCODE_MAX
>  };
> =20
> @@ -737,6 +740,11 @@ struct vm_exit {
>  		struct {
>  			enum vm_suspend_how how;
>  		} suspended;
> +		struct {
> +			uint32_t mode;
> +			uint8_t vector;
> +			cpuset_t dmask;
> +		} ipi;
>  		struct vm_task_switch task_switch;
>  	} u;
>  };
> diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
> index 35e8d9833d0e..4195cc5bd049 100644
> --- a/sys/amd64/vmm/amd/svm.c
> +++ b/sys/amd64/vmm/amd/svm.c
> @@ -2315,6 +2315,7 @@ static int
>  svm_setcap(void *arg, int vcpu, int type, int val)
>  {
>  	struct svm_softc *sc;
> +	struct vlapic *vlapic;
>  	int error;
> =20
>  	sc =3D arg;
> @@ -2333,6 +2334,10 @@ svm_setcap(void *arg, int vcpu, int type, int val)
>  		if (val =3D=3D 0)
>  			error =3D EINVAL;
>  		break;
> +	case VM_CAP_IPI_EXIT:
> +		vlapic =3D vm_lapic(sc->vm, vcpu);
> +		vlapic->ipi_exit =3D val;
> +		break;
>  	default:
>  		error =3D ENOENT;
>  		break;
> @@ -2344,6 +2349,7 @@ static int
>  svm_getcap(void *arg, int vcpu, int type, int *retval)
>  {
>  	struct svm_softc *sc;
> +	struct vlapic *vlapic;
>  	int error;
> =20
>  	sc =3D arg;
> @@ -2361,6 +2367,10 @@ svm_getcap(void *arg, int vcpu, int type, int *ret=
val)
>  	case VM_CAP_UNRESTRICTED_GUEST:
>  		*retval =3D 1;	/* unrestricted guest is always enabled */
>  		break;
> +	case VM_CAP_IPI_EXIT:
> +		vlapic =3D vm_lapic(sc->vm, vcpu);
> +		*retval =3D vlapic->ipi_exit;
> +		break;
>  	default:
>  		error =3D ENOENT;
>  		break;
> diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
> index 64544a6e7955..857028dcd0f1 100644
> --- a/sys/amd64/vmm/intel/vmx.c
> +++ b/sys/amd64/vmm/intel/vmx.c
> @@ -3504,6 +3504,7 @@ vmx_getcap(void *arg, int vcpu, int type, int *retv=
al)
>  			ret =3D 0;
>  		break;
>  	case VM_CAP_BPT_EXIT:
> +	case VM_CAP_IPI_EXIT:
>  		ret =3D 0;
>  		break;
>  	default:
> @@ -3521,6 +3522,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
>  {
>  	struct vmx *vmx =3D arg;
>  	struct vmcs *vmcs =3D &vmx->vmcs[vcpu];
> +	struct vlapic *vlapic;
>  	uint32_t baseval;
>  	uint32_t *pptr;
>  	int error;
> @@ -3599,6 +3601,12 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
>  			reg =3D VMCS_EXCEPTION_BITMAP;
>  		}
>  		break;
> +	case VM_CAP_IPI_EXIT:
> +		retval =3D 0;
> +
> +		vlapic =3D vm_lapic(vmx->vm, vcpu);
> +		vlapic->ipi_exit =3D val;
> +		break;
>  	default:
>  		break;
>  	}
> diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
> index 9599b4b4e62c..dc9b00d2316e 100644
> --- a/sys/amd64/vmm/io/vlapic.c
> +++ b/sys/amd64/vmm/io/vlapic.c
> @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
> =20
>  static void vlapic_set_error(struct vlapic *, uint32_t, bool);
>  static void vlapic_callout_handler(void *arg);
> +static void vlapic_reset(struct vlapic *vlapic);
> =20
>  static __inline uint32_t
>  vlapic_get_id(struct vlapic *vlapic)
> @@ -957,9 +958,9 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, boo=
l *retu)
>  {
>  	int i;
>  	bool phys;
> -	cpuset_t dmask;
> +	cpuset_t dmask, ipimask;
>  	uint64_t icrval;
> -	uint32_t dest, vec, mode;
> +	uint32_t dest, vec, mode, shorthand;
>  	struct vlapic *vlapic2;
>  	struct vm_exit *vmexit;
>  	struct LAPIC *lapic;
> @@ -975,97 +976,122 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, =
bool *retu)
>  		dest =3D icrval >> (32 + 24);
>  	vec =3D icrval & APIC_VECTOR_MASK;
>  	mode =3D icrval & APIC_DELMODE_MASK;
> +	phys =3D (icrval & APIC_DESTMODE_LOG) =3D=3D 0;
> +	shorthand =3D icrval & APIC_DEST_MASK;
> +
> +	maxcpus =3D vm_get_maxcpus(vlapic->vm);
> =20
> -	if (mode =3D=3D APIC_DELMODE_FIXED && vec < 16) {
> -		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
> -		VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
> -		return (0);
> -	}
> =20
>  	VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
> =20
> -	if (mode =3D=3D APIC_DELMODE_FIXED || mode =3D=3D APIC_DELMODE_NMI) {
> -		switch (icrval & APIC_DEST_MASK) {
> -		case APIC_DEST_DESTFLD:
> -			phys =3D ((icrval & APIC_DESTMODE_LOG) =3D=3D 0);
> -			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
> -			    x2apic(vlapic));
> -			break;
> -		case APIC_DEST_SELF:
> -			CPU_SETOF(vlapic->vcpuid, &dmask);
> -			break;
> -		case APIC_DEST_ALLISELF:
> -			dmask =3D vm_active_cpus(vlapic->vm);
> -			break;
> -		case APIC_DEST_ALLESELF:
> -			dmask =3D vm_active_cpus(vlapic->vm);
> -			CPU_CLR(vlapic->vcpuid, &dmask);
> -			break;
> -		default:
> -			CPU_ZERO(&dmask);	/* satisfy gcc */
> -			break;
> +	switch (shorthand) {
> +	case APIC_DEST_DESTFLD:
> +		vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic));
> +		break;
> +	case APIC_DEST_SELF:
> +		CPU_SETOF(vlapic->vcpuid, &dmask);
> +		break;
> +	case APIC_DEST_ALLISELF:
> +		dmask =3D vm_active_cpus(vlapic->vm);
> +		break;
> +	case APIC_DEST_ALLESELF:
> +		dmask =3D vm_active_cpus(vlapic->vm);
> +		CPU_CLR(vlapic->vcpuid, &dmask);
> +		break;
> +	default:
> +		__assert_unreachable();
> +	}
> +
> +	/*
> +	 * ipimask is a set of vCPUs needing userland handling of the current
> +	 * IPI.
> +	 */
> +	CPU_ZERO(&ipimask);
> +
> +	switch (mode) {
> +	case APIC_DELMODE_FIXED:
> +		if (vec < 16) {
> +			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
> +			    false);
> +			VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
> +			return (0);
>  		}
> =20
>  		CPU_FOREACH_ISSET(i, &dmask) {
> -			if (mode =3D=3D APIC_DELMODE_FIXED) {
> -				lapic_intr_edge(vlapic->vm, i, vec);
> -				vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
> -						    IPIS_SENT, i, 1);
> -				VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
> -				    "to vcpuid %d", vec, i);
> -			} else {
> -				vm_inject_nmi(vlapic->vm, i);
> -				VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
> -				    "to vcpuid %d", i);
> -			}
> +			lapic_intr_edge(vlapic->vm, i, vec);
> +			vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
> +			    IPIS_SENT, i, 1);
> +			VLAPIC_CTR2(vlapic,
> +			    "vlapic sending ipi %d to vcpuid %d", vec, i);
>  		}
> =20
> -		return (0);	/* handled completely in the kernel */
> -	}
> +		break;
> +	case APIC_DELMODE_NMI:
> +		CPU_FOREACH_ISSET(i, &dmask) {
> +			vm_inject_nmi(vlapic->vm, i);
> +			VLAPIC_CTR1(vlapic,
> +			    "vlapic sending ipi nmi to vcpuid %d", i);
> +		}
> =20
> -	maxcpus =3D vm_get_maxcpus(vlapic->vm);
> -	if (mode =3D=3D APIC_DELMODE_INIT) {
> +		break;
> +	case APIC_DELMODE_INIT:
>  		if ((icrval & APIC_LEVEL_MASK) =3D=3D APIC_LEVEL_DEASSERT)
> -			return (0);
> -
> -		if (vlapic->vcpuid =3D=3D 0 && dest !=3D 0 && dest < maxcpus) {
> -			vlapic2 =3D vm_lapic(vlapic->vm, dest);
> -
> -			/* move from INIT to waiting-for-SIPI state */
> -			if (vlapic2->boot_state =3D=3D BS_INIT) {
> -				vlapic2->boot_state =3D BS_SIPI;
> -			}
> +			break;
> =20
> -			return (0);
> +		CPU_FOREACH_ISSET(i, &dmask) {
> +			vlapic2 =3D vm_lapic(vlapic->vm, i);
> +			vlapic2->boot_state =3D BS_SIPI;
> +			CPU_SET(i, &ipimask);
>  		}
> -	}
> -
> -	if (mode =3D=3D APIC_DELMODE_STARTUP) {
> -		if (vlapic->vcpuid =3D=3D 0 && dest !=3D 0 && dest < maxcpus) {
> -			vlapic2 =3D vm_lapic(vlapic->vm, dest);
> =20
> +		break;
> +	case APIC_DELMODE_STARTUP:
> +		CPU_FOREACH_ISSET(i, &dmask) {
> +			vlapic2 =3D vm_lapic(vlapic->vm, i);
>  			/*
>  			 * Ignore SIPIs in any state other than wait-for-SIPI
>  			 */
>  			if (vlapic2->boot_state !=3D BS_SIPI)
> -				return (0);
> -
> +				continue;
> +			/*
> +			 * TODO:
> +			 * This should be triggered from userspace.
> +			 */
> +			vlapic_reset(vlapic2);
>  			vlapic2->boot_state =3D BS_RUNNING;
> +			CPU_SET(i, &ipimask);
> +		}
> =20
> -			*retu =3D true;
> -			vmexit =3D vm_exitinfo(vlapic->vm, vlapic->vcpuid);
> -			vmexit->exitcode =3D VM_EXITCODE_SPINUP_AP;
> -			vmexit->u.spinup_ap.vcpu =3D dest;
> -			vmexit->u.spinup_ap.rip =3D vec << PAGE_SHIFT;
> +		break;
> +	default:
> +		return (1);
> +	}
> =20
> -			return (0);
> +	if (!CPU_EMPTY(&ipimask)) {
> +		vmexit =3D vm_exitinfo(vlapic->vm, vlapic->vcpuid);
> +		vmexit->exitcode =3D VM_EXITCODE_IPI;
> +		vmexit->u.ipi.mode =3D mode;
> +		vmexit->u.ipi.vector =3D vec;
> +		vmexit->u.ipi.dmask =3D dmask;
> +
> +		*retu =3D true;
> +
> +		/*
> +		 * Old bhyve versions don't support the IPI exit. Translate it
> +		 * into the old style.
> +		 */
> +		if (!vlapic->ipi_exit) {
> +			if (mode =3D=3D APIC_DELMODE_STARTUP) {
> +				vmexit->exitcode =3D VM_EXITCODE_SPINUP_AP;
> +				vmexit->u.spinup_ap.vcpu =3D CPU_FFS(&ipimask) - 1;
> +				vmexit->u.spinup_ap.rip =3D vec << PAGE_SHIFT;
> +			} else {
> +				*retu =3D false;
> +			}
>  		}
>  	}
> =20
> -	/*
> -	 * This will cause a return to userland.
> -	 */
> -	return (1);
> +	return (0);
>  }
> =20
>  void
> @@ -1467,6 +1493,8 @@ vlapic_init(struct vlapic *vlapic)
>  	if (vlapic->vcpuid =3D=3D 0)
>  		vlapic->msr_apicbase |=3D APICBASE_BSP;
> =20
> +	vlapic->ipi_exit =3D false;
> +
>  	vlapic_reset(vlapic);
>  }
> =20
> diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_pri=
v.h
> index fe7965cb65d7..4b3e9009e68c 100644
> --- a/sys/amd64/vmm/io/vlapic_priv.h
> +++ b/sys/amd64/vmm/io/vlapic_priv.h
> @@ -183,6 +183,8 @@ struct vlapic {
>  	 */
>  	uint32_t	svr_last;
>  	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
> +
> +	bool		ipi_exit;
>  };
> =20
>  void vlapic_init(struct vlapic *vlapic);
> diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
> index 550cc9d15477..a1849519996c 100644
> --- a/usr.sbin/bhyve/bhyverun.c
> +++ b/usr.sbin/bhyve/bhyverun.c
> @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
>  #endif
> =20
>  #include <amd64/vmm/intel/vmcs.h>
> +#include <x86/apicreg.h>
> =20
>  #include <machine/atomic.h>
>  #include <machine/segments.h>
> @@ -935,6 +936,35 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit =
*vmexit, int *pvcpu)
>  	return (VMEXIT_CONTINUE);
>  }
> =20
> +static int
> +vmexit_ipi(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
> +{
> +	int error =3D -1;
> +	int i;
> +	switch (vmexit->u.ipi.mode) {
> +	case APIC_DELMODE_INIT:
> +		CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
> +			error =3D vm_suspend_cpu(ctx, i);
> +			if (error) {
> +				warnx("%s: failed to suspend cpu %d\n",
> +				    __func__, i);
> +				break;
> +			}
> +		}
> +		break;
> +	case APIC_DELMODE_STARTUP:
> +		CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
> +			spinup_ap(ctx, i, vmexit->u.ipi.vector << PAGE_SHIFT);
> +		}
> +		error =3D 0;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	return (error);
> +}
> +
>  static vmexit_handler_t handler[VM_EXITCODE_MAX] =3D {
>  	[VM_EXITCODE_INOUT]  =3D vmexit_inout,
>  	[VM_EXITCODE_INOUT_STR]  =3D vmexit_inout,
> @@ -951,6 +981,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] =3D {
>  	[VM_EXITCODE_TASK_SWITCH] =3D vmexit_task_switch,
>  	[VM_EXITCODE_DEBUG] =3D vmexit_debug,
>  	[VM_EXITCODE_BPT] =3D vmexit_breakpoint,
> +	[VM_EXITCODE_IPI] =3D vmexit_ipi,
>  };
> =20
>  static void
> @@ -1151,6 +1182,9 @@ spinup_vcpu(struct vmctx *ctx, int vcpu, bool suspe=
nd)
>  	error =3D vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
>  	assert(error =3D=3D 0);
> =20
> +	error =3D vm_set_capability(ctx, vcpu, VM_CAP_IPI_EXIT, 1);
> +	assert(error =3D=3D 0);
> +
>  	fbsdrun_addcpu(ctx, vcpu, rip, suspend);
>  }
> =20
> diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c
> index 2b7e602f8003..438091e564e7 100644
> --- a/usr.sbin/bhyve/spinup_ap.c
> +++ b/usr.sbin/bhyve/spinup_ap.c
> @@ -98,6 +98,9 @@ spinup_ap(struct vmctx *ctx, int newcpu, uint64_t rip)
>  	error =3D vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
>  	assert(error =3D=3D 0);
> =20
> +	error =3D vm_set_capability(ctx, newcpu, VM_CAP_IPI_EXIT, 1);
> +	assert(error =3D=3D 0);
> +
>  	spinup_ap_realmode(ctx, newcpu, &rip);
> =20
>  	vm_resume_cpu(ctx, newcpu);
>=20

Buildkernel dies with:

[...]
/usr/src/sys/amd64/vmm/io/vlapic.c:967:11: error: variable 'maxcpus' set bu=
t not used
[-Werror,-Wunused-but-set-variable] uint16_t maxcpus;


Regards oh

--=20
O. Hartmann



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20220907113213.2cffd0bf>