Date: Fri, 18 Nov 2022 18:26:55 GMT From: John Baldwin <jhb@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 98568a005a19 - main - vmm: Allocate vCPUs on first use of a vCPU. Message-ID: <202211181826.2AIIQtkb030784@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=98568a005a193ce2c37702a8377ddd10c570e452 commit 98568a005a193ce2c37702a8377ddd10c570e452 Author: John Baldwin <jhb@FreeBSD.org> AuthorDate: 2022-11-18 18:05:35 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2022-11-18 18:25:38 +0000 vmm: Allocate vCPUs on first use of a vCPU. Convert the vcpu[] array in struct vm to an array of pointers and allocate vCPUs on first use. This avoids always allocating VM_MAXCPU vCPUs for each VM, but instead only allocates the vCPUs in use. A new per-VM sx lock is added to serialize attempts to allocate vCPUs on first use. However, a given vCPU is never freed while the VM is active, so the pointer is read via an unlocked read first to avoid the need for the lock in the common case once the vCPU has been created. Some ioctls need to lock all vCPUs. To prevent races with ioctls that want to allocate a new vCPU, these ioctls also lock the sx lock that protects vCPU creation. Reviewed by: corvink, markj Differential Revision: https://reviews.freebsd.org/D37174 --- sys/amd64/include/vmm.h | 4 ++ sys/amd64/vmm/io/vlapic.c | 6 ++- sys/amd64/vmm/vmm.c | 133 +++++++++++++++++++++++++++++++++------------- sys/amd64/vmm/vmm_dev.c | 87 ++++++++++++++++-------------- 4 files changed, 150 insertions(+), 80 deletions(-) diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 713c4a8b46e9..c6194c32b095 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -219,6 +219,10 @@ extern const struct vmm_ops vmm_ops_intel; extern const struct vmm_ops vmm_ops_amd; int vm_create(const char *name, struct vm **retvm); +struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); +void vm_disable_vcpu_creation(struct vm *vm); +void vm_slock_vcpus(struct vm *vm); +void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index e13cdcc63d57..6307ce341c72 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -1840,6 +1840,7 @@ int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) { int ret; + struct vcpu *vcpu; struct vlapic *vlapic; struct LAPIC *lapic; uint32_t ccr; @@ -1851,7 +1852,10 @@ vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vlapic = vm_lapic(vm_vcpu(vm, i)); + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vlapic = vm_lapic(vcpu); /* snapshot the page first; timer period depends on icr_timer */ lapic = vlapic->apic_page; diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index ec6504772db3..7cde7b4005c4 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -127,7 +127,6 @@ struct vcpu { uint64_t tsc_offset; /* (o) TSC offsetting */ }; -#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) @@ -175,6 +174,7 @@ struct vm { volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ cpuset_t startup_cpus; /* (i) [r] waiting for startup */ int suspend; /* (i) stop VM execution */ + bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ @@ -186,13 +186,14 @@ struct vm { struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ - struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ + struct vcpu *vcpu[VM_MAXCPU]; /* (x) guest vcpus */ /* The following describe the vm cpu topology */ uint16_t sockets; /* (o) num of sockets */ uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ struct sx mem_segs_lock; /* (o) */ + struct sx vcpus_init_lock; /* (o) */ }; #define VMM_CTR0(vcpu, format) \ @@ -319,10 +320,8 @@ vcpu_state2str(enum vcpu_state state) #endif static void -vcpu_cleanup(struct vm *vm, int i, bool destroy) +vcpu_cleanup(struct vcpu *vcpu, bool destroy) { - struct vcpu *vcpu = &vm->vcpu[i]; - vmmops_vlapic_cleanup(vcpu->vlapic); vmmops_vcpu_cleanup(vcpu->cookie); vcpu->cookie = NULL; @@ -333,30 +332,30 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy) } } -static void -vcpu_init(struct vm *vm, int vcpu_id, bool create) +static struct vcpu * +vcpu_alloc(struct vm *vm, int vcpu_id) { struct vcpu *vcpu; KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, ("vcpu_init: invalid vcpu %d", vcpu_id)); - - vcpu = &vm->vcpu[vcpu_id]; - - if (create) { - KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " - "initialized", vcpu_id)); - vcpu_lock_init(vcpu); - vcpu->state = VCPU_IDLE; - vcpu->hostcpu = NOCPU; - vcpu->vcpuid = vcpu_id; - vcpu->vm = vm; - vcpu->guestfpu = fpu_save_area_alloc(); - vcpu->stats = vmm_stat_alloc(); - vcpu->tsc_offset = 0; - } - vcpu->cookie = vmmops_vcpu_init(vm->cookie, vcpu, vcpu_id); + vcpu = malloc(sizeof(*vcpu), M_VM, M_WAITOK | M_ZERO); + vcpu_lock_init(vcpu); + vcpu->state = VCPU_IDLE; + vcpu->hostcpu = NOCPU; + vcpu->vcpuid = vcpu_id; + vcpu->vm = vm; + vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->stats = vmm_stat_alloc(); + vcpu->tsc_offset = 0; + return (vcpu); +} + +static void +vcpu_init(struct vcpu *vcpu) +{ + vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie); vm_set_x2apic_state(vcpu, X2APIC_DISABLED); vcpu->reqidle = 0; @@ -473,8 +472,6 @@ MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { - int i; - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); vm->iommu = NULL; vm->vioapic = vioapic_init(vm); @@ -492,8 +489,61 @@ vm_init(struct vm *vm, bool create) vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); - for (i = 0; i < vm->maxcpus; i++) - vcpu_init(vm, i, create); + if (!create) { + for (int i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_init(vm->vcpu[i]); + } + } +} + +void +vm_disable_vcpu_creation(struct vm *vm) +{ + sx_xlock(&vm->vcpus_init_lock); + vm->dying = true; + sx_xunlock(&vm->vcpus_init_lock); +} + +struct vcpu * +vm_alloc_vcpu(struct vm *vm, int vcpuid) +{ + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) + return (NULL); + + vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); + if (__predict_true(vcpu != NULL)) + return (vcpu); + + sx_xlock(&vm->vcpus_init_lock); + vcpu = vm->vcpu[vcpuid]; + if (vcpu == NULL && !vm->dying) { + vcpu = vcpu_alloc(vm, vcpuid); + vcpu_init(vcpu); + + /* + * Ensure vCPU is fully created before updating pointer + * to permit unlocked reads above. + */ + atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], + (uintptr_t)vcpu); + } + sx_xunlock(&vm->vcpus_init_lock); + return (vcpu); +} + +void +vm_slock_vcpus(struct vm *vm) +{ + sx_slock(&vm->vcpus_init_lock); +} + +void +vm_unlock_vcpus(struct vm *vm) +{ + sx_unlock(&vm->vcpus_init_lock); } /* @@ -528,6 +578,7 @@ vm_create(const char *name, struct vm **retvm) vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); sx_init(&vm->mem_segs_lock, "vm mem_segs"); + sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ @@ -558,17 +609,14 @@ vm_get_maxcpus(struct vm *vm) int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, - uint16_t threads, uint16_t maxcpus) + uint16_t threads, uint16_t maxcpus __unused) { - if (maxcpus != 0) - return (EINVAL); /* XXX remove when supported */ + /* Ignore maxcpus. */ if ((sockets * cores * threads) > vm->maxcpus) return (EINVAL); - /* XXX need to check sockets * cores * threads == vCPU, how? */ vm->sockets = sockets; vm->cores = cores; vm->threads = threads; - vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ return(0); } @@ -593,8 +641,10 @@ vm_cleanup(struct vm *vm, bool destroy) vatpic_cleanup(vm->vatpic); vioapic_cleanup(vm->vioapic); - for (i = 0; i < vm->maxcpus; i++) - vcpu_cleanup(vm, i, destroy); + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_cleanup(vm->vcpu[i], destroy); + } vmmops_cleanup(vm->cookie); @@ -619,6 +669,7 @@ vm_cleanup(struct vm *vm, bool destroy) vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; + sx_destroy(&vm->vcpus_init_lock); sx_destroy(&vm->mem_segs_lock); mtx_destroy(&vm->rendezvous_mtx); } @@ -2250,7 +2301,7 @@ vcpu_vcpuid(struct vcpu *vcpu) struct vcpu * vm_vcpu(struct vm *vm, int vcpuid) { - return (&vm->vcpu[vcpuid]); + return (vm->vcpu[vcpuid]); } struct vlapic * @@ -2761,7 +2812,9 @@ vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta) now = rdtsc(); maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = &vm->vcpu[i]; + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); @@ -2811,7 +2864,9 @@ vm_snapshot_vcpu(struct vm *vm, struct vm_snapshot_meta *meta) maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = &vm->vcpu[i]; + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; error = vmmops_vcpu_snapshot(vcpu->cookie, meta); if (error != 0) { @@ -2894,7 +2949,9 @@ vm_restore_time(struct vm *vm) maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = &vm->vcpu[i]; + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; error = vmmops_restore_tsc(vcpu->cookie, vcpu->tsc_offset - now); diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index a7c12da68701..b4610f4f78f4 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -125,24 +125,16 @@ vmm_priv_check(struct ucred *ucred) } static int -vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) +vcpu_lock_one(struct vcpu *vcpu) { - int error; - - if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm)) - return (EINVAL); - - error = vcpu_set_state(vm_vcpu(sc->vm, vcpu), VCPU_FROZEN, true); - return (error); + return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); } static void -vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid) +vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu) { - struct vcpu *vcpu; enum vcpu_state state; - vcpu = vm_vcpu(sc->vm, vcpuid); state = vcpu_get_state(vcpu, NULL); if (state != VCPU_FROZEN) { panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), @@ -155,19 +147,29 @@ vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid) static int vcpu_lock_all(struct vmmdev_softc *sc) { - int error, vcpu; - uint16_t maxcpus; + struct vcpu *vcpu; + int error; + uint16_t i, maxcpus; + vm_slock_vcpus(sc->vm); maxcpus = vm_get_maxcpus(sc->vm); - for (vcpu = 0; vcpu < maxcpus; vcpu++) { - error = vcpu_lock_one(sc, vcpu); + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + error = vcpu_lock_one(vcpu); if (error) break; } if (error) { - while (--vcpu >= 0) - vcpu_unlock_one(sc, vcpu); + while (--i >= 0) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + vcpu_unlock_one(sc, i, vcpu); + } + vm_unlock_vcpus(sc->vm); } return (error); @@ -176,12 +178,17 @@ vcpu_lock_all(struct vmmdev_softc *sc) static void vcpu_unlock_all(struct vmmdev_softc *sc) { - int vcpu; - uint16_t maxcpus; + struct vcpu *vcpu; + uint16_t i, maxcpus; maxcpus = vm_get_maxcpus(sc->vm); - for (vcpu = 0; vcpu < maxcpus; vcpu++) - vcpu_unlock_one(sc, vcpu); + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + vcpu_unlock_one(sc, i, vcpu); + } + vm_unlock_vcpus(sc->vm); } static struct vmmdev_softc * @@ -363,20 +370,11 @@ vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, return (error); } -static struct vcpu * -lookup_vcpu(struct vm *vm, int vcpuid) -{ - if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) - return (NULL); - - return (vm_vcpu(vm, vcpuid)); -} - static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { - int error, vcpuid, state_changed, size; + int error, vcpuid, size; cpuset_t *cpuset; struct vmmdev_softc *sc; struct vcpu *vcpu; @@ -414,6 +412,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_readwrite_kernemu_device *kernemu; uint64_t *regvals; int *regnums; + enum { NONE, SINGLE, ALL } vcpus_locked; bool memsegs_locked; #ifdef BHYVE_SNAPSHOT struct vm_snapshot_meta *snapshot_meta; @@ -429,7 +428,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpuid = -1; vcpu = NULL; - state_changed = 0; + vcpus_locked = NONE; memsegs_locked = false; /* @@ -465,11 +464,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, * ioctls that can operate only on vcpus that are not running. */ vcpuid = *(int *)data; - error = vcpu_lock_one(sc, vcpuid); + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); + if (vcpu == NULL) { + error = EINVAL; + goto done; + } + error = vcpu_lock_one(vcpu); if (error) goto done; - state_changed = 1; - vcpu = vm_vcpu(sc->vm, vcpuid); + vcpus_locked = SINGLE; break; #ifdef COMPAT_FREEBSD12 @@ -501,7 +504,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = vcpu_lock_all(sc); if (error) goto done; - state_changed = 2; + vcpus_locked = ALL; break; #ifdef COMPAT_FREEBSD12 @@ -528,7 +531,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, * a specific vCPU. */ vcpuid = *(int *)data; - vcpu = lookup_vcpu(sc->vm, vcpuid); + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); if (vcpu == NULL) { error = EINVAL; goto done; @@ -545,7 +548,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vcpuid = *(int *)data; if (vcpuid == -1) break; - vcpu = lookup_vcpu(sc->vm, vcpuid); + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); if (vcpu == NULL) { error = EINVAL; goto done; @@ -957,9 +960,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, break; } - if (state_changed == 1) - vcpu_unlock_one(sc, vcpuid); - else if (state_changed == 2) + if (vcpus_locked == SINGLE) + vcpu_unlock_one(sc, vcpuid, vcpu); + else if (vcpus_locked == ALL) vcpu_unlock_all(sc); if (memsegs_locked) vm_unlock_memsegs(sc->vm); @@ -1041,8 +1044,10 @@ vmmdev_destroy(void *arg) struct devmem_softc *dsc; int error __diagused; + vm_disable_vcpu_creation(sc->vm); error = vcpu_lock_all(sc); KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); + vm_unlock_vcpus(sc->vm); while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202211181826.2AIIQtkb030784>