Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 18 Feb 2025 16:00:16 GMT
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: c76c2a19ae37 - main - vmm: Consolidate code which manages guest memory regions
Message-ID:  <202502181600.51IG0GY8025885@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=c76c2a19ae3763d17aa6a60a5831ed24cbc16e83

commit c76c2a19ae3763d17aa6a60a5831ed24cbc16e83
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-02-18 15:53:16 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-02-18 16:00:07 +0000

    vmm: Consolidate code which manages guest memory regions
    
    On all three platforms supported by vmm, we have mostly duplicated code
    to manage guest physical memory regions.  Deduplicate much of this code
    and move it into sys/dev/vmm/vmm_mem.c.
    
    To avoid exporting struct vm outside of machdep vmm.c, add a new
    struct vm_mem to contain the memory segment descriptors, and add a
    vm_mem() accessor, akin to vm_vmspace().  This way vmm_mem.c can
    implement its routines without needing to see the layout of struct vm.
    
    The handling of the per-VM vmspace is also duplicated but will be moved
    to vmm_mem.c in a follow-up patch.
    
    On amd64, move the ppt_is_mmio() check out of vm_mem_allocated() to keep
    the code MI, as PPT is only implemented on amd64.  There are only a
    couple of callers, so this is not unreasonable.
    
    No functional change intended.
    
    Reviewed by:    jhb
    Differential Revision:  https://reviews.freebsd.org/D48270
---
 sys/amd64/include/vmm.h                        |  30 +-
 sys/amd64/vmm/amd/svm.c                        |   5 +-
 sys/amd64/vmm/intel/vmx.c                      |   4 +-
 sys/amd64/vmm/vmm.c                            | 419 ++--------------------
 sys/amd64/vmm/vmm_instruction_emul.c           |   2 +
 sys/amd64/vmm/{vmm_mem.c => vmm_mem_machdep.c} |   0
 sys/arm64/include/vmm.h                        |  34 +-
 sys/arm64/vmm/vmm.c                            | 412 ++--------------------
 sys/arm64/vmm/vmm_arm64.c                      |   2 +
 sys/arm64/vmm/vmm_dev_machdep.c                |   1 +
 sys/conf/files.arm64                           |   1 +
 sys/conf/files.riscv                           |   1 +
 sys/dev/vmm/vmm_dev.c                          |   1 +
 sys/dev/vmm/vmm_mem.c                          | 459 +++++++++++++++++++++++++
 sys/dev/vmm/vmm_mem.h                          |  84 +++++
 sys/modules/vmm/Makefile                       |   3 +-
 sys/riscv/include/vmm.h                        |  34 +-
 sys/riscv/vmm/vmm.c                            | 418 ++--------------------
 sys/riscv/vmm/vmm_dev_machdep.c                |   1 +
 sys/riscv/vmm/vmm_riscv.c                      |   2 +
 20 files changed, 636 insertions(+), 1277 deletions(-)

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 6501baa455da..a9c73b75213b 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -150,6 +150,7 @@ CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN);
 
 struct vm;
 struct vm_exception;
+struct vm_mem;
 struct seg_desc;
 struct vm_exit;
 struct vm_run;
@@ -238,39 +239,11 @@ void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
 int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
     uint16_t threads, uint16_t maxcpus);
 
-/*
- * APIs that modify the guest memory map require all vcpus to be frozen.
- */
-void vm_slock_memsegs(struct vm *vm);
-void vm_xlock_memsegs(struct vm *vm);
-void vm_unlock_memsegs(struct vm *vm);
-int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
-    size_t len, int prot, int flags);
-int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
-void vm_free_memseg(struct vm *vm, int ident);
 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
 
-/*
- * APIs that inspect the guest memory map require only a *single* vcpu to
- * be frozen. This acts like a read lock on the guest memory map since any
- * modification requires *all* vcpus to be frozen.
- */
-int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
-    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
-int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
-    struct vm_object **objptr);
-vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
-void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len,
-    int prot, void **cookie);
-void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len,
-    int prot, void **cookie);
-void vm_gpa_release(void *cookie);
-bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
-
 int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
 int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
 int vm_get_seg_desc(struct vcpu *vcpu, int reg,
@@ -402,6 +375,7 @@ vcpu_should_yield(struct vcpu *vcpu)
 void *vcpu_stats(struct vcpu *vcpu);
 void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr);
 struct vmspace *vm_vmspace(struct vm *vm);
+struct vm_mem *vm_mem(struct vm *vm);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
 struct vpmtmr *vm_pmtmr(struct vm *vm);
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index ed37dd0c810e..6c16daaa47c2 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -55,6 +55,7 @@
 #include <machine/vmm_snapshot.h>
 
 #include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_mem.h>
 
 #include "vmm_lapic.h"
 #include "vmm_stat.h"
@@ -69,6 +70,7 @@
 #include "svm_softc.h"
 #include "svm_msr.h"
 #include "npt.h"
+#include "io/ppt.h"
 
 SYSCTL_DECL(_hw_vmm);
 SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
@@ -1593,7 +1595,8 @@ svm_vmexit(struct svm_softc *svm_sc, struct svm_vcpu *vcpu,
 			SVM_CTR2(vcpu, "nested page fault with "
 			    "reserved bits set: info1(%#lx) info2(%#lx)",
 			    info1, info2);
-		} else if (vm_mem_allocated(vcpu->vcpu, info2)) {
+		} else if (vm_mem_allocated(vcpu->vcpu, info2) ||
+		    ppt_is_mmio(svm_sc->vm, info2)) {
 			vmexit->exitcode = VM_EXITCODE_PAGING;
 			vmexit->u.paging.gpa = info2;
 			vmexit->u.paging.fault_type = npf_fault_type(info1);
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 13a53fa8eed6..957217ab2258 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -59,6 +59,7 @@
 #include <machine/vmm_snapshot.h>
 
 #include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_mem.h>
 
 #include "vmm_lapic.h"
 #include "vmm_host.h"
@@ -74,6 +75,7 @@
 #include "vmx_msr.h"
 #include "x86.h"
 #include "vmx_controls.h"
+#include "io/ppt.h"
 
 #define	PINBASED_CTLS_ONE_SETTING					\
 	(PINBASED_EXTINT_EXITING	|				\
@@ -2756,7 +2758,7 @@ vmx_exit_process(struct vmx *vmx, struct vmx_vcpu *vcpu, struct vm_exit *vmexit)
 		 */
 		gpa = vmcs_gpa();
 		if (vm_mem_allocated(vcpu->vcpu, gpa) ||
-		    apic_access_fault(vcpu, gpa)) {
+		    ppt_is_mmio(vmx->vm, gpa) || apic_access_fault(vcpu, gpa)) {
 			vmexit->exitcode = VM_EXITCODE_PAGING;
 			vmexit->inst_length = 0;
 			vmexit->u.paging.gpa = gpa;
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index aa13d506ac6a..1d410835be88 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -26,7 +26,6 @@
  * SUCH DAMAGE.
  */
 
-#include <sys/cdefs.h>
 #include "opt_bhyve_snapshot.h"
 
 #include <sys/param.h>
@@ -72,6 +71,7 @@
 
 #include <dev/vmm/vmm_dev.h>
 #include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_mem.h>
 
 #include "vmm_ioport.h"
 #include "vmm_host.h"
@@ -131,23 +131,6 @@ struct vcpu {
 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
 
-struct mem_seg {
-	size_t	len;
-	bool	sysmem;
-	struct vm_object *object;
-};
-#define	VM_MAX_MEMSEGS	4
-
-struct mem_map {
-	vm_paddr_t	gpa;
-	size_t		len;
-	vm_ooffset_t	segoff;
-	int		segid;
-	int		prot;
-	int		flags;
-};
-#define	VM_MAX_MEMMAPS	8
-
 /*
  * Initialization:
  * (o) initialized the first time the VM is created
@@ -180,9 +163,8 @@ struct vm {
 	void		*rendezvous_arg;	/* (x) [r] rendezvous func/arg */
 	vm_rendezvous_func_t rendezvous_func;
 	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
-	struct mem_map	mem_maps[VM_MAX_MEMMAPS]; /* (i) [m+v] guest address space */
-	struct mem_seg	mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */
 	struct vmspace	*vmspace;		/* (o) guest's address space */
+	struct vm_mem	mem;			/* (i) [m+v] guest memory */
 	char		name[VM_MAX_NAMELEN+1];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (o) guest vcpus */
 	/* The following describe the vm cpu topology */
@@ -190,7 +172,6 @@ struct vm {
 	uint16_t	cores;			/* (o) num of cores/socket */
 	uint16_t	threads;		/* (o) num of threads/core */
 	uint16_t	maxcpus;		/* (o) max pluggable cpus */
-	struct sx	mem_segs_lock;		/* (o) */
 	struct sx	vcpus_init_lock;	/* (o) */
 };
 
@@ -294,8 +275,6 @@ u_int vm_maxcpu;
 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &vm_maxcpu, 0, "Maximum number of vCPUs");
 
-static void vm_free_memmap(struct vm *vm, int ident);
-static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
 
 /* global statistics */
@@ -625,8 +604,8 @@ vm_create(const char *name, struct vm **retvm)
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->vmspace = vmspace;
+	vm_mem_init(&vm->mem);
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
-	sx_init(&vm->mem_segs_lock, "vm mem_segs");
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 	vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK |
 	    M_ZERO);
@@ -674,11 +653,10 @@ vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
 static void
 vm_cleanup(struct vm *vm, bool destroy)
 {
-	struct mem_map *mm;
-	int i;
-
 	if (destroy)
 		vm_xlock_memsegs(vm);
+	else
+		vm_assert_memseg_xlocked(vm);
 
 	ppt_unassign_all(vm);
 
@@ -695,38 +673,23 @@ vm_cleanup(struct vm *vm, bool destroy)
 	vatpic_cleanup(vm->vatpic);
 	vioapic_cleanup(vm->vioapic);
 
-	for (i = 0; i < vm->maxcpus; i++) {
+	for (int i = 0; i < vm->maxcpus; i++) {
 		if (vm->vcpu[i] != NULL)
 			vcpu_cleanup(vm->vcpu[i], destroy);
 	}
 
 	vmmops_cleanup(vm->cookie);
 
-	/*
-	 * System memory is removed from the guest address space only when
-	 * the VM is destroyed. This is because the mapping remains the same
-	 * across VM reset.
-	 *
-	 * Device memory can be relocated by the guest (e.g. using PCI BARs)
-	 * so those mappings are removed on a VM reset.
-	 */
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (destroy || !sysmem_mapping(vm, mm))
-			vm_free_memmap(vm, i);
-	}
+	vm_mem_cleanup(vm);
 
 	if (destroy) {
-		for (i = 0; i < VM_MAX_MEMSEGS; i++)
-			vm_free_memseg(vm, i);
-		vm_unlock_memsegs(vm);
+		vm_mem_destroy(vm);
 
 		vmmops_vmspace_free(vm->vmspace);
 		vm->vmspace = NULL;
 
 		free(vm->vcpu, M_VM);
 		sx_destroy(&vm->vcpus_init_lock);
-		sx_destroy(&vm->mem_segs_lock);
 		mtx_destroy(&vm->rendezvous_mtx);
 	}
 }
@@ -763,24 +726,6 @@ vm_name(struct vm *vm)
 	return (vm->name);
 }
 
-void
-vm_slock_memsegs(struct vm *vm)
-{
-	sx_slock(&vm->mem_segs_lock);
-}
-
-void
-vm_xlock_memsegs(struct vm *vm)
-{
-	sx_xlock(&vm->mem_segs_lock);
-}
-
-void
-vm_unlock_memsegs(struct vm *vm)
-{
-	sx_unlock(&vm->mem_segs_lock);
-}
-
 int
 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
@@ -800,283 +745,20 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 	return (0);
 }
 
-/*
- * Return 'true' if 'gpa' is allocated in the guest address space.
- *
- * This function is called in the context of a running vcpu which acts as
- * an implicit lock on 'vm->mem_maps[]'.
- */
-bool
-vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
-{
-	struct vm *vm = vcpu->vm;
-	struct mem_map *mm;
-	int i;
-
-#ifdef INVARIANTS
-	int hostcpu, state;
-	state = vcpu_get_state(vcpu, &hostcpu);
-	KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
-	    ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
-#endif
-
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
-			return (true);		/* 'gpa' is sysmem or devmem */
-	}
-
-	if (ppt_is_mmio(vm, gpa))
-		return (true);			/* 'gpa' is pci passthru mmio */
-
-	return (false);
-}
-
-int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
-{
-	struct mem_seg *seg;
-	vm_object_t obj;
-
-	sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
-
-	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
-		return (EINVAL);
-
-	if (len == 0 || (len & PAGE_MASK))
-		return (EINVAL);
-
-	seg = &vm->mem_segs[ident];
-	if (seg->object != NULL) {
-		if (seg->len == len && seg->sysmem == sysmem)
-			return (EEXIST);
-		else
-			return (EINVAL);
-	}
-
-	obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
-	if (obj == NULL)
-		return (ENOMEM);
-
-	seg->len = len;
-	seg->object = obj;
-	seg->sysmem = sysmem;
-	return (0);
-}
-
-int
-vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
-    vm_object_t *objptr)
-{
-	struct mem_seg *seg;
-
-	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
-
-	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
-		return (EINVAL);
-
-	seg = &vm->mem_segs[ident];
-	if (len)
-		*len = seg->len;
-	if (sysmem)
-		*sysmem = seg->sysmem;
-	if (objptr)
-		*objptr = seg->object;
-	return (0);
-}
-
-void
-vm_free_memseg(struct vm *vm, int ident)
-{
-	struct mem_seg *seg;
-
-	KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
-	    ("%s: invalid memseg ident %d", __func__, ident));
-
-	seg = &vm->mem_segs[ident];
-	if (seg->object != NULL) {
-		vm_object_deallocate(seg->object);
-		bzero(seg, sizeof(struct mem_seg));
-	}
-}
-
-int
-vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
-    size_t len, int prot, int flags)
-{
-	struct mem_seg *seg;
-	struct mem_map *m, *map;
-	vm_ooffset_t last;
-	int i, error;
-
-	if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
-		return (EINVAL);
-
-	if (flags & ~VM_MEMMAP_F_WIRED)
-		return (EINVAL);
-
-	if (segid < 0 || segid >= VM_MAX_MEMSEGS)
-		return (EINVAL);
-
-	seg = &vm->mem_segs[segid];
-	if (seg->object == NULL)
-		return (EINVAL);
-
-	last = first + len;
-	if (first < 0 || first >= last || last > seg->len)
-		return (EINVAL);
-
-	if ((gpa | first | last) & PAGE_MASK)
-		return (EINVAL);
-
-	map = NULL;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		m = &vm->mem_maps[i];
-		if (m->len == 0) {
-			map = m;
-			break;
-		}
-	}
-
-	if (map == NULL)
-		return (ENOSPC);
-
-	error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
-	    len, 0, VMFS_NO_SPACE, prot, prot, 0);
-	if (error != KERN_SUCCESS)
-		return (EFAULT);
-
-	vm_object_reference(seg->object);
-
-	if (flags & VM_MEMMAP_F_WIRED) {
-		error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
-		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
-		if (error != KERN_SUCCESS) {
-			vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
-			return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
-			    EFAULT);
-		}
-	}
-
-	map->gpa = gpa;
-	map->len = len;
-	map->segoff = first;
-	map->segid = segid;
-	map->prot = prot;
-	map->flags = flags;
-	return (0);
-}
-
-int
-vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
-{
-	struct mem_map *m;
-	int i;
-
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		m = &vm->mem_maps[i];
-		if (m->gpa == gpa && m->len == len &&
-		    (m->flags & VM_MEMMAP_F_IOMMU) == 0) {
-			vm_free_memmap(vm, i);
-			return (0);
-		}
-	}
-
-	return (EINVAL);
-}
-
-int
-vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
-    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
-{
-	struct mem_map *mm, *mmnext;
-	int i;
-
-	mmnext = NULL;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (mm->len == 0 || mm->gpa < *gpa)
-			continue;
-		if (mmnext == NULL || mm->gpa < mmnext->gpa)
-			mmnext = mm;
-	}
-
-	if (mmnext != NULL) {
-		*gpa = mmnext->gpa;
-		if (segid)
-			*segid = mmnext->segid;
-		if (segoff)
-			*segoff = mmnext->segoff;
-		if (len)
-			*len = mmnext->len;
-		if (prot)
-			*prot = mmnext->prot;
-		if (flags)
-			*flags = mmnext->flags;
-		return (0);
-	} else {
-		return (ENOENT);
-	}
-}
-
-static void
-vm_free_memmap(struct vm *vm, int ident)
-{
-	struct mem_map *mm;
-	int error __diagused;
-
-	mm = &vm->mem_maps[ident];
-	if (mm->len) {
-		error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
-		    mm->gpa + mm->len);
-		KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
-		    __func__, error));
-		bzero(mm, sizeof(struct mem_map));
-	}
-}
-
-static __inline bool
-sysmem_mapping(struct vm *vm, struct mem_map *mm)
-{
-
-	if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
-		return (true);
-	else
-		return (false);
-}
-
-vm_paddr_t
-vmm_sysmem_maxaddr(struct vm *vm)
-{
-	struct mem_map *mm;
-	vm_paddr_t maxaddr;
-	int i;
-
-	maxaddr = 0;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (sysmem_mapping(vm, mm)) {
-			if (maxaddr < mm->gpa + mm->len)
-				maxaddr = mm->gpa + mm->len;
-		}
-	}
-	return (maxaddr);
-}
-
 static void
 vm_iommu_map(struct vm *vm)
 {
 	vm_paddr_t gpa, hpa;
-	struct mem_map *mm;
+	struct vm_mem_map *mm;
 	int i;
 
-	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+	sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED);
 
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (!sysmem_mapping(vm, mm))
+		if (!vm_memseg_sysmem(vm, i))
 			continue;
 
+		mm = &vm->mem.mem_maps[i];
 		KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0,
 		    ("iommu map found invalid memmap %#lx/%#lx/%#x",
 		    mm->gpa, mm->len, mm->flags));
@@ -1114,16 +796,16 @@ static void
 vm_iommu_unmap(struct vm *vm)
 {
 	vm_paddr_t gpa;
-	struct mem_map *mm;
+	struct vm_mem_map *mm;
 	int i;
 
-	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+	sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED);
 
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (!sysmem_mapping(vm, mm))
+		if (!vm_memseg_sysmem(vm, i))
 			continue;
 
+		mm = &vm->mem.mem_maps[i];
 		if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0)
 			continue;
 		mm->flags &= ~VM_MEMMAP_F_IOMMU;
@@ -1183,69 +865,6 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
 	return (error);
 }
 
-static void *
-_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
-    void **cookie)
-{
-	int i, count, pageoff;
-	struct mem_map *mm;
-	vm_page_t m;
-
-	pageoff = gpa & PAGE_MASK;
-	if (len > PAGE_SIZE - pageoff)
-		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
-
-	count = 0;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) {
-			count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
-			    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
-			break;
-		}
-	}
-
-	if (count == 1) {
-		*cookie = m;
-		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
-	} else {
-		*cookie = NULL;
-		return (NULL);
-	}
-}
-
-void *
-vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
-    void **cookie)
-{
-#ifdef INVARIANTS
-	/*
-	 * The current vcpu should be frozen to ensure 'vm_memmap[]'
-	 * stability.
-	 */
-	int state = vcpu_get_state(vcpu, NULL);
-	KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
-	    __func__, state));
-#endif
-	return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
-}
-
-void *
-vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
-    void **cookie)
-{
-	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
-	return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
-}
-
-void
-vm_gpa_release(void *cookie)
-{
-	vm_page_t m = cookie;
-
-	vm_page_unwire(m, PQ_ACTIVE);
-}
-
 int
 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
 {
@@ -2684,6 +2303,12 @@ vm_vmspace(struct vm *vm)
 	return (vm->vmspace);
 }
 
+struct vm_mem *
+vm_mem(struct vm *vm)
+{
+	return (&vm->mem);
+}
+
 int
 vm_apicid2vcpuid(struct vm *vm, int apicid)
 {
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 6e1501493082..51769faf5893 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -39,6 +39,8 @@
 
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
+
+#include <dev/vmm/vmm_mem.h>
 #else	/* !_KERNEL */
 #include <sys/types.h>
 #include <sys/errno.h>
diff --git a/sys/amd64/vmm/vmm_mem.c b/sys/amd64/vmm/vmm_mem_machdep.c
similarity index 100%
rename from sys/amd64/vmm/vmm_mem.c
rename to sys/amd64/vmm/vmm_mem_machdep.c
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
index acbd8f5cbcb9..1d783cdacb0d 100644
--- a/sys/arm64/include/vmm.h
+++ b/sys/arm64/include/vmm.h
@@ -150,38 +150,6 @@ void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
 const char *vm_name(struct vm *vm);
 
-/*
- * APIs that modify the guest memory map require all vcpus to be frozen.
- */
-void vm_slock_memsegs(struct vm *vm);
-void vm_xlock_memsegs(struct vm *vm);
-void vm_unlock_memsegs(struct vm *vm);
-int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
-    size_t len, int prot, int flags);
-int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
-void vm_free_memseg(struct vm *vm, int ident);
-
-/*
- * APIs that inspect the guest memory map require only a *single* vcpu to
- * be frozen. This acts like a read lock on the guest memory map since any
- * modification requires *all* vcpus to be frozen.
- */
-int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
-    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
-int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
-    struct vm_object **objptr);
-vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
-void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len,
-    int prot, void **cookie);
-void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len,
-    int prot, void **cookie);
-void vm_gpa_release(void *cookie);
-bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
-
-int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
-    uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
-
 uint16_t vm_get_maxcpus(struct vm *vm);
 void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
     uint16_t *threads, uint16_t *maxcpus);
@@ -262,6 +230,8 @@ vcpu_should_yield(struct vcpu *vcpu)
 
 void *vcpu_stats(struct vcpu *vcpu);
 void vcpu_notify_event(struct vcpu *vcpu);
+struct vmspace *vm_vmspace(struct vm *vm);
+struct vm_mem *vm_mem(struct vm *vm);
 
 enum vm_reg_name vm_segment_name(int seg_encoding);
 
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 77c565e37264..ad82e6dbd432 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -65,6 +65,7 @@
 #include <dev/pci/pcireg.h>
 #include <dev/vmm/vmm_dev.h>
 #include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_mem.h>
 #include <dev/vmm/vmm_stat.h>
 
 #include "arm64.h"
@@ -94,25 +95,6 @@ struct vcpu {
 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
 
-struct mem_seg {
-	uint64_t	gpa;
-	size_t		len;
-	bool		wired;
-	bool		sysmem;
-	vm_object_t	object;
-};
-#define	VM_MAX_MEMSEGS	3
-
-struct mem_map {
-	vm_paddr_t	gpa;
-	size_t		len;
-	vm_ooffset_t	segoff;
-	int		segid;
-	int		prot;
-	int		flags;
-};
-#define	VM_MAX_MEMMAPS	4
-
 struct vmm_mmio_region {
 	uint64_t start;
 	uint64_t end;
@@ -144,9 +126,8 @@ struct vm {
 	bool		dying;			/* (o) is dying */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
-	struct mem_map	mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
-	struct mem_seg	mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
 	struct vmspace	*vmspace;		/* (o) guest's address space */
+	struct vm_mem	mem;			/* (i) guest memory */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (i) guest vcpus */
 	struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
@@ -157,7 +138,6 @@ struct vm {
 	uint16_t	cores;			/* (o) num of cores/socket */
 	uint16_t	threads;		/* (o) num of threads/core */
 	uint16_t	maxcpus;		/* (o) max pluggable cpus */
-	struct sx	mem_segs_lock;		/* (o) */
 	struct sx	vcpus_init_lock;	/* (o) */
 };
 
@@ -235,8 +215,6 @@ u_int vm_maxcpu;
 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &vm_maxcpu, 0, "Maximum number of vCPUs");
 
-static void vm_free_memmap(struct vm *vm, int ident);
-static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu);
 
 /* global statistics */
@@ -508,7 +486,7 @@ vm_create(const char *name, struct vm **retvm)
 	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->vmspace = vmspace;
-	sx_init(&vm->mem_segs_lock, "vm mem_segs");
+	vm_mem_init(&vm->mem);
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 
 	vm->sockets = 1;
@@ -557,11 +535,11 @@ vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
 static void
 vm_cleanup(struct vm *vm, bool destroy)
 {
-	struct mem_map *mm;
 	pmap_t pmap __diagused;
 	int i;
 
 	if (destroy) {
+		vm_xlock_memsegs(vm);
 		pmap = vmspace_pmap(vm->vmspace);
 		sched_pin();
 		PCPU_SET(curvmpmap, NULL);
@@ -569,7 +547,9 @@ vm_cleanup(struct vm *vm, bool destroy)
 		CPU_FOREACH(i) {
 			MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
 		}
-	}
+	} else
+		vm_assert_memseg_xlocked(vm);
+
 
 	vgic_detach_from_vm(vm->cookie);
 
@@ -580,25 +560,9 @@ vm_cleanup(struct vm *vm, bool destroy)
 
 	vmmops_cleanup(vm->cookie);
 
-	/*
-	 * System memory is removed from the guest address space only when
-	 * the VM is destroyed. This is because the mapping remains the same
-	 * across VM reset.
-	 *
-	 * Device memory can be relocated by the guest (e.g. using PCI BARs)
-	 * so those mappings are removed on a VM reset.
-	 */
-	if (!destroy) {
-		for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-			mm = &vm->mem_maps[i];
-			if (destroy || !sysmem_mapping(vm, mm))
-				vm_free_memmap(vm, i);
-		}
-	}
-
+	vm_mem_cleanup(vm);
 	if (destroy) {
-		for (i = 0; i < VM_MAX_MEMSEGS; i++)
-			vm_free_memseg(vm, i);
+		vm_mem_destroy(vm);
 
 		vmmops_vmspace_free(vm->vmspace);
 		vm->vmspace = NULL;
@@ -607,7 +571,6 @@ vm_cleanup(struct vm *vm, bool destroy)
 			free(vm->vcpu[i], M_VMM);
 		free(vm->vcpu, M_VMM);
 		sx_destroy(&vm->vcpus_init_lock);
-		sx_destroy(&vm->mem_segs_lock);
 	}
 }
 
@@ -643,290 +606,11 @@ vm_name(struct vm *vm)
 	return (vm->name);
 }
 
-void
-vm_slock_memsegs(struct vm *vm)
-{
-	sx_slock(&vm->mem_segs_lock);
-}
-
-void
-vm_xlock_memsegs(struct vm *vm)
-{
-	sx_xlock(&vm->mem_segs_lock);
-}
-
-void
-vm_unlock_memsegs(struct vm *vm)
-{
-	sx_unlock(&vm->mem_segs_lock);
-}
-
-/*
- * Return 'true' if 'gpa' is allocated in the guest address space.
- *
- * This function is called in the context of a running vcpu which acts as
- * an implicit lock on 'vm->mem_maps[]'.
- */
-bool
-vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
-{
-	struct vm *vm = vcpu->vm;
-	struct mem_map *mm;
-	int i;
-
-#ifdef INVARIANTS
-	int hostcpu, state;
-	state = vcpu_get_state(vcpu, &hostcpu);
-	KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
-	    ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
-#endif
-
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
-			return (true);		/* 'gpa' is sysmem or devmem */
-	}
-
-	return (false);
-}
-
-int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
*** 1557 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202502181600.51IG0GY8025885>