Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 13 Jan 2026 14:03:24 +0000
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: d160447129fe - main - vm_object: remove the charge member
Message-ID:  <696650ac.9fe3.4aba108e@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=d160447129fe060b28bcd6ba429d17afdf494ff2

commit d160447129fe060b28bcd6ba429d17afdf494ff2
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2026-01-03 09:34:23 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-01-13 14:03:14 +0000

    vm_object: remove the charge member
    
    State that the object charge is zero if object->cred == NULL, or equal
    to the ptoa(object->size) otherwise.
    
    Besides being much simpler, the transition to use object->size corrects
    the architectural issue with the use of object->charge.  The split
    operations effectively carve the holes in the charged regions, but
    single counter cannot properly express it.  As result, coalescing
    anonymous mappings cannot calculate correctly if the extended mapping
    already backed by the existing object is already accounted or not [1].
    
    To properly solve the issue, either we need to start tracking exact
    charged regions in the anonymous objects, which has the significant
    overhead and complications.  Or give up on the slight over-accounting
    and charge the whole object unconditionally, as it is done in the patch.
    
    Reported by:    mmel, pho [1]
    Reviewed by:    markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D54572
---
 sys/dev/md/md.c     |  2 --
 sys/kern/uipc_shm.c |  2 --
 sys/vm/swap_pager.c |  8 ++------
 sys/vm/vm_fault.c   |  5 ++---
 sys/vm/vm_map.c     | 45 ++++++++++++++++++++++-----------------------
 sys/vm/vm_object.c  | 46 +++++++++++++++++++++++++++-------------------
 sys/vm/vm_object.h  |  4 +---
 7 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 9d246d7c78fd..8d2908264aac 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -1617,7 +1617,6 @@ mdresize(struct md_s *sc, struct md_req *mdr)
 			    0, 0);
 			swap_release_by_cred(IDX_TO_OFF(oldpages -
 			    newpages), sc->cred);
-			sc->s_swap.object->charge = IDX_TO_OFF(newpages);
 			sc->s_swap.object->size = newpages;
 			VM_OBJECT_WUNLOCK(sc->s_swap.object);
 		} else if (newpages > oldpages) {
@@ -1637,7 +1636,6 @@ mdresize(struct md_s *sc, struct md_req *mdr)
 				}
 			}
 			VM_OBJECT_WLOCK(sc->s_swap.object);
-			sc->s_swap.object->charge = IDX_TO_OFF(newpages);
 			sc->s_swap.object->size = newpages;
 			VM_OBJECT_WUNLOCK(sc->s_swap.object);
 		}
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index eb1327f7f2de..fe3feab4149f 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -759,7 +759,6 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
 
 		/* Free the swap accounted for shm */
 		swap_release_by_cred(delta, object->cred);
-		object->charge -= delta;
 	} else {
 		if ((shmfd->shm_seals & F_SEAL_GROW) != 0)
 			return (EPERM);
@@ -768,7 +767,6 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
 		delta = IDX_TO_OFF(nobjsize - object->size);
 		if (!swap_reserve_by_cred(delta, object->cred))
 			return (ENOMEM);
-		object->charge += delta;
 	}
 	shmfd->shm_size = length;
 	mtx_lock(&shm_timestamp_lock);
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 3f077289ac30..efb0dd477c93 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -784,10 +784,7 @@ swap_pager_init_object(vm_object_t object, void *handle, struct ucred *cred,
 
 	object->un_pager.swp.writemappings = 0;
 	object->handle = handle;
-	if (cred != NULL) {
-		object->cred = cred;
-		object->charge = size;
-	}
+	object->cred = cred;
 	return (true);
 }
 
@@ -900,8 +897,7 @@ swap_pager_dealloc(vm_object_t object)
 	 * Release the allocation charge.
 	 */
 	if (object->cred != NULL) {
-		swap_release_by_cred(object->charge, object->cred);
-		object->charge = 0;
+		swap_release_by_cred(ptoa(object->size), object->cred);
 		crfree(object->cred);
 		object->cred = NULL;
 	}
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 1f13869aebf1..addda72e2b56 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -2310,13 +2310,12 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map __unused,
 		 * directly.
 		 */
 		dst_object = vm_object_allocate_anon(atop(dst_entry->end -
-		    dst_entry->start), NULL, NULL, 0);
+		    dst_entry->start), NULL, NULL);
 #if VM_NRESERVLEVEL > 0
 		dst_object->flags |= OBJ_COLORED;
 		dst_object->pg_color = atop(dst_entry->start);
 #endif
 		dst_object->domain = src_object->domain;
-		dst_object->charge = dst_entry->end - dst_entry->start;
 
 		dst_entry->object.vm_object = dst_object;
 		dst_entry->offset = 0;
@@ -2329,7 +2328,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map __unused,
 		    ("vm_fault_copy_entry: leaked swp charge"));
 		dst_object->cred = curthread->td_ucred;
 		crhold(dst_object->cred);
-		*fork_charge += dst_object->charge;
+		*fork_charge += ptoa(dst_object->size);
 	} else if ((dst_object->flags & OBJ_SWAP) != 0 &&
 	    dst_object->cred == NULL) {
 		KASSERT(dst_entry->cred != NULL, ("no cred for entry %p",
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 4c33b786eaa0..04628f44a497 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2428,7 +2428,7 @@ vm_map_entry_back(vm_map_entry_t entry)
 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 	    ("map entry %p is a submap", entry));
 	object = vm_object_allocate_anon(atop(entry->end - entry->start), NULL,
-	    entry->cred, entry->end - entry->start);
+	    entry->cred);
 	entry->object.vm_object = object;
 	entry->offset = 0;
 	entry->cred = NULL;
@@ -2443,21 +2443,26 @@ vm_map_entry_back(vm_map_entry_t entry)
 static inline void
 vm_map_entry_charge_object(vm_map_t map, vm_map_entry_t entry)
 {
+	vm_object_t object;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
 	    ("map entry %p is a submap", entry));
-	if (entry->object.vm_object == NULL && !vm_map_is_system(map) &&
+	object = entry->object.vm_object;
+	if (object == NULL && !vm_map_is_system(map) &&
 	    (entry->eflags & MAP_ENTRY_GUARD) == 0)
 		vm_map_entry_back(entry);
-	else if (entry->object.vm_object != NULL &&
+	else if (object != NULL &&
 	    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 	    entry->cred != NULL) {
-		VM_OBJECT_WLOCK(entry->object.vm_object);
-		KASSERT(entry->object.vm_object->cred == NULL,
+		VM_OBJECT_WLOCK(object);
+		KASSERT(object->cred == NULL,
 		    ("OVERCOMMIT: %s: both cred e %p", __func__, entry));
-		entry->object.vm_object->cred = entry->cred;
-		entry->object.vm_object->charge = entry->end - entry->start;
+		object->cred = entry->cred;
+		if (entry->end - entry->start < ptoa(object->size)) {
+			swap_reserve_force_by_cred(ptoa(object->size) -
+			    entry->end + entry->start, object->cred);
+		}
 		VM_OBJECT_WUNLOCK(entry->object.vm_object);
 		entry->cred = NULL;
 	}
@@ -2956,7 +2961,7 @@ again:
 		 * we cannot distinguish between non-charged and
 		 * charged clipped mapping of the same object later.
 		 */
-		KASSERT(obj->charge == 0,
+		KASSERT(obj->cred == NULL,
 		    ("vm_map_protect: object %p overcharged (entry %p)",
 		    obj, entry));
 		if (!swap_reserve(ptoa(obj->size))) {
@@ -2968,7 +2973,6 @@ again:
 
 		crhold(cred);
 		obj->cred = cred;
-		obj->charge = ptoa(obj->size);
 		VM_OBJECT_WUNLOCK(obj);
 	}
 
@@ -3942,7 +3946,7 @@ static void
 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
 {
 	vm_object_t object;
-	vm_pindex_t offidxstart, offidxend, size1;
+	vm_pindex_t offidxstart, offidxend, oldsize;
 	vm_size_t size;
 
 	vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE);
@@ -3989,15 +3993,11 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
 			    OBJPR_NOTMAPPED);
 			if (offidxend >= object->size &&
 			    offidxstart < object->size) {
-				size1 = object->size;
+				oldsize = object->size;
 				object->size = offidxstart;
 				if (object->cred != NULL) {
-					size1 -= object->size;
-					KASSERT(object->charge >= ptoa(size1),
-					    ("object %p charge < 0", object));
-					swap_release_by_cred(ptoa(size1),
-					    object->cred);
-					object->charge -= ptoa(size1);
+					swap_release_by_cred(ptoa(oldsize -
+					    ptoa(object->size)), object->cred);
 				}
 			}
 		}
@@ -4198,7 +4198,7 @@ vm_map_copy_swap_object(vm_map_entry_t src_entry, vm_map_entry_t dst_entry,
 		    ("OVERCOMMIT: vm_map_copy_anon_entry: cred %p",
 		     src_object));
 		src_object->cred = src_entry->cred;
-		src_object->charge = size;
+		*fork_charge += ptoa(src_object->size) - size;
 	}
 	dst_entry->object.vm_object = src_object;
 	if (charged) {
@@ -4455,7 +4455,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
 					KASSERT(object->cred == NULL,
 					    ("vmspace_fork both cred"));
 					object->cred = old_entry->cred;
-					object->charge = old_entry->end -
+					*fork_charge += old_entry->end -
 					    old_entry->start;
 					old_entry->cred = NULL;
 				}
@@ -5145,7 +5145,7 @@ RetryLookupLocked:
 		if (vm_map_lock_upgrade(map))
 			goto RetryLookup;
 		entry->object.vm_object = vm_object_allocate_anon(atop(size),
-		    NULL, entry->cred, size);
+		    NULL, entry->cred);
 		entry->offset = 0;
 		entry->cred = NULL;
 		vm_map_lock_downgrade(map);
@@ -5403,9 +5403,8 @@ vm_map_print(vm_map_t map)
 			    (void *)entry->object.vm_object,
 			    (uintmax_t)entry->offset);
 			if (entry->object.vm_object && entry->object.vm_object->cred)
-				db_printf(", obj ruid %d charge %jx",
-				    entry->object.vm_object->cred->cr_ruid,
-				    (uintmax_t)entry->object.vm_object->charge);
+				db_printf(", obj ruid %d ",
+				    entry->object.vm_object->cred->cr_ruid);
 			if (entry->eflags & MAP_ENTRY_COW)
 				db_printf(", copy (%s)",
 				    (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index f4c54ba91742..aa2d7676e6a8 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -196,9 +196,9 @@ vm_object_zdtor(void *mem, int size, void *arg)
 	KASSERT(object->type == OBJT_DEAD,
 	    ("object %p has non-dead type %d",
 	    object, object->type));
-	KASSERT(object->charge == 0 && object->cred == NULL,
-	    ("object %p has non-zero charge %ju (%p)",
-	    object, (uintmax_t)object->charge, object->cred));
+	KASSERT(object->cred == NULL,
+	    ("object %p has non-zero charge cred %p",
+	    object, object->cred));
 }
 #endif
 
@@ -254,7 +254,6 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags,
 	refcount_init(&object->ref_count, 1);
 	object->memattr = VM_MEMATTR_DEFAULT;
 	object->cred = NULL;
-	object->charge = 0;
 	object->handle = handle;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
@@ -452,7 +451,7 @@ vm_object_allocate_dyn(objtype_t dyntype, vm_pindex_t size, u_short flags)
  */
 vm_object_t
 vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
-    struct ucred *cred, vm_size_t charge)
+    struct ucred *cred)
 {
 	vm_object_t handle, object;
 
@@ -466,7 +465,6 @@ vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
 	_vm_object_allocate(OBJT_SWAP, size,
 	    OBJ_ANON | OBJ_ONEMAPPING | OBJ_SWAP, object, handle);
 	object->cred = cred;
-	object->charge = cred != NULL ? charge : 0;
 	return (object);
 }
 
@@ -1448,7 +1446,7 @@ vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length,
 	/*
 	 * Allocate a new object with the given length.
 	 */
-	result = vm_object_allocate_anon(atop(length), source, cred, length);
+	result = vm_object_allocate_anon(atop(length), source, cred);
 
 	/*
 	 * Store the offset into the source object, and fix up the offset into
@@ -1511,6 +1509,7 @@ vm_object_split(vm_map_entry_t entry)
 	struct pctrie_iter pages;
 	vm_page_t m;
 	vm_object_t orig_object, new_object, backing_object;
+	struct ucred *cred;
 	vm_pindex_t offidxstart;
 	vm_size_t size;
 
@@ -1525,9 +1524,26 @@ vm_object_split(vm_map_entry_t entry)
 
 	offidxstart = OFF_TO_IDX(entry->offset);
 	size = atop(entry->end - entry->start);
+	if (orig_object->cred != NULL) {
+		/*
+		 * vm_object_split() is currently called from
+		 * vmspace_fork(), and it might be tempting to add the
+		 * charge for the split object to fork_charge.  But
+		 * fork_charge is discharged on error when the copied
+		 * vmspace is destroyed.  Since the split object is
+		 * inserted into the shadow hierarchy serving the
+		 * source vm_map, it is kept even after the
+		 * unsuccessful fork, meaning that we have to force
+		 * its swap usage.
+		 */
+		cred = curthread->td_ucred;
+		crhold(cred);
+		swap_reserve_force_by_cred(ptoa(size), cred);
+	} else {
+		cred = NULL;
+	}
 
-	new_object = vm_object_allocate_anon(size, orig_object,
-	    orig_object->cred, ptoa(size));
+	new_object = vm_object_allocate_anon(size, orig_object, cred);
 
 	/*
 	 * We must wait for the orig_object to complete any in-progress
@@ -1550,12 +1566,6 @@ vm_object_split(vm_map_entry_t entry)
 		new_object->backing_object_offset = 
 		    orig_object->backing_object_offset + entry->offset;
 	}
-	if (orig_object->cred != NULL) {
-		crhold(orig_object->cred);
-		KASSERT(orig_object->charge >= ptoa(size),
-		    ("orig_object->charge < 0"));
-		orig_object->charge -= ptoa(size);
-	}
 
 	/*
 	 * Mark the split operation so that swap_pager_getpages() knows
@@ -2233,7 +2243,6 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
 				swap_release_by_cred(ptoa(prev_object->size -
 				    next_pindex), prev_object->cred);
 			}
-			prev_object->charge += charge;
 		} else if ((cflags & OBJCO_CHARGED) != 0) {
 			/*
 			 * The caller charged, but the object has
@@ -2786,9 +2795,8 @@ DB_SHOW_COMMAND(object, vm_object_print_static)
 	db_iprintf("Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x",
 	    object, (int)object->type, (uintmax_t)object->size,
 	    object->resident_page_count, object->ref_count, object->flags);
-	db_iprintf(" ruid %d charge %jx\n",
-	    object->cred ? object->cred->cr_ruid : -1,
-	    (uintmax_t)object->charge);
+	db_iprintf(" ruid %d\n",
+	    object->cred ? object->cred->cr_ruid : -1);
 	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
 	    atomic_load_int(&object->shadow_count),
 	    object->backing_object ? object->backing_object->ref_count : 0,
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index ca88adc12c24..e01d8ad79995 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -175,7 +175,6 @@ struct vm_object {
 		} phys;
 	} un_pager;
 	struct ucred *cred;
-	vm_ooffset_t charge;
 	void *umtx_data;
 };
 
@@ -356,8 +355,7 @@ void umtx_shm_object_terminated(vm_object_t object);
 extern int umtx_shm_vnobj_persistent;
 
 vm_object_t vm_object_allocate (objtype_t, vm_pindex_t);
-vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *,
-   vm_size_t);
+vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *);
 vm_object_t vm_object_allocate_dyn(objtype_t, vm_pindex_t, u_short);
 boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t,
    int);


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?696650ac.9fe3.4aba108e>