Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 9 Sep 2020 22:02:30 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r365520 - head/sys/vm
Message-ID:  <202009092202.089M2UbZ007422@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Wed Sep  9 22:02:30 2020
New Revision: 365520
URL: https://svnweb.freebsd.org/changeset/base/365520

Log:
  vm_map: Add a map entry kind that can only be clipped at specific boundary.
  
  The entries and their clip boundaries must be aligned on supported
  superpages sizes from pagesizes[].  vm_map operations return Mach
  error KERN_INVALID_ARGUMENT, which is usually translated to EINVAL, if
  it would require clip not at the boundary.
  
  In other words, entries force preserving virtual addresses superpage
  properties.
  
  Reviewed by:	markj
  Tested by:	pho
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 week
  Differential revision:	https://reviews.freebsd.org/D24652

Modified:
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h

Modified: head/sys/vm/vm_map.c
==============================================================================
--- head/sys/vm/vm_map.c	Wed Sep  9 21:57:55 2020	(r365519)
+++ head/sys/vm/vm_map.c	Wed Sep  9 22:02:30 2020	(r365520)
@@ -1554,13 +1554,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
 	struct ucred *cred;
 	vm_eflags_t protoeflags;
 	vm_inherit_t inheritance;
+	u_long bdry;
+	u_int bidx;
 
 	VM_MAP_ASSERT_LOCKED(map);
 	KASSERT(object != kernel_object ||
 	    (cow & MAP_COPY_ON_WRITE) == 0,
 	    ("vm_map_insert: kernel object and COW"));
-	KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0,
-	    ("vm_map_insert: paradoxical MAP_NOFAULT request"));
+	KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0 ||
+	    (cow & MAP_SPLIT_BOUNDARY_MASK) != 0,
+	    ("vm_map_insert: paradoxical MAP_NOFAULT request, obj %p cow %#x",
+	    object, cow));
 	KASSERT((prot & ~max) == 0,
 	    ("prot %#x is not subset of max_prot %#x", prot, max));
 
@@ -1615,6 +1619,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
 		inheritance = VM_INHERIT_SHARE;
 	else
 		inheritance = VM_INHERIT_DEFAULT;
+	if ((cow & MAP_SPLIT_BOUNDARY_MASK) != 0) {
+		/* This magically ignores index 0, for usual page size. */
+		bidx = (cow & MAP_SPLIT_BOUNDARY_MASK) >>
+		    MAP_SPLIT_BOUNDARY_SHIFT;
+		if (bidx >= MAXPAGESIZES)
+			return (KERN_INVALID_ARGUMENT);
+		bdry = pagesizes[bidx] - 1;
+		if ((start & bdry) != 0 || (end & bdry) != 0)
+			return (KERN_INVALID_ARGUMENT);
+		protoeflags |= bidx << MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+	}
 
 	cred = NULL;
 	if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
@@ -2342,31 +2357,40 @@ vm_map_entry_clone(vm_map_t map, vm_map_entry_t entry)
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
-static inline void
-vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
+static int
+vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t startaddr)
 {
 	vm_map_entry_t new_entry;
+	int bdry_idx;
 
 	if (!map->system_map)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "%s: map %p entry %p start 0x%jx", __func__, map, entry,
-		    (uintmax_t)start);
+		    (uintmax_t)startaddr);
 
-	if (start <= entry->start)
-		return;
+	if (startaddr <= entry->start)
+		return (KERN_SUCCESS);
 
 	VM_MAP_ASSERT_LOCKED(map);
-	KASSERT(entry->end > start && entry->start < start,
+	KASSERT(entry->end > startaddr && entry->start < startaddr,
 	    ("%s: invalid clip of entry %p", __func__, entry));
 
+	bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+	    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+	if (bdry_idx != 0) {
+		if ((startaddr & (pagesizes[bdry_idx] - 1)) != 0)
+			return (KERN_INVALID_ARGUMENT);
+	}
+
 	new_entry = vm_map_entry_clone(map, entry);
 
 	/*
 	 * Split off the front portion.  Insert the new entry BEFORE this one,
 	 * so that this entry has the specified starting address.
 	 */
-	new_entry->end = start;
+	new_entry->end = startaddr;
 	vm_map_entry_link(map, new_entry);
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -2376,11 +2400,12 @@ vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, 
  *	the interior of the entry.  Return entry after 'start', and in
  *	prev_entry set the entry before 'start'.
  */
-static inline vm_map_entry_t
+static int
 vm_map_lookup_clip_start(vm_map_t map, vm_offset_t start,
-    vm_map_entry_t *prev_entry)
+    vm_map_entry_t *res_entry, vm_map_entry_t *prev_entry)
 {
 	vm_map_entry_t entry;
+	int rv;
 
 	if (!map->system_map)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
@@ -2389,11 +2414,14 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta
 
 	if (vm_map_lookup_entry(map, start, prev_entry)) {
 		entry = *prev_entry;
-		vm_map_clip_start(map, entry, start);
+		rv = vm_map_clip_start(map, entry, start);
+		if (rv != KERN_SUCCESS)
+			return (rv);
 		*prev_entry = vm_map_entry_pred(entry);
 	} else
 		entry = vm_map_entry_succ(*prev_entry);
-	return (entry);
+	*res_entry = entry;
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -2403,31 +2431,41 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
-static inline void
-vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
+static int
+vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t endaddr)
 {
 	vm_map_entry_t new_entry;
+	int bdry_idx;
 
 	if (!map->system_map)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "%s: map %p entry %p end 0x%jx", __func__, map, entry,
-		    (uintmax_t)end);
+		    (uintmax_t)endaddr);
 
-	if (end >= entry->end)
-		return;
+	if (endaddr >= entry->end)
+		return (KERN_SUCCESS);
 
 	VM_MAP_ASSERT_LOCKED(map);
-	KASSERT(entry->start < end && entry->end > end,
+	KASSERT(entry->start < endaddr && entry->end > endaddr,
 	    ("%s: invalid clip of entry %p", __func__, entry));
 
+	bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+	    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+	if (bdry_idx != 0) {
+		if ((endaddr & (pagesizes[bdry_idx] - 1)) != 0)
+			return (KERN_INVALID_ARGUMENT);
+	}
+
 	new_entry = vm_map_entry_clone(map, entry);
 
 	/*
 	 * Split off the back portion.  Insert the new entry AFTER this one,
 	 * so that this entry has the specified ending address.
 	 */
-	new_entry->start = end;
+	new_entry->start = endaddr;
 	vm_map_entry_link(map, new_entry);
+
+	return (KERN_SUCCESS);
 }
 
 /*
@@ -2469,12 +2507,17 @@ vm_map_submap(
 	if (vm_map_lookup_entry(map, start, &entry) && entry->end >= end &&
 	    (entry->eflags & MAP_ENTRY_COW) == 0 &&
 	    entry->object.vm_object == NULL) {
-		vm_map_clip_start(map, entry, start);
-		vm_map_clip_end(map, entry, end);
+		result = vm_map_clip_start(map, entry, start);
+		if (result != KERN_SUCCESS)
+			goto unlock;
+		result = vm_map_clip_end(map, entry, end);
+		if (result != KERN_SUCCESS)
+			goto unlock;
 		entry->object.sub_map = submap;
 		entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 		result = KERN_SUCCESS;
 	}
+unlock:
 	vm_map_unlock(map);
 
 	if (result != KERN_SUCCESS) {
@@ -2661,11 +2704,18 @@ again:
 	 * of this loop early and let the next loop simplify the entries, since
 	 * some may now be mergeable.
 	 */
-	rv = KERN_SUCCESS;
-	vm_map_clip_start(map, first_entry, start);
+	rv = vm_map_clip_start(map, first_entry, start);
+	if (rv != KERN_SUCCESS) {
+		vm_map_unlock(map);
+		return (rv);
+	}
 	for (entry = first_entry; entry->start < end;
 	    entry = vm_map_entry_succ(entry)) {
-		vm_map_clip_end(map, entry, end);
+		rv = vm_map_clip_end(map, entry, end);
+		if (rv != KERN_SUCCESS) {
+			vm_map_unlock(map);
+			return (rv);
+		}
 
 		if (set_max ||
 		    ((new_prot & ~entry->protection) & VM_PROT_WRITE) == 0 ||
@@ -2785,6 +2835,7 @@ vm_map_madvise(
 	int behav)
 {
 	vm_map_entry_t entry, prev_entry;
+	int rv;
 	bool modify_map;
 
 	/*
@@ -2830,13 +2881,22 @@ vm_map_madvise(
 		 * We clip the vm_map_entry so that behavioral changes are
 		 * limited to the specified address range.
 		 */
-		for (entry = vm_map_lookup_clip_start(map, start, &prev_entry);
-		    entry->start < end;
-		    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
+		rv = vm_map_lookup_clip_start(map, start, &entry, &prev_entry);
+		if (rv != KERN_SUCCESS) {
+			vm_map_unlock(map);
+			return (vm_mmap_to_errno(rv));
+		}
+
+		for (; entry->start < end; prev_entry = entry,
+		    entry = vm_map_entry_succ(entry)) {
 			if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 				continue;
 
-			vm_map_clip_end(map, entry, end);
+			rv = vm_map_clip_end(map, entry, end);
+			if (rv != KERN_SUCCESS) {
+				vm_map_unlock(map);
+				return (vm_mmap_to_errno(rv));
+			}
 
 			switch (behav) {
 			case MADV_NORMAL:
@@ -2969,7 +3029,8 @@ int
 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 	       vm_inherit_t new_inheritance)
 {
-	vm_map_entry_t entry, prev_entry;
+	vm_map_entry_t entry, lentry, prev_entry, start_entry;
+	int rv;
 
 	switch (new_inheritance) {
 	case VM_INHERIT_NONE:
@@ -2984,18 +3045,37 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_off
 		return (KERN_SUCCESS);
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
-	for (entry = vm_map_lookup_clip_start(map, start, &prev_entry);
-	    entry->start < end;
-	    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
-		vm_map_clip_end(map, entry, end);
+	rv = vm_map_lookup_clip_start(map, start, &start_entry, &prev_entry);
+	if (rv != KERN_SUCCESS)
+		goto unlock;
+	if (vm_map_lookup_entry(map, end - 1, &lentry)) {
+		rv = vm_map_clip_end(map, lentry, end);
+		if (rv != KERN_SUCCESS)
+			goto unlock;
+	}
+	if (new_inheritance == VM_INHERIT_COPY) {
+		for (entry = start_entry; entry->start < end;
+		    prev_entry = entry, entry = vm_map_entry_succ(entry)) {
+			if ((entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
+			    != 0) {
+				rv = KERN_INVALID_ARGUMENT;
+				goto unlock;
+			}
+		}
+	}
+	for (entry = start_entry; entry->start < end; prev_entry = entry,
+	    entry = vm_map_entry_succ(entry)) {
+		KASSERT(entry->end <= end, ("non-clipped entry %p end %jx %jx",
+		    entry, (uintmax_t)entry->end, (uintmax_t)end));
 		if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
 		    new_inheritance != VM_INHERIT_ZERO)
 			entry->inheritance = new_inheritance;
 		vm_map_try_merge_entries(map, prev_entry, entry);
 	}
 	vm_map_try_merge_entries(map, prev_entry, entry);
+unlock:
 	vm_map_unlock(map);
-	return (KERN_SUCCESS);
+	return (rv);
 }
 
 /*
@@ -3094,8 +3174,13 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offs
 			    next_entry : NULL;
 			continue;
 		}
-		vm_map_clip_start(map, entry, start);
-		vm_map_clip_end(map, entry, end);
+		rv = vm_map_clip_start(map, entry, start);
+		if (rv != KERN_SUCCESS)
+			break;
+		rv = vm_map_clip_end(map, entry, end);
+		if (rv != KERN_SUCCESS)
+			break;
+
 		/*
 		 * Mark the entry in case the map lock is released.  (See
 		 * above.)
@@ -3262,8 +3347,8 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
 {
 	vm_map_entry_t entry, first_entry, next_entry, prev_entry;
 	vm_offset_t faddr, saved_end, saved_start;
-	u_long npages;
-	u_int last_timestamp;
+	u_long incr, npages;
+	u_int bidx, last_timestamp;
 	int rv;
 	bool holes_ok, need_wakeup, user_wire;
 	vm_prot_t prot;
@@ -3301,8 +3386,13 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
 			    next_entry : NULL;
 			continue;
 		}
-		vm_map_clip_start(map, entry, start);
-		vm_map_clip_end(map, entry, end);
+		rv = vm_map_clip_start(map, entry, start);
+		if (rv != KERN_SUCCESS)
+			goto done;
+		rv = vm_map_clip_end(map, entry, end);
+		if (rv != KERN_SUCCESS)
+			goto done;
+
 		/*
 		 * Mark the entry in case the map lock is released.  (See
 		 * above.)
@@ -3339,20 +3429,23 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
 			saved_start = entry->start;
 			saved_end = entry->end;
 			last_timestamp = map->timestamp;
+			bidx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
+			    >> MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+			incr =  pagesizes[bidx];
 			vm_map_busy(map);
 			vm_map_unlock(map);
 
-			faddr = saved_start;
-			do {
+			for (faddr = saved_start; faddr < saved_end;
+			    faddr += incr) {
 				/*
 				 * Simulate a fault to get the page and enter
 				 * it into the physical map.
 				 */
-				if ((rv = vm_fault(map, faddr,
-				    VM_PROT_NONE, VM_FAULT_WIRE, NULL)) !=
-				    KERN_SUCCESS)
+				rv = vm_fault(map, faddr, VM_PROT_NONE,
+				    VM_FAULT_WIRE, NULL);
+				if (rv != KERN_SUCCESS)
 					break;
-			} while ((faddr += PAGE_SIZE) < saved_end);
+			}
 			vm_map_lock(map);
 			vm_map_unbusy(map);
 			if (last_timestamp + 1 != map->timestamp) {
@@ -3427,10 +3520,14 @@ done:
 		 * Moreover, another thread could be simultaneously
 		 * wiring this new mapping entry.  Detect these cases
 		 * and skip any entries marked as in transition not by us.
+		 *
+		 * Another way to get an entry not marked with
+		 * MAP_ENTRY_IN_TRANSITION is after failed clipping,
+		 * which set rv to KERN_INVALID_ARGUMENT.
 		 */
 		if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
 		    entry->wiring_thread != curthread) {
-			KASSERT(holes_ok,
+			KASSERT(holes_ok || rv == KERN_INVALID_ARGUMENT,
 			    ("vm_map_wire: !HOLESOK and new/changed entry"));
 			continue;
 		}
@@ -3508,6 +3605,7 @@ vm_map_sync(
 	vm_object_t object;
 	vm_ooffset_t offset;
 	unsigned int last_timestamp;
+	int bdry_idx;
 	boolean_t failed;
 
 	vm_map_lock_read(map);
@@ -3519,14 +3617,26 @@ vm_map_sync(
 		start = first_entry->start;
 		end = first_entry->end;
 	}
+
 	/*
-	 * Make a first pass to check for user-wired memory and holes.
+	 * Make a first pass to check for user-wired memory, holes,
+	 * and partial invalidation of largepage mappings.
 	 */
 	for (entry = first_entry; entry->start < end; entry = next_entry) {
-		if (invalidate &&
-		    (entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
-			vm_map_unlock_read(map);
-			return (KERN_INVALID_ARGUMENT);
+		if (invalidate) {
+			if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
+				vm_map_unlock_read(map);
+				return (KERN_INVALID_ARGUMENT);
+			}
+			bdry_idx = (entry->eflags &
+			    MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+			    MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+			if (bdry_idx != 0 &&
+			    ((start & (pagesizes[bdry_idx] - 1)) != 0 ||
+			    (end & (pagesizes[bdry_idx] - 1)) != 0)) {
+				vm_map_unlock_read(map);
+				return (KERN_INVALID_ARGUMENT);
+			}
 		}
 		next_entry = vm_map_entry_succ(entry);
 		if (end > entry->end &&
@@ -3703,7 +3813,8 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry
 int
 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
-	vm_map_entry_t entry, next_entry;
+	vm_map_entry_t entry, next_entry, scratch_entry;
+	int rv;
 
 	VM_MAP_ASSERT_LOCKED(map);
 
@@ -3714,8 +3825,10 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
 	 * Find the start of the region, and clip it.
 	 * Step through all entries in this region.
 	 */
-	for (entry = vm_map_lookup_clip_start(map, start, &entry);
-	    entry->start < end; entry = next_entry) {
+	rv = vm_map_lookup_clip_start(map, start, &entry, &scratch_entry);
+	if (rv != KERN_SUCCESS)
+		return (rv);
+	for (; entry->start < end; entry = next_entry) {
 		/*
 		 * Wait for wiring or unwiring of an entry to complete.
 		 * Also wait for any system wirings to disappear on
@@ -3739,13 +3852,19 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
 				 * Specifically, the entry may have been
 				 * clipped, merged, or deleted.
 				 */
-				next_entry = vm_map_lookup_clip_start(map,
-				    saved_start, &next_entry);
+				rv = vm_map_lookup_clip_start(map, saved_start,
+				    &next_entry, &scratch_entry);
+				if (rv != KERN_SUCCESS)
+					break;
 			} else
 				next_entry = entry;
 			continue;
 		}
-		vm_map_clip_end(map, entry, end);
+
+		/* XXXKIB or delete to the upper superpage boundary ? */
+		rv = vm_map_clip_end(map, entry, end);
+		if (rv != KERN_SUCCESS)
+			break;
 		next_entry = vm_map_entry_succ(entry);
 
 		/*
@@ -3775,7 +3894,7 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
 		 */
 		vm_map_entry_delete(map, entry);
 	}
-	return (KERN_SUCCESS);
+	return (rv);
 }
 
 /*
@@ -4219,7 +4338,8 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 			new_entry->end = old_entry->end;
 			new_entry->eflags = old_entry->eflags &
 			    ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
-			    MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC);
+			    MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC |
+			    MAP_ENTRY_SPLIT_BOUNDARY_MASK);
 			new_entry->protection = old_entry->protection;
 			new_entry->max_protection = old_entry->max_protection;
 			new_entry->inheritance = VM_INHERIT_ZERO;

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h	Wed Sep  9 21:57:55 2020	(r365519)
+++ head/sys/vm/vm_map.h	Wed Sep  9 22:02:30 2020	(r365520)
@@ -149,6 +149,10 @@ struct vm_map_entry {
 #define	MAP_ENTRY_STACK_GAP_UP		0x00040000
 #define	MAP_ENTRY_HEADER		0x00080000
 
+#define	MAP_ENTRY_SPLIT_BOUNDARY_MASK	0x00300000
+
+#define	MAP_ENTRY_SPLIT_BOUNDARY_SHIFT	20
+
 #ifdef	_KERNEL
 static __inline u_char
 vm_map_entry_behavior(vm_map_entry_t entry)
@@ -373,6 +377,9 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define	MAP_CREATE_STACK_GAP_UP	0x00010000
 #define	MAP_CREATE_STACK_GAP_DN	0x00020000
 #define	MAP_VN_EXEC		0x00040000
+#define	MAP_SPLIT_BOUNDARY_MASK	0x00180000
+
+#define	MAP_SPLIT_BOUNDARY_SHIFT 19
 
 /*
  * vm_fault option flags



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202009092202.089M2UbZ007422>