Date: Wed, 9 Sep 2020 22:02:30 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r365520 - head/sys/vm Message-ID: <202009092202.089M2UbZ007422@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Wed Sep 9 22:02:30 2020 New Revision: 365520 URL: https://svnweb.freebsd.org/changeset/base/365520 Log: vm_map: Add a map entry kind that can only be clipped at specific boundary. The entries and their clip boundaries must be aligned on supported superpages sizes from pagesizes[]. vm_map operations return Mach error KERN_INVALID_ARGUMENT, which is usually translated to EINVAL, if it would require clip not at the boundary. In other words, entries force preserving virtual addresses superpage properties. Reviewed by: markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D24652 Modified: head/sys/vm/vm_map.c head/sys/vm/vm_map.h Modified: head/sys/vm/vm_map.c ============================================================================== --- head/sys/vm/vm_map.c Wed Sep 9 21:57:55 2020 (r365519) +++ head/sys/vm/vm_map.c Wed Sep 9 22:02:30 2020 (r365520) @@ -1554,13 +1554,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof struct ucred *cred; vm_eflags_t protoeflags; vm_inherit_t inheritance; + u_long bdry; + u_int bidx; VM_MAP_ASSERT_LOCKED(map); KASSERT(object != kernel_object || (cow & MAP_COPY_ON_WRITE) == 0, ("vm_map_insert: kernel object and COW")); - KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0, - ("vm_map_insert: paradoxical MAP_NOFAULT request")); + KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0 || + (cow & MAP_SPLIT_BOUNDARY_MASK) != 0, + ("vm_map_insert: paradoxical MAP_NOFAULT request, obj %p cow %#x", + object, cow)); KASSERT((prot & ~max) == 0, ("prot %#x is not subset of max_prot %#x", prot, max)); @@ -1615,6 +1619,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof inheritance = VM_INHERIT_SHARE; else inheritance = VM_INHERIT_DEFAULT; + if ((cow & MAP_SPLIT_BOUNDARY_MASK) != 0) { + /* This magically ignores index 0, for usual page size. */ + bidx = (cow & MAP_SPLIT_BOUNDARY_MASK) >> + MAP_SPLIT_BOUNDARY_SHIFT; + if (bidx >= MAXPAGESIZES) + return (KERN_INVALID_ARGUMENT); + bdry = pagesizes[bidx] - 1; + if ((start & bdry) != 0 || (end & bdry) != 0) + return (KERN_INVALID_ARGUMENT); + protoeflags |= bidx << MAP_ENTRY_SPLIT_BOUNDARY_SHIFT; + } cred = NULL; if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) @@ -2342,31 +2357,40 @@ vm_map_entry_clone(vm_map_t map, vm_map_entry_t entry) * the specified address; if necessary, * it splits the entry into two. */ -static inline void -vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) +static int +vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t startaddr) { vm_map_entry_t new_entry; + int bdry_idx; if (!map->system_map) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: map %p entry %p start 0x%jx", __func__, map, entry, - (uintmax_t)start); + (uintmax_t)startaddr); - if (start <= entry->start) - return; + if (startaddr <= entry->start) + return (KERN_SUCCESS); VM_MAP_ASSERT_LOCKED(map); - KASSERT(entry->end > start && entry->start < start, + KASSERT(entry->end > startaddr && entry->start < startaddr, ("%s: invalid clip of entry %p", __func__, entry)); + bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >> + MAP_ENTRY_SPLIT_BOUNDARY_SHIFT; + if (bdry_idx != 0) { + if ((startaddr & (pagesizes[bdry_idx] - 1)) != 0) + return (KERN_INVALID_ARGUMENT); + } + new_entry = vm_map_entry_clone(map, entry); /* * Split off the front portion. Insert the new entry BEFORE this one, * so that this entry has the specified starting address. */ - new_entry->end = start; + new_entry->end = startaddr; vm_map_entry_link(map, new_entry); + return (KERN_SUCCESS); } /* @@ -2376,11 +2400,12 @@ vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, * the interior of the entry. Return entry after 'start', and in * prev_entry set the entry before 'start'. */ -static inline vm_map_entry_t +static int vm_map_lookup_clip_start(vm_map_t map, vm_offset_t start, - vm_map_entry_t *prev_entry) + vm_map_entry_t *res_entry, vm_map_entry_t *prev_entry) { vm_map_entry_t entry; + int rv; if (!map->system_map) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, @@ -2389,11 +2414,14 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta if (vm_map_lookup_entry(map, start, prev_entry)) { entry = *prev_entry; - vm_map_clip_start(map, entry, start); + rv = vm_map_clip_start(map, entry, start); + if (rv != KERN_SUCCESS) + return (rv); *prev_entry = vm_map_entry_pred(entry); } else entry = vm_map_entry_succ(*prev_entry); - return (entry); + *res_entry = entry; + return (KERN_SUCCESS); } /* @@ -2403,31 +2431,41 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta * the specified address; if necessary, * it splits the entry into two. */ -static inline void -vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) +static int +vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t endaddr) { vm_map_entry_t new_entry; + int bdry_idx; if (!map->system_map) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: map %p entry %p end 0x%jx", __func__, map, entry, - (uintmax_t)end); + (uintmax_t)endaddr); - if (end >= entry->end) - return; + if (endaddr >= entry->end) + return (KERN_SUCCESS); VM_MAP_ASSERT_LOCKED(map); - KASSERT(entry->start < end && entry->end > end, + KASSERT(entry->start < endaddr && entry->end > endaddr, ("%s: invalid clip of entry %p", __func__, entry)); + bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >> + MAP_ENTRY_SPLIT_BOUNDARY_SHIFT; + if (bdry_idx != 0) { + if ((endaddr & (pagesizes[bdry_idx] - 1)) != 0) + return (KERN_INVALID_ARGUMENT); + } + new_entry = vm_map_entry_clone(map, entry); /* * Split off the back portion. Insert the new entry AFTER this one, * so that this entry has the specified ending address. */ - new_entry->start = end; + new_entry->start = endaddr; vm_map_entry_link(map, new_entry); + + return (KERN_SUCCESS); } /* @@ -2469,12 +2507,17 @@ vm_map_submap( if (vm_map_lookup_entry(map, start, &entry) && entry->end >= end && (entry->eflags & MAP_ENTRY_COW) == 0 && entry->object.vm_object == NULL) { - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); + result = vm_map_clip_start(map, entry, start); + if (result != KERN_SUCCESS) + goto unlock; + result = vm_map_clip_end(map, entry, end); + if (result != KERN_SUCCESS) + goto unlock; entry->object.sub_map = submap; entry->eflags |= MAP_ENTRY_IS_SUB_MAP; result = KERN_SUCCESS; } +unlock: vm_map_unlock(map); if (result != KERN_SUCCESS) { @@ -2661,11 +2704,18 @@ again: * of this loop early and let the next loop simplify the entries, since * some may now be mergeable. */ - rv = KERN_SUCCESS; - vm_map_clip_start(map, first_entry, start); + rv = vm_map_clip_start(map, first_entry, start); + if (rv != KERN_SUCCESS) { + vm_map_unlock(map); + return (rv); + } for (entry = first_entry; entry->start < end; entry = vm_map_entry_succ(entry)) { - vm_map_clip_end(map, entry, end); + rv = vm_map_clip_end(map, entry, end); + if (rv != KERN_SUCCESS) { + vm_map_unlock(map); + return (rv); + } if (set_max || ((new_prot & ~entry->protection) & VM_PROT_WRITE) == 0 || @@ -2785,6 +2835,7 @@ vm_map_madvise( int behav) { vm_map_entry_t entry, prev_entry; + int rv; bool modify_map; /* @@ -2830,13 +2881,22 @@ vm_map_madvise( * We clip the vm_map_entry so that behavioral changes are * limited to the specified address range. */ - for (entry = vm_map_lookup_clip_start(map, start, &prev_entry); - entry->start < end; - prev_entry = entry, entry = vm_map_entry_succ(entry)) { + rv = vm_map_lookup_clip_start(map, start, &entry, &prev_entry); + if (rv != KERN_SUCCESS) { + vm_map_unlock(map); + return (vm_mmap_to_errno(rv)); + } + + for (; entry->start < end; prev_entry = entry, + entry = vm_map_entry_succ(entry)) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) continue; - vm_map_clip_end(map, entry, end); + rv = vm_map_clip_end(map, entry, end); + if (rv != KERN_SUCCESS) { + vm_map_unlock(map); + return (vm_mmap_to_errno(rv)); + } switch (behav) { case MADV_NORMAL: @@ -2969,7 +3029,8 @@ int vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, vm_inherit_t new_inheritance) { - vm_map_entry_t entry, prev_entry; + vm_map_entry_t entry, lentry, prev_entry, start_entry; + int rv; switch (new_inheritance) { case VM_INHERIT_NONE: @@ -2984,18 +3045,37 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_off return (KERN_SUCCESS); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); - for (entry = vm_map_lookup_clip_start(map, start, &prev_entry); - entry->start < end; - prev_entry = entry, entry = vm_map_entry_succ(entry)) { - vm_map_clip_end(map, entry, end); + rv = vm_map_lookup_clip_start(map, start, &start_entry, &prev_entry); + if (rv != KERN_SUCCESS) + goto unlock; + if (vm_map_lookup_entry(map, end - 1, &lentry)) { + rv = vm_map_clip_end(map, lentry, end); + if (rv != KERN_SUCCESS) + goto unlock; + } + if (new_inheritance == VM_INHERIT_COPY) { + for (entry = start_entry; entry->start < end; + prev_entry = entry, entry = vm_map_entry_succ(entry)) { + if ((entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) + != 0) { + rv = KERN_INVALID_ARGUMENT; + goto unlock; + } + } + } + for (entry = start_entry; entry->start < end; prev_entry = entry, + entry = vm_map_entry_succ(entry)) { + KASSERT(entry->end <= end, ("non-clipped entry %p end %jx %jx", + entry, (uintmax_t)entry->end, (uintmax_t)end)); if ((entry->eflags & MAP_ENTRY_GUARD) == 0 || new_inheritance != VM_INHERIT_ZERO) entry->inheritance = new_inheritance; vm_map_try_merge_entries(map, prev_entry, entry); } vm_map_try_merge_entries(map, prev_entry, entry); +unlock: vm_map_unlock(map); - return (KERN_SUCCESS); + return (rv); } /* @@ -3094,8 +3174,13 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offs next_entry : NULL; continue; } - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); + rv = vm_map_clip_start(map, entry, start); + if (rv != KERN_SUCCESS) + break; + rv = vm_map_clip_end(map, entry, end); + if (rv != KERN_SUCCESS) + break; + /* * Mark the entry in case the map lock is released. (See * above.) @@ -3262,8 +3347,8 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm { vm_map_entry_t entry, first_entry, next_entry, prev_entry; vm_offset_t faddr, saved_end, saved_start; - u_long npages; - u_int last_timestamp; + u_long incr, npages; + u_int bidx, last_timestamp; int rv; bool holes_ok, need_wakeup, user_wire; vm_prot_t prot; @@ -3301,8 +3386,13 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm next_entry : NULL; continue; } - vm_map_clip_start(map, entry, start); - vm_map_clip_end(map, entry, end); + rv = vm_map_clip_start(map, entry, start); + if (rv != KERN_SUCCESS) + goto done; + rv = vm_map_clip_end(map, entry, end); + if (rv != KERN_SUCCESS) + goto done; + /* * Mark the entry in case the map lock is released. (See * above.) @@ -3339,20 +3429,23 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm saved_start = entry->start; saved_end = entry->end; last_timestamp = map->timestamp; + bidx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) + >> MAP_ENTRY_SPLIT_BOUNDARY_SHIFT; + incr = pagesizes[bidx]; vm_map_busy(map); vm_map_unlock(map); - faddr = saved_start; - do { + for (faddr = saved_start; faddr < saved_end; + faddr += incr) { /* * Simulate a fault to get the page and enter * it into the physical map. */ - if ((rv = vm_fault(map, faddr, - VM_PROT_NONE, VM_FAULT_WIRE, NULL)) != - KERN_SUCCESS) + rv = vm_fault(map, faddr, VM_PROT_NONE, + VM_FAULT_WIRE, NULL); + if (rv != KERN_SUCCESS) break; - } while ((faddr += PAGE_SIZE) < saved_end); + } vm_map_lock(map); vm_map_unbusy(map); if (last_timestamp + 1 != map->timestamp) { @@ -3427,10 +3520,14 @@ done: * Moreover, another thread could be simultaneously * wiring this new mapping entry. Detect these cases * and skip any entries marked as in transition not by us. + * + * Another way to get an entry not marked with + * MAP_ENTRY_IN_TRANSITION is after failed clipping, + * which set rv to KERN_INVALID_ARGUMENT. */ if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || entry->wiring_thread != curthread) { - KASSERT(holes_ok, + KASSERT(holes_ok || rv == KERN_INVALID_ARGUMENT, ("vm_map_wire: !HOLESOK and new/changed entry")); continue; } @@ -3508,6 +3605,7 @@ vm_map_sync( vm_object_t object; vm_ooffset_t offset; unsigned int last_timestamp; + int bdry_idx; boolean_t failed; vm_map_lock_read(map); @@ -3519,14 +3617,26 @@ vm_map_sync( start = first_entry->start; end = first_entry->end; } + /* - * Make a first pass to check for user-wired memory and holes. + * Make a first pass to check for user-wired memory, holes, + * and partial invalidation of largepage mappings. */ for (entry = first_entry; entry->start < end; entry = next_entry) { - if (invalidate && - (entry->eflags & MAP_ENTRY_USER_WIRED) != 0) { - vm_map_unlock_read(map); - return (KERN_INVALID_ARGUMENT); + if (invalidate) { + if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) { + vm_map_unlock_read(map); + return (KERN_INVALID_ARGUMENT); + } + bdry_idx = (entry->eflags & + MAP_ENTRY_SPLIT_BOUNDARY_MASK) >> + MAP_ENTRY_SPLIT_BOUNDARY_SHIFT; + if (bdry_idx != 0 && + ((start & (pagesizes[bdry_idx] - 1)) != 0 || + (end & (pagesizes[bdry_idx] - 1)) != 0)) { + vm_map_unlock_read(map); + return (KERN_INVALID_ARGUMENT); + } } next_entry = vm_map_entry_succ(entry); if (end > entry->end && @@ -3703,7 +3813,8 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry int vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) { - vm_map_entry_t entry, next_entry; + vm_map_entry_t entry, next_entry, scratch_entry; + int rv; VM_MAP_ASSERT_LOCKED(map); @@ -3714,8 +3825,10 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs * Find the start of the region, and clip it. * Step through all entries in this region. */ - for (entry = vm_map_lookup_clip_start(map, start, &entry); - entry->start < end; entry = next_entry) { + rv = vm_map_lookup_clip_start(map, start, &entry, &scratch_entry); + if (rv != KERN_SUCCESS) + return (rv); + for (; entry->start < end; entry = next_entry) { /* * Wait for wiring or unwiring of an entry to complete. * Also wait for any system wirings to disappear on @@ -3739,13 +3852,19 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs * Specifically, the entry may have been * clipped, merged, or deleted. */ - next_entry = vm_map_lookup_clip_start(map, - saved_start, &next_entry); + rv = vm_map_lookup_clip_start(map, saved_start, + &next_entry, &scratch_entry); + if (rv != KERN_SUCCESS) + break; } else next_entry = entry; continue; } - vm_map_clip_end(map, entry, end); + + /* XXXKIB or delete to the upper superpage boundary ? */ + rv = vm_map_clip_end(map, entry, end); + if (rv != KERN_SUCCESS) + break; next_entry = vm_map_entry_succ(entry); /* @@ -3775,7 +3894,7 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs */ vm_map_entry_delete(map, entry); } - return (KERN_SUCCESS); + return (rv); } /* @@ -4219,7 +4338,8 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c new_entry->end = old_entry->end; new_entry->eflags = old_entry->eflags & ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION | - MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC); + MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC | + MAP_ENTRY_SPLIT_BOUNDARY_MASK); new_entry->protection = old_entry->protection; new_entry->max_protection = old_entry->max_protection; new_entry->inheritance = VM_INHERIT_ZERO; Modified: head/sys/vm/vm_map.h ============================================================================== --- head/sys/vm/vm_map.h Wed Sep 9 21:57:55 2020 (r365519) +++ head/sys/vm/vm_map.h Wed Sep 9 22:02:30 2020 (r365520) @@ -149,6 +149,10 @@ struct vm_map_entry { #define MAP_ENTRY_STACK_GAP_UP 0x00040000 #define MAP_ENTRY_HEADER 0x00080000 +#define MAP_ENTRY_SPLIT_BOUNDARY_MASK 0x00300000 + +#define MAP_ENTRY_SPLIT_BOUNDARY_SHIFT 20 + #ifdef _KERNEL static __inline u_char vm_map_entry_behavior(vm_map_entry_t entry) @@ -373,6 +377,9 @@ long vmspace_resident_count(struct vmspace *vmspace); #define MAP_CREATE_STACK_GAP_UP 0x00010000 #define MAP_CREATE_STACK_GAP_DN 0x00020000 #define MAP_VN_EXEC 0x00040000 +#define MAP_SPLIT_BOUNDARY_MASK 0x00180000 + +#define MAP_SPLIT_BOUNDARY_SHIFT 19 /* * vm_fault option flags
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202009092202.089M2UbZ007422>