Date: Wed, 20 Nov 2024 17:56:25 GMT From: Doug Moore <dougm@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 5b78ff830791 - main - vm_page: remove pages with iterators Message-ID: <202411201756.4AKHuPxL000439@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by dougm: URL: https://cgit.FreeBSD.org/src/commit/?id=5b78ff830791633c02a3d906b2c8f5c9b3bb1a91 commit 5b78ff830791633c02a3d906b2c8f5c9b3bb1a91 Author: Doug Moore <dougm@FreeBSD.org> AuthorDate: 2024-11-20 17:54:20 +0000 Commit: Doug Moore <dougm@FreeBSD.org> CommitDate: 2024-11-20 17:54:20 +0000 vm_page: remove pages with iterators Use pctrie iterators for removing some page sequences from radix trees, to avoid repeated searches from the tree root. Rename vm_page_object_remove to vm_page_remove_radixdone, and remove from it the responsibility for removing a page from its radix tree, and pass that responsibility on to its callers. For one of those callers, vm_page_rename, pass a pages pctrie_iter, rather than a page, and use the iterator to remove the page from its radix tree. Define functions vm_page_iter_remove() and vm_page_iter_free() that are like vm_page_remove() and vm_page_free(), respectively, except that they take an iterator as parameter rather than a page, and use the iterator to remove the page from the radix tree instead of searching the radix tree. Function vm_page_iter_free() assumes that the page is associated with an object, and calls vm_page_free_object_prep to do the part of vm_page_free_prep that is object-related. In functions vm_object_split and vm_object_collapse_scan, use a pctrie_iter to walk over the pages of the object, and use vm_page_rename and vm_radix_iter_remove modify the radix tree without searching for pages. In vm_object_page_remove and _kmem_unback, use a pctrie_iter and vm_page_iter_free to remove the page from the radix tree. Reviewed by: markj (prevoius version) Tested by: pho Differential Revision: https://reviews.freebsd.org/D46724 --- sys/vm/vm_kern.c | 12 +++--- sys/vm/vm_object.c | 72 +++++++++++++++---------------- sys/vm/vm_page.c | 124 ++++++++++++++++++++++++++++++++++++++++------------- sys/vm/vm_page.h | 4 +- 4 files changed, 141 insertions(+), 71 deletions(-) diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 22776e2196b0..6343fb66cfa3 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -634,8 +634,9 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) static struct vmem * _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { + struct pctrie_iter pages; struct vmem *arena; - vm_page_t m, next; + vm_page_t m; vm_offset_t end, offset; int domain; @@ -648,17 +649,18 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); - m = vm_page_lookup(object, atop(offset)); + vm_page_iter_init(&pages, object); + m = vm_page_iter_lookup(&pages, atop(offset)); domain = vm_page_domain(m); if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0)) arena = vm_dom[domain].vmd_kernel_arena; else arena = vm_dom[domain].vmd_kernel_rwx_arena; - for (; offset < end; offset += PAGE_SIZE, m = next) { - next = vm_page_next(m); + for (; offset < end; offset += PAGE_SIZE, + m = vm_page_iter_lookup(&pages, atop(offset))) { vm_page_xbusy_claim(m); vm_page_unwire_noq(m); - vm_page_free(m); + vm_page_iter_free(&pages); } VM_OBJECT_WUNLOCK(object); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index e6324647e29e..21773318cea0 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1520,9 +1520,10 @@ vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length, void vm_object_split(vm_map_entry_t entry) { - vm_page_t m, m_next; + struct pctrie_iter pages; + vm_page_t m; vm_object_t orig_object, new_object, backing_object; - vm_pindex_t idx, offidxstart; + vm_pindex_t offidxstart; vm_size_t size; orig_object = entry->object.vm_object; @@ -1573,17 +1574,11 @@ vm_object_split(vm_map_entry_t entry) * that the object is in transition. */ vm_object_set_flag(orig_object, OBJ_SPLIT); -#ifdef INVARIANTS - idx = 0; -#endif + vm_page_iter_limit_init(&pages, orig_object, offidxstart + size); retry: - m = vm_page_find_least(orig_object, offidxstart); - KASSERT(m == NULL || idx <= m->pindex - offidxstart, - ("%s: object %p was repopulated", __func__, orig_object)); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); - + pctrie_iter_reset(&pages); + for (m = vm_page_iter_lookup_ge(&pages, offidxstart); m != NULL; + m = vm_radix_iter_step(&pages)) { /* * We must wait for pending I/O to complete before we can * rename the page. @@ -1604,13 +1599,13 @@ retry: * an incomplete fault. Just remove and ignore. */ if (vm_page_none_valid(m)) { - if (vm_page_remove(m)) + if (vm_page_iter_remove(&pages)) vm_page_free(m); continue; } /* vm_page_rename() will dirty the page. */ - if (vm_page_rename(m, new_object, idx)) { + if (vm_page_rename(&pages, new_object, m->pindex - offidxstart)) { vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); @@ -1656,7 +1651,8 @@ retry: } static vm_page_t -vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p) +vm_object_collapse_scan_wait(struct pctrie_iter *pages, vm_object_t object, + vm_page_t p) { vm_object_t backing_object; @@ -1683,12 +1679,14 @@ vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p) VM_OBJECT_WLOCK(object); } VM_OBJECT_WLOCK(backing_object); - return (TAILQ_FIRST(&backing_object->memq)); + vm_page_iter_init(pages, backing_object); + return (vm_page_iter_lookup_ge(pages, 0)); } static void vm_object_collapse_scan(vm_object_t object) { + struct pctrie_iter pages; vm_object_t backing_object; vm_page_t next, p, pp; vm_pindex_t backing_offset_index, new_pindex; @@ -1702,7 +1700,8 @@ vm_object_collapse_scan(vm_object_t object) /* * Our scan */ - for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { + vm_page_iter_init(&pages, backing_object); + for (p = vm_page_iter_lookup_ge(&pages, 0); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); new_pindex = p->pindex - backing_offset_index; @@ -1710,7 +1709,7 @@ vm_object_collapse_scan(vm_object_t object) * Check for busy page */ if (vm_page_tryxbusy(p) == 0) { - next = vm_object_collapse_scan_wait(object, p); + next = vm_object_collapse_scan_wait(&pages, object, p); continue; } @@ -1727,16 +1726,18 @@ vm_object_collapse_scan(vm_object_t object) KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) + if (vm_page_iter_remove(&pages)) vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } if (!vm_page_all_valid(p)) { KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) + if (vm_page_iter_remove(&pages)) vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } @@ -1749,7 +1750,7 @@ vm_object_collapse_scan(vm_object_t object) * busy bit owner, we can't tell whether it shadows the * original page. */ - next = vm_object_collapse_scan_wait(object, pp); + next = vm_object_collapse_scan_wait(&pages, object, pp); continue; } @@ -1775,10 +1776,11 @@ vm_object_collapse_scan(vm_object_t object) vm_pager_freespace(backing_object, p->pindex, 1); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); - if (vm_page_remove(p)) - vm_page_free(p); if (pp != NULL) vm_page_xunbusy(pp); + if (vm_page_iter_remove(&pages)) + vm_page_free(p); + next = vm_radix_iter_step(&pages); continue; } @@ -1789,9 +1791,10 @@ vm_object_collapse_scan(vm_object_t object) * If the page was mapped to a process, it can remain mapped * through the rename. vm_page_rename() will dirty the page. */ - if (vm_page_rename(p, object, new_pindex)) { + if (vm_page_rename(&pages, object, new_pindex)) { vm_page_xunbusy(p); - next = vm_object_collapse_scan_wait(object, NULL); + next = vm_object_collapse_scan_wait(&pages, object, + NULL); continue; } @@ -1807,6 +1810,7 @@ vm_object_collapse_scan(vm_object_t object) backing_offset_index); #endif vm_page_xunbusy(p); + next = vm_radix_iter_step(&pages); } return; } @@ -1981,7 +1985,8 @@ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { - vm_page_t p, next; + struct pctrie_iter pages; + vm_page_t p; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || @@ -1990,16 +1995,11 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); + vm_page_iter_limit_init(&pages, object, end); again: - p = vm_page_find_least(object, start); - - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); - + pctrie_iter_reset(&pages); + for (p = vm_page_iter_lookup_ge(&pages, start); p != NULL; + p = vm_radix_iter_step(&pages)) { /* * Skip invalid pages if asked to do so. Try to avoid acquiring * the busy lock, as some consumers rely on this to avoid @@ -2060,7 +2060,7 @@ wired: if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0 && !vm_page_try_remove_all(p)) goto wired; - vm_page_free(p); + vm_page_iter_free(&pages); } vm_object_pip_wakeup(object); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 0b9b55337b52..7d093579e35d 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -170,8 +170,9 @@ static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(vm_page_t m, uint8_t queue); -static bool vm_page_free_prep(vm_page_t m); +static bool vm_page_free_prep(vm_page_t m, bool do_remove); static void vm_page_free_toq(vm_page_t m); +static void vm_page_free_toq_impl(vm_page_t m, bool do_remove); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); @@ -1386,6 +1387,22 @@ vm_page_free(vm_page_t m) vm_page_free_toq(m); } +/* + * vm_page_iter_free: + * + * Free the current page, as identified by iterator. + */ +void +vm_page_iter_free(struct pctrie_iter *pages) +{ + vm_page_t m; + + m = vm_radix_iter_page(pages); + vm_radix_iter_remove(pages); + m->flags &= ~PG_ZERO; + vm_page_free_toq_impl(m, false); +} + /* * vm_page_free_zero: * @@ -1639,14 +1656,18 @@ vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) } /* - * Do the work to remove a page from its object. The caller is responsible for - * updating the page's fields to reflect this removal. + * vm_page_remove_radixdone + * + * Complete page "m" removal from the specified object after the radix trie + * unhooking. + * + * The caller is responsible for updating the page's fields to reflect this + * removal. */ static void -vm_page_object_remove(vm_page_t m) +vm_page_remove_radixdone(vm_page_t m) { vm_object_t object; - vm_page_t mrem __diagused; vm_page_assert_xbusied(m); object = m->object; @@ -1659,10 +1680,7 @@ vm_page_object_remove(vm_page_t m) vm_pager_page_unswapped(m); vm_pager_page_removed(object, m); - m->object = NULL; - mrem = vm_radix_remove(&object->rtree, m->pindex); - KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); /* * Now remove from the object's list of backed pages. @@ -1704,6 +1722,42 @@ vm_page_remove(vm_page_t m) return (dropped); } +/* + * vm_page_iter_remove: + * + * Remove the current page, as identified by iterator, and remove it from the + * radix tree. + */ +bool +vm_page_iter_remove(struct pctrie_iter *pages) +{ + vm_page_t m; + bool dropped; + + m = vm_radix_iter_page(pages); + vm_radix_iter_remove(pages); + vm_page_remove_radixdone(m); + dropped = (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); + vm_page_xunbusy(m); + + return (dropped); +} + +/* + * vm_page_radix_remove + * + * Removes the specified page from the radix tree. + */ +static void +vm_page_radix_remove(vm_page_t m) +{ + vm_page_t mrem __diagused; + + mrem = vm_radix_remove(&m->object->rtree, m->pindex); + KASSERT(mrem == m, + ("removed page %p, expected page %p", mrem, m)); +} + /* * vm_page_remove_xbusy * @@ -1714,7 +1768,8 @@ bool vm_page_remove_xbusy(vm_page_t m) { - vm_page_object_remove(m); + vm_page_radix_remove(m); + vm_page_remove_radixdone(m); return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } @@ -1985,8 +2040,8 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, /* * vm_page_rename: * - * Move the given memory entry from its - * current object to the specified target object/offset. + * Move the current page, as identified by iterator, from its current + * object to the specified target object/offset. * * Note: swap associated with the page must be invalidated by the move. We * have to do this for several reasons: (1) we aren't freeing the @@ -2001,13 +2056,15 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, * The objects must be locked. */ int -vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) +vm_page_rename(struct pctrie_iter *pages, + vm_object_t new_object, vm_pindex_t new_pindex) { - vm_page_t mpred; + vm_page_t m, mpred; vm_pindex_t opidx; VM_OBJECT_ASSERT_WLOCKED(new_object); + m = vm_radix_iter_page(pages); KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); /* @@ -2027,7 +2084,8 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) * the listq iterator is tainted. */ m->pindex = opidx; - vm_page_object_remove(m); + vm_radix_iter_remove(pages); + vm_page_remove_radixdone(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; @@ -3122,7 +3180,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_page_dequeue(m); if (vm_page_replace_hold(m_new, object, m->pindex, m) && - vm_page_free_prep(m)) + vm_page_free_prep(m, true)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); @@ -3134,7 +3192,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, } else { m->flags &= ~PG_ZERO; vm_page_dequeue(m); - if (vm_page_free_prep(m)) + if (vm_page_free_prep(m, true)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); KASSERT(m->dirty == 0, @@ -4073,7 +4131,7 @@ vm_page_enqueue(vm_page_t m, uint8_t queue) * page must be unmapped. */ static bool -vm_page_free_prep(vm_page_t m) +vm_page_free_prep(vm_page_t m, bool do_remove) { /* @@ -4120,7 +4178,9 @@ vm_page_free_prep(vm_page_t m) m->ref_count == VPRC_OBJREF, ("vm_page_free_prep: page %p has unexpected ref_count %u", m, m->ref_count)); - vm_page_object_remove(m); + if (do_remove) + vm_page_radix_remove(m); + vm_page_remove_radixdone(m); m->ref_count -= VPRC_OBJREF; } else vm_page_assert_unbusied(m); @@ -4172,22 +4232,13 @@ vm_page_free_prep(vm_page_t m) return (true); } -/* - * vm_page_free_toq: - * - * Returns the given page to the free list, disassociating it - * from any VM object. - * - * The object must be locked. The page must be exclusively busied if it - * belongs to an object. - */ static void -vm_page_free_toq(vm_page_t m) +vm_page_free_toq_impl(vm_page_t m, bool do_remove) { struct vm_domain *vmd; uma_zone_t zone; - if (!vm_page_free_prep(m)) + if (!vm_page_free_prep(m, do_remove)) return; vmd = vm_pagequeue_domain(m); @@ -4202,6 +4253,21 @@ vm_page_free_toq(vm_page_t m) vm_domain_freecnt_inc(vmd, 1); } +/* + * vm_page_free_toq: + * + * Returns the given page to the free list, disassociating it + * from any VM object. + * + * The object must be locked. The page must be exclusively busied if it + * belongs to an object. + */ +static void +vm_page_free_toq(vm_page_t m) +{ + vm_page_free_toq_impl(m, true); +} + /* * vm_page_free_pages_toq: * diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 893608bcacf1..613896e77dd9 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -602,6 +602,7 @@ bool vm_page_busy_sleep(vm_page_t m, const char *msg, int allocflags); void vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, const char *wmesg, int allocflags); void vm_page_free(vm_page_t m); +void vm_page_iter_free(struct pctrie_iter *); void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); @@ -679,8 +680,9 @@ void vm_page_release(vm_page_t m, int flags); void vm_page_release_locked(vm_page_t m, int flags); vm_page_t vm_page_relookup(vm_object_t, vm_pindex_t); bool vm_page_remove(vm_page_t); +bool vm_page_iter_remove(struct pctrie_iter *); bool vm_page_remove_xbusy(vm_page_t); -int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); +int vm_page_rename(struct pctrie_iter *, vm_object_t, vm_pindex_t); void vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, vm_page_t mold); int vm_page_sbusied(vm_page_t m);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202411201756.4AKHuPxL000439>