Date: Mon, 15 Jan 2018 19:02:15 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r328012 - user/jeff/numa/sys/vm Message-ID: <201801151902.w0FJ2FMD011160@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jeff Date: Mon Jan 15 19:02:15 2018 New Revision: 328012 URL: https://svnweb.freebsd.org/changeset/base/328012 Log: First cut at finer locking for reservations. Modified: user/jeff/numa/sys/vm/vm_page.c user/jeff/numa/sys/vm/vm_page.h user/jeff/numa/sys/vm/vm_phys.c user/jeff/numa/sys/vm/vm_phys.h user/jeff/numa/sys/vm/vm_reserv.c user/jeff/numa/sys/vm/vm_reserv.h Modified: user/jeff/numa/sys/vm/vm_page.c ============================================================================== --- user/jeff/numa/sys/vm/vm_page.c Mon Jan 15 18:37:03 2018 (r328011) +++ user/jeff/numa/sys/vm/vm_page.c Mon Jan 15 19:02:15 2018 (r328012) @@ -174,8 +174,8 @@ static int vm_page_insert_after(vm_page_t m, vm_object vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); -static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, - vm_paddr_t high); +static int vm_page_reclaim_run(int req_class, int domain, u_long npages, + vm_page_t m_run, vm_paddr_t high); static int vm_page_alloc_fail(vm_object_t object, int req); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); @@ -316,7 +316,7 @@ vm_page_blacklist_check(char *list, char *end) vm_paddr_t pa; vm_page_t m; char *next; - int ret; + int ret, domain; next = list; while (next != NULL) { @@ -325,9 +325,10 @@ vm_page_blacklist_check(char *list, char *end) m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) continue; - mtx_lock(&vm_page_queue_free_mtx); + domain = vm_phys_domidx(m); + vm_pagequeue_free_lock(domain); ret = vm_phys_unfree_page(m); - mtx_unlock(&vm_page_queue_free_mtx); + vm_pagequeue_free_unlock(domain); if (ret == TRUE) { TAILQ_INSERT_TAIL(&blacklist_head, m, listq); if (bootverbose) @@ -713,10 +714,10 @@ vm_page_startup(vm_offset_t vaddr) m = seg->first_page; pagecount = (u_long)atop(seg->end - seg->start); - mtx_lock(&vm_page_queue_free_mtx); + vm_pagequeue_free_lock(seg->domain); vm_phys_free_contig(m, pagecount); - vm_phys_freecnt_adj(m, (int)pagecount); - mtx_unlock(&vm_page_queue_free_mtx); + vm_phys_freecnt_adj(seg->domain, (int)pagecount); + vm_pagequeue_free_unlock(seg->domain); vm_cnt.v_page_count += (u_int)pagecount; vmd = &vm_dom[seg->domain]; @@ -1644,14 +1645,48 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pi return (m); } +/* + * Returns true if the number of free pages exceeds the minimum + * for the request class and false otherwise. + */ +int +vm_page_available(int domain, int req, int npages) +{ + + vm_pagequeue_free_assert_locked(domain); + req = req & VM_ALLOC_CLASS_MASK; + + /* + * The page daemon is allowed to dig deeper into the free page list. + */ + if (curproc == pageproc && req != VM_ALLOC_INTERRUPT) + req = VM_ALLOC_SYSTEM; + + /* XXX Global counts. */ + if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || + (req == VM_ALLOC_SYSTEM && + vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) || + (req == VM_ALLOC_INTERRUPT && + vm_cnt.v_free_count >= npages)) + return (1); + + return (0); +} + vm_page_t vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, int req, vm_page_t mpred) { vm_page_t m; - int flags, req_class; + int flags; u_int free_count; +#if VM_NRESERVLEVEL > 0 + int reserv; + reserv = object != NULL && + (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED; +#endif + KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != @@ -1665,34 +1700,19 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pind if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); - req_class = req & VM_ALLOC_CLASS_MASK; - - /* - * The page daemon is allowed to dig deeper into the free page list. - */ - if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) - req_class = VM_ALLOC_SYSTEM; - - /* - * Allocate a page if the number of free pages exceeds the minimum - * for the request class. - */ again: m = NULL; + if (reserv && + (m = vm_reserv_extend(req, object, pindex, domain, mpred)) != NULL) + goto found; mtx_lock(&vm_page_queue_free_mtx); - if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || - (req_class == VM_ALLOC_SYSTEM && - vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || - (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count > 0)) { + if (vm_page_available(domain, req, 1)) { /* * Can we allocate the page from a reservation? */ #if VM_NRESERVLEVEL > 0 - if (object == NULL || (object->flags & (OBJ_COLORED | - OBJ_FICTITIOUS)) != OBJ_COLORED || (m = - vm_reserv_alloc_page(object, pindex, domain, - mpred)) == NULL) + if (!reserv || (m = vm_reserv_alloc_page(object, pindex, + domain, mpred)) == NULL) #endif { /* @@ -1723,8 +1743,16 @@ again: * At this point we had better have found a good page. */ KASSERT(m != NULL, ("missing page")); - free_count = vm_phys_freecnt_adj(m, -1); + free_count = vm_phys_freecnt_adj(domain, -1); mtx_unlock(&vm_page_queue_free_mtx); + + /* + * Don't wakeup too often - wakeup the pageout daemon when + * we would be nearly out of memory. + */ + if (vm_paging_needed(free_count)) + pagedaemon_wakeup(); +found: vm_page_alloc_check(m); /* @@ -1782,13 +1810,6 @@ again: } else m->pindex = pindex; - /* - * Don't wakeup too often - wakeup the pageout daemon when - * we would be nearly out of memory. - */ - if (vm_paging_needed(free_count)) - pagedaemon_wakeup(); - return (m); } @@ -1858,8 +1879,12 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin { vm_page_t m, m_ret, mpred; u_int busy_lock, flags, oflags; - int req_class; +#if VM_NRESERVLEVEL > 0 + int reserv; + reserv = object != NULL && + (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED; +#endif mpred = NULL; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && @@ -1876,14 +1901,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin object)); } KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); - req_class = req & VM_ALLOC_CLASS_MASK; - /* - * The page daemon is allowed to dig deeper into the free page list. - */ - if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) - req_class = VM_ALLOC_SYSTEM; - if (object != NULL) { mpred = vm_radix_lookup_le(&object->rtree, pindex); KASSERT(mpred == NULL || mpred->pindex != pindex, @@ -1895,19 +1913,21 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin * below the lower bound for the allocation class? */ again: +#if VM_NRESERVLEVEL > 0 + if (reserv && + (m_ret = vm_reserv_extend_contig(req, object, pindex, domain, + npages, low, high, alignment, boundary, mpred)) != NULL) + goto found; +#endif m_ret = NULL; mtx_lock(&vm_page_queue_free_mtx); - if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || - (req_class == VM_ALLOC_SYSTEM && - vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) || - (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count >= npages)) { + if (vm_page_available(domain, req, npages)) { /* * Can we allocate the pages from a reservation? */ #if VM_NRESERVLEVEL > 0 retry: - if (object == NULL || (object->flags & OBJ_COLORED) == 0 || + if (!reserv || (m_ret = vm_reserv_alloc_contig(object, pindex, domain, npages, low, high, alignment, boundary, mpred)) == NULL) #endif @@ -1927,8 +1947,9 @@ retry: goto again; return (NULL); } - vm_phys_freecnt_adj(m_ret, -npages); + vm_phys_freecnt_adj(domain, -npages); mtx_unlock(&vm_page_queue_free_mtx); +found: for (m = m_ret; m < &m_ret[npages]; m++) vm_page_alloc_check(m); @@ -2059,26 +2080,13 @@ vm_page_alloc_freelist_domain(int domain, int freelist { vm_page_t m; u_int flags, free_count; - int req_class; - req_class = req & VM_ALLOC_CLASS_MASK; - /* - * The page daemon is allowed to dig deeper into the free page list. - */ - if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) - req_class = VM_ALLOC_SYSTEM; - - /* * Do not allocate reserved pages unless the req has asked for it. */ again: mtx_lock(&vm_page_queue_free_mtx); - if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || - (req_class == VM_ALLOC_SYSTEM && - vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || - (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count > 0)) + if (vm_page_available(domain, req, 1)) m = vm_phys_alloc_freelist_pages(domain, freelist, VM_FREEPOOL_DIRECT, 0); if (m == NULL) { @@ -2086,7 +2094,7 @@ again: goto again; return (NULL); } - free_count = vm_phys_freecnt_adj(m, -1); + free_count = vm_phys_freecnt_adj(domain, -1); mtx_unlock(&vm_page_queue_free_mtx); vm_page_alloc_check(m); @@ -2331,7 +2339,7 @@ unlock: * "req_class" must be an allocation class. */ static int -vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, +vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_paddr_t high) { struct mtx *m_mtx; @@ -2483,7 +2491,8 @@ retry: unlock: VM_OBJECT_WUNLOCK(object); } else { - mtx_lock(&vm_page_queue_free_mtx); + MPASS(vm_phys_domidx(m) == domain); + vm_pagequeue_free_lock(domain); order = m->order; if (order < VM_NFREEORDER) { /* @@ -2500,7 +2509,7 @@ unlock: else if (vm_reserv_is_page_free(m)) order = 0; #endif - mtx_unlock(&vm_page_queue_free_mtx); + vm_pagequeue_free_lock(domain); if (order == VM_NFREEORDER) error = EINVAL; } @@ -2508,13 +2517,14 @@ unlock: if (m_mtx != NULL) mtx_unlock(m_mtx); if ((m = SLIST_FIRST(&free)) != NULL) { - mtx_lock(&vm_page_queue_free_mtx); + MPASS(vm_phys_domidx(m) == domain); + vm_pagequeue_free_lock(domain); do { SLIST_REMOVE_HEAD(&free, plinks.s.ss); vm_page_free_phys(m); } while ((m = SLIST_FIRST(&free)) != NULL); vm_page_free_wakeup(); - mtx_unlock(&vm_page_queue_free_mtx); + vm_pagequeue_free_unlock(domain); } return (error); } @@ -2612,8 +2622,8 @@ vm_page_reclaim_contig_domain(int domain, int req, u_l for (i = 0; count > 0 && i < NRUNS; i++) { count--; m_run = m_runs[RUN_INDEX(count)]; - error = vm_page_reclaim_run(req_class, npages, m_run, - high); + error = vm_page_reclaim_run(req_class, domain, npages, + m_run, high); if (error == 0) { reclaimed += npages; if (reclaimed >= MIN_RECLAIM) @@ -3008,9 +3018,9 @@ static void vm_page_free_phys(vm_page_t m) { - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(vm_phys_domidx(m)); - vm_phys_freecnt_adj(m, 1); + vm_phys_freecnt_adj(vm_phys_domidx(m), 1); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) #endif Modified: user/jeff/numa/sys/vm/vm_page.h ============================================================================== --- user/jeff/numa/sys/vm/vm_page.h Mon Jan 15 18:37:03 2018 (r328011) +++ user/jeff/numa/sys/vm/vm_page.h Mon Jan 15 19:02:15 2018 (r328012) @@ -248,6 +248,15 @@ extern struct vm_domain vm_dom[MAXMEMDOM]; #define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex) #define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) +#define vm_pagequeue_free_assert_locked(n) \ + mtx_assert(vm_pagequeue_free_lockptr((n)), MA_OWNED) +#define vm_pagequeue_free_lock(n) \ + mtx_lock(vm_pagequeue_free_lockptr((n))) +#define vm_pagequeue_free_lockptr(n) \ + (&vm_page_queue_free_mtx) +#define vm_pagequeue_free_unlock(n) \ + mtx_unlock(vm_pagequeue_free_lockptr((n))) + #ifdef _KERNEL extern vm_page_t bogus_page; @@ -476,6 +485,7 @@ void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); +int vm_page_available(int domain, int req, int npages); vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int); vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int); vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t); Modified: user/jeff/numa/sys/vm/vm_phys.c ============================================================================== --- user/jeff/numa/sys/vm/vm_phys.c Mon Jan 15 18:37:03 2018 (r328011) +++ user/jeff/numa/sys/vm/vm_phys.c Mon Jan 15 19:02:15 2018 (r328012) @@ -653,7 +653,7 @@ vm_phys_alloc_freelist_pages(int domain, int freelist, if (flind < 0) return (NULL); - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(domain); fl = &vm_phys_free_queues[domain][flind][pool][0]; for (oind = order; oind < VM_NFREEORDER; oind++) { m = TAILQ_FIRST(&fl[oind].pl); @@ -906,8 +906,8 @@ vm_phys_free_pages(vm_page_t m, int order) m, m->pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_free_pages: order %d is out of range", order)); - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); seg = &vm_phys_segs[m->segind]; + vm_pagequeue_free_assert_locked(seg->domain); if (order < VM_NFREEORDER - 1) { pa = VM_PAGE_TO_PHYS(m); do { @@ -945,7 +945,7 @@ vm_phys_free_contig(vm_page_t m, u_long npages) * Avoid unnecessary coalescing by freeing the pages in the largest * possible power-of-two-sized subsets. */ - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(vm_phys_domidx(m)); for (;; npages -= n) { /* * Unsigned "min" is used here so that "order" is assigned @@ -1051,14 +1051,13 @@ vm_phys_unfree_page(vm_page_t m) vm_page_t m_set, m_tmp; int order; - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - /* * First, find the contiguous, power of two-sized set of free * physical pages containing the given physical page "m" and * assign it to "m_set". */ seg = &vm_phys_segs[m->segind]; + vm_pagequeue_free_assert_locked(seg->domain); for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && order < VM_NFREEORDER - 1; ) { order++; @@ -1122,7 +1121,7 @@ vm_phys_alloc_contig(int domain, u_long npages, vm_pad KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(domain); if (low >= high) return (NULL); m_run = NULL; @@ -1167,7 +1166,7 @@ vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_lo KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(seg->domain); /* Compute the queue that is the best fit for npages. */ for (order = 0; (1 << order) < npages; order++); /* Search for a run satisfying the specified conditions. */ Modified: user/jeff/numa/sys/vm/vm_phys.h ============================================================================== --- user/jeff/numa/sys/vm/vm_phys.h Mon Jan 15 18:37:03 2018 (r328011) +++ user/jeff/numa/sys/vm/vm_phys.h Mon Jan 15 19:02:15 2018 (r328012) @@ -130,11 +130,11 @@ vm_phys_domain(vm_page_t m) } static inline u_int -vm_phys_freecnt_adj(vm_page_t m, int adj) +vm_phys_freecnt_adj(int domain, int adj) { - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - vm_phys_domain(m)->vmd_free_count += adj; + vm_pagequeue_free_assert_locked(domain); + vm_dom[domain].vmd_free_count += adj; return (vm_cnt.v_free_count += adj); } Modified: user/jeff/numa/sys/vm/vm_reserv.c ============================================================================== --- user/jeff/numa/sys/vm/vm_reserv.c Mon Jan 15 18:37:03 2018 (r328011) +++ user/jeff/numa/sys/vm/vm_reserv.c Mon Jan 15 19:02:15 2018 (r328012) @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_param.h> #include <vm/vm_object.h> #include <vm/vm_page.h> +#include <vm/vm_pageout.h> #include <vm/vm_phys.h> #include <vm/vm_radix.h> #include <vm/vm_reserv.h> @@ -163,6 +164,9 @@ popmap_is_set(popmap_t popmap[], int i) * object's list of reservations. * * A partially populated reservation can be broken and reclaimed at any time. + * + * The reservation structure is synchronized by the per-domain pagequeue_free + * lock. The objq is synchronized by the vm_reserv_object lock. */ struct vm_reserv { TAILQ_ENTRY(vm_reserv) partpopq; @@ -170,7 +174,7 @@ struct vm_reserv { vm_object_t object; /* containing object */ vm_pindex_t pindex; /* offset within object */ vm_page_t pages; /* first page of a superpage */ - int domain; /* NUMA domain */ + int domain; /* NUMA domain, constant. */ int popcnt; /* # of pages in use */ char inpartpopq; popmap_t popmap[NPOPMAP]; /* bit vector of used pages */ @@ -234,6 +238,8 @@ static long vm_reserv_reclaimed; SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations"); +static struct mtx vm_reserv_object_mtx; + static void vm_reserv_break(vm_reserv_t rv, vm_page_t m); static void vm_reserv_depopulate(vm_reserv_t rv, int index); static vm_reserv_t vm_reserv_from_page(vm_page_t m); @@ -288,12 +294,12 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS) for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) { counter = 0; unused_pages = 0; - mtx_lock(&vm_page_queue_free_mtx); + vm_pagequeue_free_lock(domain); TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) { counter++; unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; } - mtx_unlock(&vm_page_queue_free_mtx); + vm_pagequeue_free_unlock(domain); sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n", domain, level, unused_pages * ((int)PAGE_SIZE / 1024), counter); @@ -304,7 +310,53 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS) return (error); } +#define vm_reserv_object_lock(object) mtx_lock(&vm_reserv_object_mtx) +#define vm_reserv_object_unlock(object) mtx_unlock(&vm_reserv_object_mtx) + /* + * Remove a reservation from the object's objq. + */ +static void +vm_reserv_remove(vm_reserv_t rv) +{ + vm_object_t object; + + KASSERT(rv->object != NULL, + ("vm_reserv_remove: reserv %p is free", rv)); + KASSERT(!rv->inpartpopq, + ("vm_reserv_remove: reserv %p's inpartpopq is TRUE", rv)); + object = rv->object; + vm_reserv_object_lock(object); + LIST_REMOVE(rv, objq); + rv->object = NULL; + vm_reserv_object_unlock(object); +} + +/* + * Insert a new reservation into the object's objq. + */ +static void +vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex) +{ + int i; + + KASSERT(rv->object == NULL, + ("vm_reserv_insert: reserv %p isn't free", rv)); + KASSERT(rv->popcnt == 0, + ("vm_reserv_insert: reserv %p's popcnt is corrupted", rv)); + KASSERT(!rv->inpartpopq, + ("vm_reserv_insert: reserv %p's inpartpopq is TRUE", rv)); + for (i = 0; i < NPOPMAP; i++) + KASSERT(rv->popmap[i] == 0, + ("vm_reserv_insert: reserv %p's popmap is corrupted", rv)); + rv->pindex = pindex; + vm_reserv_object_lock(object); + rv->object = object; + LIST_INSERT_HEAD(&object->rvq, rv, objq); + vm_reserv_object_unlock(object); +} + +/* * Reduces the given reservation's population count. If the population count * becomes zero, the reservation is destroyed. Additionally, moves the * reservation to the tail of the partially populated reservation queue if the @@ -316,7 +368,7 @@ static void vm_reserv_depopulate(vm_reserv_t rv, int index) { - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(rv->domain); KASSERT(rv->object != NULL, ("vm_reserv_depopulate: reserv %p is free", rv)); KASSERT(popmap_is_set(rv->popmap, index), @@ -339,9 +391,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index) popmap_clear(rv->popmap, index); rv->popcnt--; if (rv->popcnt == 0) { - LIST_REMOVE(rv, objq); - rv->object = NULL; - rv->domain = -1; + vm_reserv_remove(rv); vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER); vm_reserv_freed++; } else { @@ -361,6 +411,43 @@ vm_reserv_from_page(vm_page_t m) } /* + * Returns an existing reservation or NULL and initialized successor pointer. + */ +static vm_reserv_t +vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex, + vm_page_t mpred, vm_page_t *msuccp) +{ + vm_reserv_t rv; + vm_page_t msucc; + + msucc = NULL; + if (mpred != NULL) { + KASSERT(mpred->object == object, + ("vm_reserv_from_object: object doesn't contain mpred")); + KASSERT(mpred->pindex < pindex, + ("vm_reserv_from_object: mpred doesn't precede pindex")); + rv = vm_reserv_from_page(mpred); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) + goto found; + msucc = TAILQ_NEXT(mpred, listq); + } else + msucc = TAILQ_FIRST(&object->memq); + if (msucc != NULL) { + KASSERT(msucc->pindex > pindex, + ("vm_reserv_from_object: msucc doesn't succeed pindex")); + rv = vm_reserv_from_page(msucc); + if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) + goto found; + } + rv = NULL; + +found: + *msuccp = msucc; + + return (rv); +} + +/* * Returns TRUE if the given reservation contains the given page index and * FALSE otherwise. */ @@ -381,7 +468,7 @@ static void vm_reserv_populate(vm_reserv_t rv, int index) { - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(rv->domain); KASSERT(rv->object != NULL, ("vm_reserv_populate: reserv %p is free", rv)); KASSERT(popmap_is_clear(rv->popmap, index), @@ -423,6 +510,98 @@ vm_reserv_populate(vm_reserv_t rv, int index) * The object and free page queue must be locked. */ vm_page_t +vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex, + int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary, vm_page_t mpred) +{ + vm_paddr_t pa, size; + vm_page_t m, msucc; + vm_reserv_t rv; + int i, index; + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); + + /* + * Is a reservation fundamentally impossible? + */ + if (pindex < VM_RESERV_INDEX(object, pindex) || + pindex + npages > object->size || object->resident_page_count == 0) + return (NULL); + + /* + * All reservations of a particular size have the same alignment. + * Assuming that the first page is allocated from a reservation, the + * least significant bits of its physical address can be determined + * from its offset from the beginning of the reservation and the size + * of the reservation. + * + * Could the specified index within a reservation of the smallest + * possible size satisfy the alignment and boundary requirements? + */ + pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT; + if ((pa & (alignment - 1)) != 0) + return (NULL); + size = npages << PAGE_SHIFT; + if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) + return (NULL); + + /* + * Look for an existing reservation. + */ + rv = vm_reserv_from_object(object, pindex, mpred, &msucc); + if (rv == NULL) + return (NULL); + KASSERT(object != kernel_object || rv->domain == domain, + ("vm_reserv_extend_contig: Domain mismatch from reservation.")); + index = VM_RESERV_INDEX(object, pindex); + /* Does the allocation fit within the reservation? */ + if (index + npages > VM_LEVEL_0_NPAGES) + return (NULL); + domain = rv->domain; + vm_pagequeue_free_lock(domain); + if (rv->object != object || !vm_page_available(domain, req, npages)) { + m = NULL; + goto out; + } + m = &rv->pages[index]; + pa = VM_PAGE_TO_PHYS(m); + if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 || + ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) { + m = NULL; + goto out; + } + /* Handle vm_page_rename(m, new_object, ...). */ + for (i = 0; i < npages; i++) { + if (popmap_is_set(rv->popmap, index + i)) { + m = NULL; + goto out; + } + } + for (i = 0; i < npages; i++) + vm_reserv_populate(rv, index + i); + vm_phys_freecnt_adj(domain, -npages); +out: + vm_pagequeue_free_unlock(domain); + return (m); +} + +/* + * Allocates a contiguous set of physical pages of the given size "npages" + * from existing or newly created reservations. All of the physical pages + * must be at or above the given physical address "low" and below the given + * physical address "high". The given value "alignment" determines the + * alignment of the first physical page in the set. If the given value + * "boundary" is non-zero, then the set of physical pages cannot cross any + * physical address boundary that is a multiple of that value. Both + * "alignment" and "boundary" must be a power of two. + * + * The page "mpred" must immediately precede the offset "pindex" within the + * specified object. + * + * The object and free page queue must be locked. + */ +vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_page_t mpred) @@ -434,7 +613,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t u_long allocpages, maxpages, minpages; int i, index, n; - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(domain); VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); @@ -463,26 +642,13 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t return (NULL); /* - * Look for an existing reservation. + * Callers should've extended an existing reservation prior to + * calling this function. If a reservation exists it is + * incompatible with the allocation. */ - if (mpred != NULL) { - KASSERT(mpred->object == object, - ("vm_reserv_alloc_contig: object doesn't contain mpred")); - KASSERT(mpred->pindex < pindex, - ("vm_reserv_alloc_contig: mpred doesn't precede pindex")); - rv = vm_reserv_from_page(mpred); - if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) - goto found; - msucc = TAILQ_NEXT(mpred, listq); - } else - msucc = TAILQ_FIRST(&object->memq); - if (msucc != NULL) { - KASSERT(msucc->pindex > pindex, - ("vm_reserv_alloc_contig: msucc doesn't succeed pindex")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) - goto found; - } + rv = vm_reserv_from_object(object, pindex, mpred, &msucc); + if (rv != NULL) + return (NULL); /* * Could at least one reservation fit between the first index to the @@ -565,22 +731,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t KASSERT(rv->pages == m, ("vm_reserv_alloc_contig: reserv %p's pages is corrupted", rv)); - KASSERT(rv->object == NULL, - ("vm_reserv_alloc_contig: reserv %p isn't free", rv)); - LIST_INSERT_HEAD(&object->rvq, rv, objq); - rv->object = object; - rv->pindex = first; - rv->domain = domain; - KASSERT(rv->popcnt == 0, - ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted", - rv)); - KASSERT(!rv->inpartpopq, - ("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE", - rv)); - for (i = 0; i < NPOPMAP; i++) - KASSERT(rv->popmap[i] == 0, - ("vm_reserv_alloc_contig: reserv %p's popmap is corrupted", - rv)); + vm_reserv_insert(rv, object, first); n = ulmin(VM_LEVEL_0_NPAGES - index, npages); for (i = 0; i < n; i++) vm_reserv_populate(rv, index + i); @@ -594,31 +745,66 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t allocpages -= VM_LEVEL_0_NPAGES; } while (allocpages >= VM_LEVEL_0_NPAGES); return (m_ret); +} +/* + * Attempts to extend an existing reservation and allocate the page to the + * object. + * + * The page "mpred" must immediately precede the offset "pindex" within the + * specified object. + * + * The object must be locked. + */ +vm_page_t +vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain, + vm_page_t mpred) +{ + vm_page_t m, msucc; + vm_reserv_t rv; + int index, free_count; + + VM_OBJECT_ASSERT_WLOCKED(object); + /* - * Found a matching reservation. + * Could a reservation currently exist? */ -found: - index = VM_RESERV_INDEX(object, pindex); - /* Does the allocation fit within the reservation? */ - if (index + npages > VM_LEVEL_0_NPAGES) + if (pindex < VM_RESERV_INDEX(object, pindex) || + pindex >= object->size || object->resident_page_count == 0) return (NULL); - m = &rv->pages[index]; - pa = VM_PAGE_TO_PHYS(m); - if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 || - ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) + + /* + * Look for an existing reservation. + */ + rv = vm_reserv_from_object(object, pindex, mpred, &msucc); + if (rv == NULL) return (NULL); - /* Handle vm_page_rename(m, new_object, ...). */ - for (i = 0; i < npages; i++) - if (popmap_is_set(rv->popmap, index + i)) - return (NULL); - for (i = 0; i < npages; i++) - vm_reserv_populate(rv, index + i); + + KASSERT(object != kernel_object || rv->domain == domain, + ("vm_reserv_extend: Domain mismatch from reservation.")); + domain = rv->domain; + index = VM_RESERV_INDEX(object, pindex); + m = &rv->pages[index]; + vm_pagequeue_free_lock(domain); + if (vm_page_available(domain, req, 1) == 0 || + /* Handle reclaim race. */ + rv->object != object || + /* Handle vm_page_rename(m, new_object, ...). */ + popmap_is_set(rv->popmap, index)) + m = NULL; + if (m != NULL) + vm_reserv_populate(rv, index); + free_count = vm_phys_freecnt_adj(domain, -1); + vm_pagequeue_free_unlock(domain); + + if (vm_paging_needed(free_count)) + pagedaemon_wakeup(); + return (m); } /* - * Allocates a page from an existing or newly created reservation. + * Allocates a page from an existing reservation. * * The page "mpred" must immediately precede the offset "pindex" within the * specified object. @@ -632,9 +818,9 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p vm_page_t m, msucc; vm_pindex_t first, leftcap, rightcap; vm_reserv_t rv; - int i, index; + int index; - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + vm_pagequeue_free_assert_locked(domain); VM_OBJECT_ASSERT_WLOCKED(object); /* @@ -645,26 +831,13 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p return (NULL); /* - * Look for an existing reservation. + * Callers should've extended an existing reservation prior to + * calling this function. If a reservation exists it is + * incompatible with the allocation. */ - if (mpred != NULL) { - KASSERT(mpred->object == object, - ("vm_reserv_alloc_page: object doesn't contain mpred")); - KASSERT(mpred->pindex < pindex, - ("vm_reserv_alloc_page: mpred doesn't precede pindex")); - rv = vm_reserv_from_page(mpred); - if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) - goto found; - msucc = TAILQ_NEXT(mpred, listq); - } else - msucc = TAILQ_FIRST(&object->memq); - if (msucc != NULL) { - KASSERT(msucc->pindex > pindex, - ("vm_reserv_alloc_page: msucc doesn't succeed pindex")); - rv = vm_reserv_from_page(msucc); - if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) - goto found; - } + rv = vm_reserv_from_object(object, pindex, mpred, &msucc); + if (rv != NULL) + return (NULL); /* * Could a reservation fit between the first index to the left that @@ -712,37 +885,10 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p rv = vm_reserv_from_page(m); KASSERT(rv->pages == m, ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv)); - KASSERT(rv->object == NULL, - ("vm_reserv_alloc_page: reserv %p isn't free", rv)); - LIST_INSERT_HEAD(&object->rvq, rv, objq); - rv->object = object; - rv->pindex = first; - rv->domain = domain; - KASSERT(rv->popcnt == 0, - ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); - KASSERT(!rv->inpartpopq, - ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", rv)); - for (i = 0; i < NPOPMAP; i++) - KASSERT(rv->popmap[i] == 0, - ("vm_reserv_alloc_page: reserv %p's popmap is corrupted", - rv)); + vm_reserv_insert(rv, object, first); index = VM_RESERV_INDEX(object, pindex); vm_reserv_populate(rv, index); return (&rv->pages[index]); - - /* - * Found a matching reservation. - */ -found: - index = VM_RESERV_INDEX(object, pindex); - m = &rv->pages[index]; - KASSERT(object != kernel_object || vm_phys_domidx(m) == domain, - ("vm_reserv_alloc_page: Domain mismatch from reservation.")); - /* Handle vm_page_rename(m, new_object, ...). */ - if (popmap_is_set(rv->popmap, index)) - return (NULL); - vm_reserv_populate(rv, index); - return (m); } /* @@ -759,14 +905,8 @@ vm_reserv_break(vm_reserv_t rv, vm_page_t m) { int begin_zeroes, hi, i, lo; - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - KASSERT(rv->object != NULL, - ("vm_reserv_break: reserv %p is free", rv)); - KASSERT(!rv->inpartpopq, - ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv)); - LIST_REMOVE(rv, objq); - rv->object = NULL; - rv->domain = -1; + vm_pagequeue_free_assert_locked(rv->domain); + vm_reserv_remove(rv); if (m != NULL) { /* * Since the reservation is being broken, there is no harm in @@ -830,9 +970,25 @@ void vm_reserv_break_all(vm_object_t object) { vm_reserv_t rv; + int domain = -1; - mtx_lock(&vm_page_queue_free_mtx); + /* + * This access of object->rvq is unsynchronized so that the + * object rvq lock can nest after the pagequeue_free lock. We + * must check for races in the results. However, the object + * lock prevents new additions, so we are guaranteed that when + * it returns NULL the object is properly empty. + */ while ((rv = LIST_FIRST(&object->rvq)) != NULL) { + if (domain != rv->domain) { + if (domain != -1) + vm_pagequeue_free_unlock(domain); + domain = rv->domain; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201801151902.w0FJ2FMD011160>