Date: Wed, 17 Jan 2018 22:10:58 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r328097 - in user/jeff/numa/sys: arm/arm fs/tmpfs kern powerpc/booke vm Message-ID: <201801172210.w0HMAwD3006695@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jeff Date: Wed Jan 17 22:10:58 2018 New Revision: 328097 URL: https://svnweb.freebsd.org/changeset/base/328097 Log: Fix a reservation locking bug spotted by markj. Make the reservation object lock an array of locks. Fix the paging and laundry targets. Make laundry per-domain. Fix a compile error on powerpc and arm. Modified: user/jeff/numa/sys/arm/arm/pmap-v4.c user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c user/jeff/numa/sys/kern/subr_witness.c user/jeff/numa/sys/powerpc/booke/pmap.c user/jeff/numa/sys/vm/vm_meter.c user/jeff/numa/sys/vm/vm_page.c user/jeff/numa/sys/vm/vm_pageout.c user/jeff/numa/sys/vm/vm_pagequeue.h user/jeff/numa/sys/vm/vm_reserv.c Modified: user/jeff/numa/sys/arm/arm/pmap-v4.c ============================================================================== --- user/jeff/numa/sys/arm/arm/pmap-v4.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/arm/arm/pmap-v4.c Wed Jan 17 22:10:58 2018 (r328097) @@ -3817,7 +3817,7 @@ pmap_get_pv_entry(void) pv_entry_count++; if (pv_entry_count > pv_entry_high_water) - pagedaemon_wakeup(); + pagedaemon_wakeup(0); /* XXX ARM NUMA */ ret_value = uma_zalloc(pvzone, M_NOWAIT); return ret_value; } Modified: user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c ============================================================================== --- user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/fs/tmpfs/tmpfs_subr.c Wed Jan 17 22:10:58 2018 (r328097) @@ -106,6 +106,7 @@ tmpfs_mem_avail(void) { vm_ooffset_t avail; + /* XXX */ avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved; if (__predict_false(avail < 0)) avail = 0; Modified: user/jeff/numa/sys/kern/subr_witness.c ============================================================================== --- user/jeff/numa/sys/kern/subr_witness.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/kern/subr_witness.c Wed Jan 17 22:10:58 2018 (r328097) @@ -139,7 +139,7 @@ __FBSDID("$FreeBSD$"); #define WITNESS_COUNT 1536 #endif #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ -#define WITNESS_PENDLIST (2048 + MAXCPU) +#define WITNESS_PENDLIST (2048 + (MAXCPU * 4)) /* Allocate 256 KB of stack data space */ #define WITNESS_LO_DATA_COUNT 2048 Modified: user/jeff/numa/sys/powerpc/booke/pmap.c ============================================================================== --- user/jeff/numa/sys/powerpc/booke/pmap.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/powerpc/booke/pmap.c Wed Jan 17 22:10:58 2018 (r328097) @@ -1183,7 +1183,7 @@ pv_alloc(void) pv_entry_count++; if (pv_entry_count > pv_entry_high_water) - pagedaemon_wakeup(); + pagedaemon_wakeup(0); /* XXX powerpc NUMA */ pv = uma_zalloc(pvzone, M_NOWAIT); return (pv); Modified: user/jeff/numa/sys/vm/vm_meter.c ============================================================================== --- user/jeff/numa/sys/vm/vm_meter.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/vm/vm_meter.c Wed Jan 17 22:10:58 2018 (r328097) @@ -470,7 +470,13 @@ vm_inactive_count(void) u_int vm_laundry_count(void) { + u_int v; + int i; - return (vm_dom[0].vmd_pagequeues[PQ_LAUNDRY].pq_cnt); + v = 0; + for (i = 0; i < vm_ndomains; i++) + v += vm_dom[i].vmd_pagequeues[PQ_LAUNDRY].pq_cnt; + + return (v); } Modified: user/jeff/numa/sys/vm/vm_page.c ============================================================================== --- user/jeff/numa/sys/vm/vm_page.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/vm/vm_page.c Wed Jan 17 22:10:58 2018 (r328097) @@ -484,7 +484,7 @@ vm_page_startup(vm_offset_t vaddr) for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); for (i = 0; i < vm_ndomains; i++) - vm_page_domain_init(&vm_dom[i]); + vm_page_domain_init(VM_DOMAIN(i)); /* * Almost all of the pages needed for bootstrapping UMA are used @@ -709,7 +709,7 @@ vm_page_startup(vm_offset_t vaddr) vm_pagequeue_free_unlock(seg->domain); vm_cnt.v_page_count += (u_int)pagecount; - vmd = &vm_dom[seg->domain]; + vmd = VM_DOMAIN(seg->domain);; vmd->vmd_page_count += (u_int)pagecount; vmd->vmd_segs |= 1UL << m->segind; break; @@ -1644,7 +1644,7 @@ vm_page_available(int domain, int req, int npages) struct vm_domain *vmd; vm_pagequeue_free_assert_locked(domain); - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); req = req & VM_ALLOC_CLASS_MASK; /* @@ -1745,7 +1745,7 @@ again: * Don't wakeup too often - wakeup the pageout daemon when * we would be nearly out of memory. */ - if (vm_paging_needed(domain, free_count)) + if (vm_paging_needed(VM_DOMAIN(domain), free_count)) pagedaemon_wakeup(domain); #if VM_NRESERVLEVEL > 0 found: @@ -1874,6 +1874,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { + struct vm_domain *vmd; vm_page_t m, m_ret, mpred; u_int busy_lock, flags, oflags; #if VM_NRESERVLEVEL > 0 @@ -2016,7 +2017,8 @@ found: pmap_page_set_memattr(m, memattr); pindex++; } - if (vm_paging_needed(domain, vm_dom[domain].vmd_free_count)) + vmd = VM_DOMAIN(domain); + if (vm_paging_needed(vmd, vmd->vmd_free_count)) pagedaemon_wakeup(domain); return (m_ret); } @@ -2117,7 +2119,7 @@ again: } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; - if (vm_paging_needed(domain, free_count)) + if (vm_paging_needed(VM_DOMAIN(domain), free_count)) pagedaemon_wakeup(domain); return (m); } @@ -2586,7 +2588,7 @@ vm_page_reclaim_contig_domain(int domain, int req, u_l * Return if the number of free pages cannot satisfy the requested * allocation. */ - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); count = vmd->vmd_free_count; if (count < npages + vmd->vmd_free_reserved || (count < npages + vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || @@ -2679,7 +2681,7 @@ vm_wait_domain(int domain) struct vm_domain *vmd; vm_pagequeue_free_assert_locked(domain); - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); if (curproc == pageproc) { vmd->vmd_pageout_pages_needed = 1; @@ -2720,7 +2722,7 @@ vm_page_alloc_fail(vm_object_t object, int domain, int vm_pagequeue_free_assert_locked(domain); - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); atomic_add_int(&vmd->vmd_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) { @@ -2763,10 +2765,7 @@ struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { - if (vm_page_in_laundry(m)) - return (&vm_dom[0].vmd_pagequeues[m->queue]); - else - return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]); + return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]); } /* @@ -2828,10 +2827,7 @@ vm_page_enqueue(uint8_t queue, vm_page_t m) KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); - if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE) - pq = &vm_dom[0].vmd_pagequeues[queue]; - else - pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue]; + pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); m->queue = queue; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); @@ -2926,7 +2922,7 @@ vm_page_free_wakeup(int domain) struct vm_domain *vmd; vm_pagequeue_free_assert_locked(domain); - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); /* * if pageout daemon needs pages, then tell it that there are @@ -2942,7 +2938,7 @@ vm_page_free_wakeup(int domain) * high water mark. And wakeup scheduler process if we have * lots of memory. this process will swapin processes. */ - if (vmd->vmd_pages_needed && !vm_page_count_min()) { + if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) { vmd->vmd_pages_needed = false; wakeup(&vmd->vmd_free_count); } Modified: user/jeff/numa/sys/vm/vm_pageout.c ============================================================================== --- user/jeff/numa/sys/vm/vm_pageout.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/vm/vm_pageout.c Wed Jan 17 22:10:58 2018 (r328097) @@ -150,14 +150,6 @@ SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); static int vm_pageout_oom_seq = 12; -/* Pending request for dirty page laundering. */ -static enum { - VM_LAUNDRY_IDLE, - VM_LAUNDRY_BACKGROUND, - VM_LAUNDRY_SHORTFALL -} vm_laundry_request = VM_LAUNDRY_IDLE; -static int vm_inactq_scans; - static int vm_pageout_update_period; static int disable_swap_pageouts; static int lowmem_period = 10; @@ -958,10 +950,9 @@ vm_pageout_laundry_worker(void *arg) u_int inactq_scans, last_launder; int domain, last_target, launder, shortfall, shortfall_cycle, target; bool in_shortfall; - int i; domain = (uintptr_t)arg; - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; KASSERT(vmd->vmd_segs != 0, ("domain without segments")); vm_pageout_init_marker(&vmd->vmd_laundry_marker, PQ_LAUNDRY); @@ -1000,7 +991,7 @@ vm_pageout_laundry_worker(void *arg) target = shortfall; } else if (!in_shortfall) goto trybackground; - else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) { + else if (shortfall_cycle == 0 || vm_laundry_target(vmd) <= 0) { /* * We recently entered shortfall and began laundering * pages. If we have completed that laundering run @@ -1034,12 +1025,9 @@ vm_pageout_laundry_worker(void *arg) * memory pressure required to trigger laundering decreases. */ trybackground: - nclean = 0; - for (i = 0; i < vm_ndomains; i++) { - nclean += vm_dom[i].vmd_free_count; - nclean += vm_dom[i].vmd_pagequeues[PQ_INACTIVE].pq_cnt; - } - ndirty = vm_laundry_count(); + nclean = vmd->vmd_free_count + + vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt; + ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt; if (target == 0 && inactq_scans != last_launder && ndirty * isqrt(inactq_scans - last_launder) >= nclean) { target = vm_background_launder_target; @@ -1085,8 +1073,8 @@ dolaundry: * kicks us. */ vm_pagequeue_lock(pq); - if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE) - (void)mtx_sleep(&vm_laundry_request, + if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE) + (void)mtx_sleep(&vmd->vmd_laundry_request, vm_pagequeue_lockptr(pq), PVM, "launds", 0); /* @@ -1094,17 +1082,17 @@ dolaundry: * a shortfall laundering unless we're already in the middle of * one. This may preempt a background laundering. */ - if (vm_laundry_request == VM_LAUNDRY_SHORTFALL && + if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL && (!in_shortfall || shortfall_cycle == 0)) { - shortfall = vm_laundry_target() + + shortfall = vm_laundry_target(vmd) + vmd->vmd_pageout_deficit; target = 0; } else shortfall = 0; if (target == 0) - vm_laundry_request = VM_LAUNDRY_IDLE; - inactq_scans = vm_inactq_scans; + vmd->vmd_laundry_request = VM_LAUNDRY_IDLE; + inactq_scans = vmd->vmd_inactq_scans; vm_pagequeue_unlock(pq); } } @@ -1133,7 +1121,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) * If we need to reclaim memory ask kernel caches to return * some. We rate limit to avoid thrashing. */ - if (vmd == &vm_dom[0] && pass > 0 && + if (vmd == VM_DOMAIN(0) && pass > 0 && (time_uptime - lowmem_uptime) >= lowmem_period) { /* * Decrease registered cache sizes. @@ -1356,18 +1344,20 @@ drop_page: * keep count. */ if (starting_page_shortage > 0) { - pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY]; + pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; vm_pagequeue_lock(pq); - if (vm_laundry_request == VM_LAUNDRY_IDLE && + if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE && (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) { if (page_shortage > 0) { - vm_laundry_request = VM_LAUNDRY_SHORTFALL; + vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL; VM_CNT_INC(v_pdshortfalls); - } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL) - vm_laundry_request = VM_LAUNDRY_BACKGROUND; - wakeup(&vm_laundry_request); + } else if (vmd->vmd_laundry_request != + VM_LAUNDRY_SHORTFALL) + vmd->vmd_laundry_request = + VM_LAUNDRY_BACKGROUND; + wakeup(&vmd->vmd_laundry_request); } - vm_inactq_scans++; + vmd->vmd_inactq_scans++; vm_pagequeue_unlock(pq); } @@ -1397,7 +1387,7 @@ drop_page: * ensuring that they can eventually be reused. */ inactq_shortage = vmd->vmd_inactive_target - (pq->pq_cnt + - /* XXX */vm_laundry_count() / act_scan_laundry_weight) + + vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight) + vm_paging_target(vmd) + deficit + addl_page_shortage; inactq_shortage *= act_scan_laundry_weight; @@ -1751,7 +1741,7 @@ vm_pageout_oom(int shortage) _PRELE(bigproc); PROC_UNLOCK(bigproc); for (i = 0; i < vm_ndomains; i++) - wakeup(&vm_dom[i].vmd_free_count); + wakeup(&VM_DOMAIN(i)->vmd_free_count); } } @@ -1763,7 +1753,7 @@ vm_pageout_worker(void *arg) bool target_met; domain = (uintptr_t)arg; - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); pass = 0; target_met = true; @@ -1798,7 +1788,7 @@ vm_pageout_worker(void *arg) * thread will, nonetheless, wait until another page is freed * or this wakeup is performed. */ - if (vmd->vmd_pages_needed && !vm_page_count_min() /* XXX */) { + if (vmd->vmd_pages_needed && !vm_paging_min(vmd)) { vmd->vmd_pages_needed = false; wakeup(&vmd->vmd_free_count); } @@ -1861,7 +1851,7 @@ vm_pageout_init_domain(int domain) { struct vm_domain *vmd; - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); vmd->vmd_interrupt_free_min = 2; /* @@ -1909,7 +1899,7 @@ vm_pageout_init(void) struct vm_domain *vmd; vm_pageout_init_domain(i); - vmd = &vm_dom[i]; + vmd = VM_DOMAIN(i); vm_cnt.v_free_reserved += vmd->vmd_free_reserved; vm_cnt.v_free_target += vmd->vmd_free_target; vm_cnt.v_free_min += vmd->vmd_free_min; @@ -1961,6 +1951,12 @@ vm_pageout(void) panic("starting pageout for domain %d, error %d\n", i, error); } + error = kthread_add(vm_pageout_laundry_worker, + (void *)(uintptr_t)i, curproc, NULL, 0, 0, + "laundry: dom%d", i); + if (error != 0) + panic("starting laundry for domain %d, error %d", + i, error); } error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL, 0, 0, "uma"); @@ -1978,7 +1974,7 @@ pagedaemon_wakeup(int domain) struct vm_domain *vmd; vm_pagequeue_free_assert_unlocked(domain); - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); if (!vmd->vmd_pageout_wanted && curthread->td_proc != pageproc) { vmd->vmd_pageout_wanted = true; @@ -1997,7 +1993,7 @@ pagedaemon_wait(int domain, int pri, const char *wmesg struct vm_domain *vmd; vm_pagequeue_free_assert_locked(domain); - vmd = &vm_dom[domain]; + vmd = VM_DOMAIN(domain); /* * vmd_pageout_wanted may have been set by an advisory wakeup, but if Modified: user/jeff/numa/sys/vm/vm_pagequeue.h ============================================================================== --- user/jeff/numa/sys/vm/vm_pagequeue.h Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/vm/vm_pagequeue.h Wed Jan 17 22:10:58 2018 (r328097) @@ -92,9 +92,13 @@ struct vm_domain { int vmd_pageout_deficit; /* Estimated number of pages deficit */ bool vmd_pages_needed; /* Are threads waiting for free pages? */ bool vmd_pageout_wanted; /* pageout daemon wait channel */ + int vmd_inactq_scans; + enum { + VM_LAUNDRY_IDLE = 0, + VM_LAUNDRY_BACKGROUND, + VM_LAUNDRY_SHORTFALL + } vmd_laundry_request; - - u_int vmd_free_reserved; /* (c) pages reserved for deadlock */ u_int vmd_free_target; /* (c) pages desired free */ u_int vmd_free_min; /* (c) pages desired free */ @@ -107,6 +111,8 @@ struct vm_domain { extern struct vm_domain vm_dom[MAXMEMDOM]; +#define VM_DOMAIN(n) (&vm_dom[(n)]) + #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) #define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex) @@ -119,7 +125,7 @@ extern struct vm_domain vm_dom[MAXMEMDOM]; #define vm_pagequeue_free_lock(n) \ mtx_lock(vm_pagequeue_free_lockptr((n))) #define vm_pagequeue_free_lockptr(n) \ - (&vm_dom[(n)].vmd_pagequeue_free_mtx) + (&VM_DOMAIN((n))->vmd_pagequeue_free_mtx) #define vm_pagequeue_free_unlock(n) \ mtx_unlock(vm_pagequeue_free_lockptr((n))) @@ -152,7 +158,7 @@ static inline struct vm_domain * vm_pagequeue_domain(vm_page_t m) { - return (&vm_dom[vm_phys_domain(m)]); + return (VM_DOMAIN(vm_phys_domain(m))); } /* @@ -170,22 +176,28 @@ vm_paging_target(struct vm_domain *vmd) * Returns TRUE if the pagedaemon needs to be woken up. */ static inline int -vm_paging_needed(int domain, u_int free_count) +vm_paging_needed(struct vm_domain *vmd, u_int free_count) { - return (free_count < vm_dom[domain].vmd_pageout_wakeup_thresh); + return (free_count < vmd->vmd_pageout_wakeup_thresh); } +static inline int +vm_paging_min(struct vm_domain *vmd) +{ + + return (vmd->vmd_free_min > vmd->vmd_free_count); +} + /* * Return the number of pages we need to launder. * A positive number indicates that we have a shortfall of clean pages. */ static inline int -vm_laundry_target(void) +vm_laundry_target(struct vm_domain *vmd) { - return (0); - /* XXX return (vm_paging_target()); */ + return (vm_paging_target(vmd)); } #endif /* _KERNEL */ Modified: user/jeff/numa/sys/vm/vm_reserv.c ============================================================================== --- user/jeff/numa/sys/vm/vm_reserv.c Wed Jan 17 21:52:12 2018 (r328096) +++ user/jeff/numa/sys/vm/vm_reserv.c Wed Jan 17 22:10:58 2018 (r328097) @@ -166,19 +166,20 @@ popmap_is_set(popmap_t popmap[], int i) * * A partially populated reservation can be broken and reclaimed at any time. * - * The reservation structure is synchronized by the per-domain pagequeue_free - * lock. The objq is synchronized by the vm_reserv_object lock. + * f - vm_pagequeue_free_lock + * o - vm_reserv_object_lock + * c - constant after boot */ struct vm_reserv { - TAILQ_ENTRY(vm_reserv) partpopq; - LIST_ENTRY(vm_reserv) objq; - vm_object_t object; /* containing object */ - vm_pindex_t pindex; /* offset within object */ - vm_page_t pages; /* first page of a superpage */ - int domain; /* NUMA domain, constant. */ - int popcnt; /* # of pages in use */ - char inpartpopq; - popmap_t popmap[NPOPMAP]; /* bit vector of used pages */ + TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */ + LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */ + vm_object_t object; /* (o, f) containing object */ + vm_pindex_t pindex; /* (o, f) offset in object */ + vm_page_t pages; /* (c) first page */ + int domain; /* (c) NUMA domain. */ + int popcnt; /* (f) # of pages in use */ + char inpartpopq; /* (f) */ + popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */ }; /* @@ -239,8 +240,25 @@ static long vm_reserv_reclaimed; SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations"); -static struct mtx vm_reserv_object_mtx; +/* + * The object lock pool is used to synchronize the rvq. We can not use a + * pool mutex because it is required before malloc works. + * + * The "hash" function could be made faster without divide and modulo. + */ +#define VM_RESERV_OBJ_LOCK_COUNT MAXCPU +struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT]; + +#define vm_reserv_object_lock_idx(object) \ + (((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT) +#define vm_reserv_object_lock_ptr(object) \ + &vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))] +#define vm_reserv_object_lock(object) \ + mtx_lock(vm_reserv_object_lock_ptr((object))) +#define vm_reserv_object_unlock(object) \ + mtx_unlock(vm_reserv_object_lock_ptr((object))) + static void vm_reserv_break(vm_reserv_t rv, vm_page_t m); static void vm_reserv_depopulate(vm_reserv_t rv, int index); static vm_reserv_t vm_reserv_from_page(vm_page_t m); @@ -311,9 +329,6 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS) return (error); } -#define vm_reserv_object_lock(object) mtx_lock(&vm_reserv_object_mtx) -#define vm_reserv_object_unlock(object) mtx_unlock(&vm_reserv_object_mtx) - /* * Remove a reservation from the object's objq. */ @@ -350,8 +365,8 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, v for (i = 0; i < NPOPMAP; i++) KASSERT(rv->popmap[i] == 0, ("vm_reserv_insert: reserv %p's popmap is corrupted", rv)); - rv->pindex = pindex; vm_reserv_object_lock(object); + rv->pindex = pindex; rv->object = object; LIST_INSERT_HEAD(&object->rvq, rv, objq); vm_reserv_object_unlock(object); @@ -655,29 +670,36 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t * Could at least one reservation fit between the first index to the * left that can be used ("leftcap") and the first index to the right * that cannot be used ("rightcap")? + * + * We must synchronize with the reserv object lock to protect the + * pindex/object of the resulting reservations against rename while + * we are inspecting. */ first = pindex - VM_RESERV_INDEX(object, pindex); + minpages = VM_RESERV_INDEX(object, pindex) + npages; + maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES); + allocpages = maxpages; + vm_reserv_object_lock(object); if (mpred != NULL) { - /* XXX unlocked rv access */ if ((rv = vm_reserv_from_page(mpred))->object != object) leftcap = mpred->pindex + 1; else leftcap = rv->pindex + VM_LEVEL_0_NPAGES; - if (leftcap > first) + if (leftcap > first) { + vm_reserv_object_unlock(object); return (NULL); + } } - minpages = VM_RESERV_INDEX(object, pindex) + npages; - maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES); - allocpages = maxpages; if (msucc != NULL) { - /* XXX unlocked rv access */ if ((rv = vm_reserv_from_page(msucc))->object != object) rightcap = msucc->pindex; else rightcap = rv->pindex; if (first + maxpages > rightcap) { - if (maxpages == VM_LEVEL_0_NPAGES) + if (maxpages == VM_LEVEL_0_NPAGES) { + vm_reserv_object_unlock(object); return (NULL); + } /* * At least one reservation will fit between "leftcap" @@ -688,6 +710,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t allocpages = minpages; } } + vm_reserv_object_unlock(object); /* * Would the last new reservation extend past the end of the object? @@ -800,7 +823,7 @@ vm_reserv_extend(int req, vm_object_t object, vm_pinde free_count = vm_pagequeue_freecnt_adj(domain, -1); vm_pagequeue_free_unlock(domain); - if (vm_paging_needed(domain, free_count)) + if (vm_paging_needed(VM_DOMAIN(domain), free_count)) pagedaemon_wakeup(domain); return (m); @@ -845,26 +868,34 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p /* * Could a reservation fit between the first index to the left that * can be used and the first index to the right that cannot be used? + * + * We must synchronize with the reserv object lock to protect the + * pindex/object of the resulting reservations against rename while + * we are inspecting. */ first = pindex - VM_RESERV_INDEX(object, pindex); + vm_reserv_object_lock(object); if (mpred != NULL) { - /* XXX unlocked rv access */ if ((rv = vm_reserv_from_page(mpred))->object != object) leftcap = mpred->pindex + 1; else leftcap = rv->pindex + VM_LEVEL_0_NPAGES; - if (leftcap > first) + if (leftcap > first) { + vm_reserv_object_unlock(object); return (NULL); + } } if (msucc != NULL) { - /* XXX unlocked rv access */ if ((rv = vm_reserv_from_page(msucc))->object != object) rightcap = msucc->pindex; else rightcap = rv->pindex; - if (first + VM_LEVEL_0_NPAGES > rightcap) + if (first + VM_LEVEL_0_NPAGES > rightcap) { + vm_reserv_object_unlock(object); return (NULL); + } } + vm_reserv_object_unlock(object); /* * Would a new reservation extend past the end of the object? @@ -1250,18 +1281,15 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object, if (rv->object == old_object) { vm_pagequeue_free_lock(rv->domain); if (rv->object == old_object) { - /* - * XXX Do we need to synchronize them simultaneously? - * or does the pagequeue_free lock protect enough? - */ vm_reserv_object_lock(old_object); + rv->object = NULL; LIST_REMOVE(rv, objq); vm_reserv_object_unlock(old_object); vm_reserv_object_lock(new_object); rv->object = new_object; + rv->pindex -= old_object_offset; LIST_INSERT_HEAD(&new_object->rvq, rv, objq); vm_reserv_object_unlock(new_object); - rv->pindex -= old_object_offset; } vm_pagequeue_free_unlock(rv->domain); } @@ -1293,6 +1321,7 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, { vm_paddr_t new_end; size_t size; + int i; /* * Calculate the size (in bytes) of the reservation array. Round up @@ -1312,7 +1341,9 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, VM_PROT_READ | VM_PROT_WRITE); bzero(vm_reserv_array, size); - mtx_init(&vm_reserv_object_mtx, "resv obj lock", NULL, MTX_DEF); + for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++) + mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL, + MTX_DEF); /* * Return the next available physical address.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201801172210.w0HMAwD3006695>