Date: Fri, 23 Mar 2018 18:41:28 +0000 (UTC) From: Mark Johnston <markj@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r331457 - in user/jeff/numa/sys: amd64/include kern vm Message-ID: <201803231841.w2NIfS7b067849@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: markj Date: Fri Mar 23 18:41:28 2018 New Revision: 331457 URL: https://svnweb.freebsd.org/changeset/base/331457 Log: Add the WIP batch queue code. Discussed with: jeff Modified: user/jeff/numa/sys/amd64/include/vmparam.h user/jeff/numa/sys/kern/subr_witness.c user/jeff/numa/sys/vm/vm_object.c user/jeff/numa/sys/vm/vm_page.c user/jeff/numa/sys/vm/vm_page.h user/jeff/numa/sys/vm/vm_pageout.c user/jeff/numa/sys/vm/vm_pagequeue.h user/jeff/numa/sys/vm/vm_phys.c Modified: user/jeff/numa/sys/amd64/include/vmparam.h ============================================================================== --- user/jeff/numa/sys/amd64/include/vmparam.h Fri Mar 23 18:38:12 2018 (r331456) +++ user/jeff/numa/sys/amd64/include/vmparam.h Fri Mar 23 18:41:28 2018 (r331457) @@ -227,4 +227,10 @@ #define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */ +/* + * Use a fairly large batch size since we expect amd64 systems to have + * lots of memory. + */ +#define VM_BATCHQUEUE_SIZE 31 + #endif /* _MACHINE_VMPARAM_H_ */ Modified: user/jeff/numa/sys/kern/subr_witness.c ============================================================================== --- user/jeff/numa/sys/kern/subr_witness.c Fri Mar 23 18:38:12 2018 (r331456) +++ user/jeff/numa/sys/kern/subr_witness.c Fri Mar 23 18:41:28 2018 (r331457) @@ -601,7 +601,6 @@ static struct witness_order_list_entry order_lists[] = * CDEV */ { "vm map (system)", &lock_class_mtx_sleep }, - { "vm pagequeue", &lock_class_mtx_sleep }, { "vnode interlock", &lock_class_mtx_sleep }, { "cdev", &lock_class_mtx_sleep }, { NULL, NULL }, @@ -611,11 +610,11 @@ static struct witness_order_list_entry order_lists[] = { "vm map (user)", &lock_class_sx }, { "vm object", &lock_class_rw }, { "vm page", &lock_class_mtx_sleep }, - { "vm pagequeue", &lock_class_mtx_sleep }, { "pmap pv global", &lock_class_rw }, { "pmap", &lock_class_mtx_sleep }, { "pmap pv list", &lock_class_rw }, { "vm page free queue", &lock_class_mtx_sleep }, + { "vm pagequeue", &lock_class_mtx_sleep }, { NULL, NULL }, /* * kqueue/VFS interaction Modified: user/jeff/numa/sys/vm/vm_object.c ============================================================================== --- user/jeff/numa/sys/vm/vm_object.c Fri Mar 23 18:38:12 2018 (r331456) +++ user/jeff/numa/sys/vm/vm_object.c Fri Mar 23 18:41:28 2018 (r331457) @@ -721,13 +721,11 @@ static void vm_object_terminate_pages(vm_object_t object) { vm_page_t p, p_next; - struct mtx *mtx, *mtx1; - struct vm_pagequeue *pq, *pq1; + struct mtx *mtx; VM_OBJECT_ASSERT_WLOCKED(object); mtx = NULL; - pq = NULL; /* * Free any remaining pageable pages. This also removes them from the @@ -737,51 +735,23 @@ vm_object_terminate_pages(vm_object_t object) */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); - if ((object->flags & OBJ_UNMANAGED) == 0) { + if ((object->flags & OBJ_UNMANAGED) == 0) /* * vm_page_free_prep() only needs the page * lock for managed pages. */ - mtx1 = vm_page_lockptr(p); - if (mtx1 != mtx) { - if (mtx != NULL) - mtx_unlock(mtx); - if (pq != NULL) { - vm_pagequeue_unlock(pq); - pq = NULL; - } - mtx = mtx1; - mtx_lock(mtx); - } - } + vm_page_change_lock(p, &mtx); p->object = NULL; if (p->wire_count != 0) - goto unlist; + continue; VM_CNT_INC(v_pfree); p->flags &= ~PG_ZERO; - if (p->queue != PQ_NONE) { - KASSERT(p->queue < PQ_COUNT, ("vm_object_terminate: " - "page %p is not queued", p)); - pq1 = vm_page_pagequeue(p); - if (pq != pq1) { - if (pq != NULL) - vm_pagequeue_unlock(pq); - pq = pq1; - vm_pagequeue_lock(pq); - } - } - if (vm_page_free_prep(p, true)) - continue; -unlist: - TAILQ_REMOVE(&object->memq, p, listq); + + vm_page_free(p); } - if (pq != NULL) - vm_pagequeue_unlock(pq); if (mtx != NULL) mtx_unlock(mtx); - vm_page_free_phys_pglist(&object->memq); - /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were @@ -1964,7 +1934,6 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t { vm_page_t p, next; struct mtx *mtx; - struct pglist pgl; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || @@ -1973,7 +1942,6 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); - TAILQ_INIT(&pgl); again: p = vm_page_find_least(object, start); mtx = NULL; @@ -2028,12 +1996,10 @@ again: if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) pmap_remove_all(p); p->flags &= ~PG_ZERO; - if (vm_page_free_prep(p, false)) - TAILQ_INSERT_TAIL(&pgl, p, listq); + vm_page_free(p); } if (mtx != NULL) mtx_unlock(mtx); - vm_page_free_phys_pglist(&pgl); vm_object_pip_wakeup(object); } Modified: user/jeff/numa/sys/vm/vm_page.c ============================================================================== --- user/jeff/numa/sys/vm/vm_page.c Fri Mar 23 18:38:12 2018 (r331456) +++ user/jeff/numa/sys/vm/vm_page.c Fri Mar 23 18:41:28 2018 (r331457) @@ -74,13 +74,6 @@ * * The page daemon can acquire and hold any pair of page queue * locks in any order. * - * * Batch queues are used to defer insertions of pages into the - * main paging queues. The aim is to reduce contention at the - * entry point of the queue by inserting multiple pages in an - * O(1) operation. This comes at the expense of strict LRU. - * Only a page lock is required to insert a page into a batch - * queue. - * * - The object lock is required when inserting or removing * pages from an object (vm_page_insert() or vm_page_remove()). * @@ -138,13 +131,11 @@ extern int uma_startup_count(int); extern void uma_startup(void *, int); extern int vmem_startup_count(void); -/* - * Associated with page of user-allocatable memory is a - * page structure. - */ - struct vm_domain vm_dom[MAXMEMDOM]; +static DPCPU_DEFINE(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]); +static DPCPU_DEFINE(struct vm_batchqueue, freeqbatch[MAXMEMDOM]); + struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; struct mtx_padalign __exclusive_cache_line vm_domainset_lock; @@ -183,7 +174,7 @@ static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); -static void vm_page_enqueue(uint8_t queue, vm_page_t m); +static void vm_page_enqueue_lazy(vm_page_t m, uint8_t queue); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); @@ -442,7 +433,7 @@ vm_page_domain_init(int domain) { struct vm_domain *vmd; struct vm_pagequeue *pq; - int i, j; + int i; vmd = VM_DOMAIN(domain); bzero(vmd, sizeof(*vmd)); @@ -464,15 +455,6 @@ vm_page_domain_init(int domain) TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); - - /* - * The batch queue limits are set in vm_pageout_init() once - * we've set the paging targets. - */ - for (j = 0; j < BPQ_COUNT; j++) { - TAILQ_INIT(&pq->pq_bpqs[j].bpq_pl); - pq->pq_bpqs[j].bpq_lim = 1; - } } mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF); mtx_init(&vmd->vmd_pageout_mtx, "vm pageout lock", NULL, MTX_DEF); @@ -1830,6 +1812,7 @@ again: KASSERT(m != NULL, ("missing page")); found: + vm_page_dequeue(m); vm_page_alloc_check(m); /* @@ -2026,8 +2009,10 @@ again: #if VM_NRESERVLEVEL > 0 found: #endif - for (m = m_ret; m < &m_ret[npages]; m++) + for (m = m_ret; m < &m_ret[npages]; m++) { + vm_page_dequeue(m); vm_page_alloc_check(m); + } /* * Initialize the pages. Only the PG_ZERO flag is inherited. @@ -2102,6 +2087,8 @@ vm_page_alloc_check(vm_page_t m) KASSERT(m->object == NULL, ("page %p has object", m)); KASSERT(m->queue == PQ_NONE, ("page %p has unexpected queue %d", m, m->queue)); + KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0, + ("page %p has unexpected queue state", m)); KASSERT(!vm_page_held(m), ("page %p is held", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); @@ -2171,6 +2158,7 @@ again: goto again; return (NULL); } + vm_page_dequeue(m); vm_page_alloc_check(m); /* @@ -2363,7 +2351,8 @@ retry: vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && - m->queue != PQ_NONE && !vm_page_busied(m)) { + m->queue != PQ_NONE && + (m->aflags & PGA_DEQUEUE) == 0 && !vm_page_busied(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one @@ -2514,7 +2503,9 @@ retry: error = EINVAL; else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; - else if (m->queue != PQ_NONE && !vm_page_busied(m)) { + else if (m->queue != PQ_NONE && + (m->aflags & PGA_DEQUEUE) == 0 && + !vm_page_busied(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); @@ -2574,7 +2565,8 @@ retry: */ if (object->ref_count != 0) pmap_remove_all(m); - m_new->aflags = m->aflags; + m_new->aflags = m->aflags & + ~PGA_QUEUE_STATE_MASK; KASSERT(m_new->oflags == VPO_UNMANAGED, ("page %p is managed", m)); m_new->oflags = m->oflags & VPO_NOSYNC; @@ -2586,7 +2578,7 @@ retry: vm_page_remque(m); vm_page_replace_checked(m_new, object, m->pindex, m); - if (vm_page_free_prep(m, false)) + if (vm_page_free_prep(m)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); @@ -2600,7 +2592,7 @@ retry: m->flags &= ~PG_ZERO; vm_page_remque(m); vm_page_remove(m); - if (vm_page_free_prep(m, false)) + if (vm_page_free_prep(m)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); KASSERT(m->dirty == 0, @@ -3043,146 +3035,288 @@ vm_page_pagequeue(vm_page_t m) return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]); } -/* - * vm_page_enqueue_batch: - * - * Concatenate the pages in a batch queue to their corresponding paging - * queue. - * - * The pagequeue must be locked. - */ +static struct mtx * +vm_page_pagequeue_lockptr(vm_page_t m) +{ + + if (m->queue == PQ_NONE) + return (NULL); + return (&vm_page_pagequeue(m)->pq_mutex); +} + static void -vm_page_enqueue_batch(struct vm_pagequeue *pq, u_int idx) +vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq, + uint8_t queue) { - struct vm_batchqueue *bpq; + vm_page_t m; + int delta; + uint8_t aflags; - KASSERT(idx < BPQ_COUNT, ("invalid batch queue index %u", idx)); vm_pagequeue_assert_locked(pq); - bpq = &pq->pq_bpqs[idx]; - if (bpq->bpq_cnt != 0) { - TAILQ_CONCAT(&pq->pq_pl, &bpq->bpq_pl, plinks.q); - vm_pagequeue_cnt_add(pq, bpq->bpq_cnt); - bpq->bpq_cnt = 0; + delta = 0; + VM_BATCHQ_FOREACH(bq, m) { + if (__predict_false(m->queue != queue)) + continue; + + aflags = m->aflags; + if ((aflags & PGA_DEQUEUE) != 0) { + if (__predict_true((aflags & PGA_ENQUEUED) != 0)) { + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + delta--; + } + + /* + * Synchronize with the page daemon, which may be + * simultaneously scanning this page with only the page + * lock held. We must be careful to avoid leaving the + * page in a state where it appears to belong to a page + * queue. + */ + m->queue = PQ_NONE; + atomic_thread_fence_rel(); + vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK); + } else if ((aflags & PGA_ENQUEUED) == 0) { + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + delta++; + vm_page_aflag_set(m, PGA_ENQUEUED); + if (__predict_false((aflags & PGA_REQUEUE) != 0)) + vm_page_aflag_clear(m, PGA_REQUEUE); + } else if ((aflags & PGA_REQUEUE) != 0) { + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_page_aflag_clear(m, PGA_REQUEUE); + } } + vm_batchqueue_init(bq); + vm_pagequeue_cnt_add(pq, delta); } /* - * vm_page_dequeue: + * vm_page_dequeue_lazy: * - * Remove the given page from its current page queue. + * Request removal of the given page from its current page + * queue. Physical removal from the queue may be deferred + * arbitrarily, and may be cancelled by later queue operations + * on that page. * * The page must be locked. */ -void -vm_page_dequeue(vm_page_t m) +static void +vm_page_dequeue_lazy(vm_page_t m) { + struct vm_batchqueue *bq; struct vm_pagequeue *pq; + int domain, queue; vm_page_assert_locked(m); - KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", - m)); - pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); - m->queue = PQ_NONE; - vm_page_enqueue_batch(pq, BPQ_IDX(m)); - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_dec(pq); + + queue = m->queue; + if (queue == PQ_NONE) + return; + domain = vm_phys_domain(m); + pq = &VM_DOMAIN(domain)->vmd_pagequeues[queue]; + + vm_page_aflag_set(m, PGA_DEQUEUE); + + critical_enter(); + bq = DPCPU_PTR(pqbatch[domain][queue]); + if (vm_batchqueue_insert(bq, m)) { + critical_exit(); + return; + } + if (!vm_pagequeue_trylock(pq)) { + critical_exit(); + vm_pagequeue_lock(pq); + critical_enter(); + bq = DPCPU_PTR(pqbatch[domain][queue]); + } + vm_pqbatch_process(pq, bq, queue); + + /* + * The page may have been dequeued by another thread before we + * acquired the page queue lock. However, since we hold the + * page lock, the page's queue field cannot change a second + * time and we can safely clear PGA_DEQUEUE. + */ + KASSERT(m->queue == queue || m->queue == PQ_NONE, + ("%s: page %p migrated between queues", __func__, m)); + if (m->queue == queue) { + (void)vm_batchqueue_insert(bq, m); + vm_pqbatch_process(pq, bq, queue); + } else + vm_page_aflag_clear(m, PGA_DEQUEUE); vm_pagequeue_unlock(pq); + critical_exit(); } /* * vm_page_dequeue_locked: * - * Remove the given page from its current page queue. + * Remove the page from its page queue, which must be locked. + * If the page lock is not held, there is no guarantee that the + * page will not be enqueued by another thread before this function + * returns. In this case, it is up to the caller to ensure that + * no other threads hold a reference to the page. * - * The page and page queue must be locked. + * The page queue lock must be held. If the page is not already + * logically dequeued, the page lock must be held as well. */ void vm_page_dequeue_locked(vm_page_t m) { struct vm_pagequeue *pq; - vm_page_lock_assert(m, MA_OWNED); - pq = vm_page_pagequeue(m); - vm_pagequeue_assert_locked(pq); - vm_page_enqueue_batch(pq, BPQ_IDX(m)); + KASSERT(m->queue != PQ_NONE, + ("%s: page %p queue field is PQ_NONE", __func__, m)); + vm_pagequeue_assert_locked(vm_page_pagequeue(m)); + KASSERT((m->aflags & PGA_DEQUEUE) != 0 || + mtx_owned(vm_page_lockptr(m)), + ("%s: queued unlocked page %p", __func__, m)); + + if ((m->aflags & PGA_ENQUEUED) != 0) { + pq = vm_page_pagequeue(m); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_dec(pq); + } + + /* + * Synchronize with the page daemon, which may be simultaneously + * scanning this page with only the page lock held. We must be careful + * to avoid leaving the page in a state where it appears to belong to a + * page queue. + */ m->queue = PQ_NONE; - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_dec(pq); + atomic_thread_fence_rel(); + vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK); } /* - * vm_page_enqueue: + * vm_page_dequeue: * - * Add the given page to the specified page queue. + * Remove the page from whichever page queue it's in, if any. + * If the page lock is not held, there is no guarantee that the + * page will not be enqueued by another thread before this function + * returns. In this case, it is up to the caller to ensure that + * no other threads hold a reference to the page. + */ +void +vm_page_dequeue(vm_page_t m) +{ + struct mtx *lock, *lock1; + + lock = vm_page_pagequeue_lockptr(m); + for (;;) { + if (lock == NULL) + return; + mtx_lock(lock); + if ((lock1 = vm_page_pagequeue_lockptr(m)) == lock) + break; + mtx_unlock(lock); + lock = lock1; + } + KASSERT(lock == vm_page_pagequeue_lockptr(m), + ("%s: page %p migrated directly between queues", __func__, m)); + vm_page_dequeue_locked(m); + mtx_unlock(lock); +} + +/* + * vm_page_enqueue_lazy: * + * Schedule the given page for insertion into the specified page queue. + * Physical insertion of the page may be deferred indefinitely. + * * The page must be locked. */ static void -vm_page_enqueue(uint8_t queue, vm_page_t m) +vm_page_enqueue_lazy(vm_page_t m, uint8_t queue) { - struct vm_batchqueue *bpq; + struct vm_batchqueue *bq; struct vm_pagequeue *pq; + int domain; - vm_page_lock_assert(m, MA_OWNED); - KASSERT(queue < PQ_COUNT, - ("vm_page_enqueue: invalid queue %u request for page %p", - queue, m)); + vm_page_assert_locked(m); + KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0, + ("%s: page %p is already enqueued", __func__, m)); + + domain = vm_phys_domain(m); pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue]; + + /* + * The queue field might be changed back to PQ_NONE by a concurrent + * call to vm_page_dequeue(). In that case the batch queue entry will + * be a no-op. + */ m->queue = queue; - bpq = &pq->pq_bpqs[BPQ_IDX(m)]; - TAILQ_INSERT_TAIL(&bpq->bpq_pl, m, plinks.q); - if (bpq->bpq_cnt++ >= bpq->bpq_lim) { + + critical_enter(); + bq = DPCPU_PTR(pqbatch[domain][queue]); + if (__predict_true(vm_batchqueue_insert(bq, m))) { + critical_exit(); + return; + } + if (!vm_pagequeue_trylock(pq)) { + critical_exit(); vm_pagequeue_lock(pq); - vm_page_enqueue_batch(pq, BPQ_IDX(m)); - vm_pagequeue_unlock(pq); + critical_enter(); + bq = DPCPU_PTR(pqbatch[domain][queue]); } + vm_pqbatch_process(pq, bq, queue); + (void)vm_batchqueue_insert(bq, m); + vm_pqbatch_process(pq, bq, queue); + vm_pagequeue_unlock(pq); + critical_exit(); } /* * vm_page_requeue: * - * Move the given page to the tail of its current page queue. + * Schedule a requeue of the given page. * * The page must be locked. */ void vm_page_requeue(vm_page_t m) { + struct vm_batchqueue *bq; struct vm_pagequeue *pq; + int domain, queue; vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, - ("vm_page_requeue: page %p is not queued", m)); + ("%s: page %p is not enqueued", __func__, m)); + + domain = vm_phys_domain(m); + queue = m->queue; pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); - vm_page_enqueue_batch(pq, BPQ_IDX(m)); - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_unlock(pq); -} -/* - * vm_page_requeue_locked: - * - * Move the given page to the tail of its current page queue. - * - * The page queue must be locked. - */ -void -vm_page_requeue_locked(vm_page_t m) -{ - struct vm_pagequeue *pq; + if (queue == PQ_NONE) + return; - vm_page_lock_assert(m, MA_OWNED); - KASSERT(m->queue != PQ_NONE, - ("vm_page_requeue_locked: page %p is not queued", m)); - pq = vm_page_pagequeue(m); - vm_pagequeue_assert_locked(pq); - vm_page_enqueue_batch(pq, BPQ_IDX(m)); - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_page_aflag_set(m, PGA_REQUEUE); + critical_enter(); + bq = DPCPU_PTR(pqbatch[domain][queue]); + if (__predict_true(vm_batchqueue_insert(bq, m))) { + critical_exit(); + return; + } + if (!vm_pagequeue_trylock(pq)) { + critical_exit(); + vm_pagequeue_lock(pq); + critical_enter(); + bq = DPCPU_PTR(pqbatch[domain][queue]); + } + vm_pqbatch_process(pq, bq, queue); + KASSERT(m->queue == queue || m->queue == PQ_NONE, + ("%s: page %p migrated between queues", __func__, m)); + if (m->queue == queue) { + (void)vm_batchqueue_insert(bq, m); + vm_pqbatch_process(pq, bq, queue); + } else + vm_page_aflag_clear(m, PGA_REQUEUE); + vm_pagequeue_unlock(pq); + critical_exit(); } /* @@ -3200,18 +3334,18 @@ vm_page_activate(vm_page_t m) int queue; vm_page_lock_assert(m, MA_OWNED); - if ((queue = m->queue) != PQ_ACTIVE) { - if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { - if (m->act_count < ACT_INIT) - m->act_count = ACT_INIT; - if (queue != PQ_NONE) - vm_page_dequeue(m); - vm_page_enqueue(PQ_ACTIVE, m); - } - } else { - if (m->act_count < ACT_INIT) + + if ((queue = m->queue) == PQ_ACTIVE || m->wire_count > 0 || + (m->oflags & VPO_UNMANAGED) != 0) { + if (queue == PQ_ACTIVE && m->act_count < ACT_INIT) m->act_count = ACT_INIT; + return; } + + vm_page_remque(m); + if (m->act_count < ACT_INIT) + m->act_count = ACT_INIT; + vm_page_enqueue_lazy(m, PQ_ACTIVE); } /* @@ -3222,11 +3356,10 @@ vm_page_activate(vm_page_t m) * the page to the free list only if this function returns true. * * The object must be locked. The page must be locked if it is - * managed. For a queued managed page, the pagequeue_locked - * argument specifies whether the page queue is already locked. + * managed. */ bool -vm_page_free_prep(vm_page_t m, bool pagequeue_locked) +vm_page_free_prep(vm_page_t m) { #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) @@ -3242,14 +3375,14 @@ vm_page_free_prep(vm_page_t m, bool pagequeue_locked) if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED); KASSERT(!pmap_page_is_mapped(m), - ("vm_page_free_toq: freeing mapped page %p", m)); + ("vm_page_free_prep: freeing mapped page %p", m)); } else KASSERT(m->queue == PQ_NONE, - ("vm_page_free_toq: unmanaged page %p is queued", m)); + ("vm_page_free_prep: unmanaged page %p is queued", m)); VM_CNT_INC(v_tfree); if (vm_page_sbusied(m)) - panic("vm_page_free: freeing busy page %p", m); + panic("vm_page_free_prep: freeing busy page %p", m); vm_page_remove(m); @@ -3265,21 +3398,23 @@ vm_page_free_prep(vm_page_t m, bool pagequeue_locked) return (false); } - if (m->queue != PQ_NONE) { - if (pagequeue_locked) - vm_page_dequeue_locked(m); - else - vm_page_dequeue(m); - } + /* + * Pages need not be dequeued before they are returned to the physical + * memory allocator, but they must at least be marked for a deferred + * dequeue. + */ + if ((m->oflags & VPO_UNMANAGED) == 0) + vm_page_dequeue_lazy(m); + m->valid = 0; vm_page_undirty(m); if (m->wire_count != 0) - panic("vm_page_free: freeing wired page %p", m); + panic("vm_page_free_prep: freeing wired page %p", m); if (m->hold_count != 0) { m->flags &= ~PG_ZERO; KASSERT((m->flags & PG_UNHOLDFREE) == 0, - ("vm_page_free: freeing PG_UNHOLDFREE page %p", m)); + ("vm_page_free_prep: freeing PG_UNHOLDFREE page %p", m)); m->flags |= PG_UNHOLDFREE; return (false); } @@ -3340,15 +3475,35 @@ vm_page_free_phys_pglist(struct pglist *tq) void vm_page_free_toq(vm_page_t m) { + struct vm_batchqueue *cpubq, bq; struct vm_domain *vmd; + int domain, freed; - if (!vm_page_free_prep(m, false)) + if (!vm_page_free_prep(m)) return; - vmd = vm_pagequeue_domain(m); + + domain = vm_phys_domain(m); + vmd = VM_DOMAIN(domain); + + critical_enter(); + cpubq = DPCPU_PTR(freeqbatch[domain]); + if (vm_batchqueue_insert(cpubq, m)) { + critical_exit(); + return; + } + memcpy(&bq, cpubq, sizeof(bq)); + vm_batchqueue_init(cpubq); + critical_exit(); + vm_domain_free_lock(vmd); vm_phys_free_pages(m, 0); + freed = 1; + VM_BATCHQ_FOREACH(&bq, m) { + vm_phys_free_pages(m, 0); + freed++; + } vm_domain_free_unlock(vmd); - vm_domain_freecnt_inc(vmd, 1); + vm_domain_freecnt_inc(vmd, freed); } /* @@ -3365,23 +3520,18 @@ void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count) { vm_page_t m; - struct pglist pgl; int count; if (SLIST_EMPTY(free)) return; count = 0; - TAILQ_INIT(&pgl); while ((m = SLIST_FIRST(free)) != NULL) { count++; SLIST_REMOVE_HEAD(free, plinks.s.ss); - if (vm_page_free_prep(m, false)) - TAILQ_INSERT_TAIL(&pgl, m, listq); + vm_page_free_toq(m); } - vm_page_free_phys_pglist(&pgl); - if (update_wire_count) vm_wire_sub(count); } @@ -3440,22 +3590,25 @@ vm_page_unwire(vm_page_t m, uint8_t queue) KASSERT(queue < PQ_COUNT || queue == PQ_NONE, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); + if ((m->oflags & VPO_UNMANAGED) == 0) + vm_page_assert_locked(m); unwired = vm_page_unwire_noq(m); - if (unwired && (m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL) { - if (m->queue == queue) { + if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL) + return (unwired); + + if (m->queue == queue) { + if (queue == PQ_ACTIVE) + vm_page_reference(m); + else if (queue != PQ_NONE) + vm_page_requeue(m); + } else { + vm_page_dequeue(m); + if (queue != PQ_NONE) { + vm_page_enqueue_lazy(m, queue); if (queue == PQ_ACTIVE) - vm_page_reference(m); - else if (queue != PQ_NONE) - vm_page_requeue(m); - } else { - vm_page_remque(m); - if (queue != PQ_NONE) { - vm_page_enqueue(queue, m); - if (queue == PQ_ACTIVE) - /* Initialize act_count. */ - vm_page_activate(m); - } + /* Initialize act_count. */ + vm_page_activate(m); } } return (unwired); @@ -3503,44 +3656,33 @@ vm_page_unwire_noq(vm_page_t m) * The page must be locked. */ static inline void -_vm_page_deactivate(vm_page_t m, boolean_t noreuse) +_vm_page_deactivate(vm_page_t m, bool noreuse) { - struct vm_batchqueue *bpq; struct vm_pagequeue *pq; - int queue; vm_page_assert_locked(m); - if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { + if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0) + return; + + if (noreuse) { + /* This is slower than it could be. */ + vm_page_remque(m); pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE]; - /* Avoid multiple acquisitions of the inactive queue lock. */ - queue = m->queue; - if (queue == PQ_INACTIVE) { - vm_pagequeue_lock(pq); - vm_page_dequeue_locked(m); - } else { - if (queue != PQ_NONE) - vm_page_dequeue(m); - bpq = &pq->pq_bpqs[BPQ_IDX(m)]; - if (bpq->bpq_cnt < bpq->bpq_lim) { - bpq->bpq_cnt++; - m->queue = PQ_INACTIVE; - TAILQ_INSERT_TAIL(&bpq->bpq_pl, m, plinks.q); - return; - } - vm_pagequeue_lock(pq); - } + vm_pagequeue_lock(pq); m->queue = PQ_INACTIVE; - vm_page_enqueue_batch(pq, BPQ_IDX(m)); - if (noreuse) - TAILQ_INSERT_BEFORE( - &vm_pagequeue_domain(m)->vmd_inacthead, m, - plinks.q); - else - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + TAILQ_INSERT_BEFORE(&vm_pagequeue_domain(m)->vmd_inacthead, m, + plinks.q); vm_pagequeue_cnt_inc(pq); + vm_page_aflag_set(m, PGA_ENQUEUED); + if ((m->aflags & PGA_REQUEUE) != 0) + vm_page_aflag_clear(m, PGA_REQUEUE); vm_pagequeue_unlock(pq); - } + } else if (!vm_page_inactive(m)) { + vm_page_remque(m); + vm_page_enqueue_lazy(m, PQ_INACTIVE); + } else + vm_page_requeue(m); } /* @@ -3553,7 +3695,7 @@ void vm_page_deactivate(vm_page_t m) { - _vm_page_deactivate(m, FALSE); + _vm_page_deactivate(m, false); } /* @@ -3566,7 +3708,7 @@ void vm_page_deactivate_noreuse(vm_page_t m) { - _vm_page_deactivate(m, TRUE); + _vm_page_deactivate(m, true); } /* @@ -3579,13 +3721,14 @@ vm_page_launder(vm_page_t m) { vm_page_assert_locked(m); - if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { - if (m->queue == PQ_LAUNDRY) - vm_page_requeue(m); - else { - vm_page_remque(m); - vm_page_enqueue(PQ_LAUNDRY, m); - } + if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0) + return; + + if (m->queue == PQ_LAUNDRY) + vm_page_requeue(m); + else { + vm_page_remque(m); + vm_page_enqueue_lazy(m, PQ_LAUNDRY); } } @@ -3601,9 +3744,9 @@ vm_page_unswappable(vm_page_t m) vm_page_assert_locked(m); KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0, ("page %p already unswappable", m)); - if (m->queue != PQ_NONE) - vm_page_dequeue(m); - vm_page_enqueue(PQ_UNSWAPPABLE, m); + + vm_page_remque(m); + vm_page_enqueue_lazy(m, PQ_UNSWAPPABLE); } /* Modified: user/jeff/numa/sys/vm/vm_page.h ============================================================================== --- user/jeff/numa/sys/vm/vm_page.h Fri Mar 23 18:38:12 2018 (r331456) +++ user/jeff/numa/sys/vm/vm_page.h Fri Mar 23 18:41:28 2018 (r331457) @@ -94,7 +94,9 @@ * In general, operations on this structure's mutable fields are * synchronized using either one of or a combination of the lock on the * object that the page belongs to (O), the pool lock for the page (P), - * or the lock for either the free or paging queue (Q). If a field is + * the per-domain lock for the free queues (F), or the page's queue + * lock (Q). The queue lock for a page depends on the value of its + * queue field and described in detail below. If a field is * annotated below with two of these locks, then holding either lock is * sufficient for read access, but both locks are required for write * access. An annotation of (C) indicates that the field is immutable. @@ -143,6 +145,28 @@ * causing the thread to block. vm_page_sleep_if_busy() can be used to * sleep until the page's busy state changes, after which the caller * must re-lookup the page and re-evaluate its state. + * + * The queue field is the index of the page queue containing the + * page, or PQ_NONE if the page is not enqueued. The queue lock of a + * page is the page queue lock corresponding to the page queue index, + * or the page lock (P) for the page. To modify the queue field, the + * queue lock for the old value of the field must be held. It is + * invalid for a page's queue field to transition between two distinct + * page queue indices. That is, when updating the queue field, either + * the new value or the old value must be PQ_NONE. + * + * To avoid contention on page queue locks, page queue operations + * (enqueue, dequeue, requeue) are batched using per-CPU queues. + * A deferred operation is requested by inserting an entry into a *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803231841.w2NIfS7b067849>