From owner-svn-src-head@FreeBSD.ORG Tue Nov 13 02:50:40 2012 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 379614F8; Tue, 13 Nov 2012 02:50:40 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 1DD208FC14; Tue, 13 Nov 2012 02:50:40 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id qAD2oe7W065273; Tue, 13 Nov 2012 02:50:40 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id qAD2odEw065268; Tue, 13 Nov 2012 02:50:39 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201211130250.qAD2odEw065268@svn.freebsd.org> From: Alan Cox Date: Tue, 13 Nov 2012 02:50:39 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r242941 - in head/sys: sys vm X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 13 Nov 2012 02:50:40 -0000 Author: alc Date: Tue Nov 13 02:50:39 2012 New Revision: 242941 URL: http://svnweb.freebsd.org/changeset/base/242941 Log: Replace the single, global page queues lock with per-queue locks on the active and inactive paging queues. Reviewed by: kib Modified: head/sys/sys/vmmeter.h head/sys/vm/vm_fault.c head/sys/vm/vm_page.c head/sys/vm/vm_page.h head/sys/vm/vm_pageout.c Modified: head/sys/sys/vmmeter.h ============================================================================== --- head/sys/sys/vmmeter.h Tue Nov 13 02:42:31 2012 (r242940) +++ head/sys/sys/vmmeter.h Tue Nov 13 02:50:39 2012 (r242941) @@ -46,7 +46,7 @@ * c - constant after initialization * f - locked by vm_page_queue_free_mtx * p - locked by being in the PCPU and atomicity respect to interrupts - * q - locked by vm_page_queue_mtx + * q - changes are synchronized by the corresponding vm_pagequeue lock */ struct vmmeter { /* @@ -76,7 +76,7 @@ struct vmmeter { u_int v_intrans; /* (p) intransit blocking page faults */ u_int v_reactivated; /* (f) pages reactivated from free list */ u_int v_pdwakeups; /* (f) times daemon has awaken from sleep */ - u_int v_pdpages; /* (q) pages analyzed by daemon */ + u_int v_pdpages; /* (p) pages analyzed by daemon */ u_int v_tcached; /* (p) total pages cached */ u_int v_dfree; /* (p) pages freed by daemon */ Modified: head/sys/vm/vm_fault.c ============================================================================== --- head/sys/vm/vm_fault.c Tue Nov 13 02:42:31 2012 (r242940) +++ head/sys/vm/vm_fault.c Tue Nov 13 02:50:39 2012 (r242941) @@ -388,7 +388,7 @@ RetryFault:; vm_object_deallocate(fs.first_object); goto RetryFault; } - vm_pageq_remove(fs.m); + vm_page_remque(fs.m); vm_page_unlock(fs.m); /* Modified: head/sys/vm/vm_page.c ============================================================================== --- head/sys/vm/vm_page.c Tue Nov 13 02:42:31 2012 (r242940) +++ head/sys/vm/vm_page.c Tue Nov 13 02:50:39 2012 (r242941) @@ -63,10 +63,16 @@ /* * GENERAL RULES ON VM_PAGE MANIPULATION * - * - a pageq mutex is required when adding or removing a page from a - * page queue (vm_page_queue[]), regardless of other mutexes or the + * - A page queue lock is required when adding or removing a page from a + * page queue (vm_pagequeues[]), regardless of other locks or the * busy state of a page. * + * * In general, no thread besides the page daemon can acquire or + * hold more than one page queue lock at a time. + * + * * The page daemon can acquire and hold any pair of page queue + * locks in any order. + * * - The object mutex is held when inserting or removing * pages from an object (vm_page_insert() or vm_page_remove()). * @@ -115,8 +121,20 @@ __FBSDID("$FreeBSD$"); * page structure. */ -struct vpgqueues vm_page_queues[PQ_COUNT]; -struct mtx_padalign vm_page_queue_mtx; +struct vm_pagequeue vm_pagequeues[PQ_COUNT] = { + [PQ_INACTIVE] = { + .pq_pl = TAILQ_HEAD_INITIALIZER( + vm_pagequeues[PQ_INACTIVE].pq_pl), + .pq_cnt = &cnt.v_inactive_count, + .pq_name = "vm inactive pagequeue" + }, + [PQ_ACTIVE] = { + .pq_pl = TAILQ_HEAD_INITIALIZER( + vm_pagequeues[PQ_ACTIVE].pq_pl), + .pq_cnt = &cnt.v_active_count, + .pq_name = "vm active pagequeue" + } +}; struct mtx_padalign vm_page_queue_free_mtx; struct mtx_padalign pa_lock[PA_LOCK_COUNT]; @@ -139,7 +157,6 @@ static uma_zone_t fakepg_zone; static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); -static void vm_page_queue_remove(int queue, vm_page_t m); static void vm_page_enqueue(int queue, vm_page_t m); static void vm_page_init_fakepg(void *dummy); @@ -294,20 +311,11 @@ vm_page_startup(vm_offset_t vaddr) /* * Initialize the page and queue locks. */ - mtx_init(&vm_page_queue_mtx, "vm page queue", NULL, MTX_DEF | - MTX_RECURSE); mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); - - /* - * Initialize the queue headers for the hold queue, the active queue, - * and the inactive queue. - */ for (i = 0; i < PQ_COUNT; i++) - TAILQ_INIT(&vm_page_queues[i].pl); - vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; - vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; + vm_pagequeue_init_lock(&vm_pagequeues[i]); /* * Allocate memory for use when boot strapping the kernel memory @@ -1867,61 +1875,109 @@ vm_waitpfault(void) } /* - * vm_page_queue_remove: + * vm_page_dequeue: * - * Remove the given page from the specified queue. + * Remove the given page from its current page queue. * - * The page and page queues must be locked. + * The page must be locked. */ -static __inline void -vm_page_queue_remove(int queue, vm_page_t m) +void +vm_page_dequeue(vm_page_t m) { - struct vpgqueues *pq; + struct vm_pagequeue *pq; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); vm_page_lock_assert(m, MA_OWNED); - pq = &vm_page_queues[queue]; - TAILQ_REMOVE(&pq->pl, m, pageq); - (*pq->cnt)--; + KASSERT(m->queue != PQ_NONE, + ("vm_page_dequeue: page %p is not queued", m)); + pq = &vm_pagequeues[m->queue]; + vm_pagequeue_lock(pq); + m->queue = PQ_NONE; + TAILQ_REMOVE(&pq->pq_pl, m, pageq); + (*pq->pq_cnt)--; + vm_pagequeue_unlock(pq); } /* - * vm_pageq_remove: + * vm_page_dequeue_locked: * - * Remove a page from its queue. + * Remove the given page from its current page queue. * - * The given page must be locked. + * The page and page queue must be locked. */ void -vm_pageq_remove(vm_page_t m) +vm_page_dequeue_locked(vm_page_t m) { - int queue; + struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); - if ((queue = m->queue) != PQ_NONE) { - vm_page_lock_queues(); - m->queue = PQ_NONE; - vm_page_queue_remove(queue, m); - vm_page_unlock_queues(); - } + pq = &vm_pagequeues[m->queue]; + vm_pagequeue_assert_locked(pq); + m->queue = PQ_NONE; + TAILQ_REMOVE(&pq->pq_pl, m, pageq); + (*pq->pq_cnt)--; } /* * vm_page_enqueue: * - * Add the given page to the specified queue. + * Add the given page to the specified page queue. * - * The page queues must be locked. + * The page must be locked. */ static void vm_page_enqueue(int queue, vm_page_t m) { - struct vpgqueues *vpq; + struct vm_pagequeue *pq; - vpq = &vm_page_queues[queue]; + vm_page_lock_assert(m, MA_OWNED); + pq = &vm_pagequeues[queue]; + vm_pagequeue_lock(pq); m->queue = queue; - TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); - ++*vpq->cnt; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); + ++*pq->pq_cnt; + vm_pagequeue_unlock(pq); +} + +/* + * vm_page_requeue: + * + * Move the given page to the tail of its current page queue. + * + * The page must be locked. + */ +void +vm_page_requeue(vm_page_t m) +{ + struct vm_pagequeue *pq; + + vm_page_lock_assert(m, MA_OWNED); + KASSERT(m->queue != PQ_NONE, + ("vm_page_requeue: page %p is not queued", m)); + pq = &vm_pagequeues[m->queue]; + vm_pagequeue_lock(pq); + TAILQ_REMOVE(&pq->pq_pl, m, pageq); + TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); + vm_pagequeue_unlock(pq); +} + +/* + * vm_page_requeue_locked: + * + * Move the given page to the tail of its current page queue. + * + * The page queue must be locked. + */ +void +vm_page_requeue_locked(vm_page_t m) +{ + struct vm_pagequeue *pq; + + KASSERT(m->queue != PQ_NONE, + ("vm_page_requeue_locked: page %p is not queued", m)); + pq = &vm_pagequeues[m->queue]; + vm_pagequeue_assert_locked(pq); + TAILQ_REMOVE(&pq->pq_pl, m, pageq); + TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); } /* @@ -1944,11 +2000,9 @@ vm_page_activate(vm_page_t m) if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; - vm_page_lock_queues(); if (queue != PQ_NONE) - vm_page_queue_remove(queue, m); + vm_page_dequeue(m); vm_page_enqueue(PQ_ACTIVE, m); - vm_page_unlock_queues(); } else KASSERT(queue == PQ_NONE, ("vm_page_activate: wired page %p is queued", m)); @@ -2008,7 +2062,9 @@ vm_page_free_toq(vm_page_t m) vm_page_lock_assert(m, MA_OWNED); KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_toq: freeing mapped page %p", m)); - } + } else + KASSERT(m->queue == PQ_NONE, + ("vm_page_free_toq: unmanaged page %p is queued", m)); PCPU_INC(cnt.v_tfree); if (VM_PAGE_IS_FREE(m)) @@ -2022,8 +2078,7 @@ vm_page_free_toq(vm_page_t m) * callback routine until after we've put the page on the * appropriate free queue. */ - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_pageq_remove(m); + vm_page_remque(m); vm_page_remove(m); /* @@ -2101,8 +2156,10 @@ vm_page_wire(vm_page_t m) return; } if (m->wire_count == 0) { - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_pageq_remove(m); + KASSERT((m->oflags & VPO_UNMANAGED) == 0 || + m->queue == PQ_NONE, + ("vm_page_wire: unmanaged page %p is queued", m)); + vm_page_remque(m); atomic_add_int(&cnt.v_wire_count, 1); } m->wire_count++; @@ -2145,9 +2202,7 @@ vm_page_unwire(vm_page_t m, int activate return; if (!activate) m->flags &= ~PG_WINATCFLS; - vm_page_lock_queues(); vm_page_enqueue(activate ? PQ_ACTIVE : PQ_INACTIVE, m); - vm_page_unlock_queues(); } } else panic("vm_page_unwire: page %p's wire count is zero", m); @@ -2176,6 +2231,7 @@ vm_page_unwire(vm_page_t m, int activate static inline void _vm_page_deactivate(vm_page_t m, int athead) { + struct vm_pagequeue *pq; int queue; vm_page_lock_assert(m, MA_OWNED); @@ -2186,19 +2242,18 @@ _vm_page_deactivate(vm_page_t m, int ath if ((queue = m->queue) == PQ_INACTIVE) return; if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { - m->flags &= ~PG_WINATCFLS; - vm_page_lock_queues(); if (queue != PQ_NONE) - vm_page_queue_remove(queue, m); + vm_page_dequeue(m); + m->flags &= ~PG_WINATCFLS; + pq = &vm_pagequeues[PQ_INACTIVE]; + vm_pagequeue_lock(pq); + m->queue = PQ_INACTIVE; if (athead) - TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, - pageq); + TAILQ_INSERT_HEAD(&pq->pq_pl, m, pageq); else - TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, - pageq); - m->queue = PQ_INACTIVE; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq); cnt.v_inactive_count++; - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); } } @@ -2298,7 +2353,7 @@ vm_page_cache(vm_page_t m) /* * Remove the page from the paging queues. */ - vm_pageq_remove(m); + vm_page_remque(m); /* * Remove the page from the object's collection of resident @@ -3039,7 +3094,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pag db_printf("\n"); db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", - *vm_page_queues[PQ_ACTIVE].cnt, - *vm_page_queues[PQ_INACTIVE].cnt); + *vm_pagequeues[PQ_ACTIVE].pq_cnt, + *vm_pagequeues[PQ_INACTIVE].pq_cnt); } #endif /* DDB */ Modified: head/sys/vm/vm_page.h ============================================================================== --- head/sys/vm/vm_page.h Tue Nov 13 02:42:31 2012 (r242940) +++ head/sys/vm/vm_page.h Tue Nov 13 02:50:39 2012 (r242941) @@ -92,7 +92,7 @@ * In general, operations on this structure's mutable fields are * synchronized using either one of or a combination of the lock on the * object that the page belongs to (O), the pool lock for the page (P), - * or the lock for either the free or paging queues (Q). If a field is + * or the lock for either the free or paging queue (Q). If a field is * annotated below with two of these locks, then holding either lock is * sufficient for read access, but both locks are required for write * access. @@ -111,8 +111,6 @@ * field is encapsulated in vm_page_clear_dirty_mask(). */ -TAILQ_HEAD(pglist, vm_page); - #if PAGE_SIZE == 4096 #define VM_PAGE_BITS_ALL 0xffu typedef uint8_t vm_page_bits_t; @@ -128,7 +126,7 @@ typedef uint64_t vm_page_bits_t; #endif struct vm_page { - TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (Q) */ + TAILQ_ENTRY(vm_page) pageq; /* page queue or free list (Q) */ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ struct vm_page *left; /* splay tree link (O) */ struct vm_page *right; /* splay tree link (O) */ @@ -180,12 +178,22 @@ struct vm_page { #define PQ_ACTIVE 1 #define PQ_COUNT 2 -struct vpgqueues { - struct pglist pl; - int *cnt; -}; +TAILQ_HEAD(pglist, vm_page); -extern struct vpgqueues vm_page_queues[PQ_COUNT]; +struct vm_pagequeue { + struct mtx pq_mutex; + struct pglist pq_pl; + int *const pq_cnt; + const char *const pq_name; +} __aligned(CACHE_LINE_SIZE); + +extern struct vm_pagequeue vm_pagequeues[PQ_COUNT]; + +#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) +#define vm_pagequeue_init_lock(pq) mtx_init(&(pq)->pq_mutex, \ + (pq)->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); +#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) +#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) extern struct mtx_padalign vm_page_queue_free_mtx; extern struct mtx_padalign pa_lock[]; @@ -320,11 +328,6 @@ vm_page_t vm_phys_paddr_to_vm_page(vm_pa vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); -extern struct mtx_padalign vm_page_queue_mtx; - -#define vm_page_lock_queues() mtx_lock(&vm_page_queue_mtx) -#define vm_page_unlock_queues() mtx_unlock(&vm_page_queue_mtx) - /* page allocation classes: */ #define VM_ALLOC_NORMAL 0 #define VM_ALLOC_INTERRUPT 1 @@ -354,8 +357,6 @@ void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); void vm_page_wakeup(vm_page_t m); -void vm_pageq_remove(vm_page_t m); - void vm_page_activate (vm_page_t); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, @@ -370,6 +371,8 @@ int vm_page_try_to_cache (vm_page_t); int vm_page_try_to_free (vm_page_t); void vm_page_dontneed(vm_page_t); void vm_page_deactivate (vm_page_t); +void vm_page_dequeue(vm_page_t m); +void vm_page_dequeue_locked(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); @@ -384,6 +387,8 @@ void vm_page_readahead_finish(vm_page_t void vm_page_reference(vm_page_t m); void vm_page_remove (vm_page_t); void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); +void vm_page_requeue(vm_page_t m); +void vm_page_requeue_locked(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); void vm_page_sleep(vm_page_t m, const char *msg); vm_page_t vm_page_splay(vm_pindex_t, vm_page_t); @@ -512,6 +517,22 @@ vm_page_dirty(vm_page_t m) } /* + * vm_page_remque: + * + * If the given page is in a page queue, then remove it from that page + * queue. + * + * The page must be locked. + */ +static inline void +vm_page_remque(vm_page_t m) +{ + + if (m->queue != PQ_NONE) + vm_page_dequeue(m); +} + +/* * vm_page_sleep_if_busy: * * Sleep and release the page queues lock if VPO_BUSY is set or, Modified: head/sys/vm/vm_pageout.c ============================================================================== --- head/sys/vm/vm_pageout.c Tue Nov 13 02:42:31 2012 (r242940) +++ head/sys/vm/vm_pageout.c Tue Nov 13 02:50:39 2012 (r242941) @@ -218,7 +218,6 @@ static void vm_req_vmdaemon(int req); #endif static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); static void vm_pageout_page_stats(void); -static void vm_pageout_requeue(vm_page_t m); /* * Initialize a dummy page for marking the caller's place in the specified @@ -255,29 +254,29 @@ static boolean_t vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; + struct vm_pagequeue *pq; boolean_t unchanged; u_short queue; vm_object_t object; queue = m->queue; vm_pageout_init_marker(&marker, queue); + pq = &vm_pagequeues[queue]; object = m->object; - TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, - m, &marker, pageq); - vm_page_unlock_queues(); + TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq); + vm_pagequeue_unlock(pq); vm_page_unlock(m); VM_OBJECT_LOCK(object); vm_page_lock(m); - vm_page_lock_queues(); + vm_pagequeue_lock(pq); /* Page queue might have changed. */ *next = TAILQ_NEXT(&marker, pageq); unchanged = (m->queue == queue && m->object == object && &marker == TAILQ_NEXT(m, pageq)); - TAILQ_REMOVE(&vm_page_queues[queue].pl, - &marker, pageq); + TAILQ_REMOVE(&pq->pq_pl, &marker, pageq); return (unchanged); } @@ -294,27 +293,27 @@ static boolean_t vm_pageout_page_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; + struct vm_pagequeue *pq; boolean_t unchanged; u_short queue; vm_page_lock_assert(m, MA_NOTOWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (vm_page_trylock(m)) return (TRUE); queue = m->queue; vm_pageout_init_marker(&marker, queue); + pq = &vm_pagequeues[queue]; - TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq); - vm_page_unlock_queues(); + TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq); + vm_pagequeue_unlock(pq); vm_page_lock(m); - vm_page_lock_queues(); + vm_pagequeue_lock(pq); /* Page queue might have changed. */ *next = TAILQ_NEXT(&marker, pageq); unchanged = (m->queue == queue && &marker == TAILQ_NEXT(m, pageq)); - TAILQ_REMOVE(&vm_page_queues[queue].pl, &marker, pageq); + TAILQ_REMOVE(&pq->pq_pl, &marker, pageq); return (unchanged); } @@ -565,13 +564,15 @@ static boolean_t vm_pageout_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high) { struct mount *mp; + struct vm_pagequeue *pq; struct vnode *vp; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_tmp, next; - vm_page_lock_queues(); - TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) { + pq = &vm_pagequeues[queue]; + vm_pagequeue_lock(pq); + TAILQ_FOREACH_SAFE(m, &pq->pq_pl, pageq, next) { KASSERT(m->queue == queue, ("vm_pageout_launder: page %p's queue is not %d", m, queue)); @@ -603,7 +604,7 @@ vm_pageout_launder(int queue, int tries, continue; } if (object->type == OBJT_VNODE) { - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); vp = object->handle; vm_object_reference_locked(object); VM_OBJECT_UNLOCK(object); @@ -618,7 +619,7 @@ vm_pageout_launder(int queue, int tries, return (TRUE); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); m_tmp = m; vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0, NULL, NULL); @@ -626,12 +627,17 @@ vm_pageout_launder(int queue, int tries, return (TRUE); } } else { + /* + * Dequeue here to prevent lock recursion in + * vm_page_cache(). + */ + vm_page_dequeue_locked(m); vm_page_cache(m); vm_page_unlock(m); } VM_OBJECT_UNLOCK(object); } - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); return (FALSE); } @@ -745,19 +751,14 @@ vm_pageout_object_deactivate_pages(pmap_ p->act_count == 0)) { pmap_remove_all(p); vm_page_deactivate(p); - } else { - vm_page_lock_queues(); - vm_pageout_requeue(p); - vm_page_unlock_queues(); - } + } else + vm_page_requeue(p); } else { vm_page_activate(p); if (p->act_count < ACT_MAX - ACT_ADVANCE) p->act_count += ACT_ADVANCE; - vm_page_lock_queues(); - vm_pageout_requeue(p); - vm_page_unlock_queues(); + vm_page_requeue(p); } } else if (p->queue == PQ_INACTIVE) pmap_remove_all(p); @@ -853,26 +854,6 @@ vm_pageout_map_deactivate_pages(map, des #endif /* !defined(NO_SWAPPING) */ /* - * vm_pageout_requeue: - * - * Move the specified page to the tail of its present page queue. - * - * The page queues must be locked. - */ -static void -vm_pageout_requeue(vm_page_t m) -{ - struct vpgqueues *vpq; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - KASSERT(m->queue != PQ_NONE, - ("vm_pageout_requeue: page %p is not queued", m)); - vpq = &vm_page_queues[m->queue]; - TAILQ_REMOVE(&vpq->pl, m, pageq); - TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); -} - -/* * vm_pageout_scan does the dirty work for the pageout daemon. */ static void @@ -880,6 +861,7 @@ vm_pageout_scan(int pass) { vm_page_t m, next; struct vm_page marker; + struct vm_pagequeue *pq; int page_shortage, maxscan, pcount; int addl_page_shortage; vm_object_t object; @@ -888,6 +870,8 @@ vm_pageout_scan(int pass) int maxlaunder; boolean_t queues_locked; + vm_pageout_init_marker(&marker, PQ_INACTIVE); + /* * Decrease registered cache sizes. */ @@ -911,15 +895,7 @@ vm_pageout_scan(int pass) */ page_shortage = vm_paging_target() + addl_page_shortage; - vm_pageout_init_marker(&marker, PQ_INACTIVE); - /* - * Start scanning the inactive queue for pages we can move to the - * cache or free. The scan will stop when the target is reached or - * we have scanned the entire inactive queue. Note that m->act_count - * is not used to form decisions for the inactive queue, only for the - * active queue. - * * maxlaunder limits the number of dirty pages we flush per scan. * For most systems a smaller value (16 or 32) is more robust under * extreme memory and disk pressure because any unnecessary writes @@ -933,18 +909,27 @@ vm_pageout_scan(int pass) maxlaunder = 1; if (pass) maxlaunder = 10000; - vm_page_lock_queues(); - queues_locked = TRUE; + maxscan = cnt.v_inactive_count; - for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl); + /* + * Start scanning the inactive queue for pages we can move to the + * cache or free. The scan will stop when the target is reached or + * we have scanned the entire inactive queue. Note that m->act_count + * is not used to form decisions for the inactive queue, only for the + * active queue. + */ + pq = &vm_pagequeues[PQ_INACTIVE]; + vm_pagequeue_lock(pq); + queues_locked = TRUE; + for (m = TAILQ_FIRST(&pq->pq_pl); m != NULL && maxscan-- > 0 && page_shortage > 0; m = next) { + vm_pagequeue_assert_locked(pq); KASSERT(queues_locked, ("unlocked queues")); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m)); - cnt.v_pdpages++; + PCPU_INC(cnt.v_pdpages); next = TAILQ_NEXT(m, pageq); /* @@ -991,13 +976,12 @@ vm_pageout_scan(int pass) } /* - * We unlock vm_page_queue_mtx, invalidating the + * We unlock the inactive page queue, invalidating the * 'next' pointer. Use our marker to remember our * place. */ - TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, - m, &marker, pageq); - vm_page_unlock_queues(); + TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq); + vm_pagequeue_unlock(pq); queues_locked = FALSE; /* @@ -1098,9 +1082,9 @@ vm_pageout_scan(int pass) * the thrash point for a heavily loaded machine. */ m->flags |= PG_WINATCFLS; - vm_page_lock_queues(); + vm_pagequeue_lock(pq); queues_locked = TRUE; - vm_pageout_requeue(m); + vm_page_requeue_locked(m); } else if (maxlaunder > 0) { /* * We always want to try to flush some dirty pages if @@ -1127,11 +1111,11 @@ vm_pageout_scan(int pass) * Those objects are in a "rundown" state. */ if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) { - vm_page_lock_queues(); + vm_pagequeue_lock(pq); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); queues_locked = TRUE; - vm_pageout_requeue(m); + vm_page_requeue_locked(m); goto relock_queues; } @@ -1184,7 +1168,7 @@ vm_pageout_scan(int pass) } VM_OBJECT_LOCK(object); vm_page_lock(m); - vm_page_lock_queues(); + vm_pagequeue_lock(pq); queues_locked = TRUE; /* * The page might have been moved to another @@ -1218,12 +1202,12 @@ vm_pageout_scan(int pass) */ if (m->hold_count) { vm_page_unlock(m); - vm_pageout_requeue(m); + vm_page_requeue_locked(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; } - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); queues_locked = FALSE; } @@ -1246,7 +1230,7 @@ unlock_and_continue: VM_OBJECT_UNLOCK(object); if (mp != NULL) { if (queues_locked) { - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); queues_locked = FALSE; } if (vp != NULL) @@ -1261,13 +1245,13 @@ unlock_and_continue: VM_OBJECT_UNLOCK(object); relock_queues: if (!queues_locked) { - vm_page_lock_queues(); + vm_pagequeue_lock(pq); queues_locked = TRUE; } next = TAILQ_NEXT(&marker, pageq); - TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, - &marker, pageq); + TAILQ_REMOVE(&pq->pq_pl, &marker, pageq); } + vm_pagequeue_unlock(pq); /* * Compute the number of pages we want to try to move from the @@ -1283,9 +1267,9 @@ relock_queues: * deactivation candidates. */ pcount = cnt.v_active_count; - m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - + pq = &vm_pagequeues[PQ_ACTIVE]; + vm_pagequeue_lock(pq); + m = TAILQ_FIRST(&pq->pq_pl); while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { KASSERT(m->queue == PQ_ACTIVE, @@ -1322,7 +1306,7 @@ relock_queues: (m->hold_count != 0)) { vm_page_unlock(m); VM_OBJECT_UNLOCK(object); - vm_pageout_requeue(m); + vm_page_requeue_locked(m); m = next; continue; } @@ -1331,7 +1315,7 @@ relock_queues: * The count for pagedaemon pages is done after checking the * page for eligibility... */ - cnt.v_pdpages++; + PCPU_INC(cnt.v_pdpages); /* * Check to see "how much" the page has been used. @@ -1358,14 +1342,16 @@ relock_queues: * Only if an object is currently being used, do we use the * page activation count stats. */ - if (actcount && (object->ref_count != 0)) { - vm_pageout_requeue(m); - } else { + if (actcount != 0 && object->ref_count != 0) + vm_page_requeue_locked(m); + else { m->act_count -= min(m->act_count, ACT_DECLINE); if (vm_pageout_algorithm || object->ref_count == 0 || m->act_count == 0) { page_shortage--; + /* Dequeue to avoid later lock recursion. */ + vm_page_dequeue_locked(m); if (object->ref_count == 0) { KASSERT(!pmap_page_is_mapped(m), ("vm_pageout_scan: page %p is mapped", m)); @@ -1376,15 +1362,14 @@ relock_queues: } else { vm_page_deactivate(m); } - } else { - vm_pageout_requeue(m); - } + } else + vm_page_requeue_locked(m); } vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); #if !defined(NO_SWAPPING) /* * Idle process swapout -- run once per second. @@ -1529,6 +1514,7 @@ vm_pageout_oom(int shortage) static void vm_pageout_page_stats() { + struct vm_pagequeue *pq; vm_object_t object; vm_page_t m,next; int pcount,tpcount; /* Number of pages to check */ @@ -1542,7 +1528,6 @@ vm_pageout_page_stats() if (page_shortage <= 0) return; - vm_page_lock_queues(); pcount = cnt.v_active_count; fullintervalcount += vm_pageout_stats_interval; if (fullintervalcount < vm_pageout_full_stats_interval) { @@ -1554,7 +1539,9 @@ vm_pageout_page_stats() fullintervalcount = 0; } - m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); + pq = &vm_pagequeues[PQ_ACTIVE]; + vm_pagequeue_lock(pq); + m = TAILQ_FIRST(&pq->pq_pl); while ((m != NULL) && (pcount-- > 0)) { int actcount; @@ -1589,7 +1576,7 @@ vm_pageout_page_stats() (m->hold_count != 0)) { vm_page_unlock(m); VM_OBJECT_UNLOCK(object); - vm_pageout_requeue(m); + vm_page_requeue_locked(m); m = next; continue; } @@ -1605,7 +1592,7 @@ vm_pageout_page_stats() m->act_count += ACT_ADVANCE + actcount; if (m->act_count > ACT_MAX) m->act_count = ACT_MAX; - vm_pageout_requeue(m); + vm_page_requeue_locked(m); } else { if (m->act_count == 0) { /* @@ -1618,17 +1605,19 @@ vm_pageout_page_stats() * of doing the operation. */ pmap_remove_all(m); + /* Dequeue to avoid later lock recursion. */ + vm_page_dequeue_locked(m); vm_page_deactivate(m); } else { m->act_count -= min(m->act_count, ACT_DECLINE); - vm_pageout_requeue(m); + vm_page_requeue_locked(m); } } vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } - vm_page_unlock_queues(); + vm_pagequeue_unlock(pq); } /*