From owner-svn-src-user@freebsd.org  Tue Mar 27 21:37:59 2018
Return-Path: <owner-svn-src-user@freebsd.org>
Delivered-To: svn-src-user@mailman.ysv.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1])
 by mailman.ysv.freebsd.org (Postfix) with ESMTP id 53C18F5F9AD
 for <svn-src-user@mailman.ysv.freebsd.org>;
 Tue, 27 Mar 2018 21:37:59 +0000 (UTC)
 (envelope-from markj@FreeBSD.org)
Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org
 [IPv6:2610:1c1:1:606c::19:3])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client CN "mxrelay.nyi.freebsd.org",
 Issuer "Let's Encrypt Authority X3" (verified OK))
 by mx1.freebsd.org (Postfix) with ESMTPS id 047D883B13;
 Tue, 27 Mar 2018 21:37:59 +0000 (UTC)
 (envelope-from markj@FreeBSD.org)
Received: from repo.freebsd.org (repo.freebsd.org
 [IPv6:2610:1c1:1:6068::e6a:0])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client did not present a certificate)
 by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id F34C721B80;
 Tue, 27 Mar 2018 21:37:58 +0000 (UTC)
 (envelope-from markj@FreeBSD.org)
Received: from repo.freebsd.org ([127.0.1.37])
 by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2RLbw4J089876;
 Tue, 27 Mar 2018 21:37:58 GMT (envelope-from markj@FreeBSD.org)
Received: (from markj@localhost)
 by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2RLbwFF089874;
 Tue, 27 Mar 2018 21:37:58 GMT (envelope-from markj@FreeBSD.org)
Message-Id: <201803272137.w2RLbwFF089874@repo.freebsd.org>
X-Authentication-Warning: repo.freebsd.org: markj set sender to
 markj@FreeBSD.org using -f
From: Mark Johnston <markj@FreeBSD.org>
Date: Tue, 27 Mar 2018 21:37:58 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-user@freebsd.org
Subject: svn commit: r331655 - user/jeff/numa/sys/vm
X-SVN-Group: user
X-SVN-Commit-Author: markj
X-SVN-Commit-Paths: user/jeff/numa/sys/vm
X-SVN-Commit-Revision: 331655
X-SVN-Commit-Repository: base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
X-BeenThere: svn-src-user@freebsd.org
X-Mailman-Version: 2.1.25
Precedence: list
List-Id: "SVN commit messages for the experimental &quot; user&quot;
 src tree" <svn-src-user.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/svn-src-user>,
 <mailto:svn-src-user-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-user/>
List-Post: <mailto:svn-src-user@freebsd.org>
List-Help: <mailto:svn-src-user-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/svn-src-user>,
 <mailto:svn-src-user-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Tue, 27 Mar 2018 21:37:59 -0000

Author: markj
Date: Tue Mar 27 21:37:58 2018
New Revision: 331655
URL: https://svnweb.freebsd.org/changeset/base/331655

Log:
  Refactor some page queue scanning code.
  
  - Use a common subroutine to collect a batch of pages for processing.
  - Use a common subroutine to detect pages that were logically dequeued
    after being added to a batch.
  - Use batching in the laundry queue scan. Remove
    vm_pageout_fallback_object_lock() and vm_pageout_page_lock().
  - Remove a racy assert from vm_page_alloc_check(). [1]
  
  Reported by:	pho [1]

Modified:
  user/jeff/numa/sys/vm/vm_page.c
  user/jeff/numa/sys/vm/vm_pageout.c

Modified: user/jeff/numa/sys/vm/vm_page.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_page.c	Tue Mar 27 21:14:39 2018	(r331654)
+++ user/jeff/numa/sys/vm/vm_page.c	Tue Mar 27 21:37:58 2018	(r331655)
@@ -2087,8 +2087,6 @@ vm_page_alloc_check(vm_page_t m)
 	KASSERT(m->object == NULL, ("page %p has object", m));
 	KASSERT(m->queue == PQ_NONE,
 	    ("page %p has unexpected queue %d", m, m->queue));
-	KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0,
-	    ("page %p has unexpected queue state", m));
 	KASSERT(!vm_page_held(m), ("page %p is held", m));
 	KASSERT(!vm_page_busied(m), ("page %p is busy", m));
 	KASSERT(m->dirty == 0, ("page %p is dirty", m));

Modified: user/jeff/numa/sys/vm/vm_pageout.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_pageout.c	Tue Mar 27 21:14:39 2018	(r331654)
+++ user/jeff/numa/sys/vm/vm_pageout.c	Tue Mar 27 21:37:58 2018	(r331655)
@@ -201,11 +201,9 @@ SYSCTL_INT(_vm, OID_AUTO, max_wired,
 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
 
 static u_int isqrt(u_int num);
-static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
 static int vm_pageout_launder(struct vm_domain *vmd, int launder,
     bool in_shortfall);
 static void vm_pageout_laundry_worker(void *arg);
-static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 
 /*
  * Initialize a dummy page for marking the caller's place in the specified
@@ -226,97 +224,64 @@ vm_pageout_init_marker(vm_page_t marker, u_short queue
 		marker->aflags = PGA_ENQUEUED;
 }
 
-/*
- * vm_pageout_fallback_object_lock:
- * 
- * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
- * known to have failed and page queue must be either PQ_ACTIVE or
- * PQ_INACTIVE.  To avoid lock order violation, unlock the page queue
- * while locking the vm object.  Use marker page to detect page queue
- * changes and maintain notion of next page on page queue.  Return
- * TRUE if no changes were detected, FALSE otherwise.  vm object is
- * locked on return.
- * 
- * This function depends on both the lock portion of struct vm_object
- * and normal struct vm_page being type stable.
- */
-static boolean_t
-vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
+static inline bool
+vm_pageout_page_queued(vm_page_t m, int queue)
 {
-	struct vm_page marker;
-	struct vm_pagequeue *pq;
-	boolean_t unchanged;
-	u_short queue;
-	vm_object_t object;
 
-	queue = m->queue;
-	vm_pageout_init_marker(&marker, queue);
-	pq = vm_page_pagequeue(m);
-	object = m->object;
-	
-	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
-	vm_pagequeue_unlock(pq);
-	vm_page_unlock(m);
-	VM_OBJECT_WLOCK(object);
-	vm_page_lock(m);
-	vm_pagequeue_lock(pq);
+	vm_page_assert_locked(m);
 
-	/*
-	 * The page's object might have changed, and/or the page might
-	 * have moved from its original position in the queue.  If the
-	 * page's object has changed, then the caller should abandon
-	 * processing the page because the wrong object lock was
-	 * acquired.  Use the marker's plinks.q, not the page's, to
-	 * determine if the page has been moved.  The state of the
-	 * page's plinks.q can be indeterminate; whereas, the marker's
-	 * plinks.q must be valid.
-	 */
-	*next = TAILQ_NEXT(&marker, plinks.q);
-	unchanged = m->object == object &&
-	    m == TAILQ_PREV(&marker, pglist, plinks.q);
-	KASSERT(!unchanged || m->queue == queue,
-	    ("page %p queue %d %d", m, queue, m->queue));
-	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
-	return (unchanged);
+	if ((m->aflags & PGA_DEQUEUE) != 0)
+		return (false);
+	atomic_thread_fence_acq();
+	return (m->queue == queue);
 }
 
 /*
- * Lock the page while holding the page queue lock.  Use marker page
- * to detect page queue changes and maintain notion of next page on
- * page queue.  Return TRUE if no changes were detected, FALSE
- * otherwise.  The page is locked on return. The page queue lock might
- * be dropped and reacquired.
+ * Add a small number of queued pages to a batch queue for later processing
+ * without the corresponding queue lock held.  The caller must have enqueued a
+ * marker page at the desired start point for the scan.
  *
- * This function depends on normal struct vm_page being type stable.
+ * When processing the batch queue, vm_pageout_page_queued() must be used to
+ * determine whether the page was logically dequeued by another thread.  Once
+ * this check is performed, the page lock guarantees that the page will not be
+ * disassociated from the queue.
  */
-static boolean_t
-vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
+static inline void
+vm_pageout_collect_batch(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
+    vm_page_t marker, int maxscan, const bool dequeue)
 {
-	struct vm_page marker;
-	struct vm_pagequeue *pq;
-	boolean_t unchanged;
-	u_short queue;
+	vm_page_t m;
 
-	vm_page_lock_assert(m, MA_NOTOWNED);
-	if (vm_page_trylock(m))
-		return (TRUE);
+	vm_pagequeue_assert_locked(pq);
 
-	queue = m->queue;
-	vm_pageout_init_marker(&marker, queue);
-	pq = vm_page_pagequeue(m);
+	vm_batchqueue_init(bq);
+	for (m = TAILQ_NEXT(marker, plinks.q); m != NULL && maxscan > 0;
+	    m = TAILQ_NEXT(m, plinks.q), maxscan--) {
+		VM_CNT_INC(v_pdpages);
+		if (__predict_false((m->flags & PG_MARKER) != 0))
+			continue;
 
-	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
-	vm_pagequeue_unlock(pq);
-	vm_page_lock(m);
-	vm_pagequeue_lock(pq);
+		KASSERT((m->aflags & PGA_ENQUEUED) != 0,
+		    ("page %p not enqueued", m));
+		KASSERT((m->flags & PG_FICTITIOUS) == 0,
+		    ("Fictitious page %p cannot be in page queue", m));
+		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+		    ("Unmanaged page %p cannot be in page queue", m));
 
-	/* Page queue might have changed. */
-	*next = TAILQ_NEXT(&marker, plinks.q);
-	unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
-	KASSERT(!unchanged || m->queue == queue,
-	    ("page %p queue %d %d", m, queue, m->queue));
-	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
-	return (unchanged);
+		if (!vm_batchqueue_insert(bq, m))
+			break;
+		if (dequeue) {
+			TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+			vm_page_aflag_clear(m, PGA_ENQUEUED);
+		}
+	}
+	TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
+	if (__predict_true(m != NULL))
+		TAILQ_INSERT_BEFORE(m, marker, plinks.q);
+	else
+		TAILQ_INSERT_TAIL(&pq->pq_pl, marker, plinks.q);
+	if (dequeue)
+		vm_pagequeue_cnt_add(pq, -bq->bq_cnt);
 }
 
 /*
@@ -694,12 +659,14 @@ unlock_mp:
 static int
 vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
 {
+	struct vm_batchqueue bq, rq;
 	struct vm_pagequeue *pq;
+	struct mtx *mtx;
 	vm_object_t object;
-	vm_page_t m, next;
+	vm_page_t m;
 	int act_delta, error, maxscan, numpagedout, queue, starting_target;
 	int vnodes_skipped;
-	bool pageout_ok, queue_locked;
+	bool obj_locked, pageout_ok;
 
 	starting_target = launder;
 	vnodes_skipped = 0;
@@ -717,217 +684,227 @@ vm_pageout_launder(struct vm_domain *vmd, int launder,
 	 * As an optimization, we avoid laundering from PQ_UNSWAPPABLE when no
 	 * swap devices are configured.
 	 */
-	if (atomic_load_acq_int(&swapdev_enabled)) {
+	if (atomic_load_acq_int(&swapdev_enabled))
 		queue = PQ_UNSWAPPABLE;
-		pq = &vmd->vmd_pagequeues[queue];
-	} else {
+	else
 		queue = PQ_LAUNDRY;
-		pq = &vmd->vmd_pagequeues[queue];
-	}
+	pq = &vmd->vmd_pagequeues[queue];
 
 scan:
 	vm_pagequeue_lock(pq);
-	maxscan = pq->pq_cnt;
-	queue_locked = true;
-	for (m = TAILQ_FIRST(&pq->pq_pl);
-	    m != NULL && maxscan-- > 0 && launder > 0;
-	    m = next) {
-		vm_pagequeue_assert_locked(pq);
-		KASSERT(queue_locked, ("unlocked laundry queue"));
-		KASSERT(vm_page_in_laundry(m),
-		    ("page %p has an inconsistent queue", m));
-		KASSERT((m->aflags & PGA_ENQUEUED) != 0,
-		    ("page %p not enqueued", m));
+	TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
+	for (maxscan = pq->pq_cnt; maxscan > 0 && launder > 0 &&
+	    TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q) != NULL;
+	    maxscan -= bq.bq_cnt) {
+		vm_pageout_collect_batch(pq, &bq, &vmd->vmd_laundry_marker,
+		    min(maxscan, launder), false);
+		vm_pagequeue_unlock(pq);
 
-		VM_CNT_INC(v_pdpages);
-		next = TAILQ_NEXT(m, plinks.q);
-		if ((m->flags & PG_MARKER) != 0)
-			continue;
+		mtx = NULL;
+		obj_locked = false;
+		object = NULL;
+		vm_batchqueue_init(&rq);
+		VM_BATCHQ_FOREACH(&bq, m) {
+			vm_page_change_lock(m, &mtx);
 
-		KASSERT((m->flags & PG_FICTITIOUS) == 0,
-		    ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
-		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-		    ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
+recheck:
+			/*
+			 * The page may have been disassociated from the queue
+			 * while locks were dropped.
+			 */
+			if (!vm_pageout_page_queued(m, queue))
+				continue;
 
-		if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
-			vm_page_unlock(m);
-			continue;
-		}
-		if (m->wire_count != 0 || (m->aflags & PGA_DEQUEUE) != 0) {
-			vm_page_dequeue_locked(m);
-			vm_page_unlock(m);
-			continue;
-		}
-		object = m->object;
-		if ((!VM_OBJECT_TRYWLOCK(object) &&
-		    (!vm_pageout_fallback_object_lock(m, &next) ||
-		    vm_page_held(m))) || vm_page_busied(m)) {
-			VM_OBJECT_WUNLOCK(object);
-			if ((m->wire_count != 0 ||
-			    (m->aflags & PGA_DEQUEUE) != 0) &&
-			    vm_page_pagequeue(m) == pq)
-				vm_page_dequeue_locked(m);
-			vm_page_unlock(m);
-			continue;
-		}
+			/*
+			 * A requeue was requested, so this page gets a second
+			 * chance.
+			 */
+			if ((m->aflags & PGA_REQUEUE) != 0)
+				goto reenqueue;
 
-		/*
-		 * Unlock the laundry queue, invalidating the 'next' pointer.
-		 * Use a marker to remember our place in the laundry queue.
-		 * After this point we have to handle a concurrent dequeue of
-		 * the page.  The page will not be re-enqueued while we hold
-		 * the page lock, however.
-		 */
-		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
-		    plinks.q);
-		vm_pagequeue_unlock(pq);
-		queue_locked = false;
+			/*
+			 * Wired pages may not be freed.  Complete their removal
+			 * from the queue now to avoid needless revisits during
+			 * future scans.
+			 */
+			if (m->hold_count != 0)
+				goto reenqueue;
+			if (m->wire_count != 0) {
+				vm_page_dequeue(m);
+				continue;
+			}
 
-		/*
-		 * Invalid pages can be easily freed.  They cannot be
-		 * mapped; vm_page_free() asserts this.
-		 */
-		if (m->valid == 0)
-			goto free_page;
+			if (object != m->object) {
+				if (obj_locked) {
+					VM_OBJECT_WUNLOCK(object);
+					obj_locked = false;
+				}
+				object = m->object;
+			}
+			if (!obj_locked) {
+				if (!VM_OBJECT_TRYWLOCK(object)) {
+					mtx_unlock(mtx);
+					VM_OBJECT_WLOCK(object);
+					obj_locked = true;
+					mtx_lock(mtx);
+					goto recheck;
+				} else
+					obj_locked = true;
+			}
 
-		/*
-		 * If the page has been referenced and the object is not dead,
-		 * reactivate or requeue the page depending on whether the
-		 * object is mapped.
-		 */
-		if ((m->aflags & PGA_REFERENCED) != 0) {
-			vm_page_aflag_clear(m, PGA_REFERENCED);
-			act_delta = 1;
-		} else
-			act_delta = 0;
-		if (object->ref_count != 0)
-			act_delta += pmap_ts_referenced(m);
-		else {
-			KASSERT(!pmap_page_is_mapped(m),
-			    ("page %p is mapped", m));
-		}
-		if (act_delta != 0) {
-			if (object->ref_count != 0) {
-				VM_CNT_INC(v_reactivated);
-				vm_page_activate(m);
+			if (vm_page_busied(m))
+				goto reenqueue;
 
-				/*
-				 * Increase the activation count if the page
-				 * was referenced while in the laundry queue.
-				 * This makes it less likely that the page will
-				 * be returned prematurely to the inactive
-				 * queue.
- 				 */
-				m->act_count += act_delta + ACT_ADVANCE;
+			/*
+			 * Invalid pages can be easily freed.  They cannot be
+			 * mapped; vm_page_free() asserts this.
+			 */
+			if (m->valid == 0)
+				goto free_page;
 
-				/*
-				 * If this was a background laundering, count
-				 * activated pages towards our target.  The
-				 * purpose of background laundering is to ensure
-				 * that pages are eventually cycled through the
-				 * laundry queue, and an activation is a valid
-				 * way out.
-				 */
-				if (!in_shortfall)
-					launder--;
-				goto drop_page;
-			} else if ((object->flags & OBJ_DEAD) == 0)
-				goto requeue_page;
-		}
+			/*
+			 * If the page has been referenced and the object is not
+			 * dead, reactivate or requeue the page depending on
+			 * whether the object is mapped.
+			 */
+			if ((m->aflags & PGA_REFERENCED) != 0) {
+				vm_page_aflag_clear(m, PGA_REFERENCED);
+				act_delta = 1;
+			} else
+				act_delta = 0;
+			if (object->ref_count != 0)
+				act_delta += pmap_ts_referenced(m);
+			else {
+				KASSERT(!pmap_page_is_mapped(m),
+				    ("page %p is mapped", m));
+			}
+			if (act_delta != 0) {
+				if (object->ref_count != 0) {
+					VM_CNT_INC(v_reactivated);
+					vm_page_activate(m);
 
-		/*
-		 * If the page appears to be clean at the machine-independent
-		 * layer, then remove all of its mappings from the pmap in
-		 * anticipation of freeing it.  If, however, any of the page's
-		 * mappings allow write access, then the page may still be
-		 * modified until the last of those mappings are removed.
-		 */
-		if (object->ref_count != 0) {
-			vm_page_test_dirty(m);
-			if (m->dirty == 0)
-				pmap_remove_all(m);
-		}
+					/*
+					 * Increase the activation count if the
+					 * page was referenced while in the
+					 * laundry queue.  This makes it less
+					 * likely that the page will be returned
+					 * prematurely to the inactive queue.
+					 */
+					m->act_count += act_delta + ACT_ADVANCE;
 
-		/*
-		 * Clean pages are freed, and dirty pages are paged out unless
-		 * they belong to a dead object.  Requeueing dirty pages from
-		 * dead objects is pointless, as they are being paged out and
-		 * freed by the thread that destroyed the object.
-		 */
-		if (m->dirty == 0) {
-free_page:
-			vm_page_free(m);
-			VM_CNT_INC(v_dfree);
-		} else if ((object->flags & OBJ_DEAD) == 0) {
-			if (object->type != OBJT_SWAP &&
-			    object->type != OBJT_DEFAULT)
-				pageout_ok = true;
-			else if (disable_swap_pageouts)
-				pageout_ok = false;
-			else
-				pageout_ok = true;
-			if (!pageout_ok) {
-requeue_page:
-				vm_pagequeue_lock(pq);
-				queue_locked = true;
-				KASSERT(m->queue == queue ||
-				    m->queue == PQ_NONE,
-				    ("%s: page %p migrated between queues",
-				    __func__, m));
-				if (m->queue == queue) {
-					TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
-					TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
-				} else {
-					m->queue = queue;
-					TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
-					vm_pagequeue_cnt_add(pq, 1);
-					vm_page_aflag_set(m, PGA_ENQUEUED);
-					if (__predict_false((m->aflags &
-					    PGA_REQUEUE) != 0))
-						vm_page_aflag_clear(m,
-						    PGA_REQUEUE);
+					/*
+					 * If this was a background laundering,
+					 * count activated pages towards our
+					 * target.  The purpose of background
+					 * laundering is to ensure that pages
+					 * are eventually cycled through the
+					 * laundry queue, and an activation is a
+					 * valid way out.
+					 */
+					if (!in_shortfall)
+						launder--;
+					continue;
+				} else if ((object->flags & OBJ_DEAD) == 0) {
+					vm_page_aflag_set(m, PGA_REQUEUE);
+					goto reenqueue;
 				}
-				goto drop_page;
 			}
 
 			/*
-			 * Form a cluster with adjacent, dirty pages from the
-			 * same object, and page out that entire cluster.
-			 *
-			 * The adjacent, dirty pages must also be in the
-			 * laundry.  However, their mappings are not checked
-			 * for new references.  Consequently, a recently
-			 * referenced page may be paged out.  However, that
-			 * page will not be prematurely reclaimed.  After page
-			 * out, the page will be placed in the inactive queue,
-			 * where any new references will be detected and the
-			 * page reactivated.
+			 * If the page appears to be clean at the
+			 * machine-independent layer, then remove all of its
+			 * mappings from the pmap in anticipation of freeing it.
+			 * If, however, any of the page's mappings allow write
+			 * access, then the page may still be modified until the
+			 * last of those mappings are removed.
 			 */
-			error = vm_pageout_clean(m, &numpagedout);
-			if (error == 0) {
-				launder -= numpagedout;
-				maxscan -= numpagedout - 1;
-			} else if (error == EDEADLK) {
-				pageout_lock_miss++;
-				vnodes_skipped++;
+			if (object->ref_count != 0) {
+				vm_page_test_dirty(m);
+				if (m->dirty == 0)
+					pmap_remove_all(m);
 			}
-			goto relock_queue;
+
+			/*
+			 * Clean pages are freed, and dirty pages are paged out
+			 * unless they belong to a dead object.  Requeueing
+			 * dirty pages from dead objects is pointless, as they
+			 * are being paged out and freed by the thread that
+			 * destroyed the object.
+			 */
+			if (m->dirty == 0) {
+free_page:
+				vm_page_free(m);
+				VM_CNT_INC(v_dfree);
+			} else if ((object->flags & OBJ_DEAD) == 0) {
+				if (object->type != OBJT_SWAP &&
+				    object->type != OBJT_DEFAULT)
+					pageout_ok = true;
+				else if (disable_swap_pageouts)
+					pageout_ok = false;
+				else
+					pageout_ok = true;
+				if (!pageout_ok) {
+					vm_page_aflag_set(m, PGA_REQUEUE);
+					goto reenqueue;
+				}
+
+				/*
+				 * Form a cluster with adjacent, dirty pages from the
+				 * same object, and page out that entire cluster.
+				 *
+				 * The adjacent, dirty pages must also be in the
+				 * laundry.  However, their mappings are not checked
+				 * for new references.  Consequently, a recently
+				 * referenced page may be paged out.  However, that
+				 * page will not be prematurely reclaimed.  After page
+				 * out, the page will be placed in the inactive queue,
+				 * where any new references will be detected and the
+				 * page reactivated.
+				 */
+				error = vm_pageout_clean(m, &numpagedout);
+				if (error == 0) {
+					launder -= numpagedout;
+					maxscan -= numpagedout - 1;
+				} else if (error == EDEADLK) {
+					pageout_lock_miss++;
+					vnodes_skipped++;
+				}
+
+				mtx = NULL;
+				obj_locked = false;
+			}
+			continue;
+reenqueue:
+			if (!vm_batchqueue_insert(&rq, m))
+				panic("failed to requeue page %p", m);
 		}
-drop_page:
-		vm_page_unlock(m);
-		VM_OBJECT_WUNLOCK(object);
-relock_queue:
-		if (!queue_locked) {
-			vm_pagequeue_lock(pq);
-			queue_locked = true;
+		if (mtx != NULL)
+			mtx_unlock(mtx);
+		if (obj_locked)
+			VM_OBJECT_WUNLOCK(object);
+
+		vm_pagequeue_lock(pq);
+		VM_BATCHQ_FOREACH(&rq, m) {
+			if (m->queue == queue &&
+			    (m->aflags & PGA_ENQUEUED) != 0) {
+				TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+				if ((m->aflags & PGA_REQUEUE) != 0) {
+					TAILQ_INSERT_TAIL(&pq->pq_pl, m,
+					    plinks.q);
+					vm_page_aflag_clear(m, PGA_REQUEUE);
+				} else
+					TAILQ_INSERT_BEFORE(&vmd->vmd_marker, m,
+					    plinks.q);
+				vm_pagequeue_cnt_inc(pq);
+			}
 		}
-		next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
-		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
 	}
+	TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 
 	if (launder > 0 && pq == &vmd->vmd_pagequeues[PQ_UNSWAPPABLE]) {
-		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+		queue = PQ_LAUNDRY;
+		pq = &vmd->vmd_pagequeues[queue];
 		goto scan;
 	}
 
@@ -1199,7 +1176,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass, int s
 {
 	struct vm_batchqueue bq, rq;
 	struct mtx *mtx;
-	vm_page_t m, next;
+	vm_page_t m;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
 	long min_scan;
@@ -1246,8 +1223,6 @@ vm_pageout_scan(struct vm_domain *vmd, int pass, int s
 		page_shortage = deficit = 0;
 	starting_page_shortage = page_shortage;
 
-	vm_batchqueue_init(&bq);
-
 	/*
 	 * Start scanning the inactive queue for pages that we can free.  The
 	 * scan will stop when we reach the target or we have scanned the
@@ -1255,38 +1230,13 @@ vm_pageout_scan(struct vm_domain *vmd, int pass, int s
 	 * decisions for the inactive queue, only for the active queue.)
 	 */
 	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
-	maxscan = pq->pq_cnt;
 	vm_pagequeue_lock(pq);
-	for (m = TAILQ_FIRST(&pq->pq_pl);
-	     m != NULL && maxscan-- > 0 && page_shortage > 0;
-	     m = next) {
-		vm_pagequeue_assert_locked(pq);
-		KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
-		KASSERT((m->aflags & PGA_ENQUEUED) != 0,
-		    ("page %p not enqueued", m));
-
-		VM_CNT_INC(v_pdpages);
-		next = TAILQ_NEXT(m, plinks.q);
-		if ((m->flags & PG_MARKER) != 0)
-			continue;
-
-		KASSERT((m->flags & PG_FICTITIOUS) == 0,
-		    ("Fictitious page %p cannot be in inactive queue", m));
-		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-		    ("Unmanaged page %p cannot be in inactive queue", m));
-
-		/*
-		 * Dequeue pages to be processed without the page queue lock
-		 * held.
-		 */
-		if (vm_batchqueue_insert(&bq, m)) {
-			TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
-			vm_page_aflag_clear(m, PGA_ENQUEUED);
-			vm_pagequeue_cnt_dec(pq);
-			continue;
-		}
-
-		TAILQ_INSERT_BEFORE(m, &vmd->vmd_marker, plinks.q);
+	TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
+	for (maxscan = pq->pq_cnt; maxscan > 0 && page_shortage > 0 &&
+	    TAILQ_NEXT(&vmd->vmd_marker, plinks.q) != NULL;
+	    maxscan -= bq.bq_cnt) {
+		vm_pageout_collect_batch(pq, &bq, &vmd->vmd_marker,
+		    min(maxscan, page_shortage), true);
 		vm_pagequeue_unlock(pq);
 
 		mtx = NULL;
@@ -1298,14 +1248,11 @@ vm_pageout_scan(struct vm_domain *vmd, int pass, int s
 
 recheck:
 			/*
-			 * The page may have been dequeued while locks were
-			 * dropped.
+			 * The page may have been disassociated from the queue
+			 * while locks were dropped.
 			 */
-			if ((m->aflags & PGA_DEQUEUE) != 0)
+			if (!vm_pageout_page_queued(m, PQ_INACTIVE))
 				continue;
-			atomic_thread_fence_acq();
-			if (m->queue != PQ_INACTIVE)
-				continue;
 
 			/*
 			 * A requeue was requested, so this page gets a second
@@ -1395,8 +1342,8 @@ recheck:
 					m->act_count += act_delta + ACT_ADVANCE;
 					continue;
 				} else if ((object->flags & OBJ_DEAD) == 0) {
-					vm_page_requeue(m);
-					continue;
+					vm_page_aflag_set(m, PGA_REQUEUE);
+					goto reenqueue;
 				}
 			}
 
@@ -1436,7 +1383,6 @@ reenqueue:
 			mtx_unlock(mtx);
 		if (obj_locked)
 			VM_OBJECT_WUNLOCK(object);
-		vm_batchqueue_init(&bq);
 
 		vm_pagequeue_lock(pq);
 		VM_BATCHQ_FOREACH(&rq, m) {
@@ -1453,19 +1399,8 @@ reenqueue:
 				vm_pagequeue_cnt_inc(pq);
 			}
 		}
-
-		/* Pick up where we left off. */
-		next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
-		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	}
-
-	/* Requeue batched pages for next time. */
-	VM_BATCHQ_FOREACH(&bq, m) {
-		if (vm_page_inactive(m) && (m->aflags & PGA_ENQUEUED) == 0) {
-			TAILQ_INSERT_HEAD(&pq->pq_pl, m, plinks.q);
-			vm_page_aflag_set(m, PGA_ENQUEUED);
-		}
-	}
+	TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 
 	/*
@@ -1529,7 +1464,6 @@ reenqueue:
 
 	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
 	vm_pagequeue_lock(pq);
-	maxscan = pq->pq_cnt;
 
 	/*
 	 * If we're just idle polling attempt to visit every
@@ -1550,29 +1484,14 @@ reenqueue:
 	 * the per-page activity counter and use it to identify deactivation
 	 * candidates.  Held pages may be deactivated.
 	 */
-	vm_batchqueue_init(&bq);
-	for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
-	    min_scan || (inactq_shortage > 0 && scanned < maxscan)); m = next,
-	    scanned++) {
-		KASSERT(m->queue == PQ_ACTIVE,
-		    ("vm_pageout_scan: page %p isn't active", m));
-		KASSERT((m->aflags & PGA_ENQUEUED) != 0,
-		    ("page %p not enqueued", m));
-
-		VM_CNT_INC(v_pdpages);
-		next = TAILQ_NEXT(m, plinks.q);
-		if ((m->flags & PG_MARKER) != 0)
-			continue;
-
-		KASSERT((m->flags & PG_FICTITIOUS) == 0,
-		    ("Fictitious page %p cannot be in active queue", m));
-		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-		    ("Unmanaged page %p cannot be in active queue", m));
-
-		if (vm_batchqueue_insert(&bq, m))
-			continue;
-
-		TAILQ_INSERT_BEFORE(m, &vmd->vmd_marker, plinks.q);
+	TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
+	for (maxscan = pq->pq_cnt, scanned = 0;
+	    TAILQ_NEXT(&vmd->vmd_marker, plinks.q) != NULL &&
+	    (scanned < min_scan || (inactq_shortage > 0 && scanned < maxscan));
+	    scanned += bq.bq_cnt) {
+		vm_pageout_collect_batch(pq, &bq, &vmd->vmd_marker,
+		    (inactq_shortage > 0 ? maxscan : min_scan) - scanned,
+		    false);
 		vm_pagequeue_unlock(pq);
 
 		mtx = NULL;
@@ -1581,14 +1500,11 @@ reenqueue:
 			vm_page_change_lock(m, &mtx);
 
 			/*
-			 * The page may have been dequeued while locks were
-			 * dropped.
+			 * The page may have been disassociated from the queue
+			 * while locks were dropped.
 			 */
-			if ((m->aflags & PGA_DEQUEUE) != 0)
+			if (!vm_pageout_page_queued(m, PQ_ACTIVE))
 				continue;
-			atomic_thread_fence_acq();
-			if (m->queue != PQ_ACTIVE)
-				continue;
 
 			/*
 			 * Perform lazy dequeues.
@@ -1678,26 +1594,23 @@ reenqueue:
 		}
 		if (mtx != NULL)
 			mtx_unlock(mtx);
-		vm_batchqueue_init(&bq);
 
 		vm_pagequeue_lock(pq);
 		/*
 		 * XXXMJ this step could be avoided if we used a CLOCK scan for
-		 * the active queue
+		 * the active queue. This would involve modifying
+		 * vm_page_enqueue() to insert after a marker page rather than
+		 * at the tail of the queue.
 		 */
 		VM_BATCHQ_FOREACH(&rq, m) {
-			if (vm_page_active(m)) {
-				if ((m->aflags & PGA_ENQUEUED) != 0)
-					TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+			if (vm_page_active(m) &&
+			    (m->aflags & PGA_ENQUEUED) != 0) {
+				TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 				TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
-				vm_page_aflag_set(m, PGA_ENQUEUED);
 			}
 		}
-
-		/* Pick up where we left off. */
-		next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
-		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	}
+	TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	if (pass > 0)
 		vm_swapout_run_idle();