Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 23 Aug 2018 20:34:23 +0000 (UTC)
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r338276 - head/sys/vm
Message-ID:  <201808232034.w7NKYNrB046886@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: markj
Date: Thu Aug 23 20:34:22 2018
New Revision: 338276
URL: https://svnweb.freebsd.org/changeset/base/338276

Log:
  Ensure that queue state is cleared when vm_page_dequeue() returns.
  
  Per-page queue state is updated non-atomically, with either the page
  lock or the page queue lock held.  When vm_page_dequeue() is called
  without the page lock, in rare cases a different thread may be
  concurrently dequeuing the page with the pagequeue lock held.  Because
  of the non-atomic update, vm_page_dequeue() might return before queue
  state is completely updated, which can lead to race conditions.
  
  Restrict the vm_page_dequeue() interface so that it must be called
  either with the page lock held or on a free page, and busy wait when
  a different thread is concurrently updating queue state, which must
  happen in a critical section.
  
  While here, do some related cleanup: inline vm_page_dequeue_locked()
  into its only caller and delete a prototype for the unimplemented
  vm_page_requeue_locked().  Replace the volatile qualifier for "queue"
  added in r333703 with explicit uses of atomic_load_8() where required.
  
  Reported and tested by:	pho
  Reviewed by:	alc
  Differential Revision:	https://reviews.freebsd.org/D15980

Modified:
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Thu Aug 23 20:25:27 2018	(r338275)
+++ head/sys/vm/vm_page.c	Thu Aug 23 20:34:22 2018	(r338276)
@@ -521,7 +521,7 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segi
 	m->wire_count = 0;
 	m->busy_lock = VPB_UNBUSIED;
 	m->hold_count = 0;
-	m->flags = 0;
+	m->flags = m->aflags = 0;
 	m->phys_addr = pa;
 	m->queue = PQ_NONE;
 	m->psind = 0;
@@ -2148,8 +2148,9 @@ vm_page_alloc_check(vm_page_t m)
 {
 
 	KASSERT(m->object == NULL, ("page %p has object", m));
-	KASSERT(m->queue == PQ_NONE,
-	    ("page %p has unexpected queue %d", m, m->queue));
+	KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,
+	    ("page %p has unexpected queue %d, flags %#x",
+	    m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK)));
 	KASSERT(!vm_page_held(m), ("page %p is held", m));
 	KASSERT(!vm_page_busied(m), ("page %p is busy", m));
 	KASSERT(m->dirty == 0, ("page %p is dirty", m));
@@ -3090,7 +3091,7 @@ vm_page_pagequeue_lockptr(vm_page_t m)
 {
 	uint8_t queue;
 
-	if ((queue = m->queue) == PQ_NONE)
+	if ((queue = atomic_load_8(&m->queue)) == PQ_NONE)
 		return (NULL);
 	return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue].pq_mutex);
 }
@@ -3101,6 +3102,7 @@ vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_pa
 	struct vm_domain *vmd;
 	uint8_t aflags;
 
+	CRITICAL_ASSERT(curthread);
 	vm_pagequeue_assert_locked(pq);
 	KASSERT(pq == vm_page_pagequeue(m),
 	    ("page %p doesn't belong to %p", m, pq));
@@ -3267,7 +3269,7 @@ vm_page_dequeue_deferred(vm_page_t m)
 
 	vm_page_assert_locked(m);
 
-	queue = m->queue;
+	queue = atomic_load_8(&m->queue);
 	if (queue == PQ_NONE) {
 		KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0,
 		    ("page %p has queue state", m));
@@ -3279,56 +3281,46 @@ vm_page_dequeue_deferred(vm_page_t m)
 }
 
 /*
- *	vm_page_dequeue_locked:
- *
- *	Remove the page from its page queue, which must be locked.
- *	If the page lock is not held, there is no guarantee that the
- *	page will not be enqueued by another thread before this function
- *	returns.  In this case, it is up to the caller to ensure that
- *	no other threads hold a reference to the page.
- *
- *	The page queue lock must be held.  If the page is not already
- *	logically dequeued, the page lock must be held as well.
- */
-void
-vm_page_dequeue_locked(vm_page_t m)
-{
-	struct vm_pagequeue *pq;
-
-	pq = vm_page_pagequeue(m);
-
-	KASSERT(m->queue != PQ_NONE,
-	    ("%s: page %p queue field is PQ_NONE", __func__, m));
-	vm_pagequeue_assert_locked(pq);
-	KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||
-	    mtx_owned(vm_page_lockptr(m)),
-	    ("%s: queued unlocked page %p", __func__, m));
-
-	if ((m->aflags & PGA_ENQUEUED) != 0) {
-		TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
-		vm_pagequeue_cnt_dec(pq);
-	}
-	vm_page_dequeue_complete(m);
-}
-
-/*
  *	vm_page_dequeue:
  *
  *	Remove the page from whichever page queue it's in, if any.
- *	If the page lock is not held, there is no guarantee that the
- *	page will not be enqueued by another thread before this function
- *	returns.  In this case, it is up to the caller to ensure that
- *	no other threads hold a reference to the page.
+ *	The page must either be locked or unallocated.  This constraint
+ *	ensures that the queue state of the page will remain consistent
+ *	after this function returns.
  */
 void
 vm_page_dequeue(vm_page_t m)
 {
 	struct mtx *lock, *lock1;
+	struct vm_pagequeue *pq;
+	uint8_t aflags;
 
-	lock = vm_page_pagequeue_lockptr(m);
+	KASSERT(mtx_owned(vm_page_lockptr(m)) || m->order == VM_NFREEORDER,
+	    ("page %p is allocated and unlocked", m));
+
 	for (;;) {
-		if (lock == NULL)
-			return;
+		lock = vm_page_pagequeue_lockptr(m);
+		if (lock == NULL) {
+			/*
+			 * A thread may be concurrently executing
+			 * vm_page_dequeue_complete().  Ensure that all queue
+			 * state is cleared before we return.
+			 */
+			aflags = atomic_load_8(&m->aflags);
+			if ((aflags & PGA_QUEUE_STATE_MASK) == 0)
+				return;
+			KASSERT((aflags & PGA_DEQUEUE) != 0,
+			    ("page %p has unexpected queue state flags %#x",
+			    m, aflags));
+
+			/*
+			 * Busy wait until the thread updating queue state is
+			 * finished.  Such a thread must be executing in a
+			 * critical section.
+			 */
+			cpu_spinwait();
+			continue;
+		}
 		mtx_lock(lock);
 		if ((lock1 = vm_page_pagequeue_lockptr(m)) == lock)
 			break;
@@ -3337,7 +3329,16 @@ vm_page_dequeue(vm_page_t m)
 	}
 	KASSERT(lock == vm_page_pagequeue_lockptr(m),
 	    ("%s: page %p migrated directly between queues", __func__, m));
-	vm_page_dequeue_locked(m);
+	KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||
+	    mtx_owned(vm_page_lockptr(m)),
+	    ("%s: queued unlocked page %p", __func__, m));
+
+	if ((m->aflags & PGA_ENQUEUED) != 0) {
+		pq = vm_page_pagequeue(m);
+		TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+		vm_pagequeue_cnt_dec(pq);
+	}
+	vm_page_dequeue_complete(m);
 	mtx_unlock(lock);
 }
 
@@ -3376,7 +3377,7 @@ vm_page_requeue(vm_page_t m)
 
 	if ((m->aflags & PGA_REQUEUE) == 0)
 		vm_page_aflag_set(m, PGA_REQUEUE);
-	vm_pqbatch_submit_page(m, m->queue);
+	vm_pqbatch_submit_page(m, atomic_load_8(&m->queue));
 }
 
 /*

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h	Thu Aug 23 20:25:27 2018	(r338275)
+++ head/sys/vm/vm_page.h	Thu Aug 23 20:34:22 2018	(r338276)
@@ -208,7 +208,7 @@ struct vm_page {
 	uint16_t flags;			/* page PG_* flags (P) */
 	uint8_t aflags;			/* access is atomic */
 	uint8_t oflags;			/* page VPO_* flags (O) */
-	volatile uint8_t queue;		/* page queue index (Q) */
+	uint8_t queue;			/* page queue index (Q) */
 	int8_t psind;			/* pagesizes[] index (O) */
 	int8_t segind;			/* vm_phys segment index (C) */
 	uint8_t	order;			/* index of the buddy queue (F) */
@@ -539,7 +539,6 @@ void vm_page_deactivate(vm_page_t);
 void vm_page_deactivate_noreuse(vm_page_t);
 void vm_page_dequeue(vm_page_t m);
 void vm_page_dequeue_deferred(vm_page_t m);
-void vm_page_dequeue_locked(vm_page_t m);
 void vm_page_drain_pqbatch(void);
 vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
 bool vm_page_free_prep(vm_page_t m);
@@ -565,7 +564,6 @@ int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_
 vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object,
     vm_pindex_t pindex);
 void vm_page_requeue(vm_page_t m);
-void vm_page_requeue_locked(vm_page_t m);
 int vm_page_sbusied(vm_page_t m);
 vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start,
     vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201808232034.w7NKYNrB046886>