Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 16 Oct 2017 15:16:24 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r324665 - in head/sys/amd64: amd64 include
Message-ID:  <201710161516.v9GFGO9Z069895@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Mon Oct 16 15:16:24 2017
New Revision: 324665
URL: https://svnweb.freebsd.org/changeset/base/324665

Log:
  Fix the pv_chunks pc_lru tailq handling in reclaim_pv_chunk().
  
  For processing, reclaim_pv_chunk() removes the pv_chunk from the lru
  list, which makes pc_lru linkage invalid.  Then the pmap lock is
  released, which allows for other thread to free the last pv entry
  allocated from the chunk and call free_pv_chunk(), which tries to
  modify the invalid linkage.
  
  Similarly, the chunk is inserted into the private tailq new_tail
  temporary.  Again, free_pv_chunk() might be run and corrupt the
  linkage for the new_tail after the pmap lock is dropped.
  
  This is a consequence of r299788 elimination of pvh_global_lock, which
  allowed for reclaim to run in parallel with other pmap calls which
  free pv chunks.
  
  As a fix, do not remove the chunk from pc_lru queue, use a marker to
  remember the position in the queue iteration.  We can safely operate
  on the chunks after the chunk's pmap is locked, we fetched the chunk
  after the marker, and we checked that chunk pmap is same as we have
  locked, because chunk removal from pc_lru requires both pv_chunk_mutex
  and the pmap mutex owned.
  
  Note that the fix lost an optimization which was present in the
  previous algorithm.  Namely, new_tail requeueing rotated the pv chunks
  list so that reclaim didn't scan the same pv chunks that couldn't be
  freed (because they contained a wired and/or superpage mapping) on
  every invocation.  An additional change is planned which would improve
  this.
  
  Reported and tested by:	pho
  Reviewed by:	alc
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 week

Modified:
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/pmap.h

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Mon Oct 16 15:05:32 2017	(r324664)
+++ head/sys/amd64/amd64/pmap.c	Mon Oct 16 15:16:24 2017	(r324665)
@@ -2888,11 +2888,11 @@ reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked
 static vm_page_t
 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 {
-	struct pch new_tail;
-	struct pv_chunk *pc;
+	struct pv_chunk *pc, *pc_marker;
+	struct pv_chunk_header pc_marker_b;
 	struct md_page *pvh;
 	pd_entry_t *pde;
-	pmap_t pmap;
+	pmap_t next_pmap, pmap;
 	pt_entry_t *pte, tpte;
 	pt_entry_t PG_G, PG_A, PG_M, PG_RW;
 	pv_entry_t pv;
@@ -2909,7 +2909,8 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
 	m_pc = NULL;
 	PG_G = PG_A = PG_M = PG_RW = 0;
 	SLIST_INIT(&free);
-	TAILQ_INIT(&new_tail);
+	bzero(&pc_marker_b, sizeof(pc_marker_b));
+	pc_marker = (struct pv_chunk *)&pc_marker_b;
 
 	/*
 	 * A delayed invalidation block should already be active if
@@ -2919,30 +2920,52 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
 	start_di = pmap_not_in_di();
 
 	mtx_lock(&pv_chunks_mutex);
-	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) {
-		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+	TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
+	while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != NULL &&
+	    SLIST_EMPTY(&free)) {
+		next_pmap = pc->pc_pmap;
+		if (next_pmap == NULL)		/* marker */
+			goto next_chunk;
 		mtx_unlock(&pv_chunks_mutex);
-		if (pmap != pc->pc_pmap) {
+
+		/*
+		 * A pv_chunk can only be removed from the pc_lru list
+		 * when both pc_chunks_mutex is owned and the
+		 * corresponding pmap is locked.
+		 */
+		if (pmap != next_pmap) {
 			reclaim_pv_chunk_leave_pmap(pmap, locked_pmap,
 			    start_di);
-			pmap = pc->pc_pmap;
+			pmap = next_pmap;
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap) {
 				RELEASE_PV_LIST_LOCK(lockp);
 				PMAP_LOCK(pmap);
-			} else if (pmap != locked_pmap &&
-			    !PMAP_TRYLOCK(pmap)) {
-				pmap = NULL;
-				TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+				if (start_di)
+					pmap_delayed_invl_started();
 				mtx_lock(&pv_chunks_mutex);
 				continue;
-			}
+			} else if (pmap != locked_pmap) {
+				if (PMAP_TRYLOCK(pmap)) {
+					if (start_di)
+						pmap_delayed_invl_started();
+					mtx_lock(&pv_chunks_mutex);
+					continue;
+				} else {
+					pmap = NULL; /* pmap is not locked */
+					mtx_lock(&pv_chunks_mutex);
+					pc = TAILQ_NEXT(pc_marker, pc_lru);
+					if (pc == NULL ||
+					    pc->pc_pmap != next_pmap)
+						continue;
+					goto next_chunk;
+				}
+			} else if (start_di)
+				pmap_delayed_invl_started();
 			PG_G = pmap_global_bit(pmap);
 			PG_A = pmap_accessed_bit(pmap);
 			PG_M = pmap_modified_bit(pmap);
 			PG_RW = pmap_rw_bit(pmap);
-			if (start_di)
-				pmap_delayed_invl_started();
 		}
 
 		/*
@@ -2987,9 +3010,8 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
 			}
 		}
 		if (freed == 0) {
-			TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 			mtx_lock(&pv_chunks_mutex);
-			continue;
+			goto next_chunk;
 		}
 		/* Every freed mapping is for a 4 KB page. */
 		pmap_resident_count_dec(pmap, freed);
@@ -3006,16 +3028,19 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
 			m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 			dump_drop_page(m_pc->phys_addr);
 			mtx_lock(&pv_chunks_mutex);
+			TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 			break;
 		}
 		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
-		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 		mtx_lock(&pv_chunks_mutex);
 		/* One freed pv entry in locked_pmap is sufficient. */
 		if (pmap == locked_pmap)
 			break;
+next_chunk:
+		TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
+		TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
 	}
-	TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
+	TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di);
 	if (m_pc == NULL && !SLIST_EMPTY(&free)) {

Modified: head/sys/amd64/include/pmap.h
==============================================================================
--- head/sys/amd64/include/pmap.h	Mon Oct 16 15:05:32 2017	(r324664)
+++ head/sys/amd64/include/pmap.h	Mon Oct 16 15:16:24 2017	(r324665)
@@ -366,11 +366,18 @@ typedef struct pv_entry {
  */
 #define	_NPCM	3
 #define	_NPCPV	168
-struct pv_chunk {
-	pmap_t			pc_pmap;
-	TAILQ_ENTRY(pv_chunk)	pc_list;
-	uint64_t		pc_map[_NPCM];	/* bitmap; 1 = free */
+#define	PV_CHUNK_HEADER							\
+	pmap_t			pc_pmap;				\
+	TAILQ_ENTRY(pv_chunk)	pc_list;				\
+	uint64_t		pc_map[_NPCM];	/* bitmap; 1 = free */	\
 	TAILQ_ENTRY(pv_chunk)	pc_lru;
+
+struct pv_chunk_header {
+	PV_CHUNK_HEADER
+};
+
+struct pv_chunk {
+	PV_CHUNK_HEADER
 	struct pv_entry		pc_pventry[_NPCPV];
 };
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201710161516.v9GFGO9Z069895>