Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 16 Sep 2019 02:31:58 +0000 (UTC)
From:      Alan Cox <alc@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r352373 - stable/12/sys/arm64/arm64
Message-ID:  <201909160231.x8G2Vwev063983@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: alc
Date: Mon Sep 16 02:31:58 2019
New Revision: 352373
URL: https://svnweb.freebsd.org/changeset/base/352373

Log:
  MFC r349003, r349031, r349042, r349129, r349290, r349618, r349798
    Change pmap_demote_l2_locked() so that it removes the superpage mapping on
    a demotion failure.  Otherwise, some callers to pmap_demote_l2_locked(),
    such as pmap_protect(), may leave an incorrect mapping in place on a
    demotion failure.
  
    Change pmap_demote_l2_locked() so that it handles addresses that are not
    superpage aligned.  Some callers to pmap_demote_l2_locked(), such as
    pmap_protect(), may not pass a superpage aligned address.
  
    Optimize TLB invalidation in pmap_remove_l2().
  
    Change the arm64 pmap so that updates to the global count of wired pages
    are not performed directly by the pmap.  Instead, they are performed by
    vm_page_free_pages_toq().
  
    Batch the TLB invalidations that are performed by pmap_protect() rather
    than performing them one at a time.
  
    Eliminate a redundant call to pmap_invalidate_page() from
    pmap_ts_referenced().
  
    Introduce pmap_remove_l3_range() and use it in two places: (1)
    pmap_remove(), where it eliminates redundant TLB invalidations by
    pmap_remove() and pmap_remove_l3(), and (2) pmap_enter_l2(), where it may
    optimize the TLB invalidations by batching them.
  
    Implement pmap_copy().
  
    Three changes to pmap_enter():
  
    1. Use _pmap_alloc_l3() instead of pmap_alloc_l3() in order to handle the
    possibility that a superpage mapping for "va" was created while we slept.
  
    2. Eliminate code for allocating kernel page table pages.  Kernel page
    table pages are preallocated by pmap_growkernel().
  
    3. Eliminate duplicated unlock operations when KERN_RESOURCE_SHORTAGE is
    returned.

Modified:
  stable/12/sys/arm64/arm64/pmap.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/arm64/arm64/pmap.c
==============================================================================
--- stable/12/sys/arm64/arm64/pmap.c	Mon Sep 16 00:59:10 2019	(r352372)
+++ stable/12/sys/arm64/arm64/pmap.c	Mon Sep 16 02:31:58 2019	(r352373)
@@ -1410,8 +1410,6 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t
 	}
 	pmap_invalidate_page(pmap, va);
 
-	vm_wire_sub(1);
-
 	/*
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
@@ -2030,9 +2028,8 @@ next_chunk:
 		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 		/* Recycle a freed page table page. */
 		m_pc->wire_count = 1;
-		vm_wire_add(1);
 	}
-	vm_page_free_pages_toq(&free, false);
+	vm_page_free_pages_toq(&free, true);
 	return (m_pc);
 }
 
@@ -2267,6 +2264,8 @@ pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_padd
 	int bit, field;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT((va & L2_OFFSET) == 0,
+	    ("pmap_pv_demote_l2: va is not 2mpage aligned"));
 	KASSERT((pa & L2_OFFSET) == 0,
 	    ("pmap_pv_demote_l2: pa is not 2mpage aligned"));
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
@@ -2277,7 +2276,6 @@ pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_padd
 	 * must not be released until the last pv entry is reinstantiated.
 	 */
 	pvh = pa_to_pvh(pa);
-	va = va & ~L2_OFFSET;
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
@@ -2433,7 +2431,13 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_
 	old_l2 = pmap_load_clear(l2);
 	KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK,
 	    ("pmap_remove_l2: L2e %lx is not a block mapping", old_l2));
-	pmap_invalidate_range(pmap, sva, sva + L2_SIZE);
+
+	/*
+	 * Since a promotion must break the 4KB page mappings before making
+	 * the 2MB page mapping, a pmap_invalidate_page() suffices.
+	 */
+	pmap_invalidate_page(pmap, sva);
+
 	if (old_l2 & ATTR_SW_WIRED)
 		pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
 	pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
@@ -2463,8 +2467,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_
 			pmap_resident_count_dec(pmap, 1);
 			KASSERT(ml3->wire_count == NL3PG,
 			    ("pmap_remove_l2: l3 page wire count error"));
-			ml3->wire_count = 1;
-			vm_page_unwire_noq(ml3);
+			ml3->wire_count = 0;
 			pmap_add_delayed_free_list(ml3, free, FALSE);
 		}
 	}
@@ -2507,6 +2510,82 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_
 }
 
 /*
+ * Remove the specified range of addresses from the L3 page table that is
+ * identified by the given L2 entry.
+ */
+static void
+pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva,
+    vm_offset_t eva, struct spglist *free, struct rwlock **lockp)
+{
+	struct md_page *pvh;
+	struct rwlock *new_lock;
+	pt_entry_t *l3, old_l3;
+	vm_offset_t va;
+	vm_page_t m;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE),
+	    ("pmap_remove_l3_range: range crosses an L3 page table boundary"));
+	va = eva;
+	for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) {
+		if (!pmap_l3_valid(pmap_load(l3))) {
+			if (va != eva) {
+				pmap_invalidate_range(pmap, va, sva);
+				va = eva;
+			}
+			continue;
+		}
+		old_l3 = pmap_load_clear(l3);
+		if ((old_l3 & ATTR_SW_WIRED) != 0)
+			pmap->pm_stats.wired_count--;
+		pmap_resident_count_dec(pmap, 1);
+		if ((old_l3 & ATTR_SW_MANAGED) != 0) {
+			m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
+			if (pmap_page_dirty(old_l3))
+				vm_page_dirty(m);
+			if ((old_l3 & ATTR_AF) != 0)
+				vm_page_aflag_set(m, PGA_REFERENCED);
+			new_lock = PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m));
+			if (new_lock != *lockp) {
+				if (*lockp != NULL) {
+					/*
+					 * Pending TLB invalidations must be
+					 * performed before the PV list lock is
+					 * released.  Otherwise, a concurrent
+					 * pmap_remove_all() on a physical page
+					 * could return while a stale TLB entry
+					 * still provides access to that page. 
+					 */
+					if (va != eva) {
+						pmap_invalidate_range(pmap, va,
+						    sva);
+						va = eva;
+					}
+					rw_wunlock(*lockp);
+				}
+				*lockp = new_lock;
+				rw_wlock(*lockp);
+			}
+			pmap_pvh_free(&m->md, pmap, sva);
+			if (TAILQ_EMPTY(&m->md.pv_list) &&
+			    (m->flags & PG_FICTITIOUS) == 0) {
+				pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+				if (TAILQ_EMPTY(&pvh->pv_list))
+					vm_page_aflag_clear(m, PGA_WRITEABLE);
+			}
+		}
+		if (va == eva)
+			va = sva;
+		if (pmap_unuse_pt(pmap, sva, l2e, free)) {
+			sva += L3_SIZE;
+			break;
+		}
+	}
+	if (va != eva)
+		pmap_invalidate_range(pmap, va, sva);
+}
+
+/*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
@@ -2516,9 +2595,9 @@ void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	struct rwlock *lock;
-	vm_offset_t va, va_next;
+	vm_offset_t va_next;
 	pd_entry_t *l0, *l1, *l2;
-	pt_entry_t l3_paddr, *l3;
+	pt_entry_t l3_paddr;
 	struct spglist free;
 
 	/*
@@ -2571,8 +2650,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t 
 				pmap_remove_l2(pmap, l2, sva, pmap_load(l1),
 				    &free, &lock);
 				continue;
-			} else if (pmap_demote_l2_locked(pmap, l2,
-			    sva &~L2_OFFSET, &lock) == NULL)
+			} else if (pmap_demote_l2_locked(pmap, l2, sva,
+			    &lock) == NULL)
 				continue;
 			l3_paddr = pmap_load(l2);
 		}
@@ -2591,33 +2670,13 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t 
 		if (va_next > eva)
 			va_next = eva;
 
-		va = va_next;
-		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
-		    sva += L3_SIZE) {
-			if (l3 == NULL)
-				panic("l3 == NULL");
-			if (pmap_load(l3) == 0) {
-				if (va != va_next) {
-					pmap_invalidate_range(pmap, va, sva);
-					va = va_next;
-				}
-				continue;
-			}
-			if (va == va_next)
-				va = sva;
-			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
-			    &lock)) {
-				sva += L3_SIZE;
-				break;
-			}
-		}
-		if (va != va_next)
-			pmap_invalidate_range(pmap, va, sva);
+		pmap_remove_l3_range(pmap, l3_paddr, sva, va_next, &free,
+		    &lock);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
-	vm_page_free_pages_toq(&free, false);
+	vm_page_free_pages_toq(&free, true);
 }
 
 /*
@@ -2722,7 +2781,7 @@ retry:
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(lock);
-	vm_page_free_pages_toq(&free, false);
+	vm_page_free_pages_toq(&free, true);
 }
 
 /*
@@ -2788,8 +2847,15 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
 		    sva += L3_SIZE) {
 			l3 = pmap_load(l3p);
-			if (!pmap_l3_valid(l3))
+			if (!pmap_l3_valid(l3)) {
+				if (va != va_next) {
+					pmap_invalidate_range(pmap, va, sva);
+					va = va_next;
+				}
 				continue;
+			}
+			if (va == va_next)
+				va = sva;
 
 			nbits = 0;
 			if ((prot & VM_PROT_WRITE) == 0) {
@@ -2804,9 +2870,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
 				nbits |= ATTR_XN;
 
 			pmap_set(l3p, nbits);
-			/* XXX: Use pmap_invalidate_range */
-			pmap_invalidate_page(pmap, sva);
 		}
+		if (va != va_next)
+			pmap_invalidate_range(pmap, va, sva);
 	}
 	PMAP_UNLOCK(pmap);
 }
@@ -3011,8 +3077,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 	pt_entry_t new_l3, orig_l3;
 	pt_entry_t *l2, *l3;
 	pv_entry_t pv;
-	vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa;
-	vm_page_t mpte, om, l1_m, l2_m, l3_m;
+	vm_paddr_t opa, pa;
+	vm_page_t mpte, om;
 	boolean_t nosleep;
 	int lvl, rv;
 
@@ -3036,7 +3102,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 
 	lock = NULL;
-	mpte = NULL;
 	PMAP_LOCK(pmap);
 	if (psind == 1) {
 		/* Assert the required virtual and physical alignment. */
@@ -3046,13 +3111,25 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 		    flags, m, &lock);
 		goto out;
 	}
+	mpte = NULL;
 
+	/*
+	 * In the case that a page table page is not
+	 * resident, we are creating it here.
+	 */
+retry:
 	pde = pmap_pde(pmap, va, &lvl);
-	if (pde != NULL && lvl == 1) {
+	if (pde != NULL && lvl == 2) {
+		l3 = pmap_l2_to_l3(pde, va);
+		if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
+			mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
+			mpte->wire_count++;
+		}
+		goto havel3;
+	} else if (pde != NULL && lvl == 1) {
 		l2 = pmap_l1_to_l2(pde, va);
 		if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
-		    (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET,
-		    &lock)) != NULL) {
+		    (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) {
 			l3 = &l3[pmap_l3_index(va)];
 			if (va < VM_MAXUSER_ADDRESS) {
 				mpte = PHYS_TO_VM_PAGE(
@@ -3061,84 +3138,27 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v
 			}
 			goto havel3;
 		}
+		/* We need to allocate an L3 table. */
 	}
-
 	if (va < VM_MAXUSER_ADDRESS) {
 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
-		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
+
+		/*
+		 * We use _pmap_alloc_l3() instead of pmap_alloc_l3() in order
+		 * to handle the possibility that a superpage mapping for "va"
+		 * was created while we slept.
+		 */
+		mpte = _pmap_alloc_l3(pmap, pmap_l2_pindex(va),
+		    nosleep ? NULL : &lock);
 		if (mpte == NULL && nosleep) {
 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
-			if (lock != NULL)
-				rw_wunlock(lock);
-			PMAP_UNLOCK(pmap);
-			return (KERN_RESOURCE_SHORTAGE);
+			rv = KERN_RESOURCE_SHORTAGE;
+			goto out;
 		}
-		pde = pmap_pde(pmap, va, &lvl);
-		KASSERT(pde != NULL,
-		    ("pmap_enter: Invalid page entry, va: 0x%lx", va));
-		KASSERT(lvl == 2,
-		    ("pmap_enter: Invalid level %d", lvl));
-	} else {
-		/*
-		 * If we get a level 2 pde it must point to a level 3 entry
-		 * otherwise we will need to create the intermediate tables
-		 */
-		if (lvl < 2) {
-			switch (lvl) {
-			default:
-			case -1:
-				/* Get the l0 pde to update */
-				pde = pmap_l0(pmap, va);
-				KASSERT(pde != NULL, ("..."));
+		goto retry;
+	} else
+		panic("pmap_enter: missing L3 table for kernel va %#lx", va);
 
-				l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
-				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
-				    VM_ALLOC_ZERO);
-				if (l1_m == NULL)
-					panic("pmap_enter: l1 pte_m == NULL");
-				if ((l1_m->flags & PG_ZERO) == 0)
-					pmap_zero_page(l1_m);
-
-				l1_pa = VM_PAGE_TO_PHYS(l1_m);
-				pmap_load_store(pde, l1_pa | L0_TABLE);
-				/* FALLTHROUGH */
-			case 0:
-				/* Get the l1 pde to update */
-				pde = pmap_l1_to_l2(pde, va);
-				KASSERT(pde != NULL, ("..."));
-
-				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
-				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
-				    VM_ALLOC_ZERO);
-				if (l2_m == NULL)
-					panic("pmap_enter: l2 pte_m == NULL");
-				if ((l2_m->flags & PG_ZERO) == 0)
-					pmap_zero_page(l2_m);
-
-				l2_pa = VM_PAGE_TO_PHYS(l2_m);
-				pmap_load_store(pde, l2_pa | L1_TABLE);
-				/* FALLTHROUGH */
-			case 1:
-				/* Get the l2 pde to update */
-				pde = pmap_l1_to_l2(pde, va);
-				KASSERT(pde != NULL, ("..."));
-
-				l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
-				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
-				    VM_ALLOC_ZERO);
-				if (l3_m == NULL)
-					panic("pmap_enter: l3 pte_m == NULL");
-				if ((l3_m->flags & PG_ZERO) == 0)
-					pmap_zero_page(l3_m);
-
-				l3_pa = VM_PAGE_TO_PHYS(l3_m);
-				pmap_load_store(pde, l3_pa | L2_TABLE);
-				break;
-			}
-		}
-	}
-	l3 = pmap_l2_to_l3(pde, va);
-
 havel3:
 	orig_l3 = pmap_load(l3);
 	opa = orig_l3 & ~ATTR_MASK;
@@ -3352,8 +3372,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
     vm_page_t m, struct rwlock **lockp)
 {
 	struct spglist free;
-	pd_entry_t *l2, *l3, old_l2;
-	vm_offset_t sva;
+	pd_entry_t *l2, old_l2;
 	vm_page_t l2pg, mt;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -3382,13 +3401,8 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
 			(void)pmap_remove_l2(pmap, l2, va,
 			    pmap_load(pmap_l1(pmap, va)), &free, lockp);
 		else
-			for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) {
-				l3 = pmap_l2_to_l3(l2, sva);
-				if (pmap_l3_valid(pmap_load(l3)) &&
-				    pmap_remove_l3(pmap, l3, sva, old_l2, &free,
-				    lockp) != 0)
-					break;
-			}
+			pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE,
+			    &free, lockp);
 		vm_page_free_pages_toq(&free, true);
 		if (va >= VM_MAXUSER_ADDRESS) {
 			/*
@@ -3605,7 +3619,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, v
 			SLIST_INIT(&free);
 			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
-				vm_page_free_pages_toq(&free, false);
+				vm_page_free_pages_toq(&free, true);
 			}
 			mpte = NULL;
 		}
@@ -3734,12 +3748,153 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t 
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
+ *
+ *	Because the executable mappings created by this routine are copied,
+ *	it should not have to flush the instruction cache.
  */
-
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
+	struct rwlock *lock;
+	struct spglist free;
+	pd_entry_t *l0, *l1, *l2, srcptepaddr;
+	pt_entry_t *dst_pte, ptetemp, *src_pte;
+	vm_offset_t addr, end_addr, va_next;
+	vm_page_t dst_l2pg, dstmpte, srcmpte;
+
+	if (dst_addr != src_addr)
+		return;
+	end_addr = src_addr + len;
+	lock = NULL;
+	if (dst_pmap < src_pmap) {
+		PMAP_LOCK(dst_pmap);
+		PMAP_LOCK(src_pmap);
+	} else {
+		PMAP_LOCK(src_pmap);
+		PMAP_LOCK(dst_pmap);
+	}
+	for (addr = src_addr; addr < end_addr; addr = va_next) {
+		l0 = pmap_l0(src_pmap, addr);
+		if (pmap_load(l0) == 0) {
+			va_next = (addr + L0_SIZE) & ~L0_OFFSET;
+			if (va_next < addr)
+				va_next = end_addr;
+			continue;
+		}
+		l1 = pmap_l0_to_l1(l0, addr);
+		if (pmap_load(l1) == 0) {
+			va_next = (addr + L1_SIZE) & ~L1_OFFSET;
+			if (va_next < addr)
+				va_next = end_addr;
+			continue;
+		}
+		va_next = (addr + L2_SIZE) & ~L2_OFFSET;
+		if (va_next < addr)
+			va_next = end_addr;
+		l2 = pmap_l1_to_l2(l1, addr);
+		srcptepaddr = pmap_load(l2);
+		if (srcptepaddr == 0)
+			continue;
+		if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) {
+			if ((addr & L2_OFFSET) != 0 ||
+			    addr + L2_SIZE > end_addr)
+				continue;
+			dst_l2pg = pmap_alloc_l2(dst_pmap, addr, NULL);
+			if (dst_l2pg == NULL)
+				break;
+			l2 = (pd_entry_t *)
+			    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_l2pg));
+			l2 = &l2[pmap_l2_index(addr)];
+			if (pmap_load(l2) == 0 &&
+			    ((srcptepaddr & ATTR_SW_MANAGED) == 0 ||
+			    pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr,
+			    PMAP_ENTER_NORECLAIM, &lock))) {
+				(void)pmap_load_store(l2, srcptepaddr &
+				    ~ATTR_SW_WIRED);
+				pmap_resident_count_inc(dst_pmap, L2_SIZE /
+				    PAGE_SIZE);
+				atomic_add_long(&pmap_l2_mappings, 1);
+			} else
+				dst_l2pg->wire_count--;
+			continue;
+		}
+		KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE,
+		    ("pmap_copy: invalid L2 entry"));
+		srcptepaddr &= ~ATTR_MASK;
+		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
+		KASSERT(srcmpte->wire_count > 0,
+		    ("pmap_copy: source page table page is unused"));
+		if (va_next > end_addr)
+			va_next = end_addr;
+		src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr);
+		src_pte = &src_pte[pmap_l3_index(addr)];
+		dstmpte = NULL;
+		for (; addr < va_next; addr += PAGE_SIZE, src_pte++) {
+			ptetemp = pmap_load(src_pte);
+
+			/*
+			 * We only virtual copy managed pages.
+			 */
+			if ((ptetemp & ATTR_SW_MANAGED) == 0)
+				continue;
+
+			if (dstmpte != NULL) {
+				KASSERT(dstmpte->pindex == pmap_l2_pindex(addr),
+				    ("dstmpte pindex/addr mismatch"));
+				dstmpte->wire_count++;
+			} else if ((dstmpte = pmap_alloc_l3(dst_pmap, addr,
+			    NULL)) == NULL)
+				goto out;
+			dst_pte = (pt_entry_t *)
+			    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
+			dst_pte = &dst_pte[pmap_l3_index(addr)];
+			if (pmap_load(dst_pte) == 0 &&
+			    pmap_try_insert_pv_entry(dst_pmap, addr,
+			    PHYS_TO_VM_PAGE(ptetemp & ~ATTR_MASK), &lock)) {
+				/*
+				 * Clear the wired, modified, and accessed
+				 * (referenced) bits during the copy.
+				 *
+				 * XXX not yet
+				 */
+				(void)pmap_load_store(dst_pte, ptetemp &
+				    ~ATTR_SW_WIRED);
+				pmap_resident_count_inc(dst_pmap, 1);
+			} else {
+				SLIST_INIT(&free);
+				if (pmap_unwire_l3(dst_pmap, addr, dstmpte,
+				    &free)) {
+					/*
+					 * Although "addr" is not mapped,
+					 * paging-structure caches could
+					 * nonetheless have entries that refer
+					 * to the freed page table pages.
+					 * Invalidate those entries.
+					 *
+					 * XXX redundant invalidation
+					 */
+					pmap_invalidate_page(dst_pmap, addr);
+					vm_page_free_pages_toq(&free, true);
+				}
+				goto out;
+			}
+			/* Have we copied all of the valid mappings? */ 
+			if (dstmpte->wire_count >= srcmpte->wire_count)
+				break;
+		}
+	}
+out:
+	/*
+	 * XXX This barrier may not be needed because the destination pmap is
+	 * not active.
+	 */
+	dsb(ishst);
+
+	if (lock != NULL)
+		rw_wunlock(lock);
+	PMAP_UNLOCK(src_pmap);
+	PMAP_UNLOCK(dst_pmap);
 }
 
 /*
@@ -4088,8 +4243,7 @@ pmap_remove_pages(pmap_t pmap)
 						pmap_resident_count_dec(pmap,1);
 						KASSERT(ml3->wire_count == NL3PG,
 						    ("pmap_remove_pages: l3 page wire count error"));
-						ml3->wire_count = 1;
-						vm_page_unwire_noq(ml3);
+						ml3->wire_count = 0;
 						pmap_add_delayed_free_list(ml3,
 						    &free, FALSE);
 					}
@@ -4127,7 +4281,7 @@ pmap_remove_pages(pmap_t pmap)
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
-	vm_page_free_pages_toq(&free, false);
+	vm_page_free_pages_toq(&free, true);
 }
 
 /*
@@ -4328,8 +4482,7 @@ retry_pv_loop:
 		va = pv->pv_va;
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
-			pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET,
-			    &lock);
+			(void)pmap_demote_l2_locked(pmap, pte, va, &lock);
 		KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 		    ("inconsistent pv lock %p %p for page %p",
 		    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
@@ -4566,7 +4719,6 @@ small_mappings:
 				 */
 				pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
 				    &free, &lock);
-				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 				if (pvf == pv)
 					pvf = NULL;
@@ -4588,7 +4740,7 @@ small_mappings:
 	    not_cleared < PMAP_TS_REFERENCED_MAX);
 out:
 	rw_wunlock(lock);
-	vm_page_free_pages_toq(&free, false);
+	vm_page_free_pages_toq(&free, true);
 	return (cleared + not_cleared);
 }
 
@@ -4891,7 +5043,7 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size
 				pte = pmap_l1_to_l2(pte, tmpva);
 			case 2:
 				newpte = pmap_demote_l2(kernel_pmap, pte,
-				    tmpva & ~L2_OFFSET);
+				    tmpva);
 				if (newpte == NULL)
 					return (EINVAL);
 				pte = pmap_l2_to_l3(pte, tmpva);
@@ -4990,6 +5142,18 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_
 	return (l2);
 }
 
+static void
+pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2,
+    struct rwlock **lockp)
+{
+	struct spglist free;
+
+	SLIST_INIT(&free);
+	(void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free,
+	    lockp);
+	vm_page_free_pages_toq(&free, true);
+}
+
 /*
  * Create an L3 table to map all addresses within an L2 mapping.
  */
@@ -5008,8 +5172,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_
 	oldl2 = pmap_load(l2);
 	KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
 	    ("pmap_demote_l2: Demoting a non-block entry"));
-	KASSERT((va & L2_OFFSET) == 0,
-	    ("pmap_demote_l2: Invalid virtual address %#lx", va));
+	va &= ~L2_OFFSET;
 
 	tmpl2 = 0;
 	if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) {
@@ -5018,15 +5181,57 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_
 			return (NULL);
 	}
 
+	/*
+	 * Invalidate the 2MB page mapping and return "failure" if the
+	 * mapping was never accessed.
+	 */
+	if ((oldl2 & ATTR_AF) == 0) {
+		KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
+		    ("pmap_demote_l2: a wired mapping is missing ATTR_AF"));
+		pmap_demote_l2_abort(pmap, va, l2, lockp);
+		CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx in pmap %p",
+		    va, pmap);
+		goto fail;
+	}
+
 	if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
+		KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
+		    ("pmap_demote_l2: page table page for a wired mapping"
+		    " is missing"));
+
+		/*
+		 * If the page table page is missing and the mapping
+		 * is for a kernel address, the mapping must belong to
+		 * the direct map.  Page table pages are preallocated
+		 * for every other part of the kernel address space,
+		 * so the direct map region is the only part of the
+		 * kernel address space that must be handled here.
+		 */
+		KASSERT(va < VM_MAXUSER_ADDRESS || VIRT_IN_DMAP(va),
+		    ("pmap_demote_l2: No saved mpte for va %#lx", va));
+
+		/*
+		 * If the 2MB page mapping belongs to the direct map
+		 * region of the kernel's address space, then the page
+		 * allocation request specifies the highest possible
+		 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the
+		 * priority is normal.
+		 */
 		ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va),
 		    (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
+
+		/*
+		 * If the allocation of the new page table page fails,
+		 * invalidate the 2MB page mapping and return "failure".
+		 */
 		if (ml3 == NULL) {
+			pmap_demote_l2_abort(pmap, va, l2, lockp);
 			CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			goto fail;
 		}
+
 		if (va < VM_MAXUSER_ADDRESS) {
 			ml3->wire_count = NL3PG;
 			pmap_resident_count_inc(pmap, 1);
@@ -5075,6 +5280,10 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_
 	if ((oldl2 & ATTR_SW_MANAGED) != 0)
 		reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
 
+	/*
+	 * Pass PAGE_SIZE so that a single TLB invalidation is performed on
+	 * the 2MB page mapping.
+	 */
 	pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE);
 
 	/*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201909160231.x8G2Vwev063983>