Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 26 Jun 2012 06:02:43 +0000 (UTC)
From:      Alan Cox <alc@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r237592 - head/sys/amd64/amd64
Message-ID:  <201206260602.q5Q62hSg018729@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: alc
Date: Tue Jun 26 06:02:43 2012
New Revision: 237592
URL: http://svn.freebsd.org/changeset/base/237592

Log:
  Add PV list locking to pmap_enter().  Its execution is no longer serialized
  by the pvh global lock.
  
  Add a needed atomic operation to pmap_object_init_pt().

Modified:
  head/sys/amd64/amd64/pmap.c

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Tue Jun 26 05:34:31 2012	(r237591)
+++ head/sys/amd64/amd64/pmap.c	Tue Jun 26 06:02:43 2012	(r237592)
@@ -264,7 +264,8 @@ static void	pmap_pv_demote_pde(pmap_t pm
 		    struct rwlock **lockp);
 static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 		    struct rwlock **lockp);
-static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
+static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+		    struct rwlock **lockp);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
@@ -287,7 +288,8 @@ static boolean_t pmap_is_referenced_pvh(
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+    struct rwlock **lockp);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
@@ -305,10 +307,13 @@ static void pmap_update_pde(pmap_t pmap,
     pd_entry_t newpde);
 static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
-static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
+		struct rwlock **lockp);
+static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va,
+		struct rwlock **lockp);
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
+		struct rwlock **lockp);
 
-static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m,
                 vm_page_t* free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *);
@@ -1686,8 +1691,10 @@ pmap_pinit(pmap_t pmap)
 }
 
 /*
- * this routine is called if the page table page is not
- * mapped correctly.
+ * This routine is called if the desired page table page does not exist.
+ *
+ * If page table page allocation fails, this routine may sleep before
+ * returning NULL.  It sleeps only if a lock pointer was given.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
@@ -1695,25 +1702,26 @@ pmap_pinit(pmap_t pmap)
  * race conditions.
  */
 static vm_page_t
-_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, pdppg, pdpg;
 
-	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
-	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
-	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
-		if (flags & M_WAITOK) {
+		if (lockp != NULL) {
+			if (*lockp != NULL) {
+				rw_wunlock(*lockp);
+				*lockp = NULL;
+			}
 			PMAP_UNLOCK(pmap);
-			rw_wunlock(&pvh_global_lock);
+			rw_runlock(&pvh_global_lock);
 			VM_WAIT;
-			rw_wlock(&pvh_global_lock);
+			rw_rlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
 		}
 
@@ -1754,7 +1762,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 		if ((*pml4 & PG_V) == 0) {
 			/* Have to allocate a new pdp, recurse */
 			if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index,
-			    flags) == NULL) {
+			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
@@ -1787,7 +1795,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 		if ((*pml4 & PG_V) == 0) {
 			/* Have to allocate a new pd, recurse */
 			if (_pmap_allocpte(pmap, NUPDE + pdpindex,
-			    flags) == NULL) {
+			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
@@ -1801,7 +1809,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 			if ((*pdp & PG_V) == 0) {
 				/* Have to allocate a new pd, recurse */
 				if (_pmap_allocpte(pmap, NUPDE + pdpindex,
-				    flags) == NULL) {
+				    lockp) == NULL) {
 					--m->wire_count;
 					atomic_subtract_int(&cnt.v_wire_count,
 					    1);
@@ -1827,15 +1835,12 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t 
 }
 
 static vm_page_t
-pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t pdpindex, ptepindex;
 	pdp_entry_t *pdpe;
 	vm_page_t pdpg;
 
-	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
-	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
-	    ("pmap_allocpde: flags is neither M_NOWAIT nor M_WAITOK"));
 retry:
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
@@ -1846,24 +1851,20 @@ retry:
 		/* Allocate a pd page. */
 		ptepindex = pmap_pde_pindex(va);
 		pdpindex = ptepindex >> NPDPEPGSHIFT;
-		pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags);
-		if (pdpg == NULL && (flags & M_WAITOK))
+		pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp);
+		if (pdpg == NULL && lockp != NULL)
 			goto retry;
 	}
 	return (pdpg);
 }
 
 static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *pd;
 	vm_page_t m;
 
-	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
-	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
-	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
-
 	/*
 	 * Calculate pagetable page index
 	 */
@@ -1879,7 +1880,7 @@ retry:
 	 * normal 4K page.
 	 */
 	if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
-		if (!pmap_demote_pde(pmap, pd, va)) {
+		if (!pmap_demote_pde_locked(pmap, pd, va, lockp)) {
 			/*
 			 * Invalidation of the 2MB page mapping may have caused
 			 * the deallocation of the underlying PD page.
@@ -1900,8 +1901,8 @@ retry:
 		 * Here if the pte page isn't mapped, or if it has been
 		 * deallocated.
 		 */
-		m = _pmap_allocpte(pmap, ptepindex, flags);
-		if (m == NULL && (flags & M_WAITOK))
+		m = _pmap_allocpte(pmap, ptepindex, lockp);
+		if (m == NULL && lockp != NULL)
 			goto retry;
 	}
 	return (m);
@@ -2504,16 +2505,18 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offse
  * for the 2MB page mapping.
  */
 static void
-pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
+pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+    struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
+	rw_assert(&pvh_global_lock, RA_LOCKED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_promote_pde: pa is not 2mpage aligned"));
+	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the 2mpage's
@@ -3255,7 +3258,8 @@ retry:
  * identical characteristics. 
  */
 static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+    struct rwlock **lockp)
 {
 	pd_entry_t newpde;
 	pt_entry_t *firstpte, oldpte, pa, *pte;
@@ -3273,7 +3277,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t
 setpde:
 	newpde = *firstpte;
 	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
-		pmap_pde_p_failures++;
+		atomic_add_long(&pmap_pde_p_failures, 1);
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return;
@@ -3298,7 +3302,7 @@ setpde:
 setpte:
 		oldpte = *pte;
 		if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
-			pmap_pde_p_failures++;
+			atomic_add_long(&pmap_pde_p_failures, 1);
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return;
@@ -3317,7 +3321,7 @@ setpte:
 			    " in pmap %p", oldpteva, pmap);
 		}
 		if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
-			pmap_pde_p_failures++;
+			atomic_add_long(&pmap_pde_p_failures, 1);
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return;
@@ -3342,7 +3346,7 @@ setpte:
 	 * Promote the pv entries.
 	 */
 	if ((newpde & PG_MANAGED) != 0)
-		pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
+		pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp);
 
 	/*
 	 * Propagate the PAT index to its proper position.
@@ -3358,7 +3362,7 @@ setpte:
 	else
 		pde_store(pde, PG_PS | newpde);
 
-	pmap_pde_promotions++;
+	atomic_add_long(&pmap_pde_promotions, 1);
 	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 }
@@ -3379,6 +3383,7 @@ void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
     vm_prot_t prot, boolean_t wired)
 {
+	struct rwlock *lock;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	pt_entry_t newpte, origpte;
@@ -3398,7 +3403,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
 
 	mpte = NULL;
 
-	rw_wlock(&pvh_global_lock);
+	lock = NULL;
+	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	/*
@@ -3406,7 +3412,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS)
-		mpte = pmap_allocpte(pmap, va, M_WAITOK);
+		mpte = pmap_allocpte(pmap, va, &lock);
 
 	pde = pmap_pde(pmap, va);
 	if (pde != NULL && (*pde & PG_V) != 0) {
@@ -3460,6 +3466,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
 			pmap->pm_stats.wired_count--;
 		if (origpte & PG_MANAGED) {
 			om = PHYS_TO_VM_PAGE(opa);
+			CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, om);
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 		}
 		if (mpte != NULL) {
@@ -3479,6 +3486,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
 		    ("pmap_enter: managed mapping within the clean submap"));
 		if (pv == NULL)
 			pv = get_pv_entry(pmap, FALSE);
+		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		pa |= PG_MANAGED;
@@ -3534,11 +3542,13 @@ validate:
 				if ((newpte & PG_RW) == 0)
 					invlva = TRUE;
 			}
-			if ((origpte & PG_MANAGED) != 0 &&
-			    TAILQ_EMPTY(&om->md.pv_list) &&
-			    ((om->flags & PG_FICTITIOUS) != 0 ||
-			    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
-				vm_page_aflag_clear(om, PGA_WRITEABLE);
+			if ((origpte & PG_MANAGED) != 0) {
+				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, om);
+				if (TAILQ_EMPTY(&om->md.pv_list) &&
+				    ((om->flags & PG_FICTITIOUS) != 0 ||
+				    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
+					vm_page_aflag_clear(om, PGA_WRITEABLE);
+			}
 			if (invlva)
 				pmap_invalidate_page(pmap, va);
 		} else
@@ -3552,9 +3562,11 @@ validate:
 	if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 	    pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0)
-		pmap_promote_pde(pmap, pde, va);
+		pmap_promote_pde(pmap, pde, va, &lock);
 
-	rw_wunlock(&pvh_global_lock);
+	if (lock != NULL)
+		rw_wunlock(lock);
+	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
@@ -3573,7 +3585,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t 
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	if ((mpde = pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) {
+	if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
@@ -3735,7 +3747,9 @@ pmap_enter_quick_locked(pmap_t pmap, vm_
 
 			/*
 			 * If the page table page is mapped, we just increment
-			 * the hold count, and activate it.
+			 * the hold count, and activate it.  Otherwise, we
+			 * attempt to allocate a page table page.  If this
+			 * attempt fails, we don't retry.  Instead, we give up.
 			 */
 			if (ptepa && (*ptepa & PG_V) != 0) {
 				if (*ptepa & PG_PS)
@@ -3743,8 +3757,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_
 				mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME);
 				mpte->wire_count++;
 			} else {
-				mpte = _pmap_allocpte(pmap, ptepindex,
-				    M_NOWAIT);
+				mpte = _pmap_allocpte(pmap, ptepindex, NULL);
 				if (mpte == NULL)
 					return (mpte);
 			}
@@ -3870,7 +3883,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
 		PMAP_LOCK(pmap);
 		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 		    size; pa += NBPDR) {
-			pdpg = pmap_allocpde(pmap, addr, M_NOWAIT);
+			pdpg = pmap_allocpde(pmap, addr, NULL);
 			if (pdpg == NULL) {
 				/*
 				 * The creation of mappings below is only an
@@ -3888,7 +3901,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
 				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
 				pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
-				pmap_pde_mappings++;
+				atomic_add_long(&pmap_pde_mappings, 1);
 			} else {
 				/* Continue on if the PDE is already valid. */
 				pdpg->wire_count--;
@@ -4020,7 +4033,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
-			dstmpde = pmap_allocpde(dst_pmap, addr, M_NOWAIT);
+			dstmpde = pmap_allocpde(dst_pmap, addr, NULL);
 			if (dstmpde == NULL)
 				break;
 			pde = (pd_entry_t *)
@@ -4058,7 +4071,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
 				    dstmpte->pindex == pmap_pde_pindex(addr))
 					dstmpte->wire_count++;
 				else if ((dstmpte = pmap_allocpte(dst_pmap,
-				    addr, M_NOWAIT)) == NULL)
+				    addr, NULL)) == NULL)
 					goto out;
 				dst_pte = (pt_entry_t *)
 				    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201206260602.q5Q62hSg018729>