Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 2 Oct 2005 04:54:56 GMT
From:      Alan Cox <alc@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 84641 for review
Message-ID:  <200510020454.j924suAU029218@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=84641

Change 84641 by alc@alc_home on 2005/10/02 04:54:30

	Integrate changes from my private branch.

Affected files ...

.. //depot/projects/superpages/src/sys/alpha/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/amd64/amd64/pmap.c#2 integrate
.. //depot/projects/superpages/src/sys/amd64/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/arm/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/conf/NOTES#2 integrate
.. //depot/projects/superpages/src/sys/conf/files#2 integrate
.. //depot/projects/superpages/src/sys/conf/options#2 integrate
.. //depot/projects/superpages/src/sys/i386/i386/pmap.c#2 integrate
.. //depot/projects/superpages/src/sys/i386/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/ia64/include/param.h#2 integrate
.. //depot/projects/superpages/src/sys/kern/vfs_bio.c#2 integrate
.. //depot/projects/superpages/src/sys/modules/linux/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/modules/nwfs/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/modules/smbfs/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/modules/svr4/Makefile#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm.h#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_buddy.c#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_buddy.h#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_contig.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_fault.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_map.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_object.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_object.h#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_page.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_page.h#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_pageout.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_pageq.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vm_reserve.c#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_reserve.h#1 branch
.. //depot/projects/superpages/src/sys/vm/vm_zeroidle.c#2 integrate
.. //depot/projects/superpages/src/sys/vm/vnode_pager.c#2 integrate

Differences ...

==== //depot/projects/superpages/src/sys/alpha/include/param.h#2 (text+ko) ====

@@ -103,6 +103,23 @@
 #define PAGE_MASK	(PAGE_SIZE-1)
 #define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
+#define	BUDDY_QUEUES	10	/* Number of queues in the buddy allocator */
+
+/*
+ * SP_LEVELS is the number of superpage sizes.
+ */
+#define	SP_LEVELS	3
+
+/*
+ * SP_SMALL_SHIFT is LOG2("The Smallest Superpage Size" / PAGE_SIZE).
+ */
+#define	SP_SMALL_SHIFT	3
+
+/*
+ * XXX
+ */
+#define	SP_FACTOR_SHIFT	3
+
 #define	KERNBASE	0xfffffc0000300000LL	/* start of kernel virtual */
 #define	BTOPKERNBASE	((u_long)KERNBASE >> PGSHIFT)
 

==== //depot/projects/superpages/src/sys/amd64/amd64/pmap.c#2 (text+ko) ====

@@ -133,6 +133,7 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_reserve.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
@@ -205,6 +206,8 @@
 static pv_entry_t get_pv_entry(void);
 static void	pmap_clear_ptes(vm_page_t m, long bit);
 
+static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva);
+static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
 		vm_offset_t sva, pd_entry_t ptepde);
 static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde);
@@ -220,6 +223,9 @@
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 
+static void mach_promote(pmap_t pmap, pd_entry_t *pde, reservation_t reserv);
+static boolean_t pmap_demote(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
@@ -358,21 +364,6 @@
 }
 
 
-static __inline pt_entry_t *
-pmap_pte_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *ptepde)
-{
-	pd_entry_t *pde;
-
-	pde = pmap_pde(pmap, va);
-	if (pde == NULL || (*pde & PG_V) == 0)
-		return NULL;
-	*ptepde = *pde;
-	if ((*pde & PG_PS) != 0)	/* compat with i386 pmap_pte() */
-		return ((pt_entry_t *)pde);
-	return (pmap_pde_to_pte(pde, va));
-}
-
-
 PMAP_INLINE pt_entry_t *
 vtopte(vm_offset_t va)
 {
@@ -1297,11 +1288,13 @@
 	 * normal 4K page.
 	 */
 	if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
-		*pd = 0;
-		pd = 0;
-		pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
-		pmap_unuse_pt(pmap, va, *pmap_pdpe(pmap, va));
-		pmap_invalidate_all(kernel_pmap);
+		if (!pmap_demote(pmap, pd, va)) {
+			/*
+			 * Invalidation of the 2MB page mapping may have caused
+			 * the deallocation of the underlying PD page.
+			 */
+			pd = NULL;
+		}
 	}
 
 	/*
@@ -1519,6 +1512,53 @@
 }
 
 /*
+ * pmap_remove_pde: do the things to unmap a superpage in a process
+ */
+static int
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva)
+{
+	pd_entry_t oldpde;
+	vm_offset_t eva, va;
+	vm_page_t m;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT((sva & PDRMASK) == 0,
+	    ("pmap_remove_pde: sva is not 2mpage aligned"));
+	oldpde = pte_load_clear(pdq);
+	if (oldpde & PG_W)
+		pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
+
+	/*
+	 * Machines that don't support invlpg, also don't support
+	 * PG_G.
+	 */
+	if (oldpde & PG_G)
+		pmap_invalidate_page(kernel_pmap, sva);
+	pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+	if (oldpde & PG_MANAGED) {
+		eva = sva + NBPDR;
+		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+		    va < eva; va += PAGE_SIZE, m++) {
+			if (oldpde & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+				if (pmap_nw_modified((pt_entry_t) oldpde)) {
+					printf(
+	"pmap_remove_pde: modified 2mpage not writable: va: 0x%lx, pde: 0x%lx\n",
+					    va, oldpde);
+				}
+#endif
+				if (pmap_track_modified(va))
+					vm_page_dirty(m);
+			}
+			if (oldpde & PG_A)
+				vm_page_flag_set(m, PG_REFERENCED);
+			pmap_remove_entry(pmap, m, va);
+		}
+	}
+	return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva)));
+}
+
+/*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
@@ -1651,11 +1691,25 @@
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
-			*pde = 0;
-			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
-			pmap_unuse_pt(pmap, sva, *pdpe);
-			anyvalid = 1;
-			continue;
+			if (sva + NBPDR == va_next && eva >= va_next) {
+#ifdef	INVARIANTS
+				printf("pmap_remove: superpage at %lx to destroy.\n",
+				    sva);
+#endif
+				pmap_remove_pde(pmap, pde, sva);
+				anyvalid = 1;
+				continue;
+			} else {
+#ifdef	INVARIANTS
+				printf("pmap_remove: superpage at %lx to demote !!!\n",
+				    sva);
+#endif
+				if (!pmap_demote(pmap, pde, sva)) {
+					anyvalid = 1;	/* XXX */
+					continue;
+				}
+				ptpaddr = *pde;
+			}
 		}
 
 		/*
@@ -1698,9 +1752,10 @@
 void
 pmap_remove_all(vm_page_t m)
 {
+	pmap_t pmap;
 	register pv_entry_t pv;
 	pt_entry_t *pte, tpte;
-	pd_entry_t ptepde;
+	pd_entry_t *pde;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
@@ -1713,12 +1768,25 @@
 #endif
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-		PMAP_LOCK(pv->pv_pmap);
-		pv->pv_pmap->pm_stats.resident_count--;
-		pte = pmap_pte_pde(pv->pv_pmap, pv->pv_va, &ptepde);
+		pmap = pv->pv_pmap;
+		PMAP_LOCK(pmap);
+		pmap->pm_stats.resident_count--;
+		pde = pmap_pde(pmap, pv->pv_va);
+		if (*pde & PG_PS) {
+			printf("pmap_remove_all: superpage to demote !!!\n");
+			if (!pmap_demote(pmap, pde, pv->pv_va)) {
+				/*
+				 * All mappings within the same 2mpage were
+				 * destroyed and pv was freed.
+				 */
+				PMAP_UNLOCK(pmap);
+				continue;
+			}
+		}
+		pte = pmap_pde_to_pte(pde, pv->pv_va);
 		tpte = pte_load_clear(pte);
 		if (tpte & PG_W)
-			pv->pv_pmap->pm_stats.wired_count--;
+			pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
@@ -1736,18 +1804,60 @@
 			if (pmap_track_modified(pv->pv_va))
 				vm_page_dirty(m);
 		}
-		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
-		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
+		pmap_invalidate_page(pmap, pv->pv_va);
+		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
-		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, ptepde);
-		PMAP_UNLOCK(pv->pv_pmap);
+		pmap_unuse_pt(pmap, pv->pv_va, *pde);
+		PMAP_UNLOCK(pmap);
 		free_pv_entry(pv);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 }
 
 /*
+ * pmap_protect_pde: do the things to protect a 2mpage in a process
+ */
+static boolean_t
+pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva)
+{
+	pd_entry_t newpde, oldpde;
+	vm_offset_t eva, va;
+	vm_page_t m;
+	boolean_t anychanged;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT((sva & PDRMASK) == 0,
+	    ("pmap_protect_pde: sva is not 2mpage aligned"));
+	anychanged = FALSE;
+retry:
+	oldpde = newpde = *pde;
+	if (oldpde & PG_MANAGED) {
+		eva = sva + NBPDR;
+		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+		    va < eva; va += PAGE_SIZE, m++) {
+			if (oldpde & PG_A) {
+				vm_page_flag_set(m, PG_REFERENCED);
+				newpde &= ~PG_A;
+			}
+			if ((oldpde & PG_M) != 0 &&
+			    pmap_track_modified(va))
+				vm_page_dirty(m);
+		}
+	}
+	newpde &= ~(PG_RW | PG_M);
+	if (newpde != oldpde) {
+		if (!atomic_cmpset_long(pde, oldpde, newpde))
+			goto retry;
+		if (oldpde & PG_G)
+			pmap_invalidate_page(pmap, sva);
+		else
+			anychanged = TRUE;
+	}
+	return (anychanged);
+}
+
+/*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
@@ -1802,9 +1912,16 @@
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
-			*pde &= ~(PG_M|PG_RW);
-			anychanged = 1;
-			continue;
+			if (sva + NBPDR == va_next && eva >= va_next) {
+				if (pmap_protect_pde(pmap, pde, sva))
+					anychanged = 1;
+				continue;
+			} else {
+				if (!pmap_demote(pmap, pde, sva)) {
+					anychanged = 1;	/* XXX */
+					continue;
+				}
+			}
 		}
 
 		if (va_next > eva)
@@ -2032,6 +2149,24 @@
 		} else
 			pte_store(pte, newpte | PG_A);
 	}
+
+	/*
+	 * Promotion condition:
+	 * 1) Page has to be part of a fully populated reservation
+	 * 2) Virtual adress corresponding to the reservation has to
+	 *    be superpage aligned
+	 */
+	if (((mpte != NULL && mpte->wire_count == NPTEPG) ||
+	     m->object == kernel_object || (m->object == kmem_object && FALSE)) &&
+	    m->reserv != NULL &&
+	    m->reserv->refcnt == NBPDR / PAGE_SIZE) {
+#ifdef	INVARIANTS
+		printf("%s: pmap %p va %lx XXX\n", __func__, pmap, va);
+#endif
+		KASSERT(m->object->flags & OBJ_SUPERPAGES, ("pmap_enter: xxx"));
+		mach_promote(pmap, pmap_pde(pmap, va), m->reserv);
+	}
+
 	vm_page_unlock_queues();
 	PMAP_UNLOCK(pmap);
 }
@@ -2146,6 +2281,23 @@
 		pte_store(pte, pa | PG_V | PG_U);
 	else
 		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
+
+	/*
+	 * Promotion condition:
+	 * 1) Page has to be part of a fully populated reservation
+	 * 2) Virtual adress corresponding to the reservation has to
+	 *    be superpage aligned
+	 */
+	if (m->reserv != NULL &&
+	    m->reserv->refcnt == NBPDR / PAGE_SIZE &&
+	    mpte->wire_count == NPTEPG) {
+#ifdef	INVARIANTS
+		printf("%s: pmap %p va %lx XXX\n", __func__, pmap, va);
+#endif
+		KASSERT(m->object->flags & OBJ_SUPERPAGES,
+		    ("pmap_enter_quick: xxx"));
+		mach_promote(pmap, pmap_pde(pmap, va), m->reserv);
+	}
 out:
 	PMAP_UNLOCK(pmap);
 	return mpte;
@@ -2372,6 +2524,13 @@
 				*pde = srcptepaddr;
 				dst_pmap->pm_stats.resident_count +=
 				    NBPDR / PAGE_SIZE;
+				if (srcptepaddr & PG_MANAGED) {
+					m = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
+					for (; addr < va_next; addr += PAGE_SIZE) {
+						pmap_insert_entry(dst_pmap, addr, m);
+						m++;
+					}
+				}
 			} else
 				pmap_unwire_pte_hold(dst_pmap, addr, dstmpde);
 			continue;
@@ -2530,6 +2689,7 @@
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
+	pd_entry_t *pde;
 	pt_entry_t *pte, tpte;
 	vm_page_t m;
 	pv_entry_t pv, npv;
@@ -2550,9 +2710,43 @@
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+		pde = vtopde(pv->pv_va);
+#else
+		pde = pmap_pde(pmap, pv->pv_va);
+#endif
+		if (*pde & PG_PS) {
+			if (*pde & PG_W) {
+				npv = TAILQ_NEXT(pv, pv_plist);
+				continue;
+			}
+			if (sva <= trunc_2mpage(pv->pv_va) &&
+			    eva >= round_2mpage(pv->pv_va + 1)) {
+#ifdef	INVARIANTS
+				printf("pmap_remove_pages: superpage at %lx to destroy.\n",
+				    trunc_2mpage(pv->pv_va));
+#endif
+				pmap_remove_pde(pmap, pde, trunc_2mpage(pv->pv_va));
+				npv = TAILQ_FIRST(&pmap->pm_pvlist);
+				continue;
+			}
+#ifdef	INVARIANTS
+			printf("pmap_remove_pages: superpage at %lx to demote !!!\n",
+			    pv->pv_va);
+#endif
+			if (!pmap_demote(pmap, pde, pv->pv_va)) {
+				/*
+				 * All mappings within the same 2mpage were
+				 * destroyed and pv was freed.
+				 */
+				npv = TAILQ_FIRST(&pmap->pm_pvlist);
+				continue;
+			}
+		}
+
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = vtopte(pv->pv_va);
 #else
-		pte = pmap_pte(pmap, pv->pv_va);
+		pte = pmap_pde_to_pte(pde, pv->pv_va);
 #endif
 		tpte = *pte;
 
@@ -2597,7 +2791,7 @@
 		if (TAILQ_EMPTY(&m->md.pv_list))
 			vm_page_flag_clear(m, PG_WRITEABLE);
 
-		pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va));
+		pmap_unuse_pt(pmap, pv->pv_va, *pde);
 		free_pv_entry(pv);
 	}
 	pmap_invalidate_all(pmap);
@@ -2657,7 +2851,7 @@
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pde = pmap_pde(pmap, addr);
-	if (pde != NULL && (*pde & PG_V)) {
+	if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) {
 		pte = vtopte(addr);
 		rv = (*pte & PG_V) == 0;
 	}
@@ -2671,7 +2865,9 @@
 static __inline void
 pmap_clear_ptes(vm_page_t m, long bit)
 {
-	register pv_entry_t pv;
+	pmap_t pmap;
+	pv_entry_t npv, pv;
+	pd_entry_t *pde;
 	pt_entry_t pbits, *pte;
 
 	if ((m->flags & PG_FICTITIOUS) ||
@@ -2683,7 +2879,7 @@
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+	TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, npv) {
 		/*
 		 * don't write protect pager mappings
 		 */
@@ -2692,8 +2888,22 @@
 				continue;
 		}
 
-		PMAP_LOCK(pv->pv_pmap);
-		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
+		pmap = pv->pv_pmap;
+		PMAP_LOCK(pmap);
+		pde = pmap_pde(pmap, pv->pv_va);
+		if (*pde & PG_PS) {
+			printf("pmap_clear_ptes: superpage to demote !!!\n");
+			if ((*pde & bit) == 0 ||
+			    !pmap_demote(pmap, pde, pv->pv_va)) {
+				/*
+				 * All mappings within the same 2mpage were
+				 * destroyed and pv was freed.
+				 */
+				PMAP_UNLOCK(pmap);
+				continue;
+			}
+		}
+		pte = pmap_pde_to_pte(pde, pv->pv_va);
 retry:
 		pbits = *pte;
 		if (pbits & bit) {
@@ -2707,9 +2917,9 @@
 			} else {
 				atomic_clear_long(pte, bit);
 			}
-			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+			pmap_invalidate_page(pmap, pv->pv_va);
 		}
-		PMAP_UNLOCK(pv->pv_pmap);
+		PMAP_UNLOCK(pmap);
 	}
 	if (bit == PG_RW)
 		vm_page_flag_clear(m, PG_WRITEABLE);
@@ -2747,6 +2957,7 @@
 int
 pmap_ts_referenced(vm_page_t m)
 {
+	pmap_t pmap;
 	register pv_entry_t pv, pvf, pvn;
 	pt_entry_t *pte;
 	pt_entry_t v;
@@ -2770,20 +2981,21 @@
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 
-			PMAP_LOCK(pv->pv_pmap);
-			pte = pmap_pte(pv->pv_pmap, pv->pv_va);
+			pmap = pv->pv_pmap;
+			PMAP_LOCK(pmap);
+			pte = pmap_pte(pmap, pv->pv_va);
 
 			if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
 				atomic_clear_long(pte, PG_A);
-				pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+				pmap_invalidate_page(pmap, pv->pv_va);
 
 				rtval++;
 				if (rtval > 4) {
-					PMAP_UNLOCK(pv->pv_pmap);
+					PMAP_UNLOCK(pmap);
 					break;
 				}
 			}
-			PMAP_UNLOCK(pv->pv_pmap);
+			PMAP_UNLOCK(pmap);
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 
@@ -2873,24 +3085,35 @@
 	pmap_t pmap;
 	vm_offset_t addr;
 {
-	pt_entry_t *ptep, pte;
+	pd_entry_t *pdep;
+	pt_entry_t pte;
+	vm_paddr_t pa;
 	vm_page_t m;
 	int val = 0;
 	
 	PMAP_LOCK(pmap);
-	ptep = pmap_pte(pmap, addr);
-	pte = (ptep != NULL) ? *ptep : 0;
+	pdep = pmap_pde(pmap, addr);
+	if (pdep != NULL && (*pdep & PG_V)) {
+		if (*pdep & PG_PS) {
+			KASSERT((*pdep & PG_FRAME & PDRMASK) == 0,
+			    ("pmap_mincore: bad pde"));
+			pte = *pdep;
+			pa = (*pdep & PG_FRAME) | (addr & PDRMASK);
+		} else {
+			pte = *pmap_pde_to_pte(pdep, addr);
+			pa = pte & PG_FRAME;
+		}
+	} else {
+		pte = 0;
+		pa = 0;
+	}
 	PMAP_UNLOCK(pmap);
 
 	if (pte != 0) {
-		vm_paddr_t pa;
-
 		val = MINCORE_INCORE;
 		if ((pte & PG_MANAGED) == 0)
 			return val;
 
-		pa = pte & PG_FRAME;
-
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
@@ -2975,3 +3198,131 @@
 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return addr;
 }
+
+#define COMPATIBLE_PTE_MASK (PG_NX | PG_U | PG_RW)
+#define COMPATIBLE_PTE(a,b) ((a & COMPATIBLE_PTE_MASK) == (b & COMPATIBLE_PTE_MASK))
+
+static void
+mach_promote(pmap_t pmap, pd_entry_t *pde, reservation_t reserv)
+{
+	vm_paddr_t pa;
+	pt_entry_t *pte, *first_pte, flags;
+	vm_page_t page_pa;
+	vm_page_t tofree = PHYS_TO_VM_PAGE(*pde & PG_FRAME); /*pte page to free after promotion*/
+
+	first_pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
+	flags = *first_pte;
+	pa = VM_PAGE_TO_PHYS(reserv->first_page);
+
+	/* 
+	 * Check all the ptes before promotion
+	 */
+	for (pte = first_pte; pte < first_pte + NPTEPG; pte++) {
+		if (pa != (*pte & PG_FRAME))
+			return;
+		pa += PAGE_SIZE;
+
+		page_pa = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+		KASSERT(page_pa->reserv,("mach_promote: page has no reservation"));
+		KASSERT(page_pa->reserv == reserv,("mach_promote: reservation mismatch"));
+	
+		if ((*pte & PG_V) == 0 || !COMPATIBLE_PTE(*pte, flags))
+			return;
+
+		/* Add dirty bit and accsd bit to the flags, if necessary */
+		flags |= *pte & (PG_A | PG_M);
+	}
+
+	/* Create a superpage: add PG_PS bit to the pde */ 
+	*pde = PG_PS | flags;
+
+	if (pmap != kernel_pmap)
+		pmap->pm_stats.resident_count--;
+	
+	/* Invalidate old TLB entries  */
+	pmap_invalidate_all(pmap);
+
+	/*
+	 * XXX
+	 *
+	 * File system corruption occurs if pte pages belonging to the
+	 * kernel pmap are freed.
+	 */
+	if (pmap != kernel_pmap) {
+		KASSERT(tofree->wire_count == NPTEPG,
+		    ("pmap_promote: pte page wire count error"));
+		tofree->wire_count = 0;	
+		vm_page_free(tofree);
+		atomic_subtract_int(&cnt.v_wire_count, 1);
+	}
+
+#ifdef	INVARIANTS
+	printf("Promotion successful XXX\n");
+#endif
+}
+
+static boolean_t
+pmap_demote(pmap_t pmap, pd_entry_t *pde0, vm_offset_t va)
+{
+	pd_entry_t save_pde_value, new_pte_value ;
+	pt_entry_t *pte_page_va, *new_pte_va;
+	vm_paddr_t pte_page_pa;
+	vm_page_t pte_page;
+
+	KASSERT((*pde0 & PG_PS) != 0,
+	    ("pmap_demote: not a superpage, impossible to demote"));
+
+	/* STEP 1
+	 * Allocate the PTE page
+	 */
+	if ((pte_page = vm_page_alloc(NULL, pmap_pde_pindex(va),
+	    VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) {
+		pmap_remove_pde(pmap, pde0, trunc_2mpage(va));
+		pmap_invalidate_all(pmap);
+		return (FALSE);
+	}
+	pte_page->wire_count += NPTEPG - 1;
+	KASSERT(pte_page->wire_count == NPTEPG,
+	    ("pmap_demote: page table page %p has wire count %d",
+	    pte_page, pte_page->wire_count));
+	if (pmap != kernel_pmap)
+		pmap->pm_stats.resident_count++;
+
+	pte_page_pa = VM_PAGE_TO_PHYS(pte_page);
+	pte_page_va = (vm_offset_t *) PHYS_TO_DMAP(pte_page_pa);
+	pte_page_pa |= PG_U | PG_RW | PG_V | PG_A | PG_M;
+
+repeat:
+
+	/* STEP 2
+	 * Save the value of the pde entry
+	 * Define the value of the first pte entry
+	 */
+	save_pde_value = *pde0;
+
+	/* STEP 3
+	 * Fill the PTE page with the physical address of the base pages 
+	 */ 
+	for (	new_pte_va = pte_page_va, new_pte_value = save_pde_value & ~PG_PS;
+		new_pte_va < pte_page_va + NPTEPG;
+	 	new_pte_va++ , new_pte_value += PAGE_SIZE) {
+		
+		*new_pte_va = new_pte_value ;	
+
+	} 
+	
+	/* STEP 4:
+	 * Check if pde value has changed
+	 * If not, assign the new pde value.
+	 * If yes, repeat the pte assignment loop.
+	 */
+	if (!atomic_cmpset_long(pde0, save_pde_value, pte_page_pa))
+		goto repeat;	
+
+	/*
+	 * Some implementations of the amd64 architecture prefetch TLB
+	 * entries.
+	 */ 
+	pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+	return (TRUE);
+}

==== //depot/projects/superpages/src/sys/amd64/include/param.h#2 (text+ko) ====

@@ -119,6 +119,23 @@
 #define	NBPML4		(1ul<<PML4SHIFT)/* bytes/page map lev4 table */
 #define	PML4MASK	(NBPML4-1)
 
+#define	BUDDY_QUEUES	10	/* Number of queues in the buddy allocator */
+
+/*
+ * SP_LEVELS is the number of superpage sizes.
+ */
+#define	SP_LEVELS	1
+
+/*
+ * SP_SMALL_SHIFT is LOG2(NBPDR / PAGE_SIZE).
+ */
+#define	SP_SMALL_SHIFT	9
+
+/*
+ * If there is only one superpage size, then SP_FACTOR_SHIFT is zero.
+ */
+#define	SP_FACTOR_SHIFT	0
+
 #define IOPAGES	2		/* pages of i/o permission bitmap */
 
 #ifndef	KSTACK_PAGES

==== //depot/projects/superpages/src/sys/arm/include/param.h#2 (text+ko) ====

@@ -94,6 +94,23 @@
 #define NBPDR		(1 << PDR_SHIFT)
 #define NPDEPG          (1 << (32 - PDR_SHIFT))
 
+#define	BUDDY_QUEUES	9	/* Number of queues in the buddy allocator */
+
+/*
+ * SP_LEVELS is the number of superpage sizes.
+ */
+#define	SP_LEVELS	2
+
+/*
+ * SP_SMALL_SHIFT is LOG2("The Smallest Superpage Size" / PAGE_SIZE).
+ */
+#define	SP_SMALL_SHIFT	4
+
+/*
+ * XXX
+ */
+#define	SP_FACTOR_SHIFT	4
+
 #ifndef KSTACK_PAGES
 #define KSTACK_PAGES    2
 #endif /* !KSTACK_PAGES */

==== //depot/projects/superpages/src/sys/conf/NOTES#2 (text+ko) ====

@@ -110,16 +110,6 @@
 #
 options 	BLKDEV_IOSIZE=8192
 
-# Options for the VM subsystem
-# L2 cache size (in KB) can be specified in PQ_CACHESIZE
-options 	PQ_CACHESIZE=512	# color for 512k cache
-# Deprecated options supported for backwards compatibility
-#options 	PQ_NOOPT		# No coloring
-#options 	PQ_LARGECACHE		# color for 512k cache
-#options 	PQ_HUGECACHE		# color for 1024k cache
-#options 	PQ_MEDIUMCACHE		# color for 256k cache
-#options 	PQ_NORMALCACHE		# color for 64k cache
-
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /boot/kernel/kernel | sed -n 's/^___//p' > MYKERNEL

==== //depot/projects/superpages/src/sys/conf/files#2 (text+ko) ====

@@ -1890,6 +1890,7 @@
 vm/swap_pager.c			standard
 vm/uma_core.c			standard
 vm/uma_dbg.c			standard
+vm/vm_buddy.c			standard
 vm/vm_contig.c			standard
 vm/memguard.c			optional DEBUG_MEMGUARD
 vm/vm_fault.c			standard
@@ -1904,6 +1905,7 @@
 vm/vm_pageout.c			standard
 vm/vm_pageq.c			standard
 vm/vm_pager.c			standard
+vm/vm_reserve.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
 vm/vnode_pager.c		standard

==== //depot/projects/superpages/src/sys/conf/options#2 (text+ko) ====

@@ -515,12 +515,6 @@
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
-PQ_NOOPT		opt_vmpage.h
-PQ_NORMALCACHE		opt_vmpage.h
-PQ_MEDIUMCACHE		opt_vmpage.h
-PQ_LARGECACHE		opt_vmpage.h
-PQ_HUGECACHE		opt_vmpage.h
-PQ_CACHESIZE		opt_vmpage.h
 
 # The MemGuard replacement allocator used for tamper-after-free detection
 DEBUG_MEMGUARD		opt_vm.h

==== //depot/projects/superpages/src/sys/i386/i386/pmap.c#2 (text+ko) ====

@@ -133,6 +133,7 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_reserve.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
@@ -255,6 +256,8 @@
 static pv_entry_t get_pv_entry(void);
 static void	pmap_clear_ptes(vm_page_t m, int bit);
 
+static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva);
+static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
@@ -273,6 +276,9 @@
 static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
 #endif
 
+static void mach_promote(pmap_t pmap, vm_offset_t va, reservation_t reserv);
+static boolean_t pmap_demote(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
@@ -1199,10 +1205,8 @@
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
-		pmap->pm_pdir[ptepindex] = 0;
-		ptepa = 0;
-		pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
-		pmap_invalidate_all(kernel_pmap);
+		pmap_demote(pmap, &pmap->pm_pdir[ptepindex], va);
+		ptepa = pmap->pm_pdir[ptepindex];
 	}
 
 	/*
@@ -1521,6 +1525,52 @@
 }
 
 /*
+ * pmap_remove_pde: do the things to unmap a superpage in a process
+ */
+static void
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva)
+{
+	pd_entry_t oldpde;
+	vm_offset_t eva, va;
+	vm_page_t m;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT((sva & PDRMASK) == 0,
+	    ("pmap_remove_pde: sva is not 4mpage aligned"));
+	oldpde = pte_load_clear(pdq);
+	if (oldpde & PG_W)
+		pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
+
+	/*
+	 * Machines that don't support invlpg, also don't support
+	 * PG_G.
+	 */
+	if (oldpde & PG_G)
+		pmap_invalidate_page(kernel_pmap, sva);
+	pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+	if (oldpde & PG_MANAGED) {
+		eva = sva + NBPDR;
+		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+		    va < eva; va += PAGE_SIZE, m++) {
+			if (oldpde & PG_M) {
+#if defined(PMAP_DIAGNOSTIC)
+				if (pmap_nw_modified((pt_entry_t) oldpde)) {
+					printf(
+	"pmap_remove_pde: modified 4mpage not writable: va: 0x%x, pde: 0x%x\n",
+					    va, oldpde);
+				}
+#endif
+				if (pmap_track_modified(va))
+					vm_page_dirty(m);
+			}
+			if (oldpde & PG_A)
+				vm_page_flag_set(m, PG_REFERENCED);
+			pmap_remove_entry(pmap, m, va);
+		}
+	}
+}
+
+/*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
@@ -1639,10 +1689,24 @@
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
-			pmap->pm_pdir[pdirindex] = 0;
-			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
-			anyvalid = 1;
-			continue;
+			if (sva + NBPDR == pdnxt && eva >= pdnxt) {
+#ifdef	INVARIANTS
+				printf("pmap_remove: superpage at %x to destroy.\n",
+				    sva);
+#endif
+				pmap_remove_pde(pmap, pmap_pde(pmap, sva), sva);
+				anyvalid = 1;
+				continue;
+			} else {
+#ifdef	INVARIANTS
+				printf("pmap_remove: superpage at %x to demote !!!\n",
+				    sva);
+#endif
+				if (!pmap_demote(pmap, pmap_pde(pmap, sva), sva)) {
+					anyvalid = 1;	/* XXX */
+					continue;
+				}
+			}
 		}
 
 		/*
@@ -1686,8 +1750,10 @@
 void
 pmap_remove_all(vm_page_t m)
 {
+	pmap_t pmap;
 	register pv_entry_t pv;
 	pt_entry_t *pte, tpte;
+	pd_entry_t *pde;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
@@ -1701,12 +1767,25 @@
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	sched_pin();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-		PMAP_LOCK(pv->pv_pmap);

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200510020454.j924suAU029218>