Date: Sun, 2 Oct 2005 04:54:56 GMT From: Alan Cox <alc@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 84641 for review Message-ID: <200510020454.j924suAU029218@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=84641 Change 84641 by alc@alc_home on 2005/10/02 04:54:30 Integrate changes from my private branch. Affected files ... .. //depot/projects/superpages/src/sys/alpha/include/param.h#2 integrate .. //depot/projects/superpages/src/sys/amd64/amd64/pmap.c#2 integrate .. //depot/projects/superpages/src/sys/amd64/include/param.h#2 integrate .. //depot/projects/superpages/src/sys/arm/include/param.h#2 integrate .. //depot/projects/superpages/src/sys/conf/NOTES#2 integrate .. //depot/projects/superpages/src/sys/conf/files#2 integrate .. //depot/projects/superpages/src/sys/conf/options#2 integrate .. //depot/projects/superpages/src/sys/i386/i386/pmap.c#2 integrate .. //depot/projects/superpages/src/sys/i386/include/param.h#2 integrate .. //depot/projects/superpages/src/sys/ia64/include/param.h#2 integrate .. //depot/projects/superpages/src/sys/kern/vfs_bio.c#2 integrate .. //depot/projects/superpages/src/sys/modules/linux/Makefile#2 integrate .. //depot/projects/superpages/src/sys/modules/nwfs/Makefile#2 integrate .. //depot/projects/superpages/src/sys/modules/smbfs/Makefile#2 integrate .. //depot/projects/superpages/src/sys/modules/svr4/Makefile#2 integrate .. //depot/projects/superpages/src/sys/vm/vm.h#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_buddy.c#1 branch .. //depot/projects/superpages/src/sys/vm/vm_buddy.h#1 branch .. //depot/projects/superpages/src/sys/vm/vm_contig.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_fault.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_map.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_object.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_object.h#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_page.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_page.h#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_pageout.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_pageq.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vm_reserve.c#1 branch .. //depot/projects/superpages/src/sys/vm/vm_reserve.h#1 branch .. //depot/projects/superpages/src/sys/vm/vm_zeroidle.c#2 integrate .. //depot/projects/superpages/src/sys/vm/vnode_pager.c#2 integrate Differences ... ==== //depot/projects/superpages/src/sys/alpha/include/param.h#2 (text+ko) ==== @@ -103,6 +103,23 @@ #define PAGE_MASK (PAGE_SIZE-1) #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define BUDDY_QUEUES 10 /* Number of queues in the buddy allocator */ + +/* + * SP_LEVELS is the number of superpage sizes. + */ +#define SP_LEVELS 3 + +/* + * SP_SMALL_SHIFT is LOG2("The Smallest Superpage Size" / PAGE_SIZE). + */ +#define SP_SMALL_SHIFT 3 + +/* + * XXX + */ +#define SP_FACTOR_SHIFT 3 + #define KERNBASE 0xfffffc0000300000LL /* start of kernel virtual */ #define BTOPKERNBASE ((u_long)KERNBASE >> PGSHIFT) ==== //depot/projects/superpages/src/sys/amd64/amd64/pmap.c#2 (text+ko) ==== @@ -133,6 +133,7 @@ #include <vm/vm_extern.h> #include <vm/vm_pageout.h> #include <vm/vm_pager.h> +#include <vm/vm_reserve.h> #include <vm/uma.h> #include <machine/cpu.h> @@ -205,6 +206,8 @@ static pv_entry_t get_pv_entry(void); static void pmap_clear_ptes(vm_page_t m, long bit); +static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva); +static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde); static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde); @@ -220,6 +223,9 @@ static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); +static void mach_promote(pmap_t pmap, pd_entry_t *pde, reservation_t reserv); +static boolean_t pmap_demote(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); + CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); @@ -358,21 +364,6 @@ } -static __inline pt_entry_t * -pmap_pte_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *ptepde) -{ - pd_entry_t *pde; - - pde = pmap_pde(pmap, va); - if (pde == NULL || (*pde & PG_V) == 0) - return NULL; - *ptepde = *pde; - if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ - return ((pt_entry_t *)pde); - return (pmap_pde_to_pte(pde, va)); -} - - PMAP_INLINE pt_entry_t * vtopte(vm_offset_t va) { @@ -1297,11 +1288,13 @@ * normal 4K page. */ if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { - *pd = 0; - pd = 0; - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - pmap_unuse_pt(pmap, va, *pmap_pdpe(pmap, va)); - pmap_invalidate_all(kernel_pmap); + if (!pmap_demote(pmap, pd, va)) { + /* + * Invalidation of the 2MB page mapping may have caused + * the deallocation of the underlying PD page. + */ + pd = NULL; + } } /* @@ -1519,6 +1512,53 @@ } /* + * pmap_remove_pde: do the things to unmap a superpage in a process + */ +static int +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva) +{ + pd_entry_t oldpde; + vm_offset_t eva, va; + vm_page_t m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & PDRMASK) == 0, + ("pmap_remove_pde: sva is not 2mpage aligned")); + oldpde = pte_load_clear(pdq); + if (oldpde & PG_W) + pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; + + /* + * Machines that don't support invlpg, also don't support + * PG_G. + */ + if (oldpde & PG_G) + pmap_invalidate_page(kernel_pmap, sva); + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + if (oldpde & PG_MANAGED) { + eva = sva + NBPDR; + for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME); + va < eva; va += PAGE_SIZE, m++) { + if (oldpde & PG_M) { +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified((pt_entry_t) oldpde)) { + printf( + "pmap_remove_pde: modified 2mpage not writable: va: 0x%lx, pde: 0x%lx\n", + va, oldpde); + } +#endif + if (pmap_track_modified(va)) + vm_page_dirty(m); + } + if (oldpde & PG_A) + vm_page_flag_set(m, PG_REFERENCED); + pmap_remove_entry(pmap, m, va); + } + } + return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva))); +} + +/* * pmap_remove_pte: do the things to unmap a page in a process */ static int @@ -1651,11 +1691,25 @@ * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { - *pde = 0; - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - pmap_unuse_pt(pmap, sva, *pdpe); - anyvalid = 1; - continue; + if (sva + NBPDR == va_next && eva >= va_next) { +#ifdef INVARIANTS + printf("pmap_remove: superpage at %lx to destroy.\n", + sva); +#endif + pmap_remove_pde(pmap, pde, sva); + anyvalid = 1; + continue; + } else { +#ifdef INVARIANTS + printf("pmap_remove: superpage at %lx to demote !!!\n", + sva); +#endif + if (!pmap_demote(pmap, pde, sva)) { + anyvalid = 1; /* XXX */ + continue; + } + ptpaddr = *pde; + } } /* @@ -1698,9 +1752,10 @@ void pmap_remove_all(vm_page_t m) { + pmap_t pmap; register pv_entry_t pv; pt_entry_t *pte, tpte; - pd_entry_t ptepde; + pd_entry_t *pde; #if defined(PMAP_DIAGNOSTIC) /* @@ -1713,12 +1768,25 @@ #endif mtx_assert(&vm_page_queue_mtx, MA_OWNED); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - PMAP_LOCK(pv->pv_pmap); - pv->pv_pmap->pm_stats.resident_count--; - pte = pmap_pte_pde(pv->pv_pmap, pv->pv_va, &ptepde); + pmap = pv->pv_pmap; + PMAP_LOCK(pmap); + pmap->pm_stats.resident_count--; + pde = pmap_pde(pmap, pv->pv_va); + if (*pde & PG_PS) { + printf("pmap_remove_all: superpage to demote !!!\n"); + if (!pmap_demote(pmap, pde, pv->pv_va)) { + /* + * All mappings within the same 2mpage were + * destroyed and pv was freed. + */ + PMAP_UNLOCK(pmap); + continue; + } + } + pte = pmap_pde_to_pte(pde, pv->pv_va); tpte = pte_load_clear(pte); if (tpte & PG_W) - pv->pv_pmap->pm_stats.wired_count--; + pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); @@ -1736,18 +1804,60 @@ if (pmap_track_modified(pv->pv_va)) vm_page_dirty(m); } - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); - TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); + pmap_invalidate_page(pmap, pv->pv_va); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, ptepde); - PMAP_UNLOCK(pv->pv_pmap); + pmap_unuse_pt(pmap, pv->pv_va, *pde); + PMAP_UNLOCK(pmap); free_pv_entry(pv); } vm_page_flag_clear(m, PG_WRITEABLE); } /* + * pmap_protect_pde: do the things to protect a 2mpage in a process + */ +static boolean_t +pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva) +{ + pd_entry_t newpde, oldpde; + vm_offset_t eva, va; + vm_page_t m; + boolean_t anychanged; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & PDRMASK) == 0, + ("pmap_protect_pde: sva is not 2mpage aligned")); + anychanged = FALSE; +retry: + oldpde = newpde = *pde; + if (oldpde & PG_MANAGED) { + eva = sva + NBPDR; + for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME); + va < eva; va += PAGE_SIZE, m++) { + if (oldpde & PG_A) { + vm_page_flag_set(m, PG_REFERENCED); + newpde &= ~PG_A; + } + if ((oldpde & PG_M) != 0 && + pmap_track_modified(va)) + vm_page_dirty(m); + } + } + newpde &= ~(PG_RW | PG_M); + if (newpde != oldpde) { + if (!atomic_cmpset_long(pde, oldpde, newpde)) + goto retry; + if (oldpde & PG_G) + pmap_invalidate_page(pmap, sva); + else + anychanged = TRUE; + } + return (anychanged); +} + +/* * Set the physical protection on the * specified range of this map as requested. */ @@ -1802,9 +1912,16 @@ * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { - *pde &= ~(PG_M|PG_RW); - anychanged = 1; - continue; + if (sva + NBPDR == va_next && eva >= va_next) { + if (pmap_protect_pde(pmap, pde, sva)) + anychanged = 1; + continue; + } else { + if (!pmap_demote(pmap, pde, sva)) { + anychanged = 1; /* XXX */ + continue; + } + } } if (va_next > eva) @@ -2032,6 +2149,24 @@ } else pte_store(pte, newpte | PG_A); } + + /* + * Promotion condition: + * 1) Page has to be part of a fully populated reservation + * 2) Virtual adress corresponding to the reservation has to + * be superpage aligned + */ + if (((mpte != NULL && mpte->wire_count == NPTEPG) || + m->object == kernel_object || (m->object == kmem_object && FALSE)) && + m->reserv != NULL && + m->reserv->refcnt == NBPDR / PAGE_SIZE) { +#ifdef INVARIANTS + printf("%s: pmap %p va %lx XXX\n", __func__, pmap, va); +#endif + KASSERT(m->object->flags & OBJ_SUPERPAGES, ("pmap_enter: xxx")); + mach_promote(pmap, pmap_pde(pmap, va), m->reserv); + } + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -2146,6 +2281,23 @@ pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); + + /* + * Promotion condition: + * 1) Page has to be part of a fully populated reservation + * 2) Virtual adress corresponding to the reservation has to + * be superpage aligned + */ + if (m->reserv != NULL && + m->reserv->refcnt == NBPDR / PAGE_SIZE && + mpte->wire_count == NPTEPG) { +#ifdef INVARIANTS + printf("%s: pmap %p va %lx XXX\n", __func__, pmap, va); +#endif + KASSERT(m->object->flags & OBJ_SUPERPAGES, + ("pmap_enter_quick: xxx")); + mach_promote(pmap, pmap_pde(pmap, va), m->reserv); + } out: PMAP_UNLOCK(pmap); return mpte; @@ -2372,6 +2524,13 @@ *pde = srcptepaddr; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; + if (srcptepaddr & PG_MANAGED) { + m = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); + for (; addr < va_next; addr += PAGE_SIZE) { + pmap_insert_entry(dst_pmap, addr, m); + m++; + } + } } else pmap_unwire_pte_hold(dst_pmap, addr, dstmpde); continue; @@ -2530,6 +2689,7 @@ pmap_t pmap; vm_offset_t sva, eva; { + pd_entry_t *pde; pt_entry_t *pte, tpte; vm_page_t m; pv_entry_t pv, npv; @@ -2550,9 +2710,43 @@ } #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY + pde = vtopde(pv->pv_va); +#else + pde = pmap_pde(pmap, pv->pv_va); +#endif + if (*pde & PG_PS) { + if (*pde & PG_W) { + npv = TAILQ_NEXT(pv, pv_plist); + continue; + } + if (sva <= trunc_2mpage(pv->pv_va) && + eva >= round_2mpage(pv->pv_va + 1)) { +#ifdef INVARIANTS + printf("pmap_remove_pages: superpage at %lx to destroy.\n", + trunc_2mpage(pv->pv_va)); +#endif + pmap_remove_pde(pmap, pde, trunc_2mpage(pv->pv_va)); + npv = TAILQ_FIRST(&pmap->pm_pvlist); + continue; + } +#ifdef INVARIANTS + printf("pmap_remove_pages: superpage at %lx to demote !!!\n", + pv->pv_va); +#endif + if (!pmap_demote(pmap, pde, pv->pv_va)) { + /* + * All mappings within the same 2mpage were + * destroyed and pv was freed. + */ + npv = TAILQ_FIRST(&pmap->pm_pvlist); + continue; + } + } + +#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY pte = vtopte(pv->pv_va); #else - pte = pmap_pte(pmap, pv->pv_va); + pte = pmap_pde_to_pte(pde, pv->pv_va); #endif tpte = *pte; @@ -2597,7 +2791,7 @@ if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); - pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va)); + pmap_unuse_pt(pmap, pv->pv_va, *pde); free_pv_entry(pv); } pmap_invalidate_all(pmap); @@ -2657,7 +2851,7 @@ rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); - if (pde != NULL && (*pde & PG_V)) { + if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) { pte = vtopte(addr); rv = (*pte & PG_V) == 0; } @@ -2671,7 +2865,9 @@ static __inline void pmap_clear_ptes(vm_page_t m, long bit) { - register pv_entry_t pv; + pmap_t pmap; + pv_entry_t npv, pv; + pd_entry_t *pde; pt_entry_t pbits, *pte; if ((m->flags & PG_FICTITIOUS) || @@ -2683,7 +2879,7 @@ * Loop over all current mappings setting/clearing as appropos If * setting RO do we need to clear the VAC? */ - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, npv) { /* * don't write protect pager mappings */ @@ -2692,8 +2888,22 @@ continue; } - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte(pv->pv_pmap, pv->pv_va); + pmap = pv->pv_pmap; + PMAP_LOCK(pmap); + pde = pmap_pde(pmap, pv->pv_va); + if (*pde & PG_PS) { + printf("pmap_clear_ptes: superpage to demote !!!\n"); + if ((*pde & bit) == 0 || + !pmap_demote(pmap, pde, pv->pv_va)) { + /* + * All mappings within the same 2mpage were + * destroyed and pv was freed. + */ + PMAP_UNLOCK(pmap); + continue; + } + } + pte = pmap_pde_to_pte(pde, pv->pv_va); retry: pbits = *pte; if (pbits & bit) { @@ -2707,9 +2917,9 @@ } else { atomic_clear_long(pte, bit); } - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va); } - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } if (bit == PG_RW) vm_page_flag_clear(m, PG_WRITEABLE); @@ -2747,6 +2957,7 @@ int pmap_ts_referenced(vm_page_t m) { + pmap_t pmap; register pv_entry_t pv, pvf, pvn; pt_entry_t *pte; pt_entry_t v; @@ -2770,20 +2981,21 @@ if (!pmap_track_modified(pv->pv_va)) continue; - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte(pv->pv_pmap, pv->pv_va); + pmap = pv->pv_pmap; + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, pv->pv_va); if (pte && ((v = pte_load(pte)) & PG_A) != 0) { atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va); rtval++; if (rtval > 4) { - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); break; } } - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } @@ -2873,24 +3085,35 @@ pmap_t pmap; vm_offset_t addr; { - pt_entry_t *ptep, pte; + pd_entry_t *pdep; + pt_entry_t pte; + vm_paddr_t pa; vm_page_t m; int val = 0; PMAP_LOCK(pmap); - ptep = pmap_pte(pmap, addr); - pte = (ptep != NULL) ? *ptep : 0; + pdep = pmap_pde(pmap, addr); + if (pdep != NULL && (*pdep & PG_V)) { + if (*pdep & PG_PS) { + KASSERT((*pdep & PG_FRAME & PDRMASK) == 0, + ("pmap_mincore: bad pde")); + pte = *pdep; + pa = (*pdep & PG_FRAME) | (addr & PDRMASK); + } else { + pte = *pmap_pde_to_pte(pdep, addr); + pa = pte & PG_FRAME; + } + } else { + pte = 0; + pa = 0; + } PMAP_UNLOCK(pmap); if (pte != 0) { - vm_paddr_t pa; - val = MINCORE_INCORE; if ((pte & PG_MANAGED) == 0) return val; - pa = pte & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); /* @@ -2975,3 +3198,131 @@ addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); return addr; } + +#define COMPATIBLE_PTE_MASK (PG_NX | PG_U | PG_RW) +#define COMPATIBLE_PTE(a,b) ((a & COMPATIBLE_PTE_MASK) == (b & COMPATIBLE_PTE_MASK)) + +static void +mach_promote(pmap_t pmap, pd_entry_t *pde, reservation_t reserv) +{ + vm_paddr_t pa; + pt_entry_t *pte, *first_pte, flags; + vm_page_t page_pa; + vm_page_t tofree = PHYS_TO_VM_PAGE(*pde & PG_FRAME); /*pte page to free after promotion*/ + + first_pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); + flags = *first_pte; + pa = VM_PAGE_TO_PHYS(reserv->first_page); + + /* + * Check all the ptes before promotion + */ + for (pte = first_pte; pte < first_pte + NPTEPG; pte++) { + if (pa != (*pte & PG_FRAME)) + return; + pa += PAGE_SIZE; + + page_pa = PHYS_TO_VM_PAGE(*pte & PG_FRAME); + KASSERT(page_pa->reserv,("mach_promote: page has no reservation")); + KASSERT(page_pa->reserv == reserv,("mach_promote: reservation mismatch")); + + if ((*pte & PG_V) == 0 || !COMPATIBLE_PTE(*pte, flags)) + return; + + /* Add dirty bit and accsd bit to the flags, if necessary */ + flags |= *pte & (PG_A | PG_M); + } + + /* Create a superpage: add PG_PS bit to the pde */ + *pde = PG_PS | flags; + + if (pmap != kernel_pmap) + pmap->pm_stats.resident_count--; + + /* Invalidate old TLB entries */ + pmap_invalidate_all(pmap); + + /* + * XXX + * + * File system corruption occurs if pte pages belonging to the + * kernel pmap are freed. + */ + if (pmap != kernel_pmap) { + KASSERT(tofree->wire_count == NPTEPG, + ("pmap_promote: pte page wire count error")); + tofree->wire_count = 0; + vm_page_free(tofree); + atomic_subtract_int(&cnt.v_wire_count, 1); + } + +#ifdef INVARIANTS + printf("Promotion successful XXX\n"); +#endif +} + +static boolean_t +pmap_demote(pmap_t pmap, pd_entry_t *pde0, vm_offset_t va) +{ + pd_entry_t save_pde_value, new_pte_value ; + pt_entry_t *pte_page_va, *new_pte_va; + vm_paddr_t pte_page_pa; + vm_page_t pte_page; + + KASSERT((*pde0 & PG_PS) != 0, + ("pmap_demote: not a superpage, impossible to demote")); + + /* STEP 1 + * Allocate the PTE page + */ + if ((pte_page = vm_page_alloc(NULL, pmap_pde_pindex(va), + VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { + pmap_remove_pde(pmap, pde0, trunc_2mpage(va)); + pmap_invalidate_all(pmap); + return (FALSE); + } + pte_page->wire_count += NPTEPG - 1; + KASSERT(pte_page->wire_count == NPTEPG, + ("pmap_demote: page table page %p has wire count %d", + pte_page, pte_page->wire_count)); + if (pmap != kernel_pmap) + pmap->pm_stats.resident_count++; + + pte_page_pa = VM_PAGE_TO_PHYS(pte_page); + pte_page_va = (vm_offset_t *) PHYS_TO_DMAP(pte_page_pa); + pte_page_pa |= PG_U | PG_RW | PG_V | PG_A | PG_M; + +repeat: + + /* STEP 2 + * Save the value of the pde entry + * Define the value of the first pte entry + */ + save_pde_value = *pde0; + + /* STEP 3 + * Fill the PTE page with the physical address of the base pages + */ + for ( new_pte_va = pte_page_va, new_pte_value = save_pde_value & ~PG_PS; + new_pte_va < pte_page_va + NPTEPG; + new_pte_va++ , new_pte_value += PAGE_SIZE) { + + *new_pte_va = new_pte_value ; + + } + + /* STEP 4: + * Check if pde value has changed + * If not, assign the new pde value. + * If yes, repeat the pte assignment loop. + */ + if (!atomic_cmpset_long(pde0, save_pde_value, pte_page_pa)) + goto repeat; + + /* + * Some implementations of the amd64 architecture prefetch TLB + * entries. + */ + pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); + return (TRUE); +} ==== //depot/projects/superpages/src/sys/amd64/include/param.h#2 (text+ko) ==== @@ -119,6 +119,23 @@ #define NBPML4 (1ul<<PML4SHIFT)/* bytes/page map lev4 table */ #define PML4MASK (NBPML4-1) +#define BUDDY_QUEUES 10 /* Number of queues in the buddy allocator */ + +/* + * SP_LEVELS is the number of superpage sizes. + */ +#define SP_LEVELS 1 + +/* + * SP_SMALL_SHIFT is LOG2(NBPDR / PAGE_SIZE). + */ +#define SP_SMALL_SHIFT 9 + +/* + * If there is only one superpage size, then SP_FACTOR_SHIFT is zero. + */ +#define SP_FACTOR_SHIFT 0 + #define IOPAGES 2 /* pages of i/o permission bitmap */ #ifndef KSTACK_PAGES ==== //depot/projects/superpages/src/sys/arm/include/param.h#2 (text+ko) ==== @@ -94,6 +94,23 @@ #define NBPDR (1 << PDR_SHIFT) #define NPDEPG (1 << (32 - PDR_SHIFT)) +#define BUDDY_QUEUES 9 /* Number of queues in the buddy allocator */ + +/* + * SP_LEVELS is the number of superpage sizes. + */ +#define SP_LEVELS 2 + +/* + * SP_SMALL_SHIFT is LOG2("The Smallest Superpage Size" / PAGE_SIZE). + */ +#define SP_SMALL_SHIFT 4 + +/* + * XXX + */ +#define SP_FACTOR_SHIFT 4 + #ifndef KSTACK_PAGES #define KSTACK_PAGES 2 #endif /* !KSTACK_PAGES */ ==== //depot/projects/superpages/src/sys/conf/NOTES#2 (text+ko) ==== @@ -110,16 +110,6 @@ # options BLKDEV_IOSIZE=8192 -# Options for the VM subsystem -# L2 cache size (in KB) can be specified in PQ_CACHESIZE -options PQ_CACHESIZE=512 # color for 512k cache -# Deprecated options supported for backwards compatibility -#options PQ_NOOPT # No coloring -#options PQ_LARGECACHE # color for 512k cache -#options PQ_HUGECACHE # color for 1024k cache -#options PQ_MEDIUMCACHE # color for 256k cache -#options PQ_NORMALCACHE # color for 64k cache - # This allows you to actually store this configuration file into # the kernel binary itself, where it may be later read by saying: # strings -n 3 /boot/kernel/kernel | sed -n 's/^___//p' > MYKERNEL ==== //depot/projects/superpages/src/sys/conf/files#2 (text+ko) ==== @@ -1890,6 +1890,7 @@ vm/swap_pager.c standard vm/uma_core.c standard vm/uma_dbg.c standard +vm/vm_buddy.c standard vm/vm_contig.c standard vm/memguard.c optional DEBUG_MEMGUARD vm/vm_fault.c standard @@ -1904,6 +1905,7 @@ vm/vm_pageout.c standard vm/vm_pageq.c standard vm/vm_pager.c standard +vm/vm_reserve.c standard vm/vm_unix.c standard vm/vm_zeroidle.c standard vm/vnode_pager.c standard ==== //depot/projects/superpages/src/sys/conf/options#2 (text+ko) ==== @@ -515,12 +515,6 @@ NO_SWAPPING opt_vm.h MALLOC_MAKE_FAILURES opt_vm.h MALLOC_PROFILE opt_vm.h -PQ_NOOPT opt_vmpage.h -PQ_NORMALCACHE opt_vmpage.h -PQ_MEDIUMCACHE opt_vmpage.h -PQ_LARGECACHE opt_vmpage.h -PQ_HUGECACHE opt_vmpage.h -PQ_CACHESIZE opt_vmpage.h # The MemGuard replacement allocator used for tamper-after-free detection DEBUG_MEMGUARD opt_vm.h ==== //depot/projects/superpages/src/sys/i386/i386/pmap.c#2 (text+ko) ==== @@ -133,6 +133,7 @@ #include <vm/vm_extern.h> #include <vm/vm_pageout.h> #include <vm/vm_pager.h> +#include <vm/vm_reserve.h> #include <vm/uma.h> #include <machine/cpu.h> @@ -255,6 +256,8 @@ static pv_entry_t get_pv_entry(void); static void pmap_clear_ptes(vm_page_t m, int bit); +static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva); +static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, @@ -273,6 +276,9 @@ static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); #endif +static void mach_promote(pmap_t pmap, vm_offset_t va, reservation_t reserv); +static boolean_t pmap_demote(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); + CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); @@ -1199,10 +1205,8 @@ * normal 4K page. */ if (ptepa & PG_PS) { - pmap->pm_pdir[ptepindex] = 0; - ptepa = 0; - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - pmap_invalidate_all(kernel_pmap); + pmap_demote(pmap, &pmap->pm_pdir[ptepindex], va); + ptepa = pmap->pm_pdir[ptepindex]; } /* @@ -1521,6 +1525,52 @@ } /* + * pmap_remove_pde: do the things to unmap a superpage in a process + */ +static void +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva) +{ + pd_entry_t oldpde; + vm_offset_t eva, va; + vm_page_t m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & PDRMASK) == 0, + ("pmap_remove_pde: sva is not 4mpage aligned")); + oldpde = pte_load_clear(pdq); + if (oldpde & PG_W) + pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; + + /* + * Machines that don't support invlpg, also don't support + * PG_G. + */ + if (oldpde & PG_G) + pmap_invalidate_page(kernel_pmap, sva); + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + if (oldpde & PG_MANAGED) { + eva = sva + NBPDR; + for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_FRAME); + va < eva; va += PAGE_SIZE, m++) { + if (oldpde & PG_M) { +#if defined(PMAP_DIAGNOSTIC) + if (pmap_nw_modified((pt_entry_t) oldpde)) { + printf( + "pmap_remove_pde: modified 4mpage not writable: va: 0x%x, pde: 0x%x\n", + va, oldpde); + } +#endif + if (pmap_track_modified(va)) + vm_page_dirty(m); + } + if (oldpde & PG_A) + vm_page_flag_set(m, PG_REFERENCED); + pmap_remove_entry(pmap, m, va); + } + } +} + +/* * pmap_remove_pte: do the things to unmap a page in a process */ static int @@ -1639,10 +1689,24 @@ * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { - pmap->pm_pdir[pdirindex] = 0; - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; - anyvalid = 1; - continue; + if (sva + NBPDR == pdnxt && eva >= pdnxt) { +#ifdef INVARIANTS + printf("pmap_remove: superpage at %x to destroy.\n", + sva); +#endif + pmap_remove_pde(pmap, pmap_pde(pmap, sva), sva); + anyvalid = 1; + continue; + } else { +#ifdef INVARIANTS + printf("pmap_remove: superpage at %x to demote !!!\n", + sva); +#endif + if (!pmap_demote(pmap, pmap_pde(pmap, sva), sva)) { + anyvalid = 1; /* XXX */ + continue; + } + } } /* @@ -1686,8 +1750,10 @@ void pmap_remove_all(vm_page_t m) { + pmap_t pmap; register pv_entry_t pv; pt_entry_t *pte, tpte; + pd_entry_t *pde; #if defined(PMAP_DIAGNOSTIC) /* @@ -1701,12 +1767,25 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); sched_pin(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - PMAP_LOCK(pv->pv_pmap); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200510020454.j924suAU029218>