Date: Tue, 26 May 2009 05:30:21 +0000 (UTC) From: Kip Macy <kmacy@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r192789 - in user/kmacy/releng_7_2_fcs/sys: amd64/amd64 amd64/include cddl/contrib/opensolaris/uts/common/fs/zfs dev/md kern nfsclient ufs/ffs vm Message-ID: <200905260530.n4Q5ULOK047475@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kmacy Date: Tue May 26 05:30:21 2009 New Revision: 192789 URL: http://svn.freebsd.org/changeset/base/192789 Log: incorporate Jeffr's patch breaking down the vm_page_queue mutex for operations which don't actually manipulate the page queues in to a hashed set of locks Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/pmap.c user/kmacy/releng_7_2_fcs/sys/amd64/include/pmap.h user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c user/kmacy/releng_7_2_fcs/sys/dev/md/md.c user/kmacy/releng_7_2_fcs/sys/kern/kern_exec.c user/kmacy/releng_7_2_fcs/sys/kern/kern_subr.c user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c user/kmacy/releng_7_2_fcs/sys/kern/sys_process.c user/kmacy/releng_7_2_fcs/sys/kern/uipc_cow.c user/kmacy/releng_7_2_fcs/sys/kern/uipc_syscalls.c user/kmacy/releng_7_2_fcs/sys/kern/vfs_bio.c user/kmacy/releng_7_2_fcs/sys/nfsclient/nfs_bio.c user/kmacy/releng_7_2_fcs/sys/ufs/ffs/ffs_vnops.c user/kmacy/releng_7_2_fcs/sys/vm/device_pager.c user/kmacy/releng_7_2_fcs/sys/vm/pmap.h user/kmacy/releng_7_2_fcs/sys/vm/swap_pager.c user/kmacy/releng_7_2_fcs/sys/vm/uma_core.c user/kmacy/releng_7_2_fcs/sys/vm/vm_contig.c user/kmacy/releng_7_2_fcs/sys/vm/vm_fault.c user/kmacy/releng_7_2_fcs/sys/vm/vm_glue.c user/kmacy/releng_7_2_fcs/sys/vm/vm_kern.c user/kmacy/releng_7_2_fcs/sys/vm/vm_map.c user/kmacy/releng_7_2_fcs/sys/vm/vm_mmap.c user/kmacy/releng_7_2_fcs/sys/vm/vm_object.c user/kmacy/releng_7_2_fcs/sys/vm/vm_page.c user/kmacy/releng_7_2_fcs/sys/vm/vm_page.h user/kmacy/releng_7_2_fcs/sys/vm/vm_pageout.c user/kmacy/releng_7_2_fcs/sys/vm/vnode_pager.c Modified: user/kmacy/releng_7_2_fcs/sys/amd64/amd64/pmap.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/amd64/amd64/pmap.c Tue May 26 04:51:56 2009 (r192788) +++ user/kmacy/releng_7_2_fcs/sys/amd64/amd64/pmap.c Tue May 26 05:30:21 2009 (r192789) @@ -168,6 +168,17 @@ __FBSDID("$FreeBSD$"); #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) +#define PA_LOCKPTR(pa) &pa_lock[pa_index((pa)) % PA_LOCK_COUNT] +#define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) +#define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) +#define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) +#define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) + +#define PA_LOCK_COUNT 64 + +struct mtx pa_lock[PA_LOCK_COUNT]; +struct mtx pv_lock; + struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ @@ -184,6 +195,14 @@ static int pg_ps_enabled; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RD, &pg_ps_enabled, 0, "Are large page mappings enabled?"); +static int pmap_tryrelock_calls; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_calls, CTLFLAG_RD, + &pmap_tryrelock_calls, 0, "Number of tryrelock calls"); + +static int pmap_tryrelock_restart; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_restart, CTLFLAG_RD, + &pmap_tryrelock_restart, 0, "Number of tryrelock restarts"); + static u_int64_t KPTphys; /* phys addr of kernel level 1 */ static u_int64_t KPDphys; /* phys addr of kernel level 2 */ u_int64_t KPDPphys; /* phys addr of kernel level 3 */ @@ -244,10 +263,13 @@ static void pmap_insert_entry(pmap_t pma static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); -static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); -static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); +static vm_page_t pmap_allocpde(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, + int flags); +static vm_page_t pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, + int flags); -static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags); +static vm_page_t _pmap_allocpte(pmap_t pmap, vm_paddr_t pa, + vm_pindex_t ptepindex, int flags); static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t* free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *); @@ -405,6 +427,37 @@ vtopde(vm_offset_t va) return (PDmap + ((va >> PDRSHIFT) & mask)); } +/* + * Try to acquire a physical address lock while a pmap is locked. If we + * fail to trylock we unlock and lock the pmap directly and cache the + * locked pa in *locked. The caller should then restart their loop in case + * the virtual to physical mapping has changed. + */ +static int +pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) +{ + vm_paddr_t lockpa; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + atomic_add_int((volatile int *)&pmap_tryrelock_calls, 1); + lockpa = *locked; + *locked = pa; + if (lockpa) { + PA_LOCK_ASSERT(lockpa, MA_OWNED); + if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) + return (0); + PA_UNLOCK(lockpa); + } + if (PA_TRYLOCK(pa)) + return 0; + PMAP_UNLOCK(pmap); + PA_LOCK(pa); + PMAP_LOCK(pmap); + atomic_add_int((volatile int *)&pmap_tryrelock_restart, 1); + + return (EAGAIN); +} + static u_int64_t allocpages(vm_paddr_t *firstaddr, int n) { @@ -502,6 +555,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; + int i; /* * Create an initial set of page tables to run the kernel in. @@ -524,6 +578,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); kernel_pmap->pm_root = NULL; + kernel_pmap->pm_free = NULL; kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); @@ -560,6 +615,11 @@ pmap_bootstrap(vm_paddr_t *firstaddr) /* Initialize the PAT MSR. */ pmap_init_pat(); + + /* Setup page locks. */ + for (i = 0; i < PA_LOCK_COUNT; i++) + mtx_init(&pa_lock[i], "page lock", NULL, MTX_DEF | MTX_RECURSE); + mtx_init(&pv_lock, "pv list lock", NULL, MTX_DEF); } /* @@ -614,6 +674,13 @@ pmap_page_init(vm_page_t m) TAILQ_INIT(&m->md.pv_list); } +struct mtx * +pmap_page_lockptr(vm_page_t m) +{ + KASSERT(m != NULL, ("pmap_page_lockptr: NULL page")); + return (PA_LOCKPTR(VM_PAGE_TO_PHYS(m))); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -996,29 +1063,35 @@ pmap_extract_and_hold(pmap_t pmap, vm_of { pd_entry_t pde, *pdep; pt_entry_t pte; + vm_paddr_t pa; vm_page_t m; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | - (va & PDRMASK)); + if (pa_tryrelock(pmap, pde & PG_PS_FRAME, &pa)) + goto retry; + m = PHYS_TO_VM_PAGE(pa | (va & PDRMASK)); vm_page_hold(m); } } else { pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - m = PHYS_TO_VM_PAGE(pte & PG_FRAME); + if (pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; + m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } } - vm_page_unlock_queues(); + if (pa) + PA_UNLOCK(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1371,6 +1444,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); pmap->pm_root = NULL; + pmap->pm_free = NULL; pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1408,6 +1482,7 @@ pmap_pinit(pmap_t pmap) pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; pmap->pm_root = NULL; + pmap->pm_free = NULL; pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1425,7 +1500,7 @@ pmap_pinit(pmap_t pmap) * race conditions. */ static vm_page_t -_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags) +_pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_pindex_t ptepindex, int flags) { vm_page_t m, pdppg, pdpg; @@ -1440,9 +1515,9 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (flags & M_WAITOK) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); + PA_UNLOCK(pa); VM_WAIT; - vm_page_lock_queues(); + PA_LOCK(pa); PMAP_LOCK(pmap); } @@ -1484,7 +1559,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pdp, recurse */ - if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index, + if (_pmap_allocpte(pmap, pa, NUPDE + NUPDPE + pml4index, flags) == NULL) { --m->wire_count; vm_page_free(m); @@ -1516,7 +1591,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pd, recurse */ - if (_pmap_allocpte(pmap, NUPDE + pdpindex, + if (_pmap_allocpte(pmap, pa, NUPDE + pdpindex, flags) == NULL) { --m->wire_count; vm_page_free(m); @@ -1529,7 +1604,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; if ((*pdp & PG_V) == 0) { /* Have to allocate a new pd, recurse */ - if (_pmap_allocpte(pmap, NUPDE + pdpindex, + if (_pmap_allocpte(pmap, pa, NUPDE + pdpindex, flags) == NULL) { --m->wire_count; vm_page_free(m); @@ -1552,7 +1627,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t } static vm_page_t -pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags) +pmap_allocpde(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags) { vm_pindex_t pdpindex, ptepindex; pdp_entry_t *pdpe; @@ -1571,7 +1646,7 @@ retry: /* Allocate a pd page. */ ptepindex = pmap_pde_pindex(va); pdpindex = ptepindex >> NPDPEPGSHIFT; - pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags); + pdpg = _pmap_allocpte(pmap, pa, NUPDE + pdpindex, flags); if (pdpg == NULL && (flags & M_WAITOK)) goto retry; } @@ -1579,10 +1654,11 @@ retry: } static vm_page_t -pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) +pmap_allocpte(pmap_t pmap, vm_paddr_t pa, vm_offset_t va, int flags) { vm_pindex_t ptepindex; pd_entry_t *pd; + vm_paddr_t lockedpa; vm_page_t m; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || @@ -1592,6 +1668,7 @@ pmap_allocpte(pmap_t pmap, vm_offset_t v /* * Calculate pagetable page index */ + lockedpa = pa; ptepindex = pmap_pde_pindex(va); retry: /* @@ -1604,6 +1681,8 @@ retry: * normal 4K page. */ if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + if (pa_tryrelock(pmap, *pd & PG_FRAME, &lockedpa)) + goto retry; if (!pmap_demote_pde(pmap, pd, va)) { /* * Invalidation of the 2MB page mapping may have caused @@ -1612,6 +1691,13 @@ retry: pd = NULL; } } + if (pa) { + if (pa_tryrelock(pmap, pa, &lockedpa)) + goto retry; + } else if (lockedpa) { + PA_UNLOCK(lockedpa); + lockedpa = 0; + } /* * If the page table page is mapped, we just increment the @@ -1625,7 +1711,7 @@ retry: * Here if the pte page isn't mapped, or if it has been * deallocated. */ - m = _pmap_allocpte(pmap, ptepindex, flags); + m = _pmap_allocpte(pmap, pa, ptepindex, flags); if (m == NULL && (flags & M_WAITOK)) goto retry; } @@ -1842,9 +1928,14 @@ pmap_collect(pmap_t locked_pmap, struct vm_offset_t va; vm_page_t m, free; + vm_page_lock_queues(); TAILQ_FOREACH(m, &vpq->pl, pageq) { - if (m->hold_count || m->busy) + if (m->hold_count || m->busy || vm_page_trylock(m) == 0) + continue; + if (m->hold_count || m->busy) { + vm_page_unlock(m); continue; + } TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); @@ -1879,7 +1970,9 @@ pmap_collect(pmap_t locked_pmap, struct if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + vm_page_unlock(m); } + vm_page_unlock_queues(); } @@ -1893,8 +1986,8 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv struct pv_chunk *pc; int idx, field, bit; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_lock(&pv_lock); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; @@ -1908,6 +2001,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + mtx_unlock(&pv_lock); return; } PV_STAT(pv_entry_spare -= _NPCPV); @@ -1916,7 +2010,8 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); - vm_page_unwire(m, 0); + m->wire_count--; + mtx_unlock(&pv_lock); vm_page_free(m); } @@ -1937,7 +2032,7 @@ get_pv_entry(pmap_t pmap, int try) vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + mtx_lock(&pv_lock); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) @@ -1965,6 +2060,7 @@ retry: TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(pv_entry_spare--); + mtx_unlock(&pv_lock); return (pv); } } @@ -1976,6 +2072,7 @@ retry: if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); + mtx_unlock(&pv_lock); return (NULL); } /* @@ -2006,6 +2103,7 @@ retry: pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); + mtx_unlock(&pv_lock); return (pv); } @@ -2020,7 +2118,6 @@ pmap_pvh_remove(struct md_page *pvh, pma { pv_entry_t pv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); @@ -2043,7 +2140,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offse vm_offset_t va_last; vm_page_t m; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PA_LOCK_ASSERT(pa, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 2mpage aligned")); @@ -2081,7 +2178,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offs vm_offset_t va_last; vm_page_t m; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PA_LOCK_ASSERT(pa, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 2mpage aligned")); @@ -2127,7 +2224,7 @@ pmap_remove_entry(pmap_t pmap, vm_page_t { struct md_page *pvh; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -2146,7 +2243,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); @@ -2161,7 +2258,7 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; @@ -2180,7 +2277,7 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse struct md_page *pvh; pv_entry_t pv; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PA_LOCK_ASSERT(pa, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; @@ -2304,6 +2401,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t pmap_invalidate_page(kernel_pmap, sva); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; if (oldpde & PG_MANAGED) { + PA_LOCK_ASSERT(oldpde & PG_PS_FRAME, MA_OWNED); pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; @@ -2336,6 +2434,9 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); } +#define PMAP_REMOVE_LAST 0x1 +#define PMAP_REMOVE_UNLOCKED 0x2 + /* * pmap_remove_pte: do the things to unmap a page in a process */ @@ -2345,8 +2446,10 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t { pt_entry_t oldpte; vm_page_t m; + int ret; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + ret = 0; oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; @@ -2357,15 +2460,23 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; + if (pmap_unuse_pt(pmap, va, ptepde, free)) + ret = PMAP_REMOVE_LAST; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); + if (vm_page_trylock(m) == 0) { + PMAP_UNLOCK(pmap); + vm_page_lock(m); + PMAP_LOCK(pmap); + } if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); pmap_remove_entry(pmap, m, va); + vm_page_unlock(m); } - return (pmap_unuse_pt(pmap, va, ptepde, free)); + return (ret); } /* @@ -2401,6 +2512,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva pd_entry_t ptpaddr, *pde; pt_entry_t *pte; vm_page_t free = NULL; + vm_paddr_t pa; int anyvalid; /* @@ -2409,11 +2521,12 @@ pmap_remove(pmap_t pmap, vm_offset_t sva if (pmap->pm_stats.resident_count == 0) return; + pa = 0; anyvalid = 0; - vm_page_lock_queues(); PMAP_LOCK(pmap); +restart: /* * special handling of removing one page. a very * common operation and easy to short circuit some @@ -2468,6 +2581,10 @@ pmap_remove(pmap_t pmap, vm_offset_t sva * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { + if (pa_tryrelock(pmap, ptpaddr & PG_FRAME, &pa)) { + va_next = sva; + continue; + } /* * Are we removing the entire large page? If not, * demote the mapping and fall through. @@ -2484,8 +2601,12 @@ pmap_remove(pmap_t pmap, vm_offset_t sva } else if (!pmap_demote_pde(pmap, pde, sva)) { /* The large page mapping was destroyed. */ continue; - } else - ptpaddr = *pde; + } + ptpaddr = *pde; + } + if (pa) { + PA_UNLOCK(pa); + pa = 0; } /* @@ -2498,6 +2619,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { + int ret; if (*pte == 0) continue; @@ -2507,14 +2629,20 @@ pmap_remove(pmap_t pmap, vm_offset_t sva */ if ((*pte & PG_G) == 0) anyvalid = 1; - if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free)) + ret = pmap_remove_pte(pmap, pte, sva, ptpaddr, &free); + if (ret & PMAP_REMOVE_LAST) break; + if (ret & PMAP_REMOVE_UNLOCKED) { + va_next = sva + PAGE_SIZE; + goto restart; + } } } + if (pa) + PA_UNLOCK(pa); out: if (anyvalid) pmap_invalidate_all(pmap); - vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } @@ -2545,7 +2673,7 @@ pmap_remove_all(vm_page_t m) KASSERT((m->flags & PG_FICTITIOUS) == 0, ("pmap_remove_all: page %p is fictitious", m)); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; @@ -2646,6 +2774,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sv pd_entry_t ptpaddr, *pde; pt_entry_t *pte; int anychanged; + vm_paddr_t pa; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); @@ -2656,10 +2785,10 @@ pmap_protect(pmap_t pmap, vm_offset_t sv (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; + pa = 0; anychanged = 0; - - vm_page_lock_queues(); PMAP_LOCK(pmap); +restart: for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); @@ -2727,6 +2856,8 @@ retry: continue; if (pbits & PG_MANAGED) { m = NULL; + if (pa_tryrelock(pmap, pbits & PG_FRAME, &pa)) + goto restart; if (pbits & PG_A) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_flag_set(m, PG_REFERENCED); @@ -2755,9 +2886,10 @@ retry: } } } + if (pa) + PA_UNLOCK(pa); if (anychanged) pmap_invalidate_all(pmap); - vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -2893,7 +3025,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_paddr_t pa; pd_entry_t *pde; pt_entry_t *pte; - vm_paddr_t opa; + vm_paddr_t opa, lockedpa; pt_entry_t origpte, newpte; vm_page_t mpte, om; boolean_t invlva; @@ -2905,16 +3037,16 @@ pmap_enter(pmap_t pmap, vm_offset_t va, mpte = NULL; - vm_page_lock_queues(); + lockedpa = pa = VM_PAGE_TO_PHYS(m); + PA_LOCK(pa); PMAP_LOCK(pmap); - +restart: /* * In the case that a page table page is not * resident, we are creating it here. */ - if (va < VM_MAXUSER_ADDRESS) { - mpte = pmap_allocpte(pmap, va, M_WAITOK); - } + if (va < VM_MAXUSER_ADDRESS) + mpte = pmap_allocpte(pmap, lockedpa, va, M_WAITOK); pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0) { @@ -2924,10 +3056,11 @@ pmap_enter(pmap_t pmap, vm_offset_t va, } else panic("pmap_enter: invalid page directory va=%#lx", va); - pa = VM_PAGE_TO_PHYS(m); om = NULL; origpte = *pte; opa = origpte & PG_FRAME; + if (pa_tryrelock(pmap, opa ? opa : pa, &lockedpa)) + goto restart; /* * Mapping has not changed, must be protection or wiring change. @@ -2965,11 +3098,18 @@ pmap_enter(pmap_t pmap, vm_offset_t va, * handle validating new mapping. */ if (opa) { + origpte = pte_load_clear(pte); if (origpte & PG_W) pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); + vm_page_lock_assert(om, MA_OWNED); pmap_remove_entry(pmap, om, va); + if (origpte & PG_A) + vm_page_flag_set(om, PG_REFERENCED); + if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + vm_page_dirty(om); + om = NULL; } if (mpte != NULL) { mpte->wire_count--; @@ -2977,9 +3117,15 @@ pmap_enter(pmap_t pmap, vm_offset_t va, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } + if (origpte & PG_A || + (origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + pmap_invalidate_page(pmap, va); + origpte = 0; } else pmap->pm_stats.resident_count++; + if (pa_tryrelock(pmap, pa, &lockedpa)) + goto restart; /* * Enter on the PV list if part of our managed memory. */ @@ -2997,6 +3143,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, pmap->pm_stats.wired_count++; validate: + vm_page_lock_assert(m, MA_OWNED); /* * Now validate mapping with desired protection/wiring. */ @@ -3027,14 +3174,14 @@ validate: origpte = pte_load_store(pte, newpte); if (origpte & PG_A) { if (origpte & PG_MANAGED) - vm_page_flag_set(om, PG_REFERENCED); - if (opa != VM_PAGE_TO_PHYS(m) || ((origpte & - PG_NX) == 0 && (newpte & PG_NX))) + vm_page_flag_set(m, PG_REFERENCED); + if (((origpte & PG_NX) == 0 && + (newpte & PG_NX))) invlva = TRUE; } if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) - vm_page_dirty(om); + vm_page_dirty(m); if ((newpte & PG_RW) == 0) invlva = TRUE; } @@ -3052,7 +3199,7 @@ validate: pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va); - vm_page_unlock_queues(); + PA_UNLOCK(pa); PMAP_UNLOCK(pmap); } @@ -3067,10 +3214,12 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t { pd_entry_t *pde, newpde; vm_page_t free, mpde; + vm_paddr_t pa; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((mpde = pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) { + pa = VM_PAGE_TO_PHYS(m); + if ((mpde = pmap_allocpde(pmap, pa, va, M_NOWAIT)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); @@ -3148,9 +3297,10 @@ pmap_enter_object(pmap_t pmap, vm_offset psize = atop(end - start); mpte = NULL; m = m_start; - PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); + vm_page_lock(m); + PMAP_LOCK(pmap); if ((va & PDRMASK) == 0 && va + NBPDR <= end && (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && @@ -3159,9 +3309,10 @@ pmap_enter_object(pmap_t pmap, vm_offset else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); + PMAP_UNLOCK(pmap); + vm_page_unlock(m); m = TAILQ_NEXT(m, listq); } - PMAP_UNLOCK(pmap); } /* @@ -3193,7 +3344,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* @@ -3226,7 +3377,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME); mpte->wire_count++; } else { - mpte = _pmap_allocpte(pmap, ptepindex, + pa = VM_PAGE_TO_PHYS(m); + mpte = _pmap_allocpte(pmap, pa, ptepindex, M_NOWAIT); if (mpte == NULL) return (mpte); @@ -3312,7 +3464,8 @@ pmap_object_init_pt(pmap_t pmap, vm_offs { vm_offset_t va; vm_page_t p, pdpg; - + vm_paddr_t pa; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE, ("pmap_object_init_pt: non-device object")); @@ -3337,14 +3490,16 @@ retry: m[0] = p; if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_lock_queues(); + vm_page_lock(p); vm_page_free(p); - vm_page_unlock_queues(); + vm_page_unlock(p); return; } p = vm_page_lookup(object, pindex); + vm_page_lock(p); vm_page_wakeup(p); + vm_page_unlock(p); } ptepa = VM_PAGE_TO_PHYS(p); @@ -3355,14 +3510,19 @@ retry: PMAP_LOCK(pmap); for (va = addr; va < addr + size; va += NBPDR) { + pa = VM_PAGE_TO_PHYS(p); while ((pdpg = - pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) { + pmap_allocpde(pmap, pa, va, M_NOWAIT)) == NULL) { PMAP_UNLOCK(pmap); + vm_page_lock(p); vm_page_busy(p); + vm_page_unlock(p); VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); + vm_page_lock(p); vm_page_wakeup(p); + vm_page_unlock(p); PMAP_LOCK(pmap); } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); @@ -3398,27 +3558,20 @@ pmap_change_wiring(pmap_t pmap, vm_offse { pd_entry_t *pde; pt_entry_t *pte; - boolean_t are_queues_locked; - - are_queues_locked = FALSE; + vm_paddr_t pa; /* * Wiring is not a hardware characteristic so there is no need to * invalidate TLB. */ -retry: + pa = 0; PMAP_LOCK(pmap); +retry: pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) { if (!wired != ((*pde & PG_W) == 0)) { - if (!are_queues_locked) { - are_queues_locked = TRUE; - if (!mtx_trylock(&vm_page_queue_mtx)) { - PMAP_UNLOCK(pmap); - vm_page_lock_queues(); - goto retry; - } - } + if (pa_tryrelock(pmap, *pde & PG_FRAME, &pa)) + goto retry; if (!pmap_demote_pde(pmap, pde, va)) panic("pmap_change_wiring: demotion failed"); } else @@ -3433,8 +3586,8 @@ retry: atomic_clear_long(pte, PG_W); } out: - if (are_queues_locked) - vm_page_unlock_queues(); + if (pa) + PA_UNLOCK(pa); PMAP_UNLOCK(pmap); } @@ -3456,6 +3609,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t va_next; + vm_paddr_t pa; if (dst_addr != src_addr) return; @@ -3463,7 +3617,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm if (!pmap_is_current(src_pmap)) return; - vm_page_lock_queues(); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); @@ -3507,20 +3660,23 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm continue; if (srcptepaddr & PG_PS) { - dstmpde = pmap_allocpde(dst_pmap, addr, M_NOWAIT); + pa = srcptepaddr & PG_PS_FRAME; + if (PA_TRYLOCK(pa) == 0) + continue; + dstmpde = pmap_allocpde(dst_pmap, pa, addr, M_NOWAIT); if (dstmpde == NULL) break; pde = (pd_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde)); pde = &pde[pmap_pde_index(addr)]; if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || - pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & - PG_PS_FRAME))) { + pmap_pv_insert_pde(dst_pmap, addr, pa))) { *pde = srcptepaddr & ~PG_W; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; } else dstmpde->wire_count--; + PA_UNLOCK(pa); continue; } @@ -3539,7 +3695,13 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { - dstmpte = pmap_allocpte(dst_pmap, addr, + vm_page_t p; + + pa = ptetemp & PG_FRAME; + if (PA_TRYLOCK(pa) == 0) + break; + p = PHYS_TO_VM_PAGE(pa); + dstmpte = pmap_allocpte(dst_pmap, pa, addr, M_NOWAIT); if (dstmpte == NULL) break; @@ -3548,7 +3710,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm dst_pte = &dst_pte[pmap_pte_index(addr)]; if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, - PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { + p)) { /* * Clear the wired, modified, and * accessed (referenced) bits @@ -3566,6 +3728,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm pmap_free_zero_pages(free); } } + PA_UNLOCK(pa); if (dstmpte->wire_count >= srcmpte->wire_count) break; } @@ -3573,7 +3736,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm src_pte++; } } - vm_page_unlock_queues(); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } @@ -3653,7 +3815,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_p if (m->flags & PG_FICTITIOUS) return FALSE; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { return TRUE; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905260530.n4Q5ULOK047475>