Date: Tue, 24 Feb 2015 21:37:20 +0000 (UTC) From: Nathan Whitehorn <nwhitehorn@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r279252 - in head/sys/powerpc: aim include ps3 pseries Message-ID: <201502242137.t1OLbKAu048428@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: nwhitehorn Date: Tue Feb 24 21:37:20 2015 New Revision: 279252 URL: https://svnweb.freebsd.org/changeset/base/279252 Log: New pmap implementation for 64-bit PowerPC processors. The main focus of this change is to improve concurrency: - Drop global state stored in the shadow overflow page table (and all other global state) - Remove all global locks - Use per-PTE lock bits to allow parallel page insertion - Reconstruct state when requested for evicted PTEs instead of buffering it during overflow This drops total wall time for make buildworld on a 32-thread POWER8 system by a factor of two and system time by a factor of three, providing performance 20% better than similarly clocked Core i7 Xeons per-core. Performance on smaller SMP systems, where PMAP lock contention was not as much of an issue, is nearly unchanged. Tested on: POWER8, POWER5+, G5 UP, G5 SMP (64-bit and 32-bit kernels) Merged from: user/nwhitehorn/ppc64-pmap-rework Looked over by: jhibbits, andreast MFC after: 3 months Relnotes: yes Sponsored by: FreeBSD Foundation Modified: head/sys/powerpc/aim/mmu_oea64.c head/sys/powerpc/aim/mmu_oea64.h head/sys/powerpc/aim/moea64_if.m head/sys/powerpc/aim/moea64_native.c head/sys/powerpc/include/pmap.h head/sys/powerpc/ps3/mmu_ps3.c head/sys/powerpc/pseries/mmu_phyp.c Modified: head/sys/powerpc/aim/mmu_oea64.c ============================================================================== --- head/sys/powerpc/aim/mmu_oea64.c Tue Feb 24 21:31:13 2015 (r279251) +++ head/sys/powerpc/aim/mmu_oea64.c Tue Feb 24 21:37:20 2015 (r279252) @@ -1,86 +1,27 @@ /*- - * Copyright (c) 2001 The NetBSD Foundation, Inc. + * Copyright (c) 2008-2015 Nathan Whitehorn * All rights reserved. * - * This code is derived from software contributed to The NetBSD Foundation - * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -/*- - * Copyright (C) 1995, 1996 Wolfgang Solfrank. - * Copyright (C) 1995, 1996 TooLs GmbH. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by TooLs GmbH. - * 4. The name of TooLs GmbH may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ - */ -/*- - * Copyright (C) 2001 Benno Rice. - * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> @@ -166,18 +107,25 @@ uintptr_t moea64_get_unique_vsid(void); /* * Locking semantics: - * -- Read lock: if no modifications are being made to either the PVO lists - * or page table or if any modifications being made result in internal - * changes (e.g. wiring, protection) such that the existence of the PVOs - * is unchanged and they remain associated with the same pmap (in which - * case the changes should be protected by the pmap lock) - * -- Write lock: required if PTEs/PVOs are being inserted or removed. + * + * There are two locks of interest: the page locks and the pmap locks, which + * protect their individual PVO lists and are locked in that order. The contents + * of all PVO entries are protected by the locks of their respective pmaps. + * The pmap of any PVO is guaranteed not to change so long as the PVO is linked + * into any list. + * */ -#define LOCK_TABLE_RD() rw_rlock(&moea64_table_lock) -#define UNLOCK_TABLE_RD() rw_runlock(&moea64_table_lock) -#define LOCK_TABLE_WR() rw_wlock(&moea64_table_lock) -#define UNLOCK_TABLE_WR() rw_wunlock(&moea64_table_lock) +#define PV_LOCK_COUNT PA_LOCK_COUNT*3 +static struct mtx_padalign pv_lock[PV_LOCK_COUNT]; + +#define PV_LOCKPTR(pa) ((struct mtx *)(&pv_lock[pa_index(pa) % PV_LOCK_COUNT])) +#define PV_LOCK(pa) mtx_lock(PV_LOCKPTR(pa)) +#define PV_UNLOCK(pa) mtx_unlock(PV_LOCKPTR(pa)) +#define PV_LOCKASSERT(pa) mtx_assert(PV_LOCKPTR(pa), MA_OWNED) +#define PV_PAGE_LOCK(m) PV_LOCK(VM_PAGE_TO_PHYS(m)) +#define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m)) +#define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m)) struct ofw_map { cell_t om_va; @@ -202,9 +150,8 @@ static int regions_sz, pregions_sz; extern void bs_remap_earlyboot(void); /* - * Lock for the pteg and pvo tables. + * Lock for the SLB tables. */ -struct rwlock moea64_table_lock; struct mtx moea64_slb_mutex; /* @@ -216,10 +163,8 @@ u_int moea64_pteg_mask; /* * PVO data. */ -struct pvo_head *moea64_pvo_table; /* pvo entries by pteg index */ -uma_zone_t moea64_upvo_zone; /* zone for pvo entries for unmanaged pages */ -uma_zone_t moea64_mpvo_zone; /* zone for pvo entries for managed pages */ +uma_zone_t moea64_pvo_zone; /* zone for pvo entries */ static struct pvo_entry *moea64_bpvo_pool; static int moea64_bpvo_pool_index = 0; @@ -261,7 +206,6 @@ SYSCTL_INT(_machdep, OID_AUTO, moea64_pv vm_offset_t moea64_scratchpage_va[2]; struct pvo_entry *moea64_scratchpage_pvo[2]; -uintptr_t moea64_scratchpage_pte[2]; struct mtx moea64_scratchpage_mtx; uint64_t moea64_large_page_mask = 0; @@ -271,16 +215,17 @@ int moea64_large_page_shift = 0; /* * PVO calls. */ -static int moea64_pvo_enter(mmu_t, pmap_t, uma_zone_t, struct pvo_head *, - vm_offset_t, vm_offset_t, uint64_t, int, int8_t); -static void moea64_pvo_remove(mmu_t, struct pvo_entry *); +static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, + struct pvo_head *pvo_head); +static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo); +static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo); static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); /* * Utility routines. */ -static boolean_t moea64_query_bit(mmu_t, vm_page_t, u_int64_t); -static u_int moea64_clear_bit(mmu_t, vm_page_t, u_int64_t); +static boolean_t moea64_query_bit(mmu_t, vm_page_t, uint64_t); +static u_int moea64_clear_bit(mmu_t, vm_page_t, uint64_t); static void moea64_kremove(mmu_t, vm_offset_t); static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz); @@ -388,43 +333,91 @@ static mmu_method_t moea64_methods[] = { MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); -static __inline u_int -va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) +static struct pvo_head * +vm_page_to_pvoh(vm_page_t m) { + + mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED); + return (&m->md.mdpg_pvoh); +} + +static struct pvo_entry * +alloc_pvo_entry(int bootstrap) +{ + struct pvo_entry *pvo; + + if (!moea64_initialized || bootstrap) { + if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) { + panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", + moea64_bpvo_pool_index, moea64_bpvo_pool_size, + moea64_bpvo_pool_size * sizeof(struct pvo_entry)); + } + pvo = &moea64_bpvo_pool[ + atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)]; + bzero(pvo, sizeof(*pvo)); + pvo->pvo_vaddr = PVO_BOOTSTRAP; + } else { + pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT); + bzero(pvo, sizeof(*pvo)); + } + + return (pvo); +} + + +static void +init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va) +{ + uint64_t vsid; uint64_t hash; int shift; - shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; - hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> - shift); - return (hash & moea64_pteg_mask); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + pvo->pvo_pmap = pmap; + va &= ~ADDR_POFF; + pvo->pvo_vaddr |= va; + vsid = va_to_vsid(pmap, va); + pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) + | (vsid << 16); + + shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift : + ADDR_PIDX_SHFT; + hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift); + pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3; } -static __inline struct pvo_head * -vm_page_to_pvoh(vm_page_t m) +static void +free_pvo_entry(struct pvo_entry *pvo) { - return (&m->md.mdpg_pvoh); + if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) + uma_zfree(moea64_pvo_zone, pvo); } -static __inline void -moea64_pte_create(struct lpte *pt, uint64_t vsid, vm_offset_t va, - uint64_t pte_lo, int flags) +void +moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte) { - /* - * Construct a PTE. Default to IMB initially. Valid bit only gets - * set when the real pte is set in memory. - * - * Note: Don't set the valid bit for correct operation of tlb update. - */ - pt->pte_hi = (vsid << LPTE_VSID_SHIFT) | - (((uint64_t)(va & ADDR_PIDX) >> ADDR_API_SHFT64) & LPTE_API); - - if (flags & PVO_LARGE) - pt->pte_hi |= LPTE_BIG; + lpte->pte_hi = (pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & + LPTE_AVPN_MASK; + lpte->pte_hi |= LPTE_VALID; + + if (pvo->pvo_vaddr & PVO_LARGE) + lpte->pte_hi |= LPTE_BIG; + if (pvo->pvo_vaddr & PVO_WIRED) + lpte->pte_hi |= LPTE_WIRED; + if (pvo->pvo_vaddr & PVO_HID) + lpte->pte_hi |= LPTE_HID; + + lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */ + if (pvo->pvo_pte.prot & VM_PROT_WRITE) + lpte->pte_lo |= LPTE_BW; + else + lpte->pte_lo |= LPTE_BR; - pt->pte_lo = pte_lo; + if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE)) + lpte->pte_lo |= LPTE_NOEXEC; } static __inline uint64_t @@ -489,6 +482,7 @@ moea64_add_ofw_mappings(mmu_t mmup, phan { struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */ pcell_t acells, trans_cells[sz/sizeof(cell_t)]; + struct pvo_entry *pvo; register_t msr; vm_offset_t off; vm_paddr_t pa_base; @@ -542,8 +536,11 @@ moea64_add_ofw_mappings(mmu_t mmup, phan moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) == LPTE_M) continue; - if (moea64_pvo_find_va(kernel_pmap, - translations[i].om_va + off) != NULL) + PMAP_LOCK(kernel_pmap); + pvo = moea64_pvo_find_va(kernel_pmap, + translations[i].om_va + off); + PMAP_UNLOCK(kernel_pmap); + if (pvo != NULL) continue; moea64_kenter(mmup, translations[i].om_va + off, @@ -606,6 +603,7 @@ static void moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { + struct pvo_entry *pvo; register_t msr; vm_paddr_t pa; vm_offset_t size, off; @@ -617,13 +615,16 @@ moea64_setup_direct_map(mmu_t mmup, vm_o DISABLE_TRANS(msr); if (hw_direct_map) { - LOCK_TABLE_WR(); PMAP_LOCK(kernel_pmap); for (i = 0; i < pregions_sz; i++) { for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + pregions[i].mr_size; pa += moea64_large_page_size) { pte_lo = LPTE_M; + pvo = alloc_pvo_entry(1 /* bootstrap */); + pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; + init_pvo_entry(pvo, kernel_pmap, pa); + /* * Set memory access as guarded if prefetch within * the page could exit the available physmem area. @@ -636,18 +637,14 @@ moea64_setup_direct_map(mmu_t mmup, vm_o pregions[i].mr_start + pregions[i].mr_size) pte_lo |= LPTE_G; - moea64_pvo_enter(mmup, kernel_pmap, moea64_upvo_zone, - NULL, pa, pa, pte_lo, - PVO_WIRED | PVO_LARGE, 0); + pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | + VM_PROT_EXECUTE; + pvo->pvo_pte.pa = pa | pte_lo; + moea64_pvo_enter(mmup, pvo, NULL); } } PMAP_UNLOCK(kernel_pmap); - UNLOCK_TABLE_WR(); } else { - size = sizeof(struct pvo_head) * moea64_pteg_count; - off = (vm_offset_t)(moea64_pvo_table); - for (pa = off; pa < off + size; pa += PAGE_SIZE) - moea64_kenter(mmup, pa, pa); size = moea64_bpvo_pool_size*sizeof(struct pvo_entry); off = (vm_offset_t)(moea64_bpvo_pool); for (pa = off; pa < off + size; pa += PAGE_SIZE) @@ -782,8 +779,6 @@ moea64_early_bootstrap(mmu_t mmup, vm_of void moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { - vm_size_t size; - register_t msr; int i; /* @@ -792,28 +787,14 @@ moea64_mid_bootstrap(mmu_t mmup, vm_offs moea64_pteg_mask = moea64_pteg_count - 1; /* - * Allocate pv/overflow lists. + * Initialize SLB table lock and page locks */ - size = sizeof(struct pvo_head) * moea64_pteg_count; - - moea64_pvo_table = (struct pvo_head *)moea64_bootstrap_alloc(size, - PAGE_SIZE); - CTR1(KTR_PMAP, "moea64_bootstrap: PVO table at %p", moea64_pvo_table); - - DISABLE_TRANS(msr); - for (i = 0; i < moea64_pteg_count; i++) - LIST_INIT(&moea64_pvo_table[i]); - ENABLE_TRANS(msr); - - /* - * Initialize the lock that synchronizes access to the pteg and pvo - * tables. - */ - rw_init_flags(&moea64_table_lock, "pmap tables", RW_RECURSE); mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); + for (i = 0; i < PV_LOCK_COUNT; i++) + mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF); /* - * Initialise the unmanaged pvo pool. + * Initialise the bootstrap pvo pool. */ moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( moea64_bpvo_pool_size*sizeof(struct pvo_entry), 0); @@ -974,7 +955,7 @@ moea64_late_bootstrap(mmu_t mmup, vm_off /* * Allocate some things for page zeroing. We put this directly - * in the page table, marked with LPTE_LOCKED, to avoid any + * in the page table and use MOEA64_PTE_REPLACE to avoid any * of the PVO book-keeping or other parts of the VM system * from even knowing that this hack exists. */ @@ -988,24 +969,17 @@ moea64_late_bootstrap(mmu_t mmup, vm_off moea64_kenter(mmup, moea64_scratchpage_va[i], 0); + PMAP_LOCK(kernel_pmap); moea64_scratchpage_pvo[i] = moea64_pvo_find_va( kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); - LOCK_TABLE_RD(); - moea64_scratchpage_pte[i] = MOEA64_PVO_TO_PTE( - mmup, moea64_scratchpage_pvo[i]); - moea64_scratchpage_pvo[i]->pvo_pte.lpte.pte_hi - |= LPTE_LOCKED; - MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[i], - &moea64_scratchpage_pvo[i]->pvo_pte.lpte, - moea64_scratchpage_pvo[i]->pvo_vpn); - UNLOCK_TABLE_RD(); + PMAP_UNLOCK(kernel_pmap); } } } /* - * Activate a user pmap. The pmap must be activated before its address - * space can be accessed in any way. + * Activate a user pmap. This mostly involves setting some non-CPU + * state. */ void moea64_activate(mmu_t mmu, struct thread *td) @@ -1040,35 +1014,33 @@ void moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry key, *pvo; - uintptr_t pt; + vm_page_t m; + int64_t refchg; - LOCK_TABLE_RD(); - PMAP_LOCK(pm); key.pvo_vaddr = sva; + PMAP_LOCK(pm); for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { - pt = MOEA64_PVO_TO_PTE(mmu, pvo); if ((pvo->pvo_vaddr & PVO_WIRED) == 0) panic("moea64_unwire: pvo %p is missing PVO_WIRED", pvo); pvo->pvo_vaddr &= ~PVO_WIRED; - if ((pvo->pvo_pte.lpte.pte_hi & LPTE_WIRED) == 0) - panic("moea64_unwire: pte %p is missing LPTE_WIRED", - &pvo->pvo_pte.lpte); - pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; - if (pt != -1) { - /* - * The PTE's wired attribute is not a hardware - * feature, so there is no need to invalidate any TLB - * entries. - */ - MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, - pvo->pvo_vpn); + refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */); + if ((pvo->pvo_vaddr & PVO_MANAGED) && + (pvo->pvo_pte.prot & VM_PROT_WRITE)) { + if (refchg < 0) + refchg = LPTE_CHG; + m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); + + refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); + if (refchg & LPTE_CHG) + vm_page_dirty(m); + if (refchg & LPTE_REF) + vm_page_aflag_set(m, PGA_REFERENCED); } pm->pm_stats.wired_count--; } - UNLOCK_TABLE_RD(); PMAP_UNLOCK(pm); } @@ -1085,13 +1057,10 @@ void moea64_set_scratchpage_pa(mmu_t mmu KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); - moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo &= - ~(LPTE_WIMG | LPTE_RPGN); - moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo |= + moea64_scratchpage_pvo[which]->pvo_pte.pa = moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; - MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[which], - &moea64_scratchpage_pvo[which]->pvo_pte.lpte, - moea64_scratchpage_pvo[which]->pvo_vpn); + MOEA64_PTE_REPLACE(mmup, moea64_scratchpage_pvo[which], + MOEA64_PTE_INVALIDATE); isync(); } @@ -1245,48 +1214,79 @@ int moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { + struct pvo_entry *pvo, *oldpvo; struct pvo_head *pvo_head; - uma_zone_t zone; uint64_t pte_lo; - u_int pvo_flags; int error; if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); + pvo = alloc_pvo_entry(0); + pvo->pvo_pmap = NULL; /* to be filled in later */ + pvo->pvo_pte.prot = prot; + + pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); + pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo; + + if ((flags & PMAP_ENTER_WIRED) != 0) + pvo->pvo_vaddr |= PVO_WIRED; + if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) { pvo_head = NULL; - zone = moea64_upvo_zone; - pvo_flags = 0; } else { - pvo_head = vm_page_to_pvoh(m); - zone = moea64_mpvo_zone; - pvo_flags = PVO_MANAGED; + pvo_head = &m->md.mdpg_pvoh; + pvo->pvo_vaddr |= PVO_MANAGED; } + + for (;;) { + PV_PAGE_LOCK(m); + PMAP_LOCK(pmap); + if (pvo->pvo_pmap == NULL) + init_pvo_entry(pvo, pmap, va); + if (prot & VM_PROT_WRITE) + if (pmap_bootstrapped && + (m->oflags & VPO_UNMANAGED) == 0) + vm_page_aflag_set(m, PGA_WRITEABLE); + + oldpvo = moea64_pvo_find_va(pmap, va); + if (oldpvo != NULL) { + if (oldpvo->pvo_vaddr == pvo->pvo_vaddr && + oldpvo->pvo_pte.pa == pvo->pvo_pte.pa && + oldpvo->pvo_pte.prot == prot) { + /* Identical mapping already exists */ + error = 0; - pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); + /* If not in page table, reinsert it */ + if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) { + moea64_pte_overflow--; + MOEA64_PTE_INSERT(mmu, oldpvo); + } - if (prot & VM_PROT_WRITE) { - pte_lo |= LPTE_BW; - if (pmap_bootstrapped && - (m->oflags & VPO_UNMANAGED) == 0) - vm_page_aflag_set(m, PGA_WRITEABLE); - } else - pte_lo |= LPTE_BR; + /* Then just clean up and go home */ + PV_PAGE_UNLOCK(m); + PMAP_UNLOCK(pmap); + free_pvo_entry(pvo); + break; + } - if ((prot & VM_PROT_EXECUTE) == 0) - pte_lo |= LPTE_NOEXEC; + /* Otherwise, need to kill it first */ + KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old " + "mapping does not match new mapping")); + moea64_pvo_remove_from_pmap(mmu, oldpvo); + } + error = moea64_pvo_enter(mmu, pvo, pvo_head); + PV_PAGE_UNLOCK(m); + PMAP_UNLOCK(pmap); - if ((flags & PMAP_ENTER_WIRED) != 0) - pvo_flags |= PVO_WIRED; + /* Free any dead pages */ + if (oldpvo != NULL) { + PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); + moea64_pvo_remove_from_page(mmu, oldpvo); + PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); + free_pvo_entry(oldpvo); + } - for (;;) { - LOCK_TABLE_WR(); - PMAP_LOCK(pmap); - error = moea64_pvo_enter(mmu, pmap, zone, pvo_head, va, - VM_PAGE_TO_PHYS(m), pte_lo, pvo_flags, psind); - PMAP_UNLOCK(pmap); - UNLOCK_TABLE_WR(); if (error != ENOMEM) break; if ((flags & PMAP_ENTER_NOSLEEP) != 0) @@ -1394,9 +1394,9 @@ moea64_extract(mmu_t mmu, pmap_t pm, vm_ if (pvo == NULL) pa = 0; else - pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | - (va - PVO_VADDR(pvo)); + pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(pm); + return (pa); } @@ -1417,13 +1417,11 @@ moea64_extract_and_hold(mmu_t mmu, pmap_ PMAP_LOCK(pmap); retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); - if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && - ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW || - (prot & VM_PROT_WRITE) == 0)) { + if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { if (vm_page_pa_tryrelock(pmap, - pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa)) + pvo->pvo_pte.pa & LPTE_RPGN, &pa)) goto retry; - m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); + m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); vm_page_hold(m); } PA_UNLOCK_COND(pa); @@ -1436,16 +1434,17 @@ static mmu_t installed_mmu; static void * moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { + struct pvo_entry *pvo; + vm_offset_t va; + vm_page_t m; + int pflags, needed_lock; + /* * This entire routine is a horrible hack to avoid bothering kmem * for new KVA addresses. Because this can get called from inside * kmem allocation routines, calling kmem for a new address here * can lead to multiply locking non-recursive mutexes. */ - vm_offset_t va; - - vm_page_t m; - int pflags, needed_lock; *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); @@ -1463,17 +1462,21 @@ moea64_uma_page_alloc(uma_zone_t zone, i va = VM_PAGE_TO_PHYS(m); - LOCK_TABLE_WR(); + pvo = alloc_pvo_entry(1 /* bootstrap */); + + pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE; + pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M; + if (needed_lock) PMAP_LOCK(kernel_pmap); - moea64_pvo_enter(installed_mmu, kernel_pmap, moea64_upvo_zone, - NULL, va, VM_PAGE_TO_PHYS(m), LPTE_M, PVO_WIRED | PVO_BOOTSTRAP, - 0); + init_pvo_entry(pvo, kernel_pmap, va); + pvo->pvo_vaddr |= PVO_WIRED; + + moea64_pvo_enter(installed_mmu, pvo, NULL); if (needed_lock) PMAP_UNLOCK(kernel_pmap); - UNLOCK_TABLE_WR(); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) bzero((void *)va, PAGE_SIZE); @@ -1489,17 +1492,13 @@ moea64_init(mmu_t mmu) CTR0(KTR_PMAP, "moea64_init"); - moea64_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, - UMA_ZONE_VM | UMA_ZONE_NOFREE); - moea64_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry), + moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); if (!hw_direct_map) { installed_mmu = mmu; - uma_zone_set_allocf(moea64_upvo_zone,moea64_uma_page_alloc); - uma_zone_set_allocf(moea64_mpvo_zone,moea64_uma_page_alloc); + uma_zone_set_allocf(moea64_pvo_zone,moea64_uma_page_alloc); } #ifdef COMPAT_FREEBSD32 @@ -1515,7 +1514,8 @@ moea64_is_referenced(mmu_t mmu, vm_page_ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_referenced: page %p is not managed", m)); - return (moea64_query_bit(mmu, m, PTE_REF)); + + return (moea64_query_bit(mmu, m, LPTE_REF)); } boolean_t @@ -1540,11 +1540,12 @@ boolean_t moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) { struct pvo_entry *pvo; - boolean_t rv; + boolean_t rv = TRUE; PMAP_LOCK(pmap); pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); - rv = pvo == NULL || (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0; + if (pvo != NULL) + rv = FALSE; PMAP_UNLOCK(pmap); return (rv); } @@ -1576,9 +1577,8 @@ void moea64_remove_write(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; - uintptr_t pt; + int64_t refchg, ret; pmap_t pmap; - uint64_t lo = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_remove_write: page %p is not managed", m)); @@ -1592,30 +1592,28 @@ moea64_remove_write(mmu_t mmu, vm_page_t if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; powerpc_sync(); - LOCK_TABLE_RD(); + PV_PAGE_LOCK(m); + refchg = 0; LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); - if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) { - pt = MOEA64_PVO_TO_PTE(mmu, pvo); - pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; - pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; - if (pt != -1) { - MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); - lo |= pvo->pvo_pte.lpte.pte_lo; - pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG; - MOEA64_PTE_CHANGE(mmu, pt, - &pvo->pvo_pte.lpte, pvo->pvo_vpn); - if (pvo->pvo_pmap == kernel_pmap) - isync(); - } + if (!(pvo->pvo_vaddr & PVO_DEAD) && + (pvo->pvo_pte.prot & VM_PROT_WRITE)) { + pvo->pvo_pte.prot &= ~VM_PROT_WRITE; + ret = MOEA64_PTE_REPLACE(mmu, pvo, + MOEA64_PTE_PROT_UPDATE); + if (ret < 0) + ret = LPTE_CHG; + refchg |= ret; + if (pvo->pvo_pmap == kernel_pmap) + isync(); } - if ((lo & LPTE_CHG) != 0) - vm_page_dirty(m); PMAP_UNLOCK(pmap); } - UNLOCK_TABLE_RD(); + if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG) + vm_page_dirty(m); vm_page_aflag_clear(m, PGA_WRITEABLE); + PV_PAGE_UNLOCK(m); } /* @@ -1646,8 +1644,7 @@ void moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { struct pvo_entry *pvo; - struct pvo_head *pvo_head; - uintptr_t pt; + int64_t refchg; pmap_t pmap; uint64_t lo; @@ -1656,25 +1653,36 @@ moea64_page_set_memattr(mmu_t mmu, vm_pa return; } - pvo_head = vm_page_to_pvoh(m); lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); - LOCK_TABLE_RD(); - LIST_FOREACH(pvo, pvo_head, pvo_vlink) { + + PV_PAGE_LOCK(m); + LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); - pt = MOEA64_PVO_TO_PTE(mmu, pvo); - pvo->pvo_pte.lpte.pte_lo &= ~LPTE_WIMG; - pvo->pvo_pte.lpte.pte_lo |= lo; - if (pt != -1) { - MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, - pvo->pvo_vpn); + if (!(pvo->pvo_vaddr & PVO_DEAD)) { + pvo->pvo_pte.pa &= ~LPTE_WIMG; + pvo->pvo_pte.pa |= lo; + refchg = MOEA64_PTE_REPLACE(mmu, pvo, + MOEA64_PTE_INVALIDATE); + if (refchg < 0) + refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ? + LPTE_CHG : 0; + if ((pvo->pvo_vaddr & PVO_MANAGED) && + (pvo->pvo_pte.prot & VM_PROT_WRITE)) { + refchg |= + atomic_readandclear_32(&m->md.mdpg_attrs); + if (refchg & LPTE_CHG) + vm_page_dirty(m); + if (refchg & LPTE_REF) + vm_page_aflag_set(m, PGA_REFERENCED); + } if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } - UNLOCK_TABLE_RD(); m->md.mdpg_cache_attrs = ma; + PV_PAGE_UNLOCK(m); } /* @@ -1683,17 +1691,29 @@ moea64_page_set_memattr(mmu_t mmu, vm_pa void moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) { - uint64_t pte_lo; int error; + struct pvo_entry *pvo, *oldpvo; - pte_lo = moea64_calc_wimg(pa, ma); + pvo = alloc_pvo_entry(0); + pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; + pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma); + pvo->pvo_vaddr |= PVO_WIRED; - LOCK_TABLE_WR(); PMAP_LOCK(kernel_pmap); - error = moea64_pvo_enter(mmu, kernel_pmap, moea64_upvo_zone, - NULL, va, pa, pte_lo, PVO_WIRED, 0); + oldpvo = moea64_pvo_find_va(kernel_pmap, va); + if (oldpvo != NULL) + moea64_pvo_remove_from_pmap(mmu, oldpvo); + init_pvo_entry(pvo, kernel_pmap, va); + error = moea64_pvo_enter(mmu, pvo, NULL); PMAP_UNLOCK(kernel_pmap); - UNLOCK_TABLE_WR(); + + /* Free any dead pages */ + if (oldpvo != NULL) { + PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); + moea64_pvo_remove_from_page(mmu, oldpvo); + PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); + free_pvo_entry(oldpvo); + } if (error != 0 && error != ENOENT) panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va, @@ -1728,7 +1748,7 @@ moea64_kextract(mmu_t mmu, vm_offset_t v pvo = moea64_pvo_find_va(kernel_pmap, va); KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, va)); - pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va - PVO_VADDR(pvo)); + pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(kernel_pmap); return (pa); } @@ -1748,8 +1768,8 @@ moea64_kremove(mmu_t mmu, vm_offset_t va * The value passed in *virt is a suggested virtual address for the mapping. * Architectures which can support a direct-mapped physical to virtual region * can return the appropriate address within that region, leaving '*virt' - * unchanged. We cannot and therefore do not; *virt is updated with the - * first usable address after the mapped region. + * unchanged. Other architectures should map the pages starting at '*virt' and + * update '*virt' with the first usable address after the mapped region. */ vm_offset_t moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, @@ -1757,8 +1777,22 @@ moea64_map(mmu_t mmu, vm_offset_t *virt, { vm_offset_t sva, va; + if (hw_direct_map) { + /* + * Check if every page in the region is covered by the direct + * map. The direct map covers all of physical memory. Use + * moea64_calc_wimg() as a shortcut to see if the page is in + * physical memory as a way to see if the direct map covers it. + */ + for (va = pa_start; va < pa_end; va += PAGE_SIZE) + if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M) + break; + if (va == pa_end) + return (pa_start); + } sva = *virt; va = sva; + /* XXX respect prot argument */ for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) moea64_kenter(mmu, va, pa_start); *virt = va; @@ -1784,16 +1818,16 @@ moea64_page_exists_quick(mmu_t mmu, pmap ("moea64_page_exists_quick: page %p is not managed", m)); loops = 0; rv = FALSE; - LOCK_TABLE_RD(); + PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { - if (pvo->pvo_pmap == pmap) { + if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) { rv = TRUE; break; } if (++loops >= 16) break; } - UNLOCK_TABLE_RD(); + PV_PAGE_UNLOCK(m); return (rv); } @@ -1810,11 +1844,11 @@ moea64_page_wired_mappings(mmu_t mmu, vm count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); - LOCK_TABLE_RD(); + PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) - if ((pvo->pvo_vaddr & PVO_WIRED) != 0) + if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED) count++; - UNLOCK_TABLE_RD(); + PV_PAGE_UNLOCK(m); return (count); } @@ -1926,59 +1960,45 @@ moea64_pinit0(mmu_t mmu, pmap_t pm) static void moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) { - uintptr_t pt; - struct vm_page *pg; - uint64_t oldlo; + struct vm_page *pg; + vm_prot_t oldprot; + int32_t refchg; PMAP_LOCK_ASSERT(pm, MA_OWNED); /* - * Grab the PTE pointer before we diddle with the cached PTE - * copy. - */ - pt = MOEA64_PVO_TO_PTE(mmu, pvo); - - /* *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201502242137.t1OLbKAu048428>