From owner-svn-src-stable@freebsd.org Fri Sep 16 12:36:12 2016 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 3EC87BDC6AC; Fri, 16 Sep 2016 12:36:12 +0000 (UTC) (envelope-from andrew@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 040DE6E0; Fri, 16 Sep 2016 12:36:11 +0000 (UTC) (envelope-from andrew@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u8GCaBRu016577; Fri, 16 Sep 2016 12:36:11 GMT (envelope-from andrew@FreeBSD.org) Received: (from andrew@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u8GCaB2u016575; Fri, 16 Sep 2016 12:36:11 GMT (envelope-from andrew@FreeBSD.org) Message-Id: <201609161236.u8GCaB2u016575@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: andrew set sender to andrew@FreeBSD.org using -f From: Andrew Turner Date: Fri, 16 Sep 2016 12:36:11 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r305882 - in stable/11/sys/arm64: arm64 include X-SVN-Group: stable-11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 16 Sep 2016 12:36:12 -0000 Author: andrew Date: Fri Sep 16 12:36:11 2016 New Revision: 305882 URL: https://svnweb.freebsd.org/changeset/base/305882 Log: MFC 304004, 304596, 304598, 304599, 304600, 304604, 304620, 304685, 304687, 304688, 304689, 304746, 304749, 304750, 304806, 305071, 305191: Merge arm64 superpage support, however leave it disabled by default. MFC after: ABT Systems Ltd Sponsored by: The FreeBSD Foundation Modified: stable/11/sys/arm64/arm64/pmap.c stable/11/sys/arm64/arm64/trap.c stable/11/sys/arm64/include/pmap.h Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/arm64/arm64/pmap.c ============================================================================== --- stable/11/sys/arm64/arm64/pmap.c Fri Sep 16 12:20:42 2016 (r305881) +++ stable/11/sys/arm64/arm64/pmap.c Fri Sep 16 12:36:11 2016 (r305882) @@ -106,6 +106,7 @@ __FBSDID("$FreeBSD$"); */ #include +#include #include #include #include @@ -134,6 +135,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -176,6 +178,7 @@ __FBSDID("$FreeBSD$"); #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) +#define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) #define NPV_LIST_LOCKS MAXCPU @@ -218,6 +221,14 @@ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; +/* + * Data for the pv entry allocation mechanism. + * Updates to pv_invl_gen are protected by the pv_list_locks[] + * elements, but reads are not. + */ +static struct md_page *pv_table; +static struct md_page pv_dummy; + vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ @@ -229,6 +240,13 @@ CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) extern pt_entry_t pagetable_dmap[]; +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +static int superpages_enabled = 0; +SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, + "Are large page mappings enabled?"); + /* * Data for the pv entry allocation mechanism */ @@ -243,6 +261,13 @@ static vm_page_t reclaim_pv_chunk(pmap_t static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); + +static int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); +static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); +static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); +static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, + vm_offset_t va, struct rwlock **lockp); +static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, @@ -422,6 +447,13 @@ pmap_pte(pmap_t pmap, vm_offset_t va, in return (l3); } +static inline bool +pmap_superpages_enabled(void) +{ + + return (superpages_enabled != 0); +} + bool pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, pd_entry_t **l2, pt_entry_t **l3) @@ -477,6 +509,17 @@ pmap_l3_valid(pt_entry_t l3) return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); } + +/* Is a level 1 or 2entry a valid block and cacheable */ +CTASSERT(L1_BLOCK == L2_BLOCK); +static __inline int +pmap_pte_valid_cacheable(pt_entry_t pte) +{ + + return (((pte & ATTR_DESCR_MASK) == L1_BLOCK) && + ((pte & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); +} + static __inline int pmap_l3_valid_cacheable(pt_entry_t l3) { @@ -834,7 +877,13 @@ pmap_page_init(vm_page_t m) void pmap_init(void) { - int i; + vm_size_t s; + int i, pv_npg; + + /* + * Are large page mappings enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); /* * Initialize the pv chunk list mutex. @@ -846,7 +895,38 @@ pmap_init(void) */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); -} + + /* + * Calculate the size of the pv head table for superpages. + */ + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); + + /* + * Allocate memory for the pv head table for superpages. + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, + M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + TAILQ_INIT(&pv_dummy.pv_list); +} + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, + "2MB page mapping counters"); + +static u_long pmap_l2_demotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_l2_demotions, 0, "2MB page demotions"); + +static u_long pmap_l2_p_failures; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_l2_p_failures, 0, "2MB page promotion failures"); + +static u_long pmap_l2_promotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_l2_promotions, 0, "2MB page promotions"); /* * Invalidate a single TLB entry. @@ -954,6 +1034,7 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *pte, tpte; + vm_offset_t off; vm_paddr_t pa; vm_page_t m; int lvl; @@ -975,9 +1056,21 @@ retry: tpte & ATTR_DESCR_MASK)); if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) + switch(lvl) { + case 1: + off = va & L1_OFFSET; + break; + case 2: + off = va & L2_OFFSET; + break; + case 3: + default: + off = 0; + } + if (vm_page_pa_tryrelock(pmap, + (tpte & ~ATTR_MASK) | off, &pa)) goto retry; - m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); + m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); vm_page_hold(m); } } @@ -1345,6 +1438,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l0 = kernel_pmap->pm_l0; + pmap->pm_root.rt_root = 0; } int @@ -1366,6 +1460,7 @@ pmap_pinit(pmap_t pmap) if ((l0pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l0); + pmap->pm_root.rt_root = 0; bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); return (1); @@ -1511,6 +1606,9 @@ pmap_alloc_l3(pmap_t pmap, vm_offset_t v { vm_pindex_t ptepindex; pd_entry_t *pde, tpde; +#ifdef INVARIANTS + pt_entry_t *pte; +#endif vm_page_t m; int lvl; @@ -1529,13 +1627,33 @@ retry: * and activate it. If we get a level 2 pde it will point to a level 3 * table. */ - if (lvl == 2) { + switch (lvl) { + case -1: + break; + case 0: +#ifdef INVARIANTS + pte = pmap_l0_to_l1(pde, va); + KASSERT(pmap_load(pte) == 0, + ("pmap_alloc_l3: TODO: l0 superpages")); +#endif + break; + case 1: +#ifdef INVARIANTS + pte = pmap_l1_to_l2(pde, va); + KASSERT(pmap_load(pte) == 0, + ("pmap_alloc_l3: TODO: l1 superpages")); +#endif + break; + case 2: tpde = pmap_load(pde); if (tpde != 0) { m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); m->wire_count++; return (m); } + break; + default: + panic("pmap_alloc_l3: Invalid level %d", lvl); } /* @@ -1566,6 +1684,8 @@ pmap_release(pmap_t pmap) KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); + KASSERT(vm_radix_is_empty(&pmap->pm_root), + ("pmap_release: pmap has reserved page table page(s)")); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); @@ -1850,6 +1970,68 @@ retry: } /* + * Ensure that the number of spare PV entries in the specified pmap meets or + * exceeds the given count, "needed". + * + * The given PV list lock may be released. + */ +static void +reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) +{ + struct pch new_tail; + struct pv_chunk *pc; + int avail, free; + vm_page_t m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); + + /* + * Newly allocated PV chunks must be stored in a private list until + * the required number of PV chunks have been allocated. Otherwise, + * reclaim_pv_chunk() could recycle one of these chunks. In + * contrast, these chunks must be added to the pmap upon allocation. + */ + TAILQ_INIT(&new_tail); +retry: + avail = 0; + TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { + bit_count((bitstr_t *)pc->pc_map, 0, + sizeof(pc->pc_map) * NBBY, &free); + if (free == 0) + break; + avail += free; + if (avail >= needed) + break; + } + for (; avail < needed; avail += _NPCPV) { + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED); + if (m == NULL) { + m = reclaim_pv_chunk(pmap, lockp); + if (m == NULL) + goto retry; + } + PV_STAT(atomic_add_int(&pc_chunk_count, 1)); + PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); + dump_add_page(m->phys_addr); + pc = (void *)PHYS_TO_DMAP(m->phys_addr); + pc->pc_pmap = pmap; + pc->pc_map[0] = PC_FREE0; + pc->pc_map[1] = PC_FREE1; + pc->pc_map[2] = PC_FREE2; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); + PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); + } + if (!TAILQ_EMPTY(&new_tail)) { + mtx_lock(&pv_chunks_mutex); + TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); + mtx_unlock(&pv_chunks_mutex); + } +} + +/* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or @@ -1871,6 +2053,74 @@ pmap_pvh_remove(struct md_page *pvh, pma } /* + * After demotion from a 2MB page mapping to 512 4KB page mappings, + * destroy the pv entry for the 2MB page mapping and reinstantiate the pv + * entries for each of the 4KB page mappings. + */ +static void +pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + struct pv_chunk *pc; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + int bit, field; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((pa & L2_OFFSET) == 0, + ("pmap_pv_demote_l2: pa is not 2mpage aligned")); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the 2mpage's pv entry for this mapping to the first + * page's pv list. Once this transfer begins, the pv list lock + * must not be released until the last pv entry is reinstantiated. + */ + pvh = pa_to_pvh(pa); + va = va & ~L2_OFFSET; + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ + PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); + va_last = va + L2_SIZE - PAGE_SIZE; + for (;;) { + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || + pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); + for (field = 0; field < _NPCM; field++) { + while (pc->pc_map[field]) { + bit = ffsl(pc->pc_map[field]) - 1; + pc->pc_map[field] &= ~(1ul << bit); + pv = &pc->pc_pventry[field * 64 + bit]; + va += PAGE_SIZE; + pv->pv_va = va; + m++; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_pv_demote_l2: page %p is not managed", m)); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (va == va_last) + goto out; + } + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } +out: + if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } + PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); + PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); +} + +/* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. @@ -1914,6 +2164,7 @@ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { + struct md_page *pvh; pt_entry_t old_l3; vm_page_t m; @@ -1934,6 +2185,12 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t * vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } } return (pmap_unuse_l3(pmap, va, l2e, free)); } @@ -2000,6 +2257,14 @@ pmap_remove(pmap_t pmap, vm_offset_t sva l3_paddr = pmap_load(l2); + if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { + /* TODO: Add pmap_remove_l2 */ + if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET, + &lock) == NULL) + continue; + l3_paddr = pmap_load(l2); + } + /* * Weed out invalid mappings. */ @@ -2061,28 +2326,56 @@ pmap_remove(pmap_t pmap, vm_offset_t sva void pmap_remove_all(vm_page_t m) { + struct md_page *pvh; pv_entry_t pv; pmap_t pmap; struct rwlock *lock; pd_entry_t *pde, tpde; pt_entry_t *pte, tpte; + vm_offset_t va; struct spglist free; - int lvl, md_gen; + int lvl, pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry: rw_wlock(lock); + while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + goto retry; + } + } + va = pv->pv_va; + pte = pmap_pte(pmap, va, &lvl); + KASSERT(pte != NULL, + ("pmap_remove_all: no page table entry found")); + KASSERT(lvl == 2, + ("pmap_remove_all: invalid pte level %d", lvl)); + + pmap_demote_l2_locked(pmap, pte, va, &lock); + PMAP_UNLOCK(pmap); + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; @@ -2169,9 +2462,17 @@ pmap_protect(pmap_t pmap, vm_offset_t sv va_next = eva; l2 = pmap_l1_to_l2(l1, sva); - if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) + if (pmap_load(l2) == 0) continue; + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { + l3p = pmap_demote_l2(pmap, l2, sva); + if (l3p == NULL) + continue; + } + KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_protect: Invalid L2 entry after demotion")); + if (va_next > eva) va_next = eva; @@ -2194,6 +2495,194 @@ pmap_protect(pmap_t pmap, vm_offset_t sv } /* + * Inserts the specified page table page into the specified pmap's collection + * of idle page table pages. Each of a pmap's page table pages is responsible + * for mapping a distinct range of virtual addresses. The pmap's collection is + * ordered by this virtual address range. + */ +static __inline int +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_insert(&pmap->pm_root, mpte)); +} + +/* + * Looks for a page table page mapping the specified virtual address in the + * specified pmap's collection of idle page table pages. Returns NULL if there + * is no page table page corresponding to the specified virtual address. + */ +static __inline vm_page_t +pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_lookup(&pmap->pm_root, pmap_l2_pindex(va))); +} + +/* + * Removes the specified page table page from the specified pmap's collection + * of idle page table pages. The specified page table page must be a member of + * the pmap's collection. + */ +static __inline void +pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + vm_radix_remove(&pmap->pm_root, mpte->pindex); +} + +/* + * Performs a break-before-make update of a pmap entry. This is needed when + * either promoting or demoting pages to ensure the TLB doesn't get into an + * inconsistent state. + */ +static void +pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, + vm_offset_t va, vm_size_t size) +{ + register_t intr; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * Ensure we don't get switched out with the page table in an + * inconsistent state. We also need to ensure no interrupts fire + * as they may make use of an address we are about to invalidate. + */ + intr = intr_disable(); + critical_enter(); + + /* Clear the old mapping */ + pmap_load_clear(pte); + PTE_SYNC(pte); + pmap_invalidate_range(pmap, va, va + size); + + /* Create the new mapping */ + pmap_load_store(pte, newpte); + PTE_SYNC(pte); + + critical_exit(); + intr_restore(intr); +} + +/* + * After promotion from 512 4KB page mappings to a single 2MB page mapping, + * replace the many pv entries for the 4KB page mappings by a single pv entry + * for the 2MB page mapping. + */ +static void +pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + + KASSERT((pa & L2_OFFSET) == 0, + ("pmap_pv_promote_l2: pa is not 2mpage aligned")); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the first page's pv entry for this mapping to the 2mpage's + * pv list. Aside from avoiding the cost of a call to get_pv_entry(), + * a transfer avoids the possibility that get_pv_entry() calls + * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the + * mappings that is being promoted. + */ + m = PHYS_TO_VM_PAGE(pa); + va = va & ~L2_OFFSET; + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + /* Free the remaining NPTEPG - 1 pv entries. */ + va_last = va + L2_SIZE - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} + +/* + * Tries to promote the 512, contiguous 4KB page mappings that are within a + * single level 2 table entry to a single 2MB page mapping. For promotion + * to occur, two conditions must be met: (1) the 4KB page mappings must map + * aligned, contiguous physical memory and (2) the 4KB page mappings must have + * identical characteristics. + */ +static void +pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *firstl3, *l3, newl2, oldl3, pa; + vm_page_t mpte; + vm_offset_t sva; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + sva = va & ~L2_OFFSET; + firstl3 = pmap_l2_to_l3(l2, sva); + newl2 = pmap_load(firstl3); + + /* Check the alingment is valid */ + if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + return; + } + + pa = newl2 + L2_SIZE - PAGE_SIZE; + for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { + oldl3 = pmap_load(l3); + if (oldl3 != pa) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + return; + } + pa -= PAGE_SIZE; + } + + /* + * Save the page table page in its current state until the L2 + * mapping the superpage is demoted by pmap_demote_l2() or + * destroyed by pmap_remove_l3(). + */ + mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + KASSERT(mpte >= vm_page_array && + mpte < &vm_page_array[vm_page_array_size], + ("pmap_promote_l2: page table page is out of range")); + KASSERT(mpte->pindex == pmap_l2_pindex(va), + ("pmap_promote_l2: page table page's pindex is wrong")); + if (pmap_insert_pt_page(pmap, mpte)) { + atomic_add_long(&pmap_l2_p_failures, 1); + CTR2(KTR_PMAP, + "pmap_promote_l2: failure for va %#lx in pmap %p", va, + pmap); + return; + } + + if ((newl2 & ATTR_SW_MANAGED) != 0) + pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); + + newl2 &= ~ATTR_DESCR_MASK; + newl2 |= L2_BLOCK; + + pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); + + atomic_add_long(&pmap_l2_promotions, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, + pmap); +} + +/* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. @@ -2212,7 +2701,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, struct rwlock *lock; pd_entry_t *pde; pt_entry_t new_l3, orig_l3; - pt_entry_t *l3; + pt_entry_t *l2, *l3; pv_entry_t pv; vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; vm_page_t mpte, om, l1_m, l2_m, l3_m; @@ -2239,6 +2728,22 @@ pmap_enter(pmap_t pmap, vm_offset_t va, lock = NULL; PMAP_LOCK(pmap); + pde = pmap_pde(pmap, va, &lvl); + if (pde != NULL && lvl == 1) { + l2 = pmap_l1_to_l2(pde, va); + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && + (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET, + &lock)) != NULL) { + l3 = &l3[pmap_l3_index(va)]; + if (va < VM_MAXUSER_ADDRESS) { + mpte = PHYS_TO_VM_PAGE( + pmap_load(l2) & ~ATTR_MASK); + mpte->wire_count++; + } + goto havel3; + } + } + if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); @@ -2257,7 +2762,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, l3 = pmap_l2_to_l3(pde, va); } else { - pde = pmap_pde(pmap, va, &lvl); /* * If we get a level 2 pde it must point to a level 3 entry * otherwise we will need to create the intermediate tables @@ -2320,6 +2824,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, l3 = pmap_l2_to_l3(pde, va); pmap_invalidate_page(pmap, va); } +havel3: om = NULL; orig_l3 = pmap_load(l3); @@ -2399,11 +2904,11 @@ pmap_enter(pmap_t pmap, vm_offset_t va, */ if (orig_l3 != 0) { validate: - orig_l3 = pmap_load_store(l3, new_l3); - PTE_SYNC(l3); + orig_l3 = pmap_load(l3); opa = orig_l3 & ~ATTR_MASK; if (opa != pa) { + pmap_update_entry(pmap, l3, new_l3, va, PAGE_SIZE); if ((orig_l3 & ATTR_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); if (pmap_page_dirty(orig_l3)) @@ -2412,18 +2917,38 @@ validate: vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pmap_pvh_free(&om->md, pmap, va); + if ((om->aflags & PGA_WRITEABLE) != 0 && + TAILQ_EMPTY(&om->md.pv_list) && + ((om->flags & PG_FICTITIOUS) != 0 || + TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) + vm_page_aflag_clear(om, PGA_WRITEABLE); } - } else if (pmap_page_dirty(orig_l3)) { - if ((orig_l3 & ATTR_SW_MANAGED) != 0) + } else { + pmap_load_store(l3, new_l3); + PTE_SYNC(l3); + pmap_invalidate_page(pmap, va); + if (pmap_page_dirty(orig_l3) && + (orig_l3 & ATTR_SW_MANAGED) != 0) vm_page_dirty(m); } } else { pmap_load_store(l3, new_l3); - PTE_SYNC(l3); } + + PTE_SYNC(l3); pmap_invalidate_page(pmap, va); - if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) - cpu_icache_sync_range(va, PAGE_SIZE); + + if (pmap != pmap_kernel()) { + if (pmap == &curproc->p_vmspace->vm_pmap) + cpu_icache_sync_range(va, PAGE_SIZE); + + if ((mpte == NULL || mpte->wire_count == NL3PG) && + pmap_superpages_enabled() && + (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) { + pmap_promote_l2(pmap, pde, va, &lock); + } + } if (lock != NULL) rw_wunlock(lock); @@ -2497,7 +3022,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ { struct spglist free; pd_entry_t *pde; - pt_entry_t *l3; + pt_entry_t *l2, *l3; vm_paddr_t pa; int lvl; @@ -2532,6 +3057,12 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ + if (lvl == 1) { + l2 = pmap_l1_to_l2(pde, va); + if ((pmap_load(l2) & ATTR_DESCR_MASK) == + L2_BLOCK) + return (NULL); + } if (lvl == 2 && pmap_load(pde) != 0) { mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); @@ -2589,7 +3120,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | - ATTR_AP(ATTR_AP_RW) | L3_PAGE; + ATTR_AP(ATTR_AP_RO) | L3_PAGE; /* * Now validate mapping with RO protection @@ -2659,6 +3190,14 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva if (pmap_load(l2) == 0) continue; + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { + l3 = pmap_demote_l2(pmap, l2, sva); + if (l3 == NULL) + continue; + } + KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_unwire: Invalid l2 entry after demotion")); + if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, @@ -2813,6 +3352,7 @@ pmap_quick_remove_page(vm_offset_t addr) boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { + struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; @@ -2832,6 +3372,18 @@ pmap_page_exists_quick(pmap_t pmap, vm_p if (loops >= 16) break; } + if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + } rw_runlock(lock); return (rv); } @@ -2846,10 +3398,11 @@ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; + struct md_page *pvh; pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; - int count, lvl, md_gen; + int count, lvl, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); @@ -2874,6 +3427,29 @@ restart: count++; PMAP_UNLOCK(pmap); } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + pte = pmap_pte(pmap, pv->pv_va, &lvl); + if (pte != NULL && + (pmap_load(pte) & ATTR_SW_WIRED) != 0) + count++; + PMAP_UNLOCK(pmap); + } + } rw_runlock(lock); return (count); } @@ -2900,8 +3476,9 @@ pmap_remove_pages(pmap_t pmap) pd_entry_t *pde; pt_entry_t *pte, tpte; struct spglist free; - vm_page_t m; + vm_page_t m, ml3, mt; pv_entry_t pv; + struct md_page *pvh; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; @@ -2928,14 +3505,30 @@ pmap_remove_pages(pmap_t pmap) pde = pmap_pde(pmap, pv->pv_va, &lvl); KASSERT(pde != NULL, ("Attempting to remove an unmapped page")); - KASSERT(lvl == 2, - ("Invalid page directory level: %d", lvl)); - pte = pmap_l2_to_l3(pde, pv->pv_va); - KASSERT(pte != NULL, - ("Attempting to remove an unmapped page")); - - tpte = pmap_load(pte); + switch(lvl) { + case 1: + pte = pmap_l1_to_l2(pde, pv->pv_va); + tpte = pmap_load(pte); + KASSERT((tpte & ATTR_DESCR_MASK) == + L2_BLOCK, + ("Attempting to remove an invalid " + "block: %lx", tpte)); + tpte = pmap_load(pte); + break; + case 2: + pte = pmap_l2_to_l3(pde, pv->pv_va); + tpte = pmap_load(pte); + KASSERT((tpte & ATTR_DESCR_MASK) == + L3_PAGE, + ("Attempting to remove an invalid " + "page: %lx", tpte)); + break; + default: + panic( + "Invalid page directory level: %d", + lvl); + } /* * We cannot remove wired pages from a process' mapping at this time @@ -2958,10 +3551,17 @@ pmap_remove_pages(pmap_t pmap) ("pmap_remove_pages: bad pte %#jx", (uintmax_t)tpte)); - /* XXX: assumes tpte is level 3 */ - if (pmap_is_current(pmap) && - pmap_l3_valid_cacheable(tpte)) - cpu_dcache_wb_range(pv->pv_va, L3_SIZE); + if (pmap_is_current(pmap)) { + if (lvl == 2 && + pmap_l3_valid_cacheable(tpte)) { + cpu_dcache_wb_range(pv->pv_va, + L3_SIZE); + } else if (lvl == 1 && + pmap_pte_valid_cacheable(tpte)) { + cpu_dcache_wb_range(pv->pv_va, + L2_SIZE); + } *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***