Date: Sat, 7 Oct 2017 18:36:42 +0000 (UTC) From: Alan Cox <alc@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r324390 - in stable/11/sys: amd64/amd64 amd64/include vm Message-ID: <201710071836.v97IagWH020550@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: alc Date: Sat Oct 7 18:36:42 2017 New Revision: 324390 URL: https://svnweb.freebsd.org/changeset/base/324390 Log: MFC r319542,321003,321378 Eliminate duplication of the pmap and pv list unlock operations in pmap_enter() by implementing a single return path. Otherwise, the duplication will only increase with the upcoming support for psind == 1. Extract the innermost loop of pmap_remove() out into its own function, pmap_remove_ptes(). (This new function will also be used by an upcoming change to pmap_enter() that adds support for psind == 1 mappings.) Add support for pmap_enter(..., psind=1) to the amd64 pmap. In other words, add support for explicitly requesting that pmap_enter() create a 2MB page mapping. (Essentially, this feature allows the machine-independent layer to create superpage mappings preemptively, and not wait for automatic promotion to occur.) Export pmap_ps_enabled() to the machine-independent layer. Add a flag to pmap_pv_insert_pde() that specifies whether it should fail or reclaim a PV entry when one is not available. Refactor pmap_enter_pde() into two functions, one by the same name, that is a general-purpose function for creating PDE PG_PS mappings, and another, pmap_enter_2mpage(), that is used to prefault 2MB read- and/or execute-only mappings for execve(2), mmap(2), and shmat(2). Modified: stable/11/sys/amd64/amd64/pmap.c stable/11/sys/amd64/include/pmap.h stable/11/sys/vm/pmap.h Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/amd64/amd64/pmap.c ============================================================================== --- stable/11/sys/amd64/amd64/pmap.c Sat Oct 7 18:08:37 2017 (r324389) +++ stable/11/sys/amd64/amd64/pmap.c Sat Oct 7 18:36:42 2017 (r324390) @@ -591,6 +591,12 @@ pmap_delayed_invl_page(vm_page_t m) */ static caddr_t crashdumpmap; +/* + * Internal flags for pmap_enter()'s helper functions. + */ +#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ +#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ + static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); @@ -600,8 +606,8 @@ static void reserve_pv_entries(pmap_t pmap, int needed struct rwlock **lockp); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp); -static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, - struct rwlock **lockp); +static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, + u_int flags, struct rwlock **lockp); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); @@ -614,8 +620,10 @@ static boolean_t pmap_demote_pde_locked(pmap_t pmap, p vm_offset_t va, struct rwlock **lockp); static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va); -static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, - vm_prot_t prot, struct rwlock **lockp); +static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, struct rwlock **lockp); +static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, + u_int flags, vm_page_t m, struct rwlock **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); @@ -636,6 +644,9 @@ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *pt static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, struct spglist *free); +static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + pd_entry_t *pde, struct spglist *free, + struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, @@ -1395,7 +1406,7 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde) return (mask); } -static __inline boolean_t +bool pmap_ps_enabled(pmap_t pmap) { @@ -3406,27 +3417,30 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, } /* - * Conditionally create the PV entry for a 2MB page mapping if the required - * memory can be allocated without resorting to reclamation. + * Create the PV entry for a 2MB page mapping. Always returns true unless the + * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns + * false if the PV entry cannot be allocated without resorting to reclamation. */ -static boolean_t -pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, +static bool +pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; + vm_paddr_t pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ - if ((pv = get_pv_entry(pmap, NULL)) != NULL) { - pv->pv_va = va; - CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); - pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); - pvh->pv_gen++; - return (TRUE); - } else - return (FALSE); + if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? + NULL : lockp)) == NULL) + return (false); + pv->pv_va = va; + pa = pde & PG_PS_FRAME; + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + return (true); } /* @@ -3749,6 +3763,44 @@ pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry } /* + * Removes the specified range of addresses from the page table page. + */ +static bool +pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + pd_entry_t *pde, struct spglist *free, struct rwlock **lockp) +{ + pt_entry_t PG_G, *pte; + vm_offset_t va; + bool anyvalid; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PG_G = pmap_global_bit(pmap); + anyvalid = false; + va = eva; + for (pte = pmap_pde_to_pte(pde, sva); sva != eva; pte++, + sva += PAGE_SIZE) { + if (*pte == 0) { + if (va != eva) { + pmap_invalidate_range(pmap, va, sva); + va = eva; + } + continue; + } + if ((*pte & PG_G) == 0) + anyvalid = true; + else if (va == eva) + va = sva; + if (pmap_remove_pte(pmap, pte, sva, *pde, free, lockp)) { + sva += PAGE_SIZE; + break; + } + } + if (va != eva) + pmap_invalidate_range(pmap, va, sva); + return (anyvalid); +} + +/* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly @@ -3758,11 +3810,11 @@ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; - vm_offset_t va, va_next; + vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; - pt_entry_t *pte, PG_G, PG_V; + pt_entry_t PG_G, PG_V; struct spglist free; int anyvalid; @@ -3865,28 +3917,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t if (va_next > eva) va_next = eva; - va = va_next; - for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, - sva += PAGE_SIZE) { - if (*pte == 0) { - if (va != va_next) { - pmap_invalidate_range(pmap, va, sva); - va = va_next; - } - continue; - } - if ((*pte & PG_G) == 0) - anyvalid = 1; - else if (va == va_next) - va = sva; - if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free, - &lock)) { - sva += PAGE_SIZE; - break; - } - } - if (va != va_next) - pmap_invalidate_range(pmap, va, sva); + if (pmap_remove_ptes(pmap, sva, va_next, pde, &free, &lock)) + anyvalid = 1; } if (lock != NULL) rw_wunlock(lock); @@ -4337,7 +4369,7 @@ setpte: */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - u_int flags, int8_t psind __unused) + u_int flags, int8_t psind) { struct rwlock *lock; pd_entry_t *pde; @@ -4346,6 +4378,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; + int rv; boolean_t nosleep; PG_A = pmap_accessed_bit(pmap); @@ -4364,6 +4397,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v ("pmap_enter: managed mapping within the clean submap")); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); + KASSERT((flags & PMAP_ENTER_RESERVED) == 0, + ("pmap_enter: flags %u has reserved bits set", flags)); pa = VM_PAGE_TO_PHYS(m); newpte = (pt_entry_t)(pa | PG_A | PG_V); if ((flags & VM_PROT_WRITE) != 0) @@ -4380,7 +4415,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v newpte |= PG_U; if (pmap == kernel_pmap) newpte |= PG_G; - newpte |= pmap_cache_bits(pmap, m->md.pat_mode, 0); + newpte |= pmap_cache_bits(pmap, m->md.pat_mode, psind > 0); /* * Set modified bit gratuitously for writeable mappings if @@ -4393,10 +4428,16 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, v } else newpte |= PG_MANAGED; - mpte = NULL; - lock = NULL; PMAP_LOCK(pmap); + if (psind == 1) { + /* Assert the required virtual and physical alignment. */ + KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); + KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); + rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock); + goto out; + } + mpte = NULL; /* * In the case that a page table page is not @@ -4420,10 +4461,8 @@ retry: mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { - if (lock != NULL) - rw_wunlock(lock); - PMAP_UNLOCK(pmap); - return (KERN_RESOURCE_SHORTAGE); + rv = KERN_RESOURCE_SHORTAGE; + goto out; } goto retry; } else @@ -4549,55 +4588,127 @@ unchanged: vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va, &lock); + rv = KERN_SUCCESS; +out: if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); - return (KERN_SUCCESS); + return (rv); } /* - * Tries to create a 2MB page mapping. Returns TRUE if successful and FALSE - * otherwise. Fails if (1) a page table page cannot be allocated without - * blocking, (2) a mapping already exists at the specified virtual address, or - * (3) a pv entry cannot be allocated without reclaiming another pv entry. + * Tries to create a read- and/or execute-only 2MB page mapping. Returns true + * if successful. Returns false if (1) a page table page cannot be allocated + * without sleeping, (2) a mapping already exists at the specified virtual + * address, or (3) a PV entry cannot be allocated without reclaiming another + * PV entry. */ -static boolean_t -pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, +static bool +pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { - pd_entry_t *pde, newpde; + pd_entry_t newpde; pt_entry_t PG_V; - vm_page_t pdpg; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PG_V = pmap_valid_bit(pmap); + newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) | + PG_PS | PG_V; + if ((m->oflags & VPO_UNMANAGED) == 0) + newpde |= PG_MANAGED; + if ((prot & VM_PROT_EXECUTE) == 0) + newpde |= pg_nx; + if (va < VM_MAXUSER_ADDRESS) + newpde |= PG_U; + return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP | + PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == + KERN_SUCCESS); +} + +/* + * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if + * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE + * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and + * a mapping already exists at the specified virtual address. Returns + * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table + * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if + * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. + * + * The parameter "m" is only used when creating a managed, writeable mapping. + */ +static int +pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, + vm_page_t m, struct rwlock **lockp) +{ struct spglist free; + pd_entry_t oldpde, *pde; + pt_entry_t PG_G, PG_RW, PG_V; + vm_page_t mt, pdpg; + PG_G = pmap_global_bit(pmap); + PG_RW = pmap_rw_bit(pmap); + KASSERT((newpde & (pmap_modified_bit(pmap) | PG_RW)) != PG_RW, + ("pmap_enter_pde: newpde is missing PG_M")); PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((pdpg = pmap_allocpde(pmap, va, NULL)) == NULL) { + if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? + NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); - return (FALSE); + return (KERN_RESOURCE_SHORTAGE); } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); pde = &pde[pmap_pde_index(va)]; - if ((*pde & PG_V) != 0) { + oldpde = *pde; + if ((oldpde & PG_V) != 0) { KASSERT(pdpg->wire_count > 1, ("pmap_enter_pde: pdpg's wire count is too low")); - pdpg->wire_count--; - CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" - " in pmap %p", va, pmap); - return (FALSE); + if ((flags & PMAP_ENTER_NOREPLACE) != 0) { + pdpg->wire_count--; + CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" + " in pmap %p", va, pmap); + return (KERN_FAILURE); + } + /* Break the existing mapping(s). */ + SLIST_INIT(&free); + if ((oldpde & PG_PS) != 0) { + /* + * The reference to the PD page that was acquired by + * pmap_allocpde() ensures that it won't be freed. + * However, if the PDE resulted from a promotion, then + * a reserved PT page could be freed. + */ + (void)pmap_remove_pde(pmap, pde, va, &free, lockp); + if ((oldpde & PG_G) == 0) + pmap_invalidate_pde_page(pmap, va, oldpde); + } else { + pmap_delayed_invl_started(); + if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free, + lockp)) + pmap_invalidate_all(pmap); + pmap_delayed_invl_finished(); + } + pmap_free_zero_pages(&free); + if (va >= VM_MAXUSER_ADDRESS) { + mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + if (pmap_insert_pt_page(pmap, mt)) { + /* + * XXX Currently, this can't happen because + * we do not perform pmap_enter(psind == 1) + * on the kernel pmap. + */ + panic("pmap_enter_pde: trie insert failed"); + } + } else + KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", + pde)); } - newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) | - PG_PS | PG_V; - if ((m->oflags & VPO_UNMANAGED) == 0) { - newpde |= PG_MANAGED; - + if ((newpde & PG_MANAGED) != 0) { /* * Abort this mapping if its PV entry could not be created. */ - if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m), - lockp)) { + if (!pmap_pv_insert_pde(pmap, va, newpde, flags, lockp)) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { /* @@ -4611,17 +4722,19 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t } CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); - return (FALSE); + return (KERN_RESOURCE_SHORTAGE); } + if ((newpde & PG_RW) != 0) { + for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) + vm_page_aflag_set(mt, PGA_WRITEABLE); + } } - if ((prot & VM_PROT_EXECUTE) == 0) - newpde |= pg_nx; - if (va < VM_MAXUSER_ADDRESS) - newpde |= PG_U; /* * Increment counters. */ + if ((newpde & PG_W) != 0) + pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE; pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); /* @@ -4633,7 +4746,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t atomic_add_long(&pmap_pde_mappings, 1); CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); - return (TRUE); + return (KERN_SUCCESS); } /* @@ -4668,7 +4781,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_o va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pmap_ps_enabled(pmap) && - pmap_enter_pde(pmap, va, m, prot, &lock)) + pmap_enter_2mpage(pmap, va, m, prot, &lock)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, @@ -5101,8 +5214,8 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_ PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_pdpg)); pde = &pde[pmap_pde_index(addr)]; if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || - pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & - PG_PS_FRAME, &lock))) { + pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, + PMAP_ENTER_NORECLAIM, &lock))) { *pde = srcptepaddr & ~PG_W; pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); atomic_add_long(&pmap_pde_mappings, 1); Modified: stable/11/sys/amd64/include/pmap.h ============================================================================== --- stable/11/sys/amd64/include/pmap.h Sat Oct 7 18:08:37 2017 (r324389) +++ stable/11/sys/amd64/include/pmap.h Sat Oct 7 18:36:42 2017 (r324390) @@ -408,6 +408,7 @@ void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); boolean_t pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); void pmap_pinit_pml4(vm_page_t); +bool pmap_ps_enabled(pmap_t pmap); void pmap_unmapdev(vm_offset_t, vm_size_t); void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); Modified: stable/11/sys/vm/pmap.h ============================================================================== --- stable/11/sys/vm/pmap.h Sat Oct 7 18:08:37 2017 (r324389) +++ stable/11/sys/vm/pmap.h Sat Oct 7 18:36:42 2017 (r324390) @@ -100,9 +100,11 @@ extern vm_offset_t kernel_vm_end; /* * Flags for pmap_enter(). The bits in the low-order byte are reserved * for the protection code (vm_prot_t) that describes the fault type. + * Bits 24 through 31 are reserved for the pmap's internal use. */ -#define PMAP_ENTER_NOSLEEP 0x0100 -#define PMAP_ENTER_WIRED 0x0200 +#define PMAP_ENTER_NOSLEEP 0x00000100 +#define PMAP_ENTER_WIRED 0x00000200 +#define PMAP_ENTER_RESERVED 0xFF000000 void pmap_activate(struct thread *td); void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201710071836.v97IagWH020550>