Date: Sun, 15 Sep 2019 17:22:30 +0000 (UTC) From: Alan Cox <alc@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r352358 - in stable/12/sys: amd64/amd64 arm64/arm64 i386/i386 riscv/riscv Message-ID: <201909151722.x8FHMU5e032148@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: alc Date: Sun Sep 15 17:22:29 2019 New Revision: 352358 URL: https://svnweb.freebsd.org/changeset/base/352358 Log: MFC r348828 Implement an alternative solution to the amd64 and i386 pmap problem that we previously addressed in r348246 (and MFCed in r348479). This pmap problem also exists on arm64 and riscv. However, the original solution developed for amd64 and i386 cannot be used on arm64 and riscv. In particular, arm64 and riscv do not define a PG_PROMOTED flag in their level 2 PTEs. (A PG_PROMOTED flag makes no sense on arm64, where unlike x86 or riscv we are required to break the old 4KB mappings before making the 2MB mapping; and on riscv there are no unused bits in the PTE to define a PG_PROMOTED flag.) This commit implements an alternative solution that can be used on all four architectures. Moreover, this solution has two other advantages. First, on older AMD processors that required the Erratum 383 workaround, it is less costly. Specifically, it avoids unnecessary calls to pmap_fill_ptp() on a superpage demotion. Second, it enables the elimination of some calls to pagezero() in pmap_kernel_remove_{l2,pde}(). In addition, remove a related stale comment from pmap_enter_{l2,pde}(). Modified: stable/12/sys/amd64/amd64/pmap.c stable/12/sys/arm64/arm64/pmap.c stable/12/sys/i386/i386/pmap.c stable/12/sys/riscv/riscv/pmap.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/amd64/amd64/pmap.c ============================================================================== --- stable/12/sys/amd64/amd64/pmap.c Sun Sep 15 16:53:02 2019 (r352357) +++ stable/12/sys/amd64/amd64/pmap.c Sun Sep 15 17:22:29 2019 (r352358) @@ -1142,7 +1142,7 @@ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); -static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); +static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted); static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva); static void pmap_invalidate_cache_range_all(vm_offset_t sva, @@ -1846,8 +1846,13 @@ pmap_init(void) mpte->pindex = pmap_pde_pindex(KERNBASE) + i; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); mpte->wire_count = 1; + + /* + * Collect the page table pages that were replaced by a 2MB + * page in create_pagetables(). They are zero filled. + */ if (i << PDRSHIFT < KERNend && - pmap_insert_pt_page(kernel_pmap, mpte)) + pmap_insert_pt_page(kernel_pmap, mpte, false)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); @@ -3219,12 +3224,15 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. + * + * If "promoted" is false, then the page table page "mpte" must be zero filled. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; return (vm_radix_insert(&pmap->pm_root, mpte)); } @@ -4716,7 +4724,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, v * If the page table page is not leftover from an earlier promotion, * initialize it. */ - if ((oldpde & PG_PROMOTED) == 0) + if (mpte->valid == 0) pmap_fill_ptp(firstpte, newpte); pmap_demote_pde_check(firstpte, newpte); @@ -4789,9 +4797,11 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, v newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V; /* - * Initialize the page table page. + * If this page table page was unmapped by a promotion, then it + * contains valid mappings. Zero it to invalidate those mappings. */ - pagezero((void *)PHYS_TO_DMAP(mptepa)); + if (mpte->valid != 0) + pagezero((void *)PHYS_TO_DMAP(mptepa)); /* * Demote the mapping. @@ -4856,6 +4866,8 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offse } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { + KASSERT(mpte->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_pde: pte page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); @@ -5489,7 +5501,7 @@ setpte: ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == pmap_pde_pindex(va), ("pmap_promote_pde: page table page's pindex is wrong")); - if (pmap_insert_pt_page(pmap, mpte)) { + if (pmap_insert_pt_page(pmap, mpte, true)) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx in pmap %p", va, @@ -5914,15 +5926,13 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t } vm_page_free_pages_toq(&free, true); if (va >= VM_MAXUSER_ADDRESS) { + /* + * Both pmap_remove_pde() and pmap_remove_ptes() will + * leave the kernel page table page zero filled. + */ mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt)) { - /* - * XXX Currently, this can't happen because - * we do not perform pmap_enter(psind == 1) - * on the kernel pmap. - */ + if (pmap_insert_pt_page(pmap, mt, false)) panic("pmap_enter_pde: trie insert failed"); - } } else KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", pde)); @@ -6912,6 +6922,8 @@ pmap_remove_pages(pmap_t pmap) } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { + KASSERT(mpte->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_pages: pte page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); Modified: stable/12/sys/arm64/arm64/pmap.c ============================================================================== --- stable/12/sys/arm64/arm64/pmap.c Sun Sep 15 16:53:02 2019 (r352357) +++ stable/12/sys/arm64/arm64/pmap.c Sun Sep 15 17:22:29 2019 (r352358) @@ -2401,9 +2401,11 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_ newl2 = ml3pa | L2_TABLE; /* - * Initialize the page table page. + * If this page table page was unmapped by a promotion, then it + * contains valid mappings. Zero it to invalidate those mappings. */ - pagezero((void *)PHYS_TO_DMAP(ml3pa)); + if (ml3->valid != 0) + pagezero((void *)PHYS_TO_DMAP(ml3pa)); /* * Demote the mapping. The caller must have already invalidated the @@ -2456,6 +2458,8 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_ } else { ml3 = pmap_remove_pt_page(pmap, sva); if (ml3 != NULL) { + KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_l2: l3 page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(ml3->wire_count == NL3PG, ("pmap_remove_l2: l3 page wire count error")); @@ -2812,12 +2816,15 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. + * + * If "promoted" is false, then the page table page "mpte" must be zero filled. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; return (vm_radix_insert(&pmap->pm_root, mpte)); } @@ -2961,7 +2968,7 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset ("pmap_promote_l2: page table page is out of range")); KASSERT(mpte->pindex == pmap_l2_pindex(va), ("pmap_promote_l2: page table page's pindex is wrong")); - if (pmap_insert_pt_page(pmap, mpte)) { + if (pmap_insert_pt_page(pmap, mpte, true)) { atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx in pmap %p", va, @@ -3384,15 +3391,13 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t } vm_page_free_pages_toq(&free, true); if (va >= VM_MAXUSER_ADDRESS) { + /* + * Both pmap_remove_l2() and pmap_remove_l3() will + * leave the kernel page table page zero filled. + */ mt = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); - if (pmap_insert_pt_page(pmap, mt)) { - /* - * XXX Currently, this can't happen bacuse - * we do not perform pmap_enter(psind == 1) - * on the kernel pmap. - */ + if (pmap_insert_pt_page(pmap, mt, false)) panic("pmap_enter_l2: trie insert failed"); - } } else KASSERT(pmap_load(l2) == 0, ("pmap_enter_l2: non-zero L2 entry %p", l2)); @@ -4078,6 +4083,8 @@ pmap_remove_pages(pmap_t pmap) ml3 = pmap_remove_pt_page(pmap, pv->pv_va); if (ml3 != NULL) { + KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_pages: l3 page not promoted")); pmap_resident_count_dec(pmap,1); KASSERT(ml3->wire_count == NL3PG, ("pmap_remove_pages: l3 page wire count error")); @@ -5020,8 +5027,10 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_ " in pmap %p", va, pmap); goto fail; } - if (va < VM_MAXUSER_ADDRESS) + if (va < VM_MAXUSER_ADDRESS) { + ml3->wire_count = NL3PG; pmap_resident_count_inc(pmap, 1); + } } l3phys = VM_PAGE_TO_PHYS(ml3); @@ -5033,10 +5042,10 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_ newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; /* - * If the page table page is new, initialize it. + * If the page table page is not leftover from an earlier promotion, + * initialize it. */ - if (ml3->wire_count == 1) { - ml3->wire_count = NL3PG; + if (ml3->valid == 0) { for (i = 0; i < Ln_ENTRIES; i++) { l3[i] = newl3 | phys; phys += L3_SIZE; Modified: stable/12/sys/i386/i386/pmap.c ============================================================================== --- stable/12/sys/i386/i386/pmap.c Sun Sep 15 16:53:02 2019 (r352357) +++ stable/12/sys/i386/i386/pmap.c Sun Sep 15 17:22:29 2019 (r352358) @@ -319,7 +319,7 @@ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_flush_page(vm_page_t m); -static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); +static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted); static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva); static void pmap_invalidate_cache_range_all(vm_offset_t sva, @@ -956,9 +956,14 @@ pmap_init(void) mpte->pindex = i + KPTDI; mpte->phys_addr = KPTphys + ptoa(i); mpte->wire_count = 1; + + /* + * Collect the page table pages that were replaced by a 2/4MB + * page. They are filled with equivalent 4KB page mappings. + */ if (pseflag != 0 && KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend && - pmap_insert_pt_page(kernel_pmap, mpte)) + pmap_insert_pt_page(kernel_pmap, mpte, true)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); @@ -1945,12 +1950,15 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. + * + * If "promoted" is false, then the page table page "mpte" must be zero filled. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; return (vm_radix_insert(&pmap->pm_root, mpte)); } @@ -2904,7 +2912,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offse * If the page table page is not leftover from an earlier promotion, * initialize it. */ - if ((oldpde & PG_PROMOTED) == 0) + if (mpte->valid == 0) pmap_fill_ptp(firstpte, newpte); KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), @@ -2976,9 +2984,11 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, v newpde = mptepa | PG_M | PG_A | PG_RW | PG_V; /* - * Initialize the page table page. + * If this page table page was unmapped by a promotion, then it + * contains valid mappings. Zero it to invalidate those mappings. */ - pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]); + if (mpte->valid != 0) + pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]); /* * Remove the mapping. @@ -3041,6 +3051,8 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offse } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { + KASSERT(mpte->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_pde: pte page not promoted")); pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); @@ -3613,7 +3625,7 @@ setpte: ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == va >> PDRSHIFT, ("pmap_promote_pde: page table page's pindex is wrong")); - if (pmap_insert_pt_page(pmap, mpte)) { + if (pmap_insert_pt_page(pmap, mpte, true)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x in pmap %p", va, @@ -3989,15 +4001,13 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t } vm_page_free_pages_toq(&free, true); if (pmap == kernel_pmap) { + /* + * Both pmap_remove_pde() and pmap_remove_ptes() will + * leave the kernel page table page zero filled. + */ mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt)) { - /* - * XXX Currently, this can't happen because - * we do not perform pmap_enter(psind == 1) - * on the kernel pmap. - */ + if (pmap_insert_pt_page(pmap, mt, false)) panic("pmap_enter_pde: trie insert failed"); - } } else KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", pde)); @@ -4876,6 +4886,8 @@ pmap_remove_pages(pmap_t pmap) } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { + KASSERT(mpte->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_pages: pte page not promoted")); pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); Modified: stable/12/sys/riscv/riscv/pmap.c ============================================================================== --- stable/12/sys/riscv/riscv/pmap.c Sun Sep 15 16:53:02 2019 (r352357) +++ stable/12/sys/riscv/riscv/pmap.c Sun Sep 15 17:22:29 2019 (r352358) @@ -1105,12 +1105,15 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. + * + * If "promoted" is false, then the page table page "ml3" must be zero filled. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3) +pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3, bool promoted) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); + ml3->valid = promoted ? VM_PAGE_BITS_ALL : 0; return (vm_radix_insert(&pmap->pm_root, ml3)); } @@ -2003,9 +2006,11 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_ newl2 = ml3pa | PTE_V; /* - * Initialize the page table page. + * If this page table page was unmapped by a promotion, then it + * contains valid mappings. Zero it to invalidate those mappings. */ - pagezero((void *)PHYS_TO_DMAP(ml3pa)); + if (ml3->valid != 0) + pagezero((void *)PHYS_TO_DMAP(ml3pa)); /* * Demote the mapping. @@ -2065,6 +2070,8 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_ } else { ml3 = pmap_remove_pt_page(pmap, sva); if (ml3 != NULL) { + KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_l2: l3 page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(ml3->wire_count == Ln_ENTRIES, ("pmap_remove_l2: l3 page wire count error")); @@ -2483,8 +2490,10 @@ pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_ "failure for va %#lx in pmap %p", va, pmap); return (false); } - if (va < VM_MAXUSER_ADDRESS) + if (va < VM_MAXUSER_ADDRESS) { + mpte->wire_count = Ln_ENTRIES; pmap_resident_count_inc(pmap, 1); + } } mptepa = VM_PAGE_TO_PHYS(mpte); firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa); @@ -2496,10 +2505,10 @@ pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_ newl3 = oldl2; /* - * If the page table page is new, initialize it. + * If the page table page is not leftover from an earlier promotion, + * initialize it. */ - if (mpte->wire_count == 1) { - mpte->wire_count = Ln_ENTRIES; + if (mpte->valid == 0) { for (i = 0; i < Ln_ENTRIES; i++) pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); } @@ -2590,7 +2599,7 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); KASSERT(ml3->pindex == pmap_l2_pindex(va), ("pmap_promote_l2: page table page's pindex is wrong")); - if (pmap_insert_pt_page(pmap, ml3)) { + if (pmap_insert_pt_page(pmap, ml3, true)) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); @@ -2973,15 +2982,13 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t } vm_page_free_pages_toq(&free, true); if (va >= VM_MAXUSER_ADDRESS) { + /* + * Both pmap_remove_l2() and pmap_remove_l3() will + * leave the kernel page table page zero filled. + */ mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); - if (pmap_insert_pt_page(pmap, mt)) { - /* - * XXX Currently, this can't happen bacuse - * we do not perform pmap_enter(psind == 1) - * on the kernel pmap. - */ + if (pmap_insert_pt_page(pmap, mt, false)) panic("pmap_enter_l2: trie insert failed"); - } } else KASSERT(pmap_load(l2) == 0, ("pmap_enter_l2: non-zero L2 entry %p", l2)); @@ -3558,6 +3565,8 @@ pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entr } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { + KASSERT(ml3->valid == VM_PAGE_BITS_ALL, + ("pmap_remove_pages: l3 page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == Ln_ENTRIES, ("pmap_remove_pages: pte page wire count error"));
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201909151722.x8FHMU5e032148>