Date: Fri, 24 Apr 2020 21:21:23 +0000 (UTC) From: Mark Johnston <markj@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r360280 - head/sys/mips/mips Message-ID: <202004242121.03OLLNgp087773@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: markj Date: Fri Apr 24 21:21:23 2020 New Revision: 360280 URL: https://svnweb.freebsd.org/changeset/base/360280 Log: Fix a race between _pmap_unwire_ptp() and MipsDoTLBMiss(). MipsDoTLBMiss() will load a segmap entry or pde, check that it isn't zero, and then chase that pointer to a physical page. If that page has been freed in the interim, it will read garbage and go on to populate the TLB with it. This can happen because pmap_unwire_ptp zeros out the pde and vm_page_free_zero()s the ptp (or, recursively, zeros out the segmap entry and vm_page_free_zero()s the pdp) without interlocking against MipsDoTLBMiss(). The pmap is locked, and pvh_global_lock may or may not be held, but this is not enough. Solve this issue by inserting TLB shootdowns within _pmap_unwire_ptp(); as MipsDoTLBMiss() runs with IRQs deferred, the IPIs involved in TLB shootdown are sufficient to ensure that MipsDoTLBMiss() sees either a zero segmap entry / pde or a non-zero entry and the pointed-to page still not freed. Submitted by: Nathaniel Filardo <nwf20@cl.cam.ac.uk> Reviewed by: kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D24491 Modified: head/sys/mips/mips/pmap.c Modified: head/sys/mips/mips/pmap.c ============================================================================== --- head/sys/mips/mips/pmap.c Fri Apr 24 19:43:07 2020 (r360279) +++ head/sys/mips/mips/pmap.c Fri Apr 24 21:21:23 2020 (r360280) @@ -1004,18 +1004,26 @@ static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) { pd_entry_t *pde; + vm_offset_t sva, eva; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ #ifdef __mips_n64 - if (m->pindex < NUPDE) + if (m->pindex < NUPDE) { pde = pmap_pde(pmap, va); - else + sva = va & ~PDRMASK; + eva = sva + NBPDR; + } else { pde = pmap_segmap(pmap, va); + sva = va & ~SEGMASK; + eva = sva + NBSEG; + } #else pde = pmap_pde(pmap, va); + sva = va & ~SEGMASK; + eva = sva + NBSEG; #endif *pde = 0; pmap->pm_stats.resident_count--; @@ -1026,12 +1034,22 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_ vm_page_t pdpg; /* - * Recursively decrement next level pagetable refcount + * Recursively decrement next level pagetable refcount. + * Either that shoots down a larger range from TLBs (below) + * or we're to shoot down just the page in question. */ pdp = (pd_entry_t *)*pmap_segmap(pmap, va); pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp)); - pmap_unwire_ptp(pmap, va, pdpg); + if (!pmap_unwire_ptp(pmap, va, pdpg)) { + pmap_invalidate_range(pmap, sva, eva); + } + } else { + /* Segmap entry shootdown */ + pmap_invalidate_range(pmap, sva, eva); } +#else + /* Segmap entry shootdown */ + pmap_invalidate_range(pmap, sva, eva); #endif /* @@ -1485,7 +1503,15 @@ pmap_pv_reclaim(pmap_t locked_pmap) if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); pc->pc_map[field] |= 1UL << bit; - pmap_unuse_pt(pmap, va, *pde); + + /* + * For simplicity, we will unconditionally shoot + * down TLBs either at the end of this function + * or at the top of the loop above if we switch + * to a different pmap. + */ + (void)pmap_unuse_pt(pmap, va, *pde); + freed++; } } @@ -1714,6 +1740,23 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, /* * pmap_remove_pte: do the things to unmap a page in a process + * + * Returns true if this was the last PTE in the PT (and possibly the last PT in + * the PD, and possibly the last PD in the segmap), in which case... + * + * 1) the TLB has been invalidated for the whole PT's span (at least), + * already, to ensure that MipsDoTLBMiss does not attempt to follow a + * dangling pointer into a freed page. No additional TLB shootdown is + * required. + * + * 2) if this removal was part of a sweep to remove PTEs, it is safe to jump + * to the PT span boundary and continue. + * + * 3) The given pde may now point onto a freed page and must not be + * dereferenced + * + * If the return value is false, the TLB has not been shot down (and the segmap + * entry, PD, and PT all remain in place). */ static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, @@ -1782,8 +1825,12 @@ pmap_remove_page(struct pmap *pmap, vm_offset_t va) if (!pte_test(ptq, PTE_V)) return; - (void)pmap_remove_pte(pmap, ptq, va, *pde); - pmap_invalidate_page(pmap, va); + /* + * Remove this PTE from the PT. If this is the last one, then + * the TLB has already been shot down, so don't bother again + */ + if (!pmap_remove_pte(pmap, ptq, va, *pde)) + pmap_invalidate_page(pmap, va); } /* @@ -1797,7 +1844,9 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t { pd_entry_t *pde, *pdpe; pt_entry_t *pte; - vm_offset_t va, va_next; + vm_offset_t va_next; + vm_offset_t va_init, va_fini; + bool need_tlb_shootdown; /* * Perform an unsynchronized read. This is, however, safe. @@ -1826,6 +1875,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t continue; } #endif + + /* Scan up to the end of the page table pointed to by pde */ va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; @@ -1842,25 +1893,44 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t if (va_next > eva) va_next = eva; - va = va_next; + need_tlb_shootdown = false; + va_init = sva; + va_fini = va_next; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { + + /* Skip over invalid entries; no need to shootdown */ if (!pte_test(pte, PTE_V)) { - if (va != va_next) { - pmap_invalidate_range(pmap, va, sva); - va = va_next; - } + /* + * If we have not yet found a valid entry, then + * we can move the lower edge of the region to + * invalidate to the next PTE. + */ + if (!need_tlb_shootdown) + va_init = sva + PAGE_SIZE; continue; } - if (va == va_next) - va = sva; + + /* + * A valid entry; the range we are shooting down must + * include this page. va_fini is used instead of sva + * so that if the range ends with a run of !PTE_V PTEs, + * but doesn't clear out so much that pmap_remove_pte + * removes the entire PT, we won't include these !PTE_V + * entries in the region to be shot down. + */ + va_fini = sva + PAGE_SIZE; + if (pmap_remove_pte(pmap, pte, sva, *pde)) { - sva += PAGE_SIZE; + /* Entire PT removed and TLBs shot down. */ + need_tlb_shootdown = false; break; + } else { + need_tlb_shootdown = true; } } - if (va != va_next) - pmap_invalidate_range(pmap, va, sva); + if (need_tlb_shootdown) + pmap_invalidate_range(pmap, va_init, va_fini); } out: rw_wunlock(&pvh_global_lock); @@ -1930,10 +2000,11 @@ pmap_remove_all(vm_page_t m) __func__, (void *)pv->pv_va, (uintmax_t)tpte)); vm_page_dirty(m); } - pmap_invalidate_page(pmap, pv->pv_va); + if (!pmap_unuse_pt(pmap, pv->pv_va, *pde)) + pmap_invalidate_page(pmap, pv->pv_va); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - pmap_unuse_pt(pmap, pv->pv_va, *pde); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -2812,7 +2883,12 @@ pmap_remove_pages(pmap_t pmap) TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); - pmap_unuse_pt(pmap, pv->pv_va, *pde); + + /* + * For simplicity, unconditionally call + * pmap_invalidate_all(), below. + */ + (void)pmap_unuse_pt(pmap, pv->pv_va, *pde); } } if (allfree) {
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202004242121.03OLLNgp087773>