Date: Wed, 27 Nov 2019 19:34:33 +0000 (UTC) From: Alan Cox <alc@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r355136 - in stable/12/sys/arm64: arm64 include Message-ID: <201911271934.xARJYXw3077603@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: alc Date: Wed Nov 27 19:34:33 2019 New Revision: 355136 URL: https://svnweb.freebsd.org/changeset/base/355136 Log: MFC r350579,r350741,r352584 Enable superpage promotion within the kernel pmap. Ordinarily, during a superpage promotion or demotion within a pmap, the pmap's lock ensures that other operations on the pmap don't observe the old mapping being broken before the new mapping is established. However, pmap_kextract() doesn't acquire the kernel pmap's lock, so it may observe the broken mapping. And, if it does, it returns an incorrect result. This revision implements a lock-free solution to this problem in pmap_update_entry() and pmap_kextract() because pmap_kextract() can't acquire the kernel pmap's lock. In case a translation fault on the kernel address space occurs from within a critical section, we must perform a lock-free check on the faulting address. Modified: stable/12/sys/arm64/arm64/pmap.c stable/12/sys/arm64/include/pte.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/arm64/arm64/pmap.c ============================================================================== --- stable/12/sys/arm64/arm64/pmap.c Wed Nov 27 19:32:29 2019 (r355135) +++ stable/12/sys/arm64/arm64/pmap.c Wed Nov 27 19:34:33 2019 (r355136) @@ -1128,40 +1128,35 @@ vm_paddr_t pmap_kextract(vm_offset_t va) { pt_entry_t *pte, tpte; - vm_paddr_t pa; - int lvl; - if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { - pa = DMAP_TO_PHYS(va); - } else { - pa = 0; - pte = pmap_pte(kernel_pmap, va, &lvl); - if (pte != NULL) { - tpte = pmap_load(pte); - pa = tpte & ~ATTR_MASK; - switch(lvl) { - case 1: - KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, - ("pmap_kextract: Invalid L1 pte found: %lx", - tpte & ATTR_DESCR_MASK)); - pa |= (va & L1_OFFSET); - break; - case 2: - KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, - ("pmap_kextract: Invalid L2 pte found: %lx", - tpte & ATTR_DESCR_MASK)); - pa |= (va & L2_OFFSET); - break; - case 3: - KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, - ("pmap_kextract: Invalid L3 pte found: %lx", - tpte & ATTR_DESCR_MASK)); - pa |= (va & L3_OFFSET); - break; - } - } - } - return (pa); + if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) + return (DMAP_TO_PHYS(va)); + pte = pmap_l1(kernel_pmap, va); + if (pte == NULL) + return (0); + + /* + * A concurrent pmap_update_entry() will clear the entry's valid bit + * but leave the rest of the entry unchanged. Therefore, we treat a + * non-zero entry as being valid, and we ignore the valid bit when + * determining whether the entry maps a block, page, or table. + */ + tpte = pmap_load(pte); + if (tpte == 0) + return (0); + if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) + return ((tpte & ~ATTR_MASK) | (va & L1_OFFSET)); + pte = pmap_l1_to_l2(&tpte, va); + tpte = pmap_load(pte); + if (tpte == 0) + return (0); + if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) + return ((tpte & ~ATTR_MASK) | (va & L2_OFFSET)); + pte = pmap_l2_to_l3(&tpte, va); + tpte = pmap_load(pte); + if (tpte == 0) + return (0); + return ((tpte & ~ATTR_MASK) | (va & L3_OFFSET)); } /*************************************************** @@ -3020,8 +3015,12 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_ent */ intr = intr_disable(); - /* Clear the old mapping */ - pmap_clear(pte); + /* + * Clear the old mapping's valid bit, but leave the rest of the entry + * unchanged, so that a lockless, concurrent pmap_kextract() can still + * lookup the physical address. + */ + pmap_clear_bits(pte, ATTR_DESCR_VALID); pmap_invalidate_range_nopin(pmap, va, va + size); /* Create the new mapping */ @@ -3423,8 +3422,7 @@ validate: } #if VM_NRESERVLEVEL > 0 - if (pmap != pmap_kernel() && - (mpte == NULL || mpte->wire_count == NL3PG) && + if ((mpte == NULL || mpte->wire_count == NL3PG) && pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { @@ -5835,23 +5833,33 @@ pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) case ISS_DATA_DFSC_TF_L1: case ISS_DATA_DFSC_TF_L2: case ISS_DATA_DFSC_TF_L3: - PMAP_LOCK(pmap); - /* Ask the MMU to check the address */ - intr = intr_disable(); - if (pmap == kernel_pmap) - par = arm64_address_translate_s1e1r(far); - else - par = arm64_address_translate_s1e0r(far); - intr_restore(intr); - PMAP_UNLOCK(pmap); - /* - * If the translation was successful the address was invalid - * due to a break-before-make sequence. We can unlock and - * return success to the trap handler. + * Retry the translation. A break-before-make sequence can + * produce a transient fault. */ - if (PAR_SUCCESS(par)) - rv = KERN_SUCCESS; + if (pmap == kernel_pmap) { + /* + * The translation fault may have occurred within a + * critical section. Therefore, we must check the + * address without acquiring the kernel pmap's lock. + */ + if (pmap_kextract(far) != 0) + rv = KERN_SUCCESS; + } else { + PMAP_LOCK(pmap); + /* Ask the MMU to check the address. */ + intr = intr_disable(); + par = arm64_address_translate_s1e0r(far); + intr_restore(intr); + PMAP_UNLOCK(pmap); + + /* + * If the translation was successful, then we can + * return success to the trap handler. + */ + if (PAR_SUCCESS(par)) + rv = KERN_SUCCESS; + } break; } Modified: stable/12/sys/arm64/include/pte.h ============================================================================== --- stable/12/sys/arm64/include/pte.h Wed Nov 27 19:32:29 2019 (r355135) +++ stable/12/sys/arm64/include/pte.h Wed Nov 27 19:34:33 2019 (r355136) @@ -71,7 +71,12 @@ typedef uint64_t pt_entry_t; /* page table entry */ #define ATTR_DEFAULT (ATTR_AF | ATTR_SH(ATTR_SH_IS)) -#define ATTR_DESCR_MASK 3 +#define ATTR_DESCR_MASK 3 +#define ATTR_DESCR_VALID 1 +#define ATTR_DESCR_TYPE_MASK 2 +#define ATTR_DESCR_TYPE_TABLE 2 +#define ATTR_DESCR_TYPE_PAGE 2 +#define ATTR_DESCR_TYPE_BLOCK 0 /* Level 0 table, 512GiB per entry */ #define L0_SHIFT 39
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201911271934.xARJYXw3077603>