Date: Sat, 21 Feb 2015 21:59:05 +0000 (UTC) From: Nathan Whitehorn <nwhitehorn@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r279112 - user/nwhitehorn/ppc64-pmap-rework/aim Message-ID: <201502212159.t1LLx5QP096159@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: nwhitehorn Date: Sat Feb 21 21:59:04 2015 New Revision: 279112 URL: https://svnweb.freebsd.org/changeset/base/279112 Log: Simplicity and correctness improvements to native page table access. Modified: user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c Modified: user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c ============================================================================== --- user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c Sat Feb 21 21:28:45 2015 (r279111) +++ user/nwhitehorn/ppc64-pmap-rework/aim/moea64_native.c Sat Feb 21 21:59:04 2015 (r279112) @@ -196,10 +196,10 @@ static int64_t moea64_pte_unset_native(m /* * Utility routines. */ -static void moea64_bootstrap_native(mmu_t mmup, - vm_offset_t kernelstart, vm_offset_t kernelend); -static void moea64_cpu_bootstrap_native(mmu_t, int ap); -static void tlbia(void); +static void moea64_bootstrap_native(mmu_t mmup, + vm_offset_t kernelstart, vm_offset_t kernelend); +static void moea64_cpu_bootstrap_native(mmu_t, int ap); +static void tlbia(void); static mmu_method_t moea64_native_methods[] = { /* Internal interfaces */ @@ -256,8 +256,6 @@ moea64_pte_clear_native(mmu_t mmu, struc moea64_pte_from_pvo(pvo, &properpt); - /* See "Resetting the Reference Bit" in arch manual */ - rw_rlock(&moea64_eviction_lock); if ((pt->pte_hi & LPTE_AVPN_MASK) != (properpt.pte_hi & LPTE_AVPN_MASK)) { @@ -266,52 +264,29 @@ moea64_pte_clear_native(mmu_t mmu, struc return (-1); } - /* - * As shown in Section 7.6.3.2.3 - */ - PTESYNC(); - ptelo = atomic_swap_32((volatile int32_t *)(&pt->pte_lo) + 1, - (pt->pte_lo & ~ptebit) & 0xffffffff); - ptelo |= pt->pte_lo; - rw_runlock(&moea64_eviction_lock); + if (ptebit == LPTE_REF) { + /* See "Resetting the Reference Bit" in arch manual */ + PTESYNC(); + /* 2-step here safe: precision is not guaranteed */ + ptelo |= pt->pte_lo; + + /* One-byte store to avoid touching the C bit */ + ((volatile uint8_t *)(&pt->pte_lo))[6] = + ((uint8_t *)(&properpt.pte_lo))[6]; + rw_runlock(&moea64_eviction_lock); - critical_enter(); - TLBIE(pvo->pvo_vpn); - critical_exit(); + critical_enter(); + TLBIE(pvo->pvo_vpn); + critical_exit(); + } else { + rw_runlock(&moea64_eviction_lock); + ptelo = moea64_pte_unset_native(mmu, pvo); + moea64_pte_insert_native(mmu, pvo); + } return (ptelo & (LPTE_REF | LPTE_CHG)); } -static int -moea64_pte_set_native(struct lpte *pvo_pt, uintptr_t offset) -{ - volatile struct lpte *pt = moea64_pteg_table + offset; - int result; - - result = atomic_cmpset_32((volatile int32_t *)(&pt->pte_hi) + 1, - pt->pte_hi & ~(LPTE_LOCKED | LPTE_VALID), LPTE_LOCKED); - if (!result) - return (-1); - - /* - * Update the PTE as defined in section 7.6.3.1. - */ - pt->pte_lo = pvo_pt->pte_lo; - EIEIO(); -#ifdef __powerpc64__ - pt->pte_hi = pvo_pt->pte_hi; -#else - *(volatile int32_t *)(&pt->pte_hi) = pvo_pt->pte_hi >> 32; - EIEIO(); - *((volatile int32_t *)(&pt->pte_hi) + 1) = pvo_pt->pte_hi & 0xffffffff; -#endif - PTESYNC(); - - /* Keep statistics */ - moea64_pte_valid++; - return (0); -} - static int64_t moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *pvo) { @@ -332,7 +307,8 @@ moea64_pte_unset_native(mmu_t mmu, struc } /* - * Invalidate the pte. + * Invalidate the pte, briefly locking it to collect RC bits. No + * atomics needed since this is protected against eviction by the lock. */ isync(); critical_enter(); @@ -355,41 +331,30 @@ moea64_pte_replace_native(mmu_t mmu, str { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; - int result; - volatile int32_t lowbits; + int64_t ptelo; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); - moea64_pte_from_pvo(pvo, &properpt); - - result = atomic_cmpset_32((volatile int32_t *)(&pt->pte_hi) + 1, - properpt.pte_hi & 0xffffffff, (properpt.pte_hi | LPTE_LOCKED) - & 0xffffffff); - - if (result && flags == 0) { - /* Just some software bits changing. The order in which this is - * done is important on 32-bit systems. */ - *(volatile int32_t *)(&pt->pte_hi) = properpt.pte_hi >> 32; - *(volatile int32_t *)(&pt->pte_lo) = properpt.pte_lo >> 32; - pt->pte_lo = properpt.pte_lo & 0xffffffff; - lowbits = atomic_swap_32((volatile int32_t *)(&pt->pte_lo) + 1, - *((int32_t *)(&properpt.pte_lo) + 1)); - EIEIO(); - *((volatile int32_t *)(&pt->pte_hi) + 1) = - properpt.pte_hi & 0xffffffff; - return (lowbits & (LPTE_REF | LPTE_CHG)); + if (flags == 0) { + /* Just some software bits changing. */ + moea64_pte_from_pvo(pvo, &properpt); + + rw_rlock(&moea64_eviction_lock); + if ((pt->pte_hi & LPTE_AVPN_MASK) != + (properpt.pte_hi & LPTE_AVPN_MASK)) { + rw_runlock(&moea64_eviction_lock); + return (-1); + } + pt->pte_hi = properpt.pte_hi; + ptelo = pt->pte_lo; + rw_runlock(&moea64_eviction_lock); + } else { + /* Otherwise, need reinsertion and deletion */ + ptelo = moea64_pte_unset_native(mmu, pvo); + moea64_pte_insert_native(mmu, pvo); } - /* - * Anything else requires invalidation and reinsertion, so just do that - */ - - lowbits = -1; - if (result) - lowbits = moea64_pte_unset_native(mmu, pvo); - moea64_pte_insert_native(mmu, pvo); - - return (lowbits); + return (ptelo); } static void @@ -524,38 +489,106 @@ tlbia(void) TLBSYNC(); } -static __inline int -moea64_pte_spillable_ident(uintptr_t slotbase) +static int +atomic_pte_lock(volatile struct lpte *pte, uint64_t bitmask, uint64_t *oldhi) +{ + int ret; + uint32_t oldhihalf; + + /* + * Note: in principle, if just the locked bit were set here, we + * could avoid needing the eviction lock. However, eviction occurs + * so rarely that it isn't worth bothering about in practice. + */ + + __asm __volatile ( + "1:\tlwarx %1, 0, %3\n\t" /* load old value */ + "and. %0,%1,%4\n\t" /* check if any bits set */ + "bne 2f\n\t" /* exit if any set */ + "stwcx. %5, 0, %3\n\t" /* attempt to store */ + "bne- 1b\n\t" /* spin if failed */ + "li %0, 1\n\t" /* success - retval = 1 */ + "b 3f\n\t" /* we've succeeded */ + "2:\n\t" + "stwcx. %1, 0, %3\n\t" /* clear reservation (74xx) */ + "li %0, 0\n\t" /* failure - retval = 0 */ + "3:\n\t" + : "=&r" (ret), "=&r"(oldhihalf), "=m" (pte->pte_hi) + : "r" ((volatile char *)&pte->pte_hi + 4), "r" (bitmask), + "r" (LPTE_LOCKED), "m" (pte->pte_hi) + : "cr0", "cr1", "cr2", "memory"); + + *oldhi = (pte->pte_hi & 0xffffffff00000000ULL) | oldhihalf; + + return (ret); +} + +static uintptr_t +moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase, + uint64_t mask) { - volatile struct lpte *pt; - int i, j; + volatile struct lpte *pt; + uint64_t oldptehi, va; uintptr_t k; + int i, j; /* Start at a random slot */ i = mftb() % 8; - k = -1; for (j = 0; j < 8; j++) { - pt = &moea64_pteg_table[slotbase + (i + j) % 8]; - if (pt->pte_hi & LPTE_WIRED) - continue; - - /* This is a candidate, so remember it */ - k = (i + j) % 8; - - /* Try to get a page that has not been used lately */ - if (!(pt->pte_lo & LPTE_REF) || !(pt->pte_hi & LPTE_VALID)) - return (k); + k = slotbase + (i + j) % 8; + pt = &moea64_pteg_table[k]; + /* Invalidate and seize lock only if no bits in mask set */ + if (atomic_pte_lock(pt, mask, &oldptehi)) /* Lock obtained */ + break; } - + + if (j == 8) + return (-1); + + if (oldptehi & LPTE_VALID) { + KASSERT(!(oldptehi & LPTE_WIRED), ("Unmapped wired entry")); + /* + * Need to invalidate old entry completely: see + * "Modifying a Page Table Entry". Need to reconstruct + * the virtual address for the outgoing entry to do that. + */ + if (oldptehi & LPTE_BIG) + va = oldptehi >> moea64_large_page_shift; + else + va = oldptehi >> ADDR_PIDX_SHFT; + if (oldptehi & LPTE_HID) + va = (((k >> 3) ^ moea64_pteg_mask) ^ va) & + VSID_HASH_MASK; + else + va = ((k >> 3) ^ va) & VSID_HASH_MASK; + va |= (oldptehi & LPTE_AVPN_MASK) << + (ADDR_API_SHFT64 - ADDR_PIDX_SHFT); + PTESYNC(); + TLBIE(va); + moea64_pte_valid--; + moea64_pte_overflow++; + } + + /* + * Update the PTE as per "Adding a Page Table Entry". Lock is released + * by setting the high doubleworld. + */ + pt->pte_lo = pvo_pt->pte_lo; + EIEIO(); + pt->pte_hi = pvo_pt->pte_hi; + PTESYNC(); + + /* Keep statistics */ + moea64_pte_valid++; + return (k); } static int moea64_pte_insert_native(mmu_t mmu, struct pvo_entry *pvo) { - volatile struct lpte *pt; - struct lpte insertpt; - int i; + struct lpte insertpt; + uintptr_t slot; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); @@ -568,17 +601,13 @@ moea64_pte_insert_native(mmu_t mmu, stru /* * First try primary hash. */ - pvo->pvo_pte.slot &= ~7UL; /* Base slot address */ - for (pt = &moea64_pteg_table[pvo->pvo_pte.slot], i = 0; i < 8; - i++, pt++) { - if (!(pt->pte_hi & LPTE_VALID)) { - if (moea64_pte_set_native(&insertpt, - pvo->pvo_pte.slot + i) == 0) { - rw_runlock(&moea64_eviction_lock); - pvo->pvo_pte.slot += i; - return (0); - } - } + pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */ + slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, + LPTE_VALID | LPTE_WIRED | LPTE_LOCKED); + if (slot != -1) { + rw_runlock(&moea64_eviction_lock); + pvo->pvo_pte.slot = slot; + return (0); } /* @@ -587,17 +616,12 @@ moea64_pte_insert_native(mmu_t mmu, stru pvo->pvo_vaddr ^= PVO_HID; insertpt.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); - - for (pt = &moea64_pteg_table[pvo->pvo_pte.slot], i = 0; i < 8; - i++, pt++) { - if (!(pt->pte_hi & LPTE_VALID)) { - if (moea64_pte_set_native(&insertpt, - pvo->pvo_pte.slot + i) == 0) { - rw_runlock(&moea64_eviction_lock); - pvo->pvo_pte.slot += i; - return (0); - } - } + slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, + LPTE_VALID | LPTE_WIRED | LPTE_LOCKED); + if (slot != -1) { + rw_runlock(&moea64_eviction_lock); + pvo->pvo_pte.slot = slot; + return (0); } /* @@ -610,51 +634,29 @@ moea64_pte_insert_native(mmu_t mmu, stru rw_wlock(&moea64_eviction_lock); } - i = moea64_pte_spillable_ident(pvo->pvo_pte.slot); - if (i < 0) { - /* Try other hash table? */ - pvo->pvo_vaddr ^= PVO_HID; - insertpt.pte_hi ^= LPTE_HID; - pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); - i = moea64_pte_spillable_ident(pvo->pvo_pte.slot); + slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, + LPTE_WIRED | LPTE_LOCKED); + if (slot != -1) { + rw_wunlock(&moea64_eviction_lock); + pvo->pvo_pte.slot = slot; + return (0); } - if (i < 0) { - /* No freeable slots in either PTEG? We're hosed. */ + /* Try other hash table. Now we're getting desperate... */ + pvo->pvo_vaddr ^= PVO_HID; + insertpt.pte_hi ^= LPTE_HID; + pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); + slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, + LPTE_WIRED | LPTE_LOCKED); + if (slot != -1) { rw_wunlock(&moea64_eviction_lock); - panic("moea64_pte_insert: overflow"); - return (-1); + pvo->pvo_pte.slot = slot; + return (0); } - pvo->pvo_pte.slot += i; - - /* - * Collect ref/changed bits from the victim and replace it. - */ - pt = &moea64_pteg_table[pvo->pvo_pte.slot]; - - /* - * Invalidate the pte. - */ - isync(); - critical_enter(); - pt->pte_hi &= ~LPTE_VALID; - PTESYNC(); - TLBIE((pt->pte_hi & LPTE_AVPN_MASK) << - (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)); - critical_exit(); - - /* New translation */ - pt->pte_lo = insertpt.pte_lo; - EIEIO(); - pt->pte_hi = insertpt.pte_hi; - PTESYNC(); - + /* No freeable slots in either PTEG? We're hosed. */ rw_wunlock(&moea64_eviction_lock); - - /* Keep statistics */ - moea64_pte_valid++; - - return (0); + panic("moea64_pte_insert: overflow"); + return (-1); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201502212159.t1LLx5QP096159>