Date: Wed, 19 Mar 2014 12:48:31 -0600 From: Ian Lepore <ian@FreeBSD.org> To: Wojciech Macek <wma@semihalf.com> Cc: freebsd-arm@FreeBSD.org Subject: Re: arm SMP on Cortex-A15 Message-ID: <1395254911.80941.9.camel@revolution.hippie.lan> In-Reply-To: <1395149146.1149.586.camel@revolution.hippie.lan> References: <CANsEV8euHTsfviiCMP_aet3qYiK2T-oK%2B-37eay7zAPH2S2vaA@mail.gmail.com> <20131220125638.GA5132@mail.bsdpad.com> <20131222092913.GA89153@mail.bsdpad.com> <CANsEV8fSoygoSUyQqKoEQ7tRxjqDOwrPD8dU7O2V2PXRj35j4A@mail.gmail.com> <20131222123636.GA61193@ci0.org> <CANsEV8fWvUkFHi8DP6Nr807RwPDB1iZrO39fpfa44qOkJPidZA@mail.gmail.com> <1395149146.1149.586.camel@revolution.hippie.lan>
next in thread | previous in thread | raw e-mail | index | archive | help
--=-YwiFRevDQYDNGbo/Gkof Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit On Tue, 2014-03-18 at 07:25 -0600, Ian Lepore wrote: > On Mon, 2014-03-17 at 09:29 +0100, Wojciech Macek wrote: > > Hi, > > > > Finally I've found some time to continue SMP hacking. It seems that I > > isolated the tlb/pmam failures and developed two simple patches that help. > > There are still some pmap changes and TEX remap left, but I don't want to > > use them now. > > https://drive.google.com/folderview?id=0B-7yTLrPxaWtSzZPUGgtM3pnUjg&usp=sharing > > * 01 - ensure that TTB is set before TLB invalidation and flush BTB to > > comply the specs > > * 02 - add missing TLB invalidations to pmap and fix invalidation order > > > > I chose buildworld -j4 as a stresstest, and run it on Arndale (USB rootfs) > > and a different 4-core a15 chip (SATA rootfs). On both setups test passed > > and was significantly faster than the one with previous patchset. > > > > I'd like to submit these changes to FreeBSD tree (with some help from our > > local committers), so any comments and testing are really appreciated. > > > > Best regards, > > Wojtek > > The first patch looks fine and is working without any problems for me. > > For the second patch, I propose the attached similar patch which > combines your changes with some I got from Olivier. The main > differences are moving the tlb flush outside the loop when propagating a > change to all L1s, and moving the tlb flush (rather than adding another) > in pmap_kenter_internal(). > > I believe even with the second patch there may still be some missing tlb > flushes. > > -- Ian Following up with a third version of the pmap-v6.c patch. On top of the previous versions, this: * ensures that cpu_cpwait() is consistantly used after every tlb flush (sometimes it's a single wait after flushes that happen in a loop). * adds a tlb flush to pmap_free_l2_bucket() * adds a tlb flush to pmap_bootstrap() * adds a tlb flush to pmap_grow_map() * adds a tlb flush to pmap_grow_l2_bucket() * adds a tlb flush to pmap_kenter_section() I'm not sure there's any armv6/7 platform that needs the cpu_cpwait(), but if it's going to appear in the code at all, it should at least be consisant. :) -- Ian --=-YwiFRevDQYDNGbo/Gkof Content-Disposition: inline; filename="smp_patch_02b.patch" Content-Type: text/x-patch; name="smp_patch_02b.patch"; charset="us-ascii" Content-Transfer-Encoding: 7bit Index: sys/arm/arm/pmap-v6.c =================================================================== --- sys/arm/arm/pmap-v6.c (revision 263112) +++ sys/arm/arm/pmap-v6.c (working copy) @@ -844,6 +844,8 @@ pmap_free_l2_bucket(pmap_t pmap, struct l2_bucket if (l1pd == (L1_C_DOM(pmap->pm_domain) | L1_TYPE_C)) { *pl1pd = 0; PTE_SYNC(pl1pd); + cpu_tlb_flushD_SE((vm_offset_t)ptep); + cpu_cpwait(); } /* @@ -1047,6 +1049,7 @@ small_mappings: cpu_tlb_flushID_SE(pv->pv_va); else if (PTE_BEEN_REFD(opte)) cpu_tlb_flushD_SE(pv->pv_va); + cpu_cpwait(); } PMAP_UNLOCK(pmap); @@ -1644,7 +1647,7 @@ pmap_postinit(void) *ptep = pte; PTE_SYNC(ptep); cpu_tlb_flushD_SE(va); - + cpu_cpwait(); va += PAGE_SIZE; } pmap_init_l1(l1, pl1pt); @@ -1948,6 +1951,8 @@ pmap_bootstrap(vm_offset_t firstaddr, struct pv_ad pmap_init_l1(l1, kernel_l1pt); cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); + cpu_tlb_flushID(); + cpu_cpwait(); virtual_avail = round_page(virtual_avail); virtual_end = vm_max_kernel_address; @@ -2034,7 +2039,8 @@ pmap_grow_map(vm_offset_t va, pt_entry_t cache_mod *ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF; pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0); PTE_SYNC(ptep); - + cpu_tlb_flushD_SE(va); + cpu_cpwait(); return (0); } @@ -2130,6 +2136,8 @@ pmap_grow_l2_bucket(pmap_t pmap, vm_offset_t va) L1_C_PROTO; PTE_SYNC(pl1pd); } + cpu_tlb_flushID_SE(va); + cpu_cpwait(); return (l2b); } @@ -2348,6 +2356,8 @@ pmap_kenter_section(vm_offset_t va, vm_offset_t pa l1->l1_kva[L1_IDX(va)] = pd; PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); } + cpu_tlb_flushD_SE(va); + cpu_cpwait(); } /* @@ -2387,14 +2397,6 @@ pmap_kenter_internal(vm_offset_t va, vm_offset_t p ptep = &l2b->l2b_kva[l2pte_index(va)]; opte = *ptep; - if (l2pte_valid(opte)) { - cpu_tlb_flushD_SE(va); - cpu_cpwait(); - } else { - if (opte == 0) - l2b->l2b_occupancy++; - } - if (flags & KENTER_CACHE) { *ptep = L2_S_PROTO | pa | pte_l2_s_cache_mode | L2_S_REF; pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, @@ -2405,10 +2407,17 @@ pmap_kenter_internal(vm_offset_t va, vm_offset_t p 0); } + PTE_SYNC(ptep); + if (l2pte_valid(opte)) { + cpu_tlb_flushD_SE(va); + } else { + if (opte == 0) + l2b->l2b_occupancy++; + } + cpu_cpwait(); + PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n", (uint32_t) ptep, opte, *ptep)); - PTE_SYNC(ptep); - cpu_cpwait(); } void @@ -2474,10 +2483,10 @@ pmap_kremove(vm_offset_t va) opte = *ptep; if (l2pte_valid(opte)) { va = va & ~PAGE_MASK; + *ptep = 0; + PTE_SYNC(ptep); cpu_tlb_flushD_SE(va); cpu_cpwait(); - *ptep = 0; - PTE_SYNC(ptep); } } @@ -2710,6 +2719,7 @@ small_mappings: cpu_tlb_flushID(); else cpu_tlb_flushD(); + cpu_cpwait(); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); @@ -2763,6 +2773,7 @@ pmap_change_attr(vm_offset_t sva, vm_size_t len, i pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE); *ptep = pte; cpu_tlb_flushID_SE(tmpva); + cpu_cpwait(); dprintf("%s: for va:%x ptep:%x pte:%x\n", __func__, tmpva, (uint32_t)ptep, pte); @@ -2900,6 +2911,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offs else if (is_refd) cpu_tlb_flushD(); + cpu_cpwait(); } rw_wunlock(&pvh_global_lock); @@ -3166,6 +3178,7 @@ validate: cpu_tlb_flushID_SE(va); else if (is_refd) cpu_tlb_flushD_SE(va); + cpu_cpwait(); } if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) @@ -3713,6 +3726,7 @@ pmap_remove_section(pmap_t pmap, vm_offset_t sva) cpu_tlb_flushID_SE(sva); else cpu_tlb_flushD_SE(sva); + cpu_cpwait(); } /* @@ -3885,6 +3899,7 @@ pmap_promote_section(pmap_t pmap, vm_offset_t va) cpu_tlb_flushID(); else cpu_tlb_flushD(); + cpu_cpwait(); pmap_section_promotions++; CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x" @@ -4009,6 +4024,7 @@ pmap_demote_section(pmap_t pmap, vm_offset_t va) cpu_tlb_flushID_SE(va); else if (L1_S_REFERENCED(l1pd)) cpu_tlb_flushD_SE(va); + cpu_cpwait(); pmap_section_demotions++; CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x" @@ -4380,6 +4396,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse } } + *ptep = 0; + PTE_SYNC(ptep); if (pmap_is_current(pmap)) { total++; if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) { @@ -4390,8 +4408,6 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) flushall = 1; } - *ptep = 0; - PTE_SYNC(ptep); sva += PAGE_SIZE; ptep++; @@ -4404,6 +4420,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offse rw_wunlock(&pvh_global_lock); if (flushall) cpu_tlb_flushID(); + cpu_cpwait(); PMAP_UNLOCK(pmap); } @@ -4923,6 +4940,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offse } } } + cpu_cpwait(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } --=-YwiFRevDQYDNGbo/Gkof--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1395254911.80941.9.camel>