Date: Mon, 10 Jun 2013 00:56:16 +0200 From: Olivier Houchard <cognet@ci0.org> To: Andrew Turner <andrew@fubar.geek.nz> Cc: freebsd-arm@freebsd.org Subject: Re: RFC: Patches with AXP support and pmap&smp fixes. Message-ID: <20130609225615.GA42548@ci0.org> In-Reply-To: <20130602130713.70b0b9f0@bender.Home> References: <517E8610.5050204@semihalf.com> <20130430142701.5bbfec2b@bender.lan> <20130430143311.GA71966@ci0.org> <518775B4.1010308@semihalf.com> <20130506134129.GA60131@ci0.org> <20130602130713.70b0b9f0@bender.Home>
next in thread | previous in thread | raw e-mail | index | archive | help
--7JfCtLOvnd9MIVvH Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Sun, Jun 02, 2013 at 01:07:13PM +0100, Andrew Turner wrote: > On Mon, 6 May 2013 15:41:29 +0200 > Olivier Houchard <cognet@ci0.org> wrote: > > > On Mon, May 06, 2013 at 11:19:48AM +0200, Grzegorz Bernacki wrote: > > > > > > Hi, > > > > > > Our patch fixes only initialization of pcpu in pcpu_init(). In 4 > > > cores setup it is possible the at least two cores simultaneously > > > updating queue of pcpus which causes corruption. > > > I am not aware of any other problems with SMP. Let us try setup > > > with WITNESS enabled and we'll see if we have the same problem. > > > Olivier, could you share your patch? In case we have the problem we > > > would like to have it. > > > > > > > Hi, > > > > Sure I will dust it off, and send it. Maybe not before Sunday though, > > I'll be mostly MIA for the week. I'm quite interested in you guys > > having a look at it. > > Did you manage to extract the patch? I don't remember seeing it. > > Andrew Hi Andrew, Sorry for the long delay. I finally took time to update it to the latest -CURRENT, and check that it boots. Regards, Olivier --7JfCtLOvnd9MIVvH Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="pcpu-patch.diff" Index: arm/arm/cpufunc_asm_armv7.S =================================================================== --- arm/arm/cpufunc_asm_armv7.S (revision 251584) +++ arm/arm/cpufunc_asm_armv7.S (working copy) @@ -57,9 +57,9 @@ #define PT_OUTER_WBWA (1 << 3) #ifdef SMP -#define PT_ATTR (PT_S|PT_INNER_WT|PT_OUTER_WT|PT_NOS) +#define PT_ATTR (PT_S|PT_INNER_WBWA|PT_OUTER_WBWA|PT_NOS) #else -#define PT_ATTR (PT_INNER_WT|PT_OUTER_WT) +#define PT_ATTR (PT_INNER_WBWA|PT_OUTER_WBWA) #endif ENTRY(armv7_setttb) @@ -98,7 +98,7 @@ ldr r1, .Lpage_mask bic r0, r0, r1 #ifdef SMP - mcr p15, 0, r0, c8, c3, 1 /* flush D tlb single entry Inner Shareable*/ + mcr p15, 0, r0, c8, c3, 3 /* flush D tlb single entry Inner Shareable*/ mcr p15, 0, r0, c7, c1, 6 /* flush BTB Inner Shareable */ #else mcr p15, 0, r0, c8, c7, 1 /* flush D tlb single entry */ @@ -113,6 +113,7 @@ ENTRY(armv7_dcache_wbinv_all) stmdb sp!, {r4, r5, r6, r7, r8, r9} + dsb /* Get cache level */ ldr r0, .Lcoherency_level ldr r3, [r0] @@ -188,6 +189,7 @@ and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 + dsb .Larmv7_wb_next: mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip @@ -203,6 +205,7 @@ and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 + dsb .Larmv7_wbinv_next: mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip @@ -222,6 +225,7 @@ and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 + dsb .Larmv7_inv_next: mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */ add r0, r0, ip @@ -237,6 +241,7 @@ and r2, r0, r3 add r1, r1, r2 bic r0, r0, r3 + dsb .Larmv7_id_wbinv_next: mcr p15, 0, r0, c7, c5, 1 /* Invalidate I cache SE with VA */ mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ Index: arm/arm/copystr.S =================================================================== --- arm/arm/copystr.S (revision 251584) +++ arm/arm/copystr.S (working copy) @@ -51,14 +51,12 @@ .align 0 #ifdef _ARM_ARCH_6 -#define GET_PCB(tmp) \ - mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) +KSTACK_LOCALS #else .Lpcb: .word _C_LABEL(__pcpu) + PC_CURPCB -#define GET_PCB(tmp) \ +#define GET_PCB(tmp, tmp2) \ ldr tmp, .Lpcb #endif @@ -114,8 +112,7 @@ moveq r0, #ENAMETOOLONG beq 2f - GET_PCB(r4) - ldr r4, [r4] + GET_PCB(r4, r5) #ifdef DIAGNOSTIC teq r4, #0x00000000 @@ -162,8 +159,7 @@ moveq r0, #ENAMETOOLONG beq 2f - GET_PCB(r4) - ldr r4, [r4] + GET_PCB(r4, r5) #ifdef DIAGNOSTIC teq r4, #0x00000000 Index: arm/arm/genassym.c =================================================================== --- arm/arm/genassym.c (revision 251584) +++ arm/arm/genassym.c (working copy) @@ -25,6 +25,8 @@ * */ +#include "opt_kstack_max_pages.h" + #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> @@ -69,6 +71,7 @@ ASSYM(PCB_R12, offsetof(struct pcb, un_32.pcb32_r12)); ASSYM(PCB_PC, offsetof(struct pcb, un_32.pcb32_pc)); ASSYM(PCB_SP, offsetof(struct pcb, un_32.pcb32_sp)); +ASSYM(PCB_CURTHREAD, offsetof(struct pcb, un_32.pcb32_curthread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); @@ -138,3 +141,6 @@ ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(NIRQ, NIRQ); + +ASSYM(KSTACK_ADDR_MASK, KSTACK_MAX_PAGES * PAGE_SIZE - 1); +ASSYM(KSTACK_PCB_ADDR, KSTACK_MAX_PAGES * PAGE_SIZE - sizeof(struct pcb)); Index: arm/arm/mp_machdep.c =================================================================== --- arm/arm/mp_machdep.c (revision 251584) +++ arm/arm/mp_machdep.c (working copy) @@ -160,12 +160,13 @@ } extern vm_paddr_t pmap_pa; + +void init_secondary_finish(void); + void init_secondary(int cpu) { struct pcpu *pc; - uint32_t loop_counter; - int start = 0, end = 0; cpu_setup(NULL); setttb(pmap_pa); @@ -195,7 +196,6 @@ ; /* Initialize curthread */ - KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pc->pc_curthread = pc->pc_idlethread; pc->pc_curpcb = pc->pc_idlethread->td_pcb; #ifdef ARM_VFP_SUPPORT @@ -203,7 +203,16 @@ vfp_init(); #endif + __asm __volatile("mov sp, %0\n" + "b init_secondary_finish\n" : : "r" (pc->pc_idlethread->td_pcb->un_32.pcb32_sp)); +} +void +init_secondary_finish(void) +{ + int start = 0, end = 0; + uint32_t loop_counter; + mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); Index: arm/arm/machdep.c =================================================================== --- arm/arm/machdep.c (revision 251584) +++ arm/arm/machdep.c (working copy) @@ -42,6 +42,7 @@ * Updated : 18/04/01 updated for new wscons */ +#include "opt_kstack_max_pages.h" #include "opt_compat.h" #include "opt_ddb.h" #include "opt_platform.h" @@ -106,6 +107,7 @@ #include <dev/ofw/openfirm.h> #endif +#define DEBUG #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else @@ -398,6 +400,7 @@ USPACE_UNDEF_STACK_TOP; pcb->un_32.pcb32_sp = (u_int)thread0.td_kstack + USPACE_SVC_STACK_TOP; + pcb->un_32.pcb32_curthread = &thread0; vector_page_setprot(VM_PROT_READ); pmap_set_pcb_pagedir(pmap_kernel(), pcb); pmap_postinit(); @@ -874,9 +877,9 @@ set_pcpu(pcpup); #endif pcpu_init(pcpup, 0, sizeof(struct pcpu)); - PCPU_SET(curthread, &thread0); + pcpup->pc_curthread = &thread0; #ifdef ARM_VFP_SUPPORT - PCPU_SET(cpu, 0); + pcpup->pc_cpu = 0; #endif } @@ -1028,6 +1031,7 @@ thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; + thread0.td_pcb->un_32.pcb32_curthread = &thread0; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } @@ -1181,6 +1185,12 @@ phys_avail[j + 1] = 0; } +void do_putc(char); +void do_putc(char c) +{ + *(volatile char *)0x48020000 = c; +} + void * initarm(struct arm_boot_params *abp) { @@ -1232,8 +1242,10 @@ /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(memory_regions, &memory_regions_sz, - &memsize) != 0) + &memsize) != 0) { + do_putc('a'); while(1); + } /* Grab physical memory regions information from device tree. */ if (fdt_get_reserved_regions(reserved_regions, &reserved_regions_sz) != 0) @@ -1364,8 +1376,20 @@ valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); - valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); + if ((KSTACK_MAX_PAGES * PAGE_SIZE) - + (freemempos & (KSTACK_MAX_MASK)) >= KSTACK_PAGES) + freemempos = (freemempos &~ KSTACK_MAX_MASK) + + (KSTACK_MAX_PAGES * PAGE_SIZE) - + (KSTACK_PAGES * PAGE_SIZE); + + else + freemempos = (freemempos &~ KSTACK_MAX_MASK) + + (KSTACK_MAX_PAGES * PAGE_SIZE) + + 2 * (KSTACK_MAX_PAGES * PAGE_SIZE) - + (KSTACK_PAGES * PAGE_SIZE); + + valloc_pages(kernelstack, KSTACK_PAGES); /* * Now we start construction of the L1 page table @@ -1486,12 +1510,14 @@ init_proc0(kernelstack.pv_va); arm_intrnames_init(); + printf("hoho\n"); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); + printf("hehe\n"); arm_dump_avail_init(memsize, sizeof(dump_avail) / sizeof(dump_avail[0])); + printf("plop\n"); pmap_bootstrap(freemempos, &kernel_l1pt); + printf("unplop\n"); msgbufp = (void *)msgbufpv.pv_va; - msgbufinit(msgbufp, msgbufsize); - mutex_init(); /* * Prepare map of physical memory regions available to vm subsystem. @@ -1500,6 +1526,7 @@ init_param2(physmem); kdb_init(); + msgbufinit(msgbufp, msgbufsize); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); Index: arm/arm/fusu.S =================================================================== --- arm/arm/fusu.S (revision 251584) +++ arm/arm/fusu.S (working copy) @@ -40,14 +40,13 @@ __FBSDID("$FreeBSD$"); #ifdef _ARM_ARCH_6 -#define GET_PCB(tmp) \ - mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) +KSTACK_LOCALS #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB -#define GET_PCB(tmp) \ - ldr tmp, .Lcurpcb +#define GET_PCB(tmp, tmp2) \ + ldr tmp, .Lcurpcb; \ + ldr tmp, [tmp] #endif /* @@ -57,21 +56,28 @@ ENTRY_NP(casuword32) ENTRY(casuword) - GET_PCB(r3) - ldr r3, [r3] - -#ifdef DIAGNOSTIC - teq r3, #0x00000000 - beq .Lfusupcbfault -#endif stmfd sp!, {r4, r5} + GET_PCB(r3, r4) + adr r4, .Lcasuwordfault str r4, [r3, #PCB_ONFAULT] +#ifdef _ARM_ARCH_6 +1: + ldrex r5, [r0] + cmp r5, r1 + movne r0, r5 + bne 2f + strex r5, r2, [r0] + cmp r5, #0 + bne 1b +#else ldrt r5, [r0] cmp r5, r1 movne r0, r5 streqt r2, [r0] +#endif moveq r0, r1 +2: ldmfd sp!, {r4, r5} mov r1, #0x00000000 str r1, [r3, #PCB_ONFAULT] @@ -97,14 +103,8 @@ ENTRY_NP(fuword32) ENTRY(fuword) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] @@ -123,14 +123,8 @@ */ ENTRY(fusword) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] @@ -159,14 +153,8 @@ mvnne r0, #0x00000000 RETne - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r1, _C_LABEL(fusubailout) str r1, [r2, #PCB_ONFAULT] @@ -199,14 +187,8 @@ */ ENTRY(fubyte) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r1, .Lfusufault str r1, [r2, #PCB_ONFAULT] @@ -242,36 +224,15 @@ mvn r0, #0x00000000 RET -#ifdef DIAGNOSTIC /* - * Handle earlier faults from [fs]u*(), due to no pcb - */ - -.Lfusupcbfault: - mov r1, r0 - adr r0, fusupcbfaulttext - b _C_LABEL(panic) - -fusupcbfaulttext: - .asciz "Yikes - no valid PCB during fusuxxx() addr=%08x\n" - .align 0 -#endif - -/* * suword(caddr_t uaddr, int x); * Store an int in the user's address space. */ ENTRY_NP(suword32) ENTRY(suword) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] @@ -296,14 +257,8 @@ mvnne r0, #0x00000000 RETne - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r3, _C_LABEL(fusubailout) str r3, [r2, #PCB_ONFAULT] @@ -327,14 +282,8 @@ */ ENTRY(susword) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] @@ -358,15 +307,8 @@ */ ENTRY(subyte) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) - -#ifdef DIAGNOSTIC - teq r2, #0x00000000 - beq .Lfusupcbfault -#endif - adr r3, .Lfusufault str r3, [r2, #PCB_ONFAULT] Index: arm/arm/gic.c =================================================================== --- arm/arm/gic.c (revision 251584) +++ arm/arm/gic.c (working copy) @@ -253,7 +253,7 @@ active_irq &= 0x3FF; if (active_irq == 0x3FF) { - if (last_irq == -1) + if (last_irq == -1 && 0) printf("Spurious interrupt detected [0x%08x]\n", active_irq); return -1; } Index: arm/arm/swtch.S =================================================================== --- arm/arm/swtch.S (revision 251584) +++ arm/arm/swtch.S (working copy) @@ -192,7 +192,7 @@ #endif /* We have a new curthread now so make a note it */ - GET_CURTHREAD_PTR(r6) + GET_PCPU_CURTHREAD_PTR(r6) str r5, [r6] /* Set the new tp */ @@ -226,7 +226,7 @@ /* Process is now on a processor. */ /* We have a new curthread now so make a note it */ - GET_CURTHREAD_PTR(r7) + GET_PCPU_CURTHREAD_PTR(r7) str r1, [r7] /* Hook in a new pcb */ @@ -434,7 +434,11 @@ str r6, [r4, #TD_LOCK] #if defined(SCHED_ULE) && defined(SMP) ldr r6, .Lblocked_lock - GET_CURTHREAD_PTR(r3) + /* + * It's fine to use GET_PCPU_CURTHREAD_PTR here, because interrupts + * are disabled, so it will be atomic. + */ + GET_PCPU_CURTHREAD_PTR(r3) 1: ldr r4, [r3, #TD_LOCK] Index: arm/arm/bcopyinout.S =================================================================== --- arm/arm/bcopyinout.S (revision 251584) +++ arm/arm/bcopyinout.S (working copy) @@ -39,6 +39,7 @@ #include "assym.s" #include <machine/asm.h> +#include <machine/asmacros.h> #include <sys/errno.h> .L_arm_memcpy: @@ -55,14 +56,12 @@ .align 0 #ifdef _ARM_ARCH_6 -#define GET_PCB(tmp) \ - mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) +KSTACK_LOCALS #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB -#define GET_PCB(tmp) \ +#define GET_PCB(tmp, tmp2) \ ldr tmp, .Lcurpcb #endif @@ -116,8 +115,7 @@ .Lnormal: SAVE_REGS - GET_PCB(r4) - ldr r4, [r4] + GET_PCB(r4, r5) ldr r5, [r4, #PCB_ONFAULT] @@ -354,8 +352,7 @@ .Lnormale: SAVE_REGS - GET_PCB(r4) - ldr r4, [r4] + GET_PCB(r4, r5) ldr r5, [r4, #PCB_ONFAULT] adr r3, .Lcopyfault @@ -549,8 +546,7 @@ * else EFAULT if a page fault occurred. */ ENTRY(badaddr_read_1) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) ldr ip, [r2, #PCB_ONFAULT] adr r3, 1f @@ -575,8 +571,7 @@ * else EFAULT if a page fault occurred. */ ENTRY(badaddr_read_2) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) ldr ip, [r2, #PCB_ONFAULT] adr r3, 1f @@ -601,8 +596,7 @@ * else EFAULT if a page fault occurred. */ ENTRY(badaddr_read_4) - GET_PCB(r2) - ldr r2, [r2] + GET_PCB(r2, r3) ldr ip, [r2, #PCB_ONFAULT] adr r3, 1f Index: arm/arm/pmap-v6.c =================================================================== --- arm/arm/pmap-v6.c (revision 251584) +++ arm/arm/pmap-v6.c (working copy) @@ -1818,7 +1818,7 @@ cpu_tlb_flushID(); cpu_cpwait(); if (vector_page < KERNBASE) { - struct pcb *curpcb = PCPU_GET(curpcb); + struct pcb *_curpcb = curpcb; pcb = thread0.td_pcb; if (pmap_is_current(pmap)) { /* @@ -1838,10 +1838,10 @@ * Make sure cpu_switch(), et al, DTRT. This is safe to do * since this process has no remaining mappings of its own. */ - curpcb->pcb_pl1vec = pcb->pcb_pl1vec; - curpcb->pcb_l1vec = pcb->pcb_l1vec; - curpcb->pcb_dacr = pcb->pcb_dacr; - curpcb->pcb_pagedir = pcb->pcb_pagedir; + _curpcb->pcb_pl1vec = pcb->pcb_pl1vec; + _curpcb->pcb_l1vec = pcb->pcb_l1vec; + _curpcb->pcb_dacr = pcb->pcb_dacr; + _curpcb->pcb_pagedir = pcb->pcb_pagedir; } pmap_free_l1(pmap); @@ -2229,6 +2229,8 @@ pmap_kextract(vm_offset_t va) { + if (kernel_vm_end == 0) + return (0); return (pmap_extract_locked(kernel_pmap, va)); } Index: arm/arm/vm_machdep.c =================================================================== --- arm/arm/vm_machdep.c (revision 251584) +++ arm/arm/vm_machdep.c (working copy) @@ -131,6 +131,7 @@ #endif td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + pcb2->un_32.pcb32_curthread = td2; mdp2 = &p2->p_md; bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2)); pcb2->un_32.pcb32_und_sp = td2->td_kstack + USPACE_UNDEF_STACK_TOP; @@ -344,6 +345,7 @@ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); + td->td_pcb->un_32.pcb32_curthread = td; tf = td->td_frame; sf = (struct switchframe *)tf - 1; sf->sf_r4 = (u_int)fork_return; @@ -406,6 +408,7 @@ { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; + td->td_pcb->un_32.pcb32_curthread = td; /* * Ensure td_frame is aligned to an 8 byte boundary as it will be * placed into the stack pointer which must be 8 byte aligned in Index: arm/arm/cpufunc.c =================================================================== --- arm/arm/cpufunc.c (revision 251584) +++ arm/arm/cpufunc.c (working copy) @@ -1116,10 +1116,10 @@ armv7_tlb_flushID, /* tlb_flushID */ armv7_tlb_flushID_SE, /* tlb_flushID_SE */ - arm11_tlb_flushI, /* tlb_flushI */ - arm11_tlb_flushI_SE, /* tlb_flushI_SE */ - arm11_tlb_flushD, /* tlb_flushD */ - arm11_tlb_flushD_SE, /* tlb_flushD_SE */ + armv7_tlb_flushID, /* tlb_flushI */ + armv7_tlb_flushID_SE, /* tlb_flushI_SE */ + armv7_tlb_flushID, /* tlb_flushD */ + armv7_tlb_flushID_SE, /* tlb_flushD_SE */ /* Cache operations */ Index: arm/arm/pmap.c =================================================================== --- arm/arm/pmap.c (revision 251584) +++ arm/arm/pmap.c (working copy) @@ -2452,7 +2452,7 @@ pmap_tlb_flushID(pmap); cpu_cpwait(); if (vector_page < KERNBASE) { - struct pcb *curpcb = PCPU_GET(curpcb); + struct pcb *_curpcb = curpcb; pcb = thread0.td_pcb; if (pmap_is_current(pmap)) { /* @@ -2472,10 +2472,10 @@ * Make sure cpu_switch(), et al, DTRT. This is safe to do * since this process has no remaining mappings of its own. */ - curpcb->pcb_pl1vec = pcb->pcb_pl1vec; - curpcb->pcb_l1vec = pcb->pcb_l1vec; - curpcb->pcb_dacr = pcb->pcb_dacr; - curpcb->pcb_pagedir = pcb->pcb_pagedir; + _curpcb->pcb_pl1vec = pcb->pcb_pl1vec; + _curpcb->pcb_l1vec = pcb->pcb_l1vec; + _curpcb->pcb_dacr = pcb->pcb_dacr; + _curpcb->pcb_pagedir = pcb->pcb_pagedir; } pmap_free_l1(pmap); Index: arm/arm/locore.S =================================================================== --- arm/arm/locore.S (revision 251584) +++ arm/arm/locore.S (working copy) @@ -225,6 +225,7 @@ /* init arm will return the new stack pointer. */ mov sp, r0 + bl _C_LABEL(mutex_init) bl _C_LABEL(mi_startup) /* call mi_startup()! */ @@ -261,6 +262,7 @@ MMU_INIT(PHYSADDR, PHYSADDR , 64, L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)) /* map VA 0xc0000000..0xc3ffffff to PA */ MMU_INIT(KERNBASE, PHYSADDR, 64, L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)) + MMU_INIT(0x48000000, 0x48000000, 1, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW)) #else MMU_INIT(PHYSADDR, PHYSADDR , 64, L1_TYPE_S|L1_SHARED|L1_S_C|L1_S_AP(AP_KRW)) /* map VA 0xc0000000..0xc3ffffff to PA */ Index: arm/arm/bcopyinout_xscale.S =================================================================== --- arm/arm/bcopyinout_xscale.S (revision 251584) +++ arm/arm/bcopyinout_xscale.S (working copy) @@ -42,13 +42,11 @@ .align 0 #ifdef _ARM_ARCH_6 -#define GET_PCB(tmp) \ - mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) +KSTACK_LOCALS #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB -#define GET_PCB(tmp) \ +#define GET_PCB(tmp, tmp2) \ ldr tmp, .Lcurpcb #endif @@ -88,8 +86,7 @@ .Lnormal: stmfd sp!, {r10-r11, lr} - GET_PCB(r10) - ldr r10, [r10] + GET_PCB(r10, r11) mov r3, #0x00 adr ip, .Lcopyin_fault @@ -530,8 +527,7 @@ .Lnormale: stmfd sp!, {r10-r11, lr} - GET_PCB(r10) - ldr r10, [r10] + GET_PCB(r10, r11) mov r3, #0x00 adr ip, .Lcopyout_fault Index: arm/include/param.h =================================================================== --- arm/include/param.h (revision 251584) +++ arm/include/param.h (working copy) @@ -124,6 +124,12 @@ #define KSTACK_GUARD_PAGES 1 #endif /* !KSTACK_GUARD_PAGES */ +#ifndef KSTACK_MAX_PAGES +#define KSTACK_MAX_PAGES 32 +#endif /* !KSTACK_MAX_PAGES */ + +#define KSTACK_MAX_MASK ((KSTACK_MAX_PAGES * PAGE_SIZE) - 1) + #define USPACE_SVC_STACK_TOP KSTACK_PAGES * PAGE_SIZE #define USPACE_SVC_STACK_BOTTOM (USPACE_SVC_STACK_TOP - 0x1000) #define USPACE_UNDEF_STACK_TOP (USPACE_SVC_STACK_BOTTOM - 0x10) Index: arm/include/pcb.h =================================================================== --- arm/include/pcb.h (revision 251584) +++ arm/include/pcb.h (working copy) @@ -63,6 +63,7 @@ u_int pcb32_lr; u_int pcb32_pc; u_int pcb32_und_sp; + void * pcb32_curthread; }; #define pcb_pagedir un_32.pcb32_pagedir #define pcb_pl1vec un_32.pcb32_pl1vec Index: arm/include/asmacros.h =================================================================== --- arm/include/asmacros.h (revision 251584) +++ arm/include/asmacros.h (working copy) @@ -216,8 +216,23 @@ name: #ifdef _ARM_ARCH_6 -#define AST_LOCALS -#define GET_CURTHREAD_PTR(tmp) \ +#define KSTACK_LOCALS \ +.Lkstack_consts: \ + .word KSTACK_ADDR_MASK; \ + .word KSTACK_PCB_ADDR; +#define AST_LOCALS KSTACK_LOCALS + +#define GET_PCB(tmp, tmp2) \ + ldr tmp2, .Lkstack_consts; \ + bic tmp, sp, tmp2; \ + ldr tmp2, .Lkstack_consts + 4; \ + add tmp, tmp, tmp2 + +#define GET_CURTHREAD(tmp, tmp2) \ + GET_PCB(tmp, tmp2); \ + ldr tmp, [tmp, #PCB_CURTHREAD] + +#define GET_PCPU_CURTHREAD_PTR(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ add tmp, tmp, #(PC_CURTHREAD) #else @@ -225,8 +240,12 @@ .Lcurthread: ;\ .word _C_LABEL(__pcpu) + PC_CURTHREAD -#define GET_CURTHREAD_PTR(tmp) \ +#define GET_PCPU_CURTHREAD_PTR(tmp) \ ldr tmp, .Lcurthread + +#define GET_CURTHREAD(tmp, tmp2) \ + ldr tmp, .Lcurthread; \ + ldr tmp, [tmp] #endif #define DO_AST \ @@ -238,8 +257,7 @@ teq r0, #(PSR_USR32_MODE) ;\ bne 2f /* Nope, get out now */ ;\ bic r4, r4, #(I32_bit|F32_bit) ;\ -1: GET_CURTHREAD_PTR(r5) ;\ - ldr r5, [r5] ;\ +1: GET_CURTHREAD(r5, r6) ;\ ldr r1, [r5, #(TD_FLAGS)] ;\ and r1, r1, #(TDF_ASTPENDING|TDF_NEEDRESCHED) ;\ teq r1, #0x00000000 ;\ Index: arm/include/pcpu.h =================================================================== --- arm/include/pcpu.h (revision 251584) +++ arm/include/pcpu.h (working copy) @@ -33,8 +33,14 @@ #ifdef _KERNEL #include <machine/cpuconf.h> +#include <machine/cpufunc.h> #include <machine/frame.h> +#include <machine/pcb.h> +#include <machine/atomic.h> +#include <sys/types.h> +#include <sys/systm.h> + #define ALT_STACK_SIZE 128 struct vmspace; @@ -94,15 +100,97 @@ __asm __volatile("mcr p15, 0, %0, c13, c0, 3" : : "r" (tls)); } + +/* + * Evaluates to the byte offset of the per-cpu variable name. + */ +#define __pcpu_offset(name) \ + __offsetof(struct pcpu, name) + +/* + * Evaluates to the type of the per-cpu variable name. + */ +#define __pcpu_type(name) \ + __typeof(((struct pcpu *)0)->name) + +/* + * Evaluates to the address of the per-cpu variable name. + */ +#define __PCPU_PTR(name) __extension__ ({ \ + __pcpu_type(name) *__p; \ + int __s; \ + \ + __s = disable_interrupts(I32_bit | F32_bit); \ + __p = &get_pcpu()->name; \ + restore_interrupts(__s); \ + __p; \ +}) + +/* + * Evaluates to the value of the per-cpu variable name. + */ +#define __PCPU_GET(name) __extension__ ({ \ + int __s; \ + __pcpu_type(name) __res; \ + __s = disable_interrupts(I32_bit | F32_bit); \ + __res = get_pcpu()->name; \ + restore_interrupts(__s); \ + __res; \ +}) + +/* + * Adds the value to the per-cpu counter name. The implementation + * must be atomic with respect to interrupts. + */ +#define __PCPU_ADD(name, val) do { \ + int __s ; \ + __s = disable_interrupts(I32_bit | F32_bit); \ + get_pcpu()->name += (val); \ + restore_interrupts(__s); \ +} while (0) + +/* + * Sets the value of the per-cpu variable name to value val. + */ +#define __PCPU_SET(name, val) { \ + int __s; \ + __s = disable_interrupts(I32_bit | F32_bit); \ + get_pcpu()->name = val; \ + restore_interrupts(__s); \ +} + +#define PCPU_GET(member) __PCPU_GET(pc_ ## member) +#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) +#define PCPU_INC(member) PCPU_ADD(member, 1) +#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) +#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) + +static __inline struct pcb * +__curpcb(void) +{ + register_t sp; + + __asm __volatile("mov %0, sp" : "=r" (sp)); + return ((struct pcb *)((sp &~ KSTACK_MAX_MASK) + + (KSTACK_MAX_PAGES * PAGE_SIZE - sizeof(struct pcb)))); +} + + +#define curpcb (__curpcb()) +#if 0 +#define curthread ((struct thread *)(curpcb->un_32.pcb32_curthread)) +#endif + + #else #define get_pcpu() pcpup -#endif #define PCPU_GET(member) (get_pcpu()->pc_ ## member) #define PCPU_ADD(member, value) (get_pcpu()->pc_ ## member += (value)) #define PCPU_INC(member) PCPU_ADD(member, 1) #define PCPU_PTR(member) (&get_pcpu()->pc_ ## member) #define PCPU_SET(member,value) (get_pcpu()->pc_ ## member = (value)) +#endif void pcpu0_init(void); #endif /* _KERNEL */ Index: arm/include/pmap.h =================================================================== --- arm/include/pmap.h (revision 251584) +++ arm/include/pmap.h (working copy) @@ -63,7 +63,7 @@ #endif #define PTE_CACHE 6 #define PTE_DEVICE 2 -#define PTE_PAGETABLE 4 +#define PTE_PAGETABLE 6 #else #define PTE_NOCACHE 1 #define PTE_CACHE 2 @@ -463,7 +463,7 @@ #if (ARM_MMU_SA1 == 1) && (ARM_NMMUS == 1) #define PMAP_NEEDS_PTE_SYNC 1 #define PMAP_INCLUDE_PTE_SYNC -#elif defined(CPU_XSCALE_81342) +#elif defined(CPU_XSCALE_81342) || defined(ARM_ARCH_7A) #define PMAP_NEEDS_PTE_SYNC 1 #define PMAP_INCLUDE_PTE_SYNC #elif (ARM_MMU_SA1 == 0) @@ -529,11 +529,17 @@ #define PMAP_INCLUDE_PTE_SYNC #endif +#ifdef ARM_L2_PIPT +#define _sync_l2(pte, size) cpu_l2cache_wb_range(vtophys(pte), size) +#else +#define _sync_l2(pte, size) cpu_l2_cache_wb_range(pte, size) +#endif + #define PTE_SYNC(pte) \ do { \ if (PMAP_NEEDS_PTE_SYNC) { \ cpu_dcache_wb_range((vm_offset_t)(pte), sizeof(pt_entry_t));\ - cpu_l2cache_wb_range((vm_offset_t)(pte), sizeof(pt_entry_t));\ + _sync_l2((vm_offset_t)(pte), sizeof(pt_entry_t));\ } else \ cpu_drain_writebuf(); \ } while (/*CONSTCOND*/0) @@ -543,7 +549,7 @@ if (PMAP_NEEDS_PTE_SYNC) { \ cpu_dcache_wb_range((vm_offset_t)(pte), \ (cnt) << 2); /* * sizeof(pt_entry_t) */ \ - cpu_l2cache_wb_range((vm_offset_t)(pte), \ + _sync_l2((vm_offset_t)(pte), \ (cnt) << 2); /* * sizeof(pt_entry_t) */ \ } else \ cpu_drain_writebuf(); \ @@ -703,6 +709,21 @@ void arm_unmap_nocache(void *, vm_size_t); extern vm_paddr_t dump_avail[]; + +/* + * We want to be able to determine the end address of the kstack, to be able + * to get the PCB address just using the SP. + */ + +static __inline void +pmap_align_stack(vm_offset_t *addr, vm_size_t length) +{ + if (((*addr + length) & (KSTACK_MAX_PAGES * PAGE_SIZE - 1)) == 0) + return; + *addr += (KSTACK_MAX_PAGES * PAGE_SIZE) - ((*addr + length) & + (KSTACK_MAX_PAGES * PAGE_SIZE - 1)); +} + #endif /* _KERNEL */ #endif /* !LOCORE */ Index: arm/ti/omap4/omap4_mp.c =================================================================== --- arm/ti/omap4/omap4_mp.c (revision 251584) +++ arm/ti/omap4/omap4_mp.c (working copy) @@ -71,7 +71,7 @@ panic("Couldn't map the SCU\n"); /* Enable the SCU */ *(volatile unsigned int *)scu_addr |= 1; - //*(volatile unsigned int *)(scu_addr + 0x30) |= 1; + *(volatile unsigned int *)(scu_addr + 0x30) |= 1; cpu_idcache_wbinv_all(); cpu_l2cache_wbinv_all(); ti_smc0(0x200, 0xfffffdff, MODIFY_AUX_CORE_0); Index: vm/vm_map.c =================================================================== --- vm/vm_map.c (revision 251584) +++ vm/vm_map.c (working copy) @@ -1465,6 +1465,11 @@ pmap_align_tlb(addr); break; #endif +#ifdef VMFS_STACK_ALIGNED_SPACE + case VMFS_STACK_ALIGNED_SPACE: + pmap_align_stack(addr, length); + break; +#endif default: break; } @@ -1477,6 +1482,9 @@ #ifdef VMFS_TLB_ALIGNED_SPACE || find_space == VMFS_TLB_ALIGNED_SPACE #endif +#ifdef VMFS_STACK_ALIGNED_SPACE + || find_space == VMFS_STACK_ALIGNED_SPACE +#endif )); vm_map_unlock(map); return (result); Index: vm/vm_map.h =================================================================== --- vm/vm_map.h (revision 251584) +++ vm/vm_map.h (working copy) @@ -346,6 +346,9 @@ #if defined(__mips__) #define VMFS_TLB_ALIGNED_SPACE 3 /* find a TLB entry aligned range */ #endif +#if defined(__arm__) +#define VMFS_STACK_ALIGNED_SPACE 4 /* Align stack so that the last addres is predictable */ +#endif /* * vm_map_wire and vm_map_unwire option flags Index: vm/vm_glue.c =================================================================== --- vm/vm_glue.c (revision 251584) +++ vm/vm_glue.c (working copy) @@ -371,6 +371,9 @@ */ ks = kmem_alloc_nofault_space(kernel_map, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, VMFS_TLB_ALIGNED_SPACE); +#elif defined(__arm__) + ks = kmem_alloc_nofault_space(kernel_map, + (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, VMFS_STACK_ALIGNED_SPACE); #else ks = kmem_alloc_nofault(kernel_map, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); --7JfCtLOvnd9MIVvH--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130609225615.GA42548>