Date: Fri, 22 Nov 2013 00:04:57 +0000 From: Olivier Houchard <cognet@FreeBSD.ORG> To: arm@freebsd.org Subject: arm SMP fix Message-ID: <20131122000457.GA44039@freebsd.org>
next in thread | raw e-mail | index | archive | help
--ZGiS0Q5IWpPtfppv Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Hi all, Attached is the first draft to a patch that fixes SMP for me, it seems to work fine on my Pandaboard. The problem is, as it is currently implemented, curthread is basically defined as get_pcpu()->pc_curthread. If we get interrupted between the moment we got the struct pcpu, and the moment we derefence it to get the curthread pointer, and migrated to another core, we would get the wrong curthread, and that's not good. The proposed fix does the following : - use the register we used to use for pcpu for curthread - get the pcpu address by reading the cpu id from the CPUID register, and just use it as an index for the __pcpu array. It breaks the KBI, which is unfortunate, but I think it is no big deal for arm right now. Any review, comment, and testing, even on UP boards, would be very welcome. Regards, Olivier --ZGiS0Q5IWpPtfppv Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="arm_smp_fix.diff" Index: include/asmacros.h =================================================================== --- include/asmacros.h (revision 258395) +++ include/asmacros.h (working copy) @@ -236,15 +236,15 @@ #ifdef _ARM_ARCH_6 #define AST_LOCALS #define GET_CURTHREAD_PTR(tmp) \ - mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURTHREAD) + mrc p15, 0, tmp, c13, c0, 4 #else #define AST_LOCALS ;\ .Lcurthread: ;\ .word _C_LABEL(__pcpu) + PC_CURTHREAD #define GET_CURTHREAD_PTR(tmp) \ - ldr tmp, .Lcurthread + ldr tmp, .Lcurthread \ + ldr tmp, [tmp] #endif #define DO_AST \ @@ -257,7 +257,6 @@ bne 2f /* Nope, get out now */ ;\ bic r4, r4, #(I32_bit|F32_bit) ;\ 1: GET_CURTHREAD_PTR(r5) ;\ - ldr r5, [r5] ;\ ldr r1, [r5, #(TD_FLAGS)] ;\ and r1, r1, #(TDF_ASTPENDING|TDF_NEEDRESCHED) ;\ teq r1, #0x00000000 ;\ Index: include/pcpu.h =================================================================== --- include/pcpu.h (revision 258395) +++ include/pcpu.h (working copy) @@ -62,22 +62,32 @@ extern struct pcpu *pcpup; #if ARM_ARCH_6 || ARM_ARCH_7A /* or ARM_TP_ADDRESS mark REMOVE ME NOTE */ -static inline struct pcpu * -get_pcpu(void) + +#define CPU_MASK (0x15) + +#define get_pcpu() __extension__ ({ \ + int id; \ + __asm __volatile("mrc p15, 0, %0, c0, c0, 5" : "=r" (id)); \ + (pcpup + (id & CPU_MASK)); \ + }) + +static inline struct thread * +get_curthread(void) { - void *pcpu; + void *ret; - __asm __volatile("mrc p15, 0, %0, c13, c0, 4" : "=r" (pcpu)); - return (pcpu); + __asm __volatile("mrc p15, 0, %0, c13, c0, 4" : "=r" (ret)); + return (ret); } static inline void -set_pcpu(void *pcpu) +set_curthread(struct thread *td) { - __asm __volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (pcpu)); + __asm __volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (td)); } + static inline void * get_tls(void) { @@ -93,6 +103,9 @@ __asm __volatile("mcr p15, 0, %0, c13, c0, 3" : : "r" (tls)); } + +#define curthread get_curthread() + #else #define get_pcpu() pcpup #endif Index: arm/copystr.S =================================================================== --- arm/copystr.S (revision 258395) +++ arm/copystr.S (working copy) @@ -53,7 +53,7 @@ #ifdef _ARM_ARCH_6 #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) + add tmp, tmp, #(TD_PCB) #else .Lpcb: .word _C_LABEL(__pcpu) + PC_CURPCB Index: arm/genassym.c =================================================================== --- arm/genassym.c (revision 258395) +++ arm/genassym.c (working copy) @@ -139,3 +139,4 @@ ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(NIRQ, NIRQ); +ASSYM(PCPU_SIZE, sizeof(struct pcpu)); Index: arm/mp_machdep.c =================================================================== --- arm/mp_machdep.c (revision 258395) +++ arm/mp_machdep.c (working copy) @@ -177,7 +177,6 @@ cpu_tlb_flushID(); pc = &__pcpu[cpu]; - set_pcpu(pc); /* * pcpu_init() updates queue, so it should not be executed in parallel @@ -203,6 +202,7 @@ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pc->pc_curthread = pc->pc_idlethread; pc->pc_curpcb = pc->pc_idlethread->td_pcb; + set_curthread(pc->pc_idlethread); #ifdef VFP pc->pc_cpu = cpu; Index: arm/machdep.c =================================================================== --- arm/machdep.c (revision 258395) +++ arm/machdep.c (working copy) @@ -870,7 +870,7 @@ pcpu0_init(void) { #if ARM_ARCH_6 || ARM_ARCH_7A || defined(CPU_MV_PJ4B) - set_pcpu(pcpup); + set_curthread(&thread0); #endif pcpu_init(pcpup, 0, sizeof(struct pcpu)); PCPU_SET(curthread, &thread0); Index: arm/fusu.S =================================================================== --- arm/fusu.S (revision 258395) +++ arm/fusu.S (working copy) @@ -42,7 +42,7 @@ #ifdef _ARM_ARCH_6 #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) + add tmp, tmp, #(TD_PCB) #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB Index: arm/swtch.S =================================================================== --- arm/swtch.S (revision 258395) +++ arm/swtch.S (working copy) @@ -89,16 +89,22 @@ #define DOMAIN_CLIENT 0x01 #ifdef _ARM_ARCH_6 -#define GET_PCPU(tmp) \ - mrc p15, 0, tmp, c13, c0, 4; +#define GET_PCPU(tmp, tmp2) \ + mrc p15, 0, tmp, c0, c0, 5; \ + and tmp, tmp, #0xf; \ + ldr tmp2, .Lcurpcpu+4; \ + mul tmp, tmp, tmp2; \ + ldr tmp2, .Lcurpcpu; \ + add tmp, tmp, tmp2; #else -.Lcurpcpu: - .word _C_LABEL(__pcpu) -#define GET_PCPU(tmp) \ +#define GET_PCPU(tmp, tmp2) \ ldr tmp, .Lcurpcpu #endif +.Lcurpcpu: + .word _C_LABEL(__pcpu) + .word PCPU_SIZE .Lcpufuncs: .word _C_LABEL(cpufuncs) .Lblocked_lock: @@ -112,7 +118,7 @@ * r5 = newtd */ - GET_PCPU(r7) + GET_PCPU(r7, r9) #ifdef VFP /* @@ -191,10 +197,15 @@ ldr r13, [r7, #(PCB_SP)] #endif + GET_PCPU(r6, r4) + /* Hook in a new pcb */ + str r7, [r6, #PC_CURPCB] /* We have a new curthread now so make a note it */ - GET_CURTHREAD_PTR(r6) + add r6, r6, #PC_CURTHREAD str r5, [r6] - +#ifndef ARM_TP_ADDRESS + mcr p15, 0, r5, c13, c0, 4 +#endif /* Set the new tp */ ldr r6, [r5, #(TD_MD + MD_TP)] #ifdef ARM_TP_ADDRESS @@ -207,9 +218,6 @@ #else mcr p15, 0, r6, c13, c0, 3 #endif - /* Hook in a new pcb */ - GET_PCPU(r6) - str r7, [r6, #PC_CURPCB] add sp, sp, #4; ldmfd sp!, {r4-r7, pc} @@ -231,11 +239,14 @@ /* Process is now on a processor. */ /* We have a new curthread now so make a note it */ - GET_CURTHREAD_PTR(r7) + GET_PCPU(r7, r2) + add r7, r7, #PC_CURTHREAD str r1, [r7] +#ifndef ARM_TP_ADDRESS + mcr p15, 0, r1, c13, c0, 4 +#endif /* Hook in a new pcb */ - GET_PCPU(r7) ldr r2, [r1, #TD_PCB] str r2, [r7, #PC_CURPCB] @@ -315,7 +326,7 @@ * a future exception will bounce the backup settings in the fp unit. * XXX vfp_store can't change r4 */ - GET_PCPU(r7) + GET_PCPU(r7, r8) ldr r8, [r7, #(PC_VFPCTHREAD)] cmp r4, r8 /* old thread used vfp? */ bne 1f /* no, don't save */ @@ -439,7 +450,7 @@ str r6, [r4, #TD_LOCK] #if defined(SCHED_ULE) && defined(SMP) ldr r6, .Lblocked_lock - GET_CURTHREAD_PTR(r3) + mrc p15, 0, r3, c13, c0, 4 1: ldr r4, [r3, #TD_LOCK] @@ -516,7 +527,7 @@ * registers and state, and modify the control as needed. * a future exception will bounce the backup settings in the fp unit. */ - GET_PCPU(r7) + GET_PCPU(r7, r4) ldr r4, [r7, #(PC_VFPCTHREAD)] /* vfp thread */ ldr r2, [r7, #(PC_CURTHREAD)] /* current thread */ cmp r4, r2 Index: arm/bcopyinout.S =================================================================== --- arm/bcopyinout.S (revision 258395) +++ arm/bcopyinout.S (working copy) @@ -57,7 +57,7 @@ #ifdef _ARM_ARCH_6 #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) + add tmp, tmp, #(TD_PCB) #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB Index: arm/pmap-v6.c =================================================================== --- arm/pmap-v6.c (revision 258395) +++ arm/pmap-v6.c (working copy) @@ -1978,7 +1978,7 @@ cpu_tlb_flushID(); cpu_cpwait(); if (vector_page < KERNBASE) { - struct pcb *curpcb = PCPU_GET(curpcb); + struct pcb *curpcb = curthread->td_pcb; pcb = thread0.td_pcb; if (pmap_is_current(pmap)) { /* Index: arm/vfp.c =================================================================== --- arm/vfp.c (revision 258395) +++ arm/vfp.c (working copy) @@ -140,12 +140,15 @@ u_int fpexc; struct pcb *curpcb; struct thread *vfptd; + int i; if (!vfp_exists) return 1; /* vfp does not exist */ fpexc = fmrx(VFPEXC); /* read the vfp exception reg */ if (fpexc & VFPEXC_EN) { + i = disable_interrupts(I32_bit|F32_bit); vfptd = PCPU_GET(vfpcthread); + restore_interrupts(i); /* did the kernel call the vfp or exception that expect us * to emulate the command. Newer hardware does not require * emulation, so we don't emulate yet. @@ -173,7 +176,7 @@ } fpexc |= VFPEXC_EN; fmxr(VFPEXC, fpexc); /* enable the vfp and repeat command */ - curpcb = PCPU_GET(curpcb); + curpcb = curthread->td_pcb; /* If we were the last process to use the VFP, the process did not * use a VFP on another processor, then the registers in the VFP * will still be ours and are current. Eventually, we will make the @@ -183,7 +186,9 @@ #ifdef SMP curpcb->pcb_vfpcpu = PCPU_GET(cpu); #endif - PCPU_SET(vfpcthread, PCPU_GET(curthread)); + i = disable_interrupts(I32_bit|F32_bit); + PCPU_SET(vfpcthread, curthread); + restore_interrupts(i); return 0; } @@ -218,7 +223,6 @@ "ldr %0, [%1]\n" /* set old vfpscr */ "mcr p10, 7, %0, cr1, c0, 0\n" : "=&r" (vfpscr) : "r" (vfpsave), "r" (is_d32) : "cc"); - PCPU_SET(vfpcthread, PCPU_GET(curthread)); } } @@ -265,6 +269,12 @@ { u_int tmp = 0; + /* + * No need to protect the access to vfpcthread by disabling + * interrupts, since it's called from cpu_throw(), who is called + * with interrupts disabled. + */ + PCPU_SET(vfpcthread, 0); /* permanent forget about reg */ tmp = fmrx(VFPEXC); tmp &= ~VFPEXC_EN; /* turn off VFP hardware */ Index: arm/bcopyinout_xscale.S =================================================================== --- arm/bcopyinout_xscale.S (revision 258395) +++ arm/bcopyinout_xscale.S (working copy) @@ -44,7 +44,7 @@ #ifdef _ARM_ARCH_6 #define GET_PCB(tmp) \ mrc p15, 0, tmp, c13, c0, 4; \ - add tmp, tmp, #(PC_CURPCB) + add tmp, tmp, #(TD_PCB) #else .Lcurpcb: .word _C_LABEL(__pcpu) + PC_CURPCB --ZGiS0Q5IWpPtfppv--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20131122000457.GA44039>