Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 22 Nov 2013 00:04:57 +0000
From:      Olivier Houchard <cognet@FreeBSD.ORG>
To:        arm@freebsd.org
Subject:   arm SMP fix
Message-ID:  <20131122000457.GA44039@freebsd.org>

next in thread | raw e-mail | index | archive | help

--ZGiS0Q5IWpPtfppv
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hi all,                                                                         

Attached is the first draft to a patch that fixes SMP for me, it seems to       
work fine on my Pandaboard.                                                     
The problem is, as it is currently implemented, curthread is basically          
defined as get_pcpu()->pc_curthread. If we get interrupted between the moment   
we got the struct pcpu, and the moment we derefence it to get the curthread 
pointer, and migrated to another core, we would get the wrong curthread, and    
that's not good.                                                                
The proposed fix does the following :                                           
- use the register we used to use for pcpu for curthread                        
- get the pcpu address by reading the cpu id from the CPUID register, and       
just use it as an index for the __pcpu array.                                   
                                                                                
It breaks the KBI, which is unfortunate, but I think it is no big deal for      
arm right now.                                                                  
                                                                                
Any review, comment, and testing, even on UP boards, would be very welcome.     
                                                                                
Regards,                                                                        

Olivier 

--ZGiS0Q5IWpPtfppv
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="arm_smp_fix.diff"

Index: include/asmacros.h
===================================================================
--- include/asmacros.h	(revision 258395)
+++ include/asmacros.h	(working copy)
@@ -236,15 +236,15 @@
 #ifdef _ARM_ARCH_6
 #define	AST_LOCALS
 #define GET_CURTHREAD_PTR(tmp) \
-	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURTHREAD)
+    	mrc	p15, 0, tmp, c13, c0, 4
 #else
 #define	AST_LOCALS							;\
 .Lcurthread:								;\
 	.word	_C_LABEL(__pcpu) + PC_CURTHREAD
 
 #define GET_CURTHREAD_PTR(tmp) \
-	ldr	tmp, .Lcurthread
+	ldr	tmp, .Lcurthread     \
+	ldr	tmp, [tmp]
 #endif
 
 #define	DO_AST								\
@@ -257,7 +257,6 @@
 	bne	2f			/* Nope, get out now */		;\
 	bic	r4, r4, #(I32_bit|F32_bit)				;\
 1:	GET_CURTHREAD_PTR(r5)						;\
-	ldr	r5, [r5]						;\
 	ldr	r1, [r5, #(TD_FLAGS)]					;\
 	and	r1, r1, #(TDF_ASTPENDING|TDF_NEEDRESCHED)		;\
 	teq	r1, #0x00000000						;\
Index: include/pcpu.h
===================================================================
--- include/pcpu.h	(revision 258395)
+++ include/pcpu.h	(working copy)
@@ -62,22 +62,32 @@
 extern struct pcpu *pcpup;
 #if ARM_ARCH_6 || ARM_ARCH_7A
 /* or ARM_TP_ADDRESS 	mark REMOVE ME NOTE */
-static inline struct pcpu *
-get_pcpu(void)
+
+#define CPU_MASK (0x15)
+
+#define get_pcpu() __extension__ ({			  		\
+    	int id;								\
+        __asm __volatile("mrc p15, 0, %0, c0, c0, 5" : "=r" (id));	\
+    	(pcpup + (id & CPU_MASK));					\
+    })
+	
+static inline struct thread *
+get_curthread(void)
 {
-	void *pcpu;
+	void *ret;
 
-	__asm __volatile("mrc p15, 0, %0, c13, c0, 4" : "=r" (pcpu));
-	return (pcpu);
+	__asm __volatile("mrc p15, 0, %0, c13, c0, 4" : "=r" (ret));
+	return (ret);
 }
 
 static inline void
-set_pcpu(void *pcpu)
+set_curthread(struct thread *td)
 {
 
-	__asm __volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (pcpu));
+	__asm __volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (td));
 }
 
+
 static inline void *
 get_tls(void)
 {
@@ -93,6 +103,9 @@
 
 	__asm __volatile("mcr p15, 0, %0, c13, c0, 3" : : "r" (tls));
 }
+
+#define curthread get_curthread()
+
 #else
 #define get_pcpu()	pcpup
 #endif
Index: arm/copystr.S
===================================================================
--- arm/copystr.S	(revision 258395)
+++ arm/copystr.S	(working copy)
@@ -53,7 +53,7 @@
 #ifdef _ARM_ARCH_6
 #define GET_PCB(tmp) \
 	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+	add	tmp, tmp, #(TD_PCB)
 #else
 .Lpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB
Index: arm/genassym.c
===================================================================
--- arm/genassym.c	(revision 258395)
+++ arm/genassym.c	(working copy)
@@ -139,3 +139,4 @@
 
 ASSYM(MAXCOMLEN, MAXCOMLEN);
 ASSYM(NIRQ, NIRQ);
+ASSYM(PCPU_SIZE, sizeof(struct pcpu));
Index: arm/mp_machdep.c
===================================================================
--- arm/mp_machdep.c	(revision 258395)
+++ arm/mp_machdep.c	(working copy)
@@ -177,7 +177,6 @@
 	cpu_tlb_flushID();
 
 	pc = &__pcpu[cpu];
-	set_pcpu(pc);
 
 	/*
 	 * pcpu_init() updates queue, so it should not be executed in parallel
@@ -203,6 +202,7 @@
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pc->pc_curthread = pc->pc_idlethread;
 	pc->pc_curpcb = pc->pc_idlethread->td_pcb;
+	set_curthread(pc->pc_idlethread);
 #ifdef VFP
 	pc->pc_cpu = cpu;
 
Index: arm/machdep.c
===================================================================
--- arm/machdep.c	(revision 258395)
+++ arm/machdep.c	(working copy)
@@ -870,7 +870,7 @@
 pcpu0_init(void)
 {
 #if ARM_ARCH_6 || ARM_ARCH_7A || defined(CPU_MV_PJ4B)
-	set_pcpu(pcpup);
+	set_curthread(&thread0);
 #endif
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
Index: arm/fusu.S
===================================================================
--- arm/fusu.S	(revision 258395)
+++ arm/fusu.S	(working copy)
@@ -42,7 +42,7 @@
 #ifdef _ARM_ARCH_6
 #define GET_PCB(tmp) \
 	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+	add	tmp, tmp, #(TD_PCB)
 #else
 .Lcurpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB
Index: arm/swtch.S
===================================================================
--- arm/swtch.S	(revision 258395)
+++ arm/swtch.S	(working copy)
@@ -89,16 +89,22 @@
 #define DOMAIN_CLIENT	0x01
 
 #ifdef _ARM_ARCH_6
-#define GET_PCPU(tmp) \
-	mrc p15, 0, tmp, c13, c0, 4;
+#define GET_PCPU(tmp, tmp2) \
+	mrc 	p15, 0, tmp, c0, c0, 5;	\
+	and	tmp, tmp, #0xf;		\
+	ldr 	tmp2, .Lcurpcpu+4;	\
+	mul 	tmp, tmp, tmp2;		\
+	ldr	tmp2, .Lcurpcpu;	\
+	add	tmp, tmp, tmp2;
 #else
-.Lcurpcpu:
-	.word	_C_LABEL(__pcpu)
 
-#define GET_PCPU(tmp) \
+#define GET_PCPU(tmp, tmp2) \
 	ldr	tmp, .Lcurpcpu
 #endif
 
+.Lcurpcpu:
+        .word   _C_LABEL(__pcpu)
+	.word	PCPU_SIZE
 .Lcpufuncs:	
 	.word	_C_LABEL(cpufuncs)
 .Lblocked_lock:
@@ -112,7 +118,7 @@
 	 * r5 = newtd
 	 */
 
-	GET_PCPU(r7)
+	GET_PCPU(r7, r9)
 
 #ifdef VFP
 	/*
@@ -191,10 +197,15 @@
 	ldr	r13, [r7, #(PCB_SP)]
 #endif
 
+	GET_PCPU(r6, r4)
+	/* Hook in a new pcb */
+	str	r7, [r6, #PC_CURPCB]
 	/* We have a new curthread now so make a note it */
-	GET_CURTHREAD_PTR(r6)
+	add	r6, r6, #PC_CURTHREAD
 	str	r5, [r6]
-
+#ifndef ARM_TP_ADDRESS
+	mcr	p15, 0, r5, c13, c0, 4
+#endif
 	/* Set the new tp */
 	ldr	r6, [r5, #(TD_MD + MD_TP)]
 #ifdef ARM_TP_ADDRESS
@@ -207,9 +218,6 @@
 #else
 	mcr p15, 0, r6, c13, c0, 3
 #endif
-	/* Hook in a new pcb */
-	GET_PCPU(r6)
-	str	r7, [r6, #PC_CURPCB]
 
 	add	sp, sp, #4;
 	ldmfd	sp!, {r4-r7, pc}
@@ -231,11 +239,14 @@
 
 	/* Process is now on a processor. */
 	/* We have a new curthread now so make a note it */
-	GET_CURTHREAD_PTR(r7)
+	GET_PCPU(r7, r2)
+	add	r7, r7, #PC_CURTHREAD
 	str	r1, [r7]
+#ifndef ARM_TP_ADDRESS
+	mcr	p15, 0, r1, c13, c0, 4
+#endif
 
 	/* Hook in a new pcb */
-	GET_PCPU(r7)
 	ldr	r2, [r1, #TD_PCB]
 	str	r2, [r7, #PC_CURPCB]
 
@@ -315,7 +326,7 @@
 	 * a future exception will bounce the backup settings in the fp unit.
 	 * XXX vfp_store can't change r4
 	 */
-	GET_PCPU(r7)
+	GET_PCPU(r7, r8)
 	ldr	r8, [r7, #(PC_VFPCTHREAD)]
 	cmp	r4, r8				/* old thread used vfp? */
 	bne	1f				/* no, don't save */
@@ -439,7 +450,7 @@
 	str	r6, [r4, #TD_LOCK]
 #if defined(SCHED_ULE) && defined(SMP)
 	ldr	r6, .Lblocked_lock
-	GET_CURTHREAD_PTR(r3)
+	mrc	p15, 0, r3, c13, c0, 4
 
 1:
 	ldr	r4, [r3, #TD_LOCK]
@@ -516,7 +527,7 @@
 	 * registers and state, and modify the control as needed.
 	 * a future exception will bounce the backup settings in the fp unit.
 	 */
-	GET_PCPU(r7)
+	GET_PCPU(r7, r4)
 	ldr	r4, [r7, #(PC_VFPCTHREAD)]      /* vfp thread */
 	ldr	r2, [r7, #(PC_CURTHREAD)]       /* current thread */
 	cmp	r4, r2
Index: arm/bcopyinout.S
===================================================================
--- arm/bcopyinout.S	(revision 258395)
+++ arm/bcopyinout.S	(working copy)
@@ -57,7 +57,7 @@
 #ifdef _ARM_ARCH_6
 #define GET_PCB(tmp) \
 	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+	add	tmp, tmp, #(TD_PCB)
 #else
 .Lcurpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB
Index: arm/pmap-v6.c
===================================================================
--- arm/pmap-v6.c	(revision 258395)
+++ arm/pmap-v6.c	(working copy)
@@ -1978,7 +1978,7 @@
 	cpu_tlb_flushID();
 	cpu_cpwait();
 	if (vector_page < KERNBASE) {
-		struct pcb *curpcb = PCPU_GET(curpcb);
+		struct pcb *curpcb = curthread->td_pcb;
 		pcb = thread0.td_pcb;
 		if (pmap_is_current(pmap)) {
 			/*
Index: arm/vfp.c
===================================================================
--- arm/vfp.c	(revision 258395)
+++ arm/vfp.c	(working copy)
@@ -140,12 +140,15 @@
 	u_int fpexc;
 	struct pcb *curpcb;
 	struct thread *vfptd;
+	int i;
 
 	if (!vfp_exists)
 		return 1;		/* vfp does not exist */
 	fpexc = fmrx(VFPEXC);		/* read the vfp exception reg */
 	if (fpexc & VFPEXC_EN) {
+		i = disable_interrupts(I32_bit|F32_bit);
 		vfptd = PCPU_GET(vfpcthread);
+		restore_interrupts(i);
 		/* did the kernel call the vfp or exception that expect us
 		 * to emulate the command. Newer hardware does not require
 		 * emulation, so we don't emulate yet.
@@ -173,7 +176,7 @@
 	}
 	fpexc |= VFPEXC_EN;
 	fmxr(VFPEXC, fpexc);	/* enable the vfp and repeat command */
-	curpcb = PCPU_GET(curpcb);
+	curpcb = curthread->td_pcb;
 	/* If we were the last process to use the VFP, the process did not
 	 * use a VFP on another processor, then the registers in the VFP
 	 * will still be ours and are current. Eventually, we will make the
@@ -183,7 +186,9 @@
 #ifdef SMP
 	curpcb->pcb_vfpcpu = PCPU_GET(cpu);
 #endif
-	PCPU_SET(vfpcthread, PCPU_GET(curthread));
+	i = disable_interrupts(I32_bit|F32_bit);
+	PCPU_SET(vfpcthread, curthread);
+	restore_interrupts(i);
 	return 0;
 }
 
@@ -218,7 +223,6 @@
 			"ldr	%0, [%1]\n"		/* set old vfpscr */
 			"mcr	p10, 7, %0, cr1, c0, 0\n"
 			: "=&r" (vfpscr) : "r" (vfpsave), "r" (is_d32) : "cc");
-		PCPU_SET(vfpcthread, PCPU_GET(curthread));
 	}
 }
 
@@ -265,6 +269,12 @@
 {
 	u_int tmp = 0;
 
+	/*
+	 * No need to protect the access to vfpcthread by disabling
+	 * interrupts, since it's called from cpu_throw(), who is called
+	 * with interrupts disabled.
+	 */
+	  
 	PCPU_SET(vfpcthread, 0);	/* permanent forget about reg */
 	tmp = fmrx(VFPEXC);
 	tmp &= ~VFPEXC_EN;		/* turn off VFP hardware */
Index: arm/bcopyinout_xscale.S
===================================================================
--- arm/bcopyinout_xscale.S	(revision 258395)
+++ arm/bcopyinout_xscale.S	(working copy)
@@ -44,7 +44,7 @@
 #ifdef _ARM_ARCH_6
 #define GET_PCB(tmp) \
 	mrc p15, 0, tmp, c13, c0, 4; \
-	add	tmp, tmp, #(PC_CURPCB)
+	add	tmp, tmp, #(TD_PCB)
 #else
 .Lcurpcb:
 	.word	_C_LABEL(__pcpu) + PC_CURPCB

--ZGiS0Q5IWpPtfppv--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20131122000457.GA44039>