Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 29 Jun 2006 02:56:53 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 100248 for review
Message-ID:  <200606290256.k5T2ur0a093977@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=100248

Change 100248 by kmacy@kmacy_storage:sun4v_work_stable on 2006/06/29 02:56:39

	integrate right up prior to bike_sched integration

Affected files ...

.. //depot/projects/kmacy_sun4v_stable/src/contrib/gcc/config/sparc/sparc.h#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/kern/kern_idle.c#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/asi.h#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/asm.h#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/frame.h#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/pcpu.h#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/exception.S#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/machdep.c#2 integrate
.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/pmap.c#2 integrate

Differences ...

==== //depot/projects/kmacy_sun4v_stable/src/contrib/gcc/config/sparc/sparc.h#2 (text+ko) ====

@@ -797,7 +797,7 @@
 /* ALIGN FRAMES on double word boundaries */
 
 #define SPARC_STACK_ALIGN(LOC) \
-  (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
+  (TARGET_ARCH64 ? (((LOC)+63) & ~63) : (((LOC)+7) & ~7))
 
 /* Allocation boundary (in *bits*) for the code of a function.  */
 #define FUNCTION_BOUNDARY 32

==== //depot/projects/kmacy_sun4v_stable/src/sys/kern/kern_idle.c#2 (text+ko) ====

@@ -78,7 +78,7 @@
 		mtx_lock_spin(&sched_lock);
 		td = FIRST_THREAD_IN_PROC(p);
 		TD_SET_CAN_RUN(td);
-		td->td_flags |= TDF_IDLETD;
+		atomic_set_int(&td->td_flags, TDF_IDLETD);
 		sched_class(td->td_ksegrp, PRI_IDLE);
 		sched_prio(td, PRI_MAX_IDLE);
 		mtx_unlock_spin(&sched_lock);
@@ -118,7 +118,8 @@
 #ifdef SMP
 		idle_cpus_mask &= ~mycpu;
 #endif
-		mi_switch(SW_VOL, NULL);
+		if ((td = choosethread()) != curthread)
+			sched_switch(curthread, td, SW_VOL);
 #ifdef SMP
 		idle_cpus_mask |= mycpu;
 #endif

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/asi.h#2 (text+ko) ====

@@ -53,13 +53,14 @@
 #define ASI_SCRATCHPAD                0x20
 #define ASI_MMU_CONTEXTID             0x21
 #define ASI_LDTD_AIUP                 0x22  /* ASI_LOAD_TWIN_DW_AS_IF_USER_PRIMARY   */
+#define ASI_LDSTBI_AIUP               0x22
 #define ASI_LDTD_AIUS                 0x23  /* ASI_LOAD_TWIN_DW_AS_IF_USER_SECONDARY */
 #define ASI_LDSTBI_AIUS               0x23
 #define ASI_QUEUE                     0x25
 #define ASI_LDTD_REAL                 0x26  /* ASI_LOAD_TWIN_DW_REAL                 */
 #define ASI_STBI_REAL                 0x26
 #define ASI_LDTD_N                    0x27  /* ASI_LOAD_TWIN_DW_NUCLEUS              */
-#define ASI_STBI_N                    0x27
+#define ASI_LDSTBI_N                  0x27
 
 #define ASI_LDTD_AIUPL                0x2A  /* ASI_LD_TWIN_DW_AS_IF_USER_PRIMARY_LITTLE   */
 #define ASI_LDTD_AIUSL                0x2B  /* ASI_LD_TWIN_DW_AS_IF_USER_SECONDARY_LITTLE */

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/asm.h#2 (text+ko) ====

@@ -95,11 +95,11 @@
 #define	ENTRY(x)	_ENTRY(x)
 #define	END(x)		.size x, . - x
 
-#define STACK_ALIGN     16
+#define STACK_ALIGN     64
 #define SET_SIZE(x)     END(x)
 #define SA(X)           (((X)+(STACK_ALIGN-1)) & ~(STACK_ALIGN-1))  
 #define WINDOWSIZE64    (16*8)
-#define MINFRAME64      (WINDOWSIZE64 + 48)
+#define MINFRAME64      (WINDOWSIZE64 + 64)
 #define MINFRAME        MINFRAME64
 #define REGOFF          SA(MINFRAME)
 

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/frame.h#2 (text+ko) ====

@@ -52,11 +52,11 @@
 	uint64_t tf_wstate;
 	uint64_t tf_asi;
 	uint64_t tf_pad3[1];
-};
+} __aligned(64);
 /* extra padding can go away once we re-shuffle user-land mcontext
  */
 
-#define	tf_sp	tf_out[6]
+#define	tf_sp	        tf_out[6]
  
 #define	TF_DONE(tf) do { \
 	tf->tf_tpc = tf->tf_tnpc; \

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/include/pcpu.h#2 (text+ko) ====

@@ -78,7 +78,8 @@
 	u_int   pc_kwbuf_full;                                          \
 	struct rwindow pc_tsbwbuf[2];                                   \
 	u_int   pc_node;                                                \
-        uint16_t pc_cpulist[MAXCPU]
+        uint16_t pc_cpulist[MAXCPU];                                    \
+        uint64_t pad[6];
 
 	/* XXX SUN4V_FIXME - as we access the *_ra and *_size fields in quick
 	 * succession we _really_ want them to be L1 cache line size aligned

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/exception.S#2 (text+ko) ====

@@ -140,6 +140,26 @@
 	storer	%i6, [%g5 + %g3]asi             ;\
 	storer	%i7, [%g5 + %g4]asi  
 
+/* 16 instructions */
+#define	SPILL_ASI_64 \
+	stxa	%l0, [%g1 + 0x0]%asi             ;\
+	stxa	%i0, [%g1 + 0x40]%asi             ;\
+	stxa	%l1, [%g1 + 0x8]%asi              ;\
+	stxa	%l2, [%g1 + 0x10]%asi             ;\
+	stxa	%l3, [%g1 + 0x18]%asi             ;\
+	stxa	%l4, [%g1 + 0x20]%asi             ;\
+	stxa	%l5, [%g1 + 0x28]%asi             ;\
+	stxa	%l6, [%g1 + 0x30]%asi             ;\
+	stxa	%l7, [%g1 + 0x38]%asi             ;\
+	stxa	%i1, [%g1 + 0x48]%asi             ;\
+	stxa	%i2, [%g1 + 0x50]%asi             ;\
+	stxa	%i3, [%g1 + 0x58]%asi             ;\
+	stxa	%i4, [%g1 + 0x60]%asi             ;\
+	stxa	%i5, [%g1 + 0x68]%asi             ;\
+	stxa	%i6, [%g1 + 0x70]%asi             ;\
+	stxa	%i7, [%g1 + 0x78]%asi  
+
+/* 23 instructions */
 #define	FILL(loader, bias, size, asi) \
 	mov	0 + bias, %g1			;\
 	loader	[%sp + %g1]asi, %l0		;\
@@ -165,20 +185,54 @@
 	loader	[%g5 + %g3]asi, %i6		;\
 	loader	[%g5 + %g4]asi, %i7
 
-#define	FILL_DW(asi) \
-	mov	0 + SPOFF, %g1			;\
-	ldda	[%sp + %g1]asi, %l0		;\
-	mov	16 + SPOFF, %g2			;\
-	ldda	[%sp + %g2]asi, %l2		;\
-	add	%sp, 32, %g3			;\
-	ldda	[%g3 + %g1]asi, %l4		;\
-	ldda	[%g3 + %g2]asi, %l6		;\
-	add	%g3, 32, %g3			;\
-	ldda	[%g3 + %g1]asi, %i0		;\
-	ldda	[%g3 + %g2]asi, %i2		;\
-	add	%g3, 32, %g3			;\
-	ldda	[%g3 + %g1]asi, %i4		;\
-	ldda	[%g3 + %g2]asi, %i6		
+#define	SPILL_ASI_SET(storer, size) \
+	storer	%l0, [%g1 + (0 * size)]%asi     ;\
+	storer	%l1, [%g1 + (1 * size)]%asi     ;\
+	storer	%l2, [%g1 + (2 * size)]%asi     ;\
+	storer	%l3, [%g1 + (3 * size)]%asi     ;\
+	storer	%l4, [%g1 + (4 * size)]%asi     ;\
+	storer	%l5, [%g1 + (5 * size)]%asi     ;\
+	storer	%l6, [%g1 + (6 * size)]%asi     ;\
+	storer	%l7, [%g1 + (7 * size)]%asi     ;\
+	storer	%i0, [%g1 + (8 * size)]%asi     ;\
+	storer	%i1, [%g1 + (9 * size)]%asi     ;\
+	storer	%i2, [%g1 + (10 * size)]%asi     ;\
+	storer	%i3, [%g1 + (11 * size)]%asi     ;\
+	storer	%i4, [%g1 + (12 * size)]%asi     ;\
+	storer	%i5, [%g1 + (13 * size)]%asi     ;\
+	storer	%i6, [%g1 + (14 * size)]%asi     ;\
+	storer	%i7, [%g1 + (15 * size)]%asi 
+
+/* 16 instructions */
+#define	FILL_ASI_SET(loader, size) \
+	loader	[%g1 + 0x0]%asi, %l0		;\
+	loader	[%g1 + (size * 1)]%asi, %l1	;\
+	loader	[%g1 + (size * 2)]%asi, %l2	;\
+	loader	[%g1 + (size * 3)]%asi, %l3	;\
+	loader	[%g1 + (size * 4)]%asi, %l4	;\
+	loader	[%g1 + (size * 5)]%asi, %l5	;\
+	loader	[%g1 + (size * 6)]%asi, %l6	;\
+	loader	[%g1 + (size * 7)]%asi, %l7	;\
+	loader	[%g1 + (size * 8)]%asi, %i0	;\
+	loader	[%g1 + (size * 9)]%asi, %i1	;\
+	loader	[%g1 + (size * 10)]%asi, %i2	;\
+	loader	[%g1 + (size * 11)]%asi, %i3	;\
+	loader	[%g1 + (size * 12)]%asi, %i4	;\
+	loader	[%g1 + (size * 13)]%asi, %i5	;\
+	loader	[%g1 + (size * 14)]%asi, %i6	;\
+	loader	[%g1 + (size * 15)]%asi, %i7	
+	
+/* 9 instructions */
+#define	FILL_DW \
+	prefetch [%g1 + 0x40], #one_read        ;\
+	ldda	[%g1 + 0]%asi, %l0		;\
+	ldda	[%g1 + 0x10]%asi, %l2		;\
+	ldda	[%g1 + 0x20]%asi, %l4		;\
+	ldda	[%g1 + 0x30]%asi, %l6		;\
+	ldda	[%g1 + 0x40]%asi, %i0		;\
+	ldda	[%g1 + 0x50]%asi, %i2		;\
+	ldda	[%g1 + 0x60]%asi, %i4		;\
+	ldda	[%g1 + 0x70]%asi, %i6		
 
 #include <sun4v/sun4v/wbuf.S>	
 	/*
@@ -372,8 +426,76 @@
 	.endm
 
 
-#define ALIGN_128   .align  128
+#define ALIGN_128	.align  128
+#define SYNC		#Sync
+#define LOOKASIDE	#Lookaside
+
+#define USE_FAST_SPILLFILL
 	
+#ifdef USE_FAST_SPILLFILL
+#define spill_64bit_asi(asival, asival_unaligned, target)	\
+	wr	%g0, asival, %asi  ;            \
+	add	%sp, SPOFF, %g1	   ;            \
+	SPILL_ASI_64               ;	        \
+	membar	LOOKASIDE          ;            \
+	saved			   ;		\
+	retry			   ;		\
+	.skip (31-21)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+
+#define	spill_64clean(asival, asival_unaligned, target)		\
+	wr	%g0, asival, %asi  ;            \
+	add	%sp, SPOFF, %g1	   ;            \
+	SPILL_ASI_64               ; 	        \
+	membar	LOOKASIDE          ;            \
+	b	spill_clean	   ;		\
+	  mov	WSTATE_USER64, %g7 ; 		\
+	.skip (31-21)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; 	\
+	ALIGN_128	
+
+#define fill_64bit_asi(asival, asival_unaligned, target)	\
+	add	%sp, SPOFF, %g1	   ;            \
+	wr	%g0, asival, %asi  ;            \
+	FILL_DW                    ; 		\
+	restored		   ;		\
+	retry			   ;		\
+	.skip (31-13)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+#else
+#define spill_64bit_asi(asival, asival_unaligned, target)	\
+	wr	%g0, asival_unaligned, %asi ;   \
+	add	%sp, SPOFF, %g1	  ;             \
+	SPILL_ASI_SET(stxa, 8)     ;	        \
+	saved			   ;		\
+	retry			   ;		\
+	.skip (31-20)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+
+#define	spill_64clean(asival, asival_unaligned, target)		\
+	wr	%g0, asival_unaligned, %asi  ;            \
+	add	%sp, SPOFF, %g1	   ;            \
+	SPILL_ASI_SET(stxa, 8)     ; 	        \
+	b	spill_clean	   ;		\
+	  mov	WSTATE_USER64, %g7 ; 		\
+	.skip (31-20)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; 	\
+	ALIGN_128	
+
+#define fill_64bit_asi(asival, asival_unaligned, target)	\
+	wr	%g0, asival_unaligned, %asi  ;  \
+	add	%sp, SPOFF, %g1	  ;             \
+	FILL_ASI_SET(ldxa, 8)      ; 		\
+	restored		   ;		\
+	retry			   ;		\
+	.skip (31-20)*4		   ;		\
+	ba,a,pt %xcc, fault_64bit_##target ; \
+	ALIGN_128	
+#endif
+
 #define spill_32bit_asi(asi, target)		\
 	srl	%sp, 0, %sp	;		\
 	SPILL_FILL_MAGIC_TRAP_ON;               \
@@ -385,16 +507,6 @@
 	ba,a,pt %xcc, fault_32bit_##target ; \
 	ALIGN_128
 
-#define spill_64bit_asi(asi, target)		\
-	SPILL_FILL_MAGIC_TRAP_ON   ;            \
-	SPILL_ASI(stxa, SPOFF, 8, asi) ;	\
-	saved			   ;		\
-	SPILL_FILL_MAGIC_TRAP_OFF  ;		\
-	retry			   ;		\
-	.skip (31-27)*4		   ;		\
-	ba,a,pt %xcc, fault_64bit_##target ; \
-	ALIGN_128	
-
 #define	spill_32clean(asi, target)		\
 	srl	%sp, 0, %sp	; 		\
 	SPILL_FILL_MAGIC_TRAP_ON;               \
@@ -405,15 +517,6 @@
 	ba,a,pt    %xcc, fault_32bit_##target ; \
 	ALIGN_128	
 	
-#define	spill_64clean(asi, target)		\
-	SPILL_FILL_MAGIC_TRAP_ON;               \
-	SPILL_ASI(stxa, SPOFF, 8, asi) ; 	\
-	b	spill_clean	   ;		\
-	  mov	WSTATE_USER64, %g7 ; 		\
-	.skip (31-26)*4		   ;		\
-	ba,a,pt %xcc, fault_64bit_##target ; 	\
-	ALIGN_128	
-
 #define fill_32bit_asi(asi, target)		\
 	srl	%sp, 0, %sp	;		\
 	SPILL_FILL_MAGIC_TRAP_ON;               \
@@ -424,22 +527,32 @@
 	ba,a,pt %xcc, fault_32bit_##target ; \
 	ALIGN_128	
 
-#define fill_64bit_asi(asi, target)		\
-	SPILL_FILL_MAGIC_TRAP_ON;               \
-	FILL(ldxa, SPOFF, 8, asi)  ; 		\
-	restored		   ;		\
-	SPILL_FILL_MAGIC_TRAP_OFF  ;		\
-	retry			   ;		\
-	.skip (31-27)*4		   ;		\
-	ba,a,pt %xcc, fault_64bit_##target ; \
-	.align 128
+.align 128
+ENTRY(fill_64bit_slow_fn0)                      
+fill_slow_start:		
+	FILL_ASI_SET(ldxa, 8);                  
+	restored		;               
+	retry 			;               
+	.skip (31-18)*4		   ;		
+	ba,a,pt %xcc, fault_64bit_fn0 ;
+	.align 128	
+END(fill_64bit_slow_fn0)
+ENTRY(fill_64bit_slow_not)                      
+	FILL_ASI_SET(ldxa, 8);                  
+	restored		;               
+	retry 			;               
+	.skip (31-18)*4		   ;		
+	ba,a,pt %xcc, fault_64bit_not ; 
+	.align 128	
+END(fill_64bit_slow_not)
+fill_slow_end:	
 		
 	.macro	spill_32bit_primary_sn0
 	spill_32bit_asi(ASI_AIUP, sn0)
 	.endm
 
 	.macro	spill_64bit_primary_sn0
-	spill_64bit_asi(ASI_AIUP, sn0)
+	spill_64bit_asi(ASI_LDSTBI_AIUP, ASI_AIUP, sn0)
 	.endm
 
 	.macro spill_32clean_primary_sn0
@@ -447,7 +560,7 @@
 	.endm
 			
 	.macro spill_64clean_primary_sn0
-	spill_64clean(ASI_AIUP, sn0)
+	spill_64clean(ASI_LDSTBI_AIUP, ASI_AIUP, sn0)
 	.endm
 
 	.macro spill_32bit_nucleus_not
@@ -455,7 +568,7 @@
 	.endm
 
 	.macro spill_64bit_nucleus_not
-	spill_64bit_asi(ASI_N,not)
+	spill_64bit_asi(ASI_LDSTBI_N, ASI_N, not)
 	.endm
 
 	.macro	spill_32bit_secondary_so0
@@ -463,7 +576,7 @@
 	.endm
 
 	.macro	spill_64bit_secondary_so0
-	spill_64bit_asi(ASI_AIUS, so0)
+	spill_64bit_asi(ASI_LDSTBI_AIUS, ASI_AIUS, so0)
 	.endm
 	
 	.macro	fill_32bit_primary_fn0
@@ -471,7 +584,7 @@
 	.endm
 
 	.macro	fill_64bit_primary_fn0
-	fill_64bit_asi(ASI_AIUP, fn0)
+	fill_64bit_asi(ASI_LDSTBI_AIUP, ASI_AIUP, fn0)
 	.endm
 
 	.macro fill_32bit_nucleus_not
@@ -479,7 +592,7 @@
 	.endm
 
 	.macro fill_64bit_nucleus_not
-	fill_64bit_asi(ASI_N, not)
+	fill_64bit_asi(ASI_LDSTBI_N, ASI_N, not)
 	.endm
 
 	.macro	spill_32bit_tt1_primary_sn1
@@ -748,12 +861,19 @@
 	tl0_reserved		4			! 0xbc
 tl0_fill_n_normal:
 	tl0_reserved		4			! 0xc0
+tl0_fill_1_normal:	
 	fill_32bit_primary_fn0				! 0xc4 
+tl0_fill_2_normal:	
 	fill_64bit_primary_fn0				! 0xc8
+tl0_fill_3_normal:	
 	fill_32bit_primary_fn0				! 0xcc
+tl0_fill_4_normal:	
 	fill_64bit_primary_fn0				! 0xd0
+tl0_fill_5_normal:	
 	fill_32bit_nucleus_not				! 0xd4
+tl0_fill_6_normal:	
 	fill_64bit_nucleus_not				! 0xd8
+tl0_fill_7_normal:	
 	fill_mixed					! 0xdc
 tl0_fill_n_other_e0:
 	tl0_reserved		32			! 0xe0-0xff
@@ -965,7 +1085,7 @@
 	  mov	%l7, %o0
 	call	spinlock_exit
 	  nop
-	b	user_rtt
+	ba,pt	%xcc, user_rtt
 	  nop
 common_utrap:
 	jmpl	%l3, %o7			! call trap handler / syscall
@@ -976,6 +1096,7 @@
 	sub	%g6, TF_SIZEOF, %sp
 	add	%sp, REGOFF + SPOFF, %l7
 ENTRY(user_rtt)
+	nop
 	! pil handling needs to be re-visited
 	wrpr	%g0, PIL_TICK, %pil
 	ldx	[PCPU(CURTHREAD)], %l4
@@ -990,7 +1111,7 @@
 	wrpr	%g0, %l0, %pil
 	call	ast
 	  mov   %l7, %o0
-	ba,a,pt	%xcc, user_rtt
+	ba,pt	%xcc, user_rtt
 	 nop
 
 1:	
@@ -1002,7 +1123,7 @@
 	mov	T_SPILL, %o1
 	call	trap
 	  mov	%l7, %o0
-	ba,a	%xcc, user_rtt
+	ba,pt	%xcc, user_rtt
 	 nop
 2:
 
@@ -1073,6 +1194,7 @@
 	ba,a	3f
 #endif
 4:
+	membar	#Lookaside
 	fill_64bit_rtt(ASI_AIUP)
 	.global	rtt_fill_end
 rtt_fill_end:
@@ -1342,6 +1464,8 @@
 #define LOADSTORE #LoadStore 
 #define STORESTORE #StoreStore
 
+
+#define WORKING
 #ifdef WORKING
 #define ENTER LOADLOAD
 #define EXIT LOADSTORE|STORESTORE
@@ -1740,8 +1864,17 @@
 	bgeu,pn %xcc, 1f
 	 nop
 	set	fault_rtt_fn1, %g7
-	ba,a	2f
+	ba,a	4f
 1:
+	set	fill_slow_start, %g6
+	cmp	%g7, %g6
+	bleu,a,pn %xcc, 2f
+	  nop
+	set	fill_slow_end, %g6
+	cmp	%g7, %g6
+	blu,a,pn %xcc, 3f
+	  nop
+2:		
 	set	tl1_end, %g6
 	cmp	%g7, %g6
 	bgeu,a,pn %xcc, ptl1_panic
@@ -1755,22 +1888,26 @@
 	cmp	%g6, WTRAP_TYPE
 	bne,a,pn %xcc, ptl1_panic
 	  mov	PTL1_BAD_TRAP, %g1
+3:		
 	andn	%g7, WTRAP_ALIGN, %g7
 	add	%g7, WTRAP_FAULTOFF, %g7
-2:	
+4:	
 	wrpr	%g0, %g7, %tnpc
 	wrpr	%g0, 1, %gl
 	rdpr	%tt, %g5
 	GET_MMFSA_SCRATCH(%g7)
-	cmp	%g5, T_ALIGNMENT
-	be,pn	%xcc, 3f
-	  wr	%g0, ASI_REAL, %asi
+	wr	%g0, ASI_REAL, %asi
 	ldxa	[%g7 + MMFSA_D_ADDR]%asi, %g6
 	ldxa	[%g7 + MMFSA_D_CTX]%asi, %g7
+	cmp	%g5, T_ALIGNMENT
+	be,pn	%xcc, 5f
+	  nop
 	srlx	%g6, PAGE_SHIFT, %g6 
 	sllx	%g6, PAGE_SHIFT, %g6	! mask off bottom
 	or	%g6, %g7, %g6
-
-3:	
-	done	
+	done
+5:
+	sllx	%g7, TRAP_CTX_SHIFT, %g7
+	or	%g7, %g5, %g5
+	done
 END(tl1_trap)	

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/machdep.c#2 (text+ko) ====

@@ -181,6 +181,7 @@
 CTASSERT(sizeof(struct pcb) <= ((KSTACK_PAGES * PAGE_SIZE) / 8));
 
 CTASSERT(sizeof(struct pcpu) <= ((PCPU_PAGES * PAGE_SIZE) / 2));
+CTASSERT((sizeof(struct pcpu) & ((1<<6)-1)) == 0);
 
 static void
 cpu_startup(void *arg)
@@ -377,7 +378,9 @@
 	proc0.p_md.md_utrap = NULL;
 	frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV;
 	thread0.td_frame = &frame0;
-
+	if ((u_long)thread0.td_frame & 0x3f) {
+		panic("unaligned frame0");
+	}
 	/*
 	 * Prime our per-cpu data page for use.  Note, we are using it for our
 	 * stack, so don't pass the real size (PAGE_SIZE) to pcpu_init or
@@ -412,6 +415,11 @@
 	thread0.td_pcb = (struct pcb *)TLB_PHYS_TO_DIRECT(vtophys((vm_offset_t)thread0.td_pcb));
 	pc->pc_curpcb = thread0.td_pcb;
 
+	if (((thread0.td_pcb->pcb_kstack + SPOFF) & 0x3f) != 0) {
+		printf("unaligned stack pcb_kstack & 0x3f == 0x%lx\n", 
+		       ((thread0.td_pcb->pcb_kstack + SPOFF) & 0x3f));
+	}
+
 	/*
 	 * Update PCPU_REG to point to direct address
 	 * to support easy phys <-> virt translation in trap handler
@@ -697,10 +705,7 @@
 #ifdef SMP
 	cpu_mp_shutdown();
 #endif
-#ifdef notyet
-/* XXX SUN4V_FIXME */
-	openfirmware_exit(args);
-#endif
+	OF_exit();
 }
 
 /* Get current clock frequency for the given cpu id. */

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/pmap.c#2 (text+ko) ====

@@ -1197,15 +1197,16 @@
 	while (ackmask != curactive) {
 		DELAY(1);
 		i++;
-		if (i > 100000000) 
+		if (i > 1000000) 
 			panic(" ackmask=0x%x active=0x%x\n", ackmask, curactive);
 
 	}
 
 	active_total |= curactive;
-	if ((active = (pmap->pm_tlbactive & ~(active_total|cpumask))) != 0) 
+	if ((active = ((pmap->pm_tlbactive & all_cpus) & ~(active_total|cpumask))) != 0) {
+		printf("pmap_ipi: retrying");
 		goto retry;
-
+	}
  done:
 	return (active_total);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200606290256.k5T2ur0a093977>