Date: Thu, 2 May 2019 16:10:37 -0700 From: Mark Millard <marklmi@yahoo.com> To: FreeBSD PowerPC ML <freebsd-ppc@freebsd.org>, Justin Hibbits <chmeeedalf@gmail.com> Subject: slbmte, slbie, slbia isync paranoyia for -r347003 investigation: does it change any behavior as stands? (yes) Message-ID: <A103C0BF-0329-4915-BF50-86B289B6DB62@yahoo.com>
next in thread | raw e-mail | index | archive | help
I extended my -r347003 patches to add context synchronizations (isync's), possibly more than necessary, just to see if the behavior I've reported elsewhere changed. It does change some: vt_upgrade(&vt_consdev). . .: seems the same, sometimes hanging-up here. cpu_mp_unleash(0). . .: seems different, it still hangs-up, but: "SMP: 4 CPUs found; 4 CPUs usable; 3 CPUs woken" is now rare. Instead it is normally silent after the ". . .". I've never had cpu_mp_unleash finish with the isync's in place. (So no successful boots.) Of course, the testing has probabilistic results, but the cpu_mp_unleash behavior differences suggest that, without the isync's, there is code that was picking up old context and using it. I show the patch set below, for reference, including the strcmp.S patch just to be complete about the changes: # svnlite diff /mnt/usr/src/ Index: /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S (revision = 347003) +++ /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S (working copy) @@ -88,9 +88,16 @@ .Lstrcmp_compare_by_word: ld %r5,0(%r3) /* Load double words. */ ld %r6,0(%r4) - xor %r8,%r8,%r8 /* %r8 <- Zero. */ + lis %r8,32639 /* 0x7f7f */ + ori %r8,%r8,32639 /* 0x7f7f7f7f */ + rldimi %r8,%r8,32,0 /* 0x7f7f7f7f'7f7f7f7f */ xor %r0,%r5,%r6 /* Check if double words are different. = */ - cmpb %r7,%r5,%r8 /* Check if double words contain zero. = */ + /* Check for zero vs. not bytes: */ + and %r9,%r5,%r8 /* 0x00->0x00, 0x80->0x00, = other->ms-bit-in-byte=3D=3D0 */ + add %r9,%r9,%r8 /* ->0x7f, ->0x7f, = ->ms-bit-in-byte=3D=3D1 */ + nor %r7,%r9,%r5 /* ->0x80, ->0x00, = ->ms-bit-in-byte=3D=3D0 */ + andc %r7,%r7,%r8 /* ->0x80, ->0x00, ->0x00 = */ + /* sort of like cmpb %r7,%r5,%r8 for %r8 = being zero */ =20 /* * If double words are different or contain zero, @@ -104,7 +111,12 @@ ldu %r5,8(%r3) /* Load double words. */ ldu %r6,8(%r4) xor %r0,%r5,%r6 /* Check if double words are different. = */ - cmpb %r7,%r5,%r8 /* Check if double words contain zero. = */ + /* Check for zero vs. not bytes: */ + and %r9,%r5,%r8 /* 0x00->0x00, 0x80->0x00, = other->ms-bit-in-byte=3D=3D0 */ + add %r9,%r9,%r8 /* ->0x7f, ->0x7f, = ->ms-bit-in-byte=3D=3D1 */ + nor %r7,%r9,%r5 /* ->0x80, ->0x00, = ->ms-bit-in-byte=3D=3D0 */ + andc %r7,%r7,%r8 /* ->0x80, ->0x00, ->0x00 = */ + /* sort of like cmpb %r7,%r5,%r8 for %r8 = being zero */ =20 /* * If double words are different or contain zero, Index: /mnt/usr/src/sys/powerpc/aim/mmu_oea64.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /mnt/usr/src/sys/powerpc/aim/mmu_oea64.c (revision 347003) +++ /mnt/usr/src/sys/powerpc/aim/mmu_oea64.c (working copy) @@ -959,7 +959,8 @@ * Map the entire KVA range into the SLB. We must not fault = there. */ #ifdef __powerpc64__ - for (va =3D virtual_avail; va < virtual_end; va +=3D = SEGMENT_LENGTH) + i =3D 0; + for (va =3D virtual_avail; va < virtual_end && i<n_slbs-1; va +=3D= SEGMENT_LENGTH, i++) moea64_bootstrap_slb_prefault(va, 0); #endif =20 @@ -1090,8 +1091,8 @@ CPU_SET(PCPU_GET(cpuid), &pm->pm_active); =20 #ifdef __powerpc64__ - PCPU_SET(aim.userslb, pm->pm_slb); - __asm __volatile("slbmte %0, %1; isync" :: + PCPU_SET(aim.userslb, pm->pm_slb); // no slbie needed? + __asm __volatile("isync; slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #else PCPU_SET(curpmap, pm->pmap_phys); @@ -1104,7 +1105,7 @@ { pmap_t pm; =20 - __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR)); + __asm __volatile("isync; slbie %0; isync" :: "r"(USER_ADDR)); =20 pm =3D &td->td_proc->p_vmspace->vm_pmap; CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); @@ -1956,7 +1957,7 @@ (uintptr_t)uaddr >> ADDR_SR_SHFT; curthread->td_pcb->pcb_cpu.aim.usr_vsid =3D slbv; #ifdef __powerpc64__ - __asm __volatile ("slbie %0; slbmte %1, %2; isync" :: + __asm __volatile ("isync; slbie %0; slbmte %1, %2; isync" :: "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE)); #else __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), = "r"(slbv)); Index: /mnt/usr/src/sys/powerpc/aim/moea64_native.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /mnt/usr/src/sys/powerpc/aim/moea64_native.c (revision = 347003) +++ /mnt/usr/src/sys/powerpc/aim/moea64_native.c (working copy) @@ -406,7 +406,7 @@ */ =20 #ifdef __powerpc64__ - __asm __volatile ("slbia"); + __asm __volatile ("isync; slbia"); __asm __volatile ("slbmfee %0,%1; slbie %0;" : = "=3Dr"(seg0) : "r"(0)); =20 @@ -417,6 +417,7 @@ __asm __volatile ("slbmte %0, %1" ::=20 "r"(slb[i].slbv), "r"(slb[i].slbe));=20 } + __asm __volatile ("isync"); #else for (i =3D 0; i < 16; i++) mtsrin(i << ADDR_SR_SHFT, = kernel_pmap->pm_sr[i]); Index: /mnt/usr/src/sys/powerpc/aim/slb.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /mnt/usr/src/sys/powerpc/aim/slb.c (revision 347003) +++ /mnt/usr/src/sys/powerpc/aim/slb.c (working copy) @@ -457,7 +457,7 @@ /* If it is for this CPU, put it in the SLB right away */ if (pmap_bootstrapped) { /* slbie not required */ - __asm __volatile ("slbmte %0, %1" ::=20 + __asm __volatile ("isync; slbmte %0, %1; isync" ::=20 "r"(slbcache[i].slbv), "r"(slbcache[i].slbe));=20 } =20 Index: /mnt/usr/src/sys/powerpc/aim/trap_subr64.S =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /mnt/usr/src/sys/powerpc/aim/trap_subr64.S (revision 347003) +++ /mnt/usr/src/sys/powerpc/aim/trap_subr64.S (working copy) @@ -65,6 +65,7 @@ =20 li %r29, 0 /* Set the counter to zero */ =20 + isync slbia slbmfee %r31,%r29 =20 clrrdi %r31,%r31,28 @@ -71,6 +72,7 @@ slbie %r31 1: ld %r31, 0(%r28) /* Load SLB entry pointer */ cmpdi %r31, 0 /* If NULL, stop */ + isync beqlr =20 ld %r30, 0(%r31) /* Load SLBV */ @@ -96,6 +98,7 @@ /* Otherwise, set up SLBs */ li %r29, 0 /* Set the counter to zero */ =20 + isync slbia slbmfee %r31,%r29 =20 clrrdi %r31,%r31,28 @@ -105,6 +108,7 @@ =20 ld %r31, 8(%r28) /* Load SLBE */ cmpdi %r31, 0 /* If SLBE is not valid, stop */ + isync beqlr ld %r30, 0(%r28) /* Load SLBV */ slbmte %r30, %r31 /* Install SLB entry */ @@ -113,6 +117,7 @@ addi %r29, %r29, 1 cmpdi %r29, 64 /* Repeat if we are not at the = end */ blt 1b=20 + isync blr =20 /* Index: /mnt/usr/src/sys/powerpc/powerpc/trap.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /mnt/usr/src/sys/powerpc/powerpc/trap.c (revision 347003) +++ /mnt/usr/src/sys/powerpc/powerpc/trap.c (working copy) @@ -453,8 +453,8 @@ #if defined(__powerpc64__) && defined(AIM) case EXC_DSE: if (td->td_pcb->pcb_cpu.aim.usr_vsid !=3D 0 && - (frame->dar & SEGMENT_MASK) =3D=3D = USER_ADDR) { - __asm __volatile ("slbmte %0, %1" :: + (frame->dar & SEGMENT_MASK) =3D=3D = USER_ADDR) { // no slbie needed? + __asm __volatile ("isync; slbmte %0, %1; = isync" :: = "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); return; @@ -712,8 +712,8 @@ * Speculatively restore last user SLB segment, which we know is * invalid already, since we are likely to do = copyin()/copyout(). */ - if (td->td_pcb->pcb_cpu.aim.usr_vsid !=3D 0) - __asm __volatile ("slbmte %0, %1; isync" :: + if (td->td_pcb->pcb_cpu.aim.usr_vsid !=3D 0) // no slbie needed? + __asm __volatile ("isync; slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), = "r"(USER_SLB_SLBE)); #endif =20 =3D=3D=3D Mark Millard marklmi at yahoo.com ( dsl-only.net went away in early 2018-Mar)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?A103C0BF-0329-4915-BF50-86B289B6DB62>