Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 2 May 2019 16:10:37 -0700
From:      Mark Millard <marklmi@yahoo.com>
To:        FreeBSD PowerPC ML <freebsd-ppc@freebsd.org>, Justin Hibbits <chmeeedalf@gmail.com>
Subject:   slbmte, slbie, slbia isync paranoyia for -r347003 investigation: does it change any behavior as stands? (yes)
Message-ID:  <A103C0BF-0329-4915-BF50-86B289B6DB62@yahoo.com>

next in thread | raw e-mail | index | archive | help
I extended my -r347003 patches to add context synchronizations
(isync's), possibly more than necessary, just to see if the
behavior I've reported elsewhere changed. It does change some:

vt_upgrade(&vt_consdev). . .: seems the same, sometimes hanging-up here.

cpu_mp_unleash(0). . .: seems different, it still hangs-up, but:

        "SMP: 4 CPUs found; 4 CPUs usable; 3 CPUs woken" is now rare.
	Instead it is normally silent after the ". . .".

	I've never had cpu_mp_unleash finish with the isync's in place.
        (So no successful boots.)

Of course, the testing has probabilistic results, but the
cpu_mp_unleash behavior differences suggest that, without
the isync's, there is code that was picking up old context
and using it.



I show the patch set below, for reference, including the strcmp.S
patch just to be complete about the changes:

# svnlite diff /mnt/usr/src/
Index: /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S     (revision =
347003)
+++ /mnt/usr/src/lib/libc/powerpc64/string/strcmp.S     (working copy)
@@ -88,9 +88,16 @@
 .Lstrcmp_compare_by_word:
        ld      %r5,0(%r3)      /* Load double words. */
        ld      %r6,0(%r4)
-       xor     %r8,%r8,%r8     /* %r8 <- Zero. */
+       lis     %r8,32639       /* 0x7f7f */
+       ori     %r8,%r8,32639   /* 0x7f7f7f7f */
+       rldimi  %r8,%r8,32,0    /* 0x7f7f7f7f'7f7f7f7f */
        xor     %r0,%r5,%r6     /* Check if double words are different. =
*/
-       cmpb    %r7,%r5,%r8     /* Check if double words contain zero. =
*/
+                               /* Check for zero vs. not bytes: */
+       and     %r9,%r5,%r8     /* 0x00->0x00, 0x80->0x00, =
other->ms-bit-in-byte=3D=3D0 */
+       add     %r9,%r9,%r8     /*     ->0x7f,     ->0x7f,      =
->ms-bit-in-byte=3D=3D1 */
+       nor     %r7,%r9,%r5     /*     ->0x80,     ->0x00,      =
->ms-bit-in-byte=3D=3D0 */
+       andc    %r7,%r7,%r8     /*     ->0x80,     ->0x00,      ->0x00 =
*/
+                               /* sort of like cmpb %r7,%r5,%r8 for %r8 =
being zero */
=20
        /*
         * If double words are different or contain zero,
@@ -104,7 +111,12 @@
        ldu     %r5,8(%r3)      /* Load double words. */
        ldu     %r6,8(%r4)
        xor     %r0,%r5,%r6     /* Check if double words are different. =
*/
-       cmpb    %r7,%r5,%r8     /* Check if double words contain zero. =
*/
+                               /* Check for zero vs. not bytes: */
+       and     %r9,%r5,%r8     /* 0x00->0x00, 0x80->0x00, =
other->ms-bit-in-byte=3D=3D0 */
+       add     %r9,%r9,%r8     /*     ->0x7f,     ->0x7f,      =
->ms-bit-in-byte=3D=3D1 */
+       nor     %r7,%r9,%r5     /*     ->0x80,     ->0x00,      =
->ms-bit-in-byte=3D=3D0 */
+       andc    %r7,%r7,%r8     /*     ->0x80,     ->0x00,      ->0x00 =
*/
+                               /* sort of like cmpb %r7,%r5,%r8 for %r8 =
being zero */
=20
        /*
         * If double words are different or contain zero,
Index: /mnt/usr/src/sys/powerpc/aim/mmu_oea64.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /mnt/usr/src/sys/powerpc/aim/mmu_oea64.c    (revision 347003)
+++ /mnt/usr/src/sys/powerpc/aim/mmu_oea64.c    (working copy)
@@ -959,7 +959,8 @@
         * Map the entire KVA range into the SLB. We must not fault =
there.
         */
        #ifdef __powerpc64__
-       for (va =3D virtual_avail; va < virtual_end; va +=3D =
SEGMENT_LENGTH)
+       i =3D 0;
+       for (va =3D virtual_avail; va < virtual_end && i<n_slbs-1; va +=3D=
 SEGMENT_LENGTH, i++)
                moea64_bootstrap_slb_prefault(va, 0);
        #endif
=20
@@ -1090,8 +1091,8 @@
        CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
=20
        #ifdef __powerpc64__
-       PCPU_SET(aim.userslb, pm->pm_slb);
-       __asm __volatile("slbmte %0, %1; isync" ::
+       PCPU_SET(aim.userslb, pm->pm_slb); // no slbie needed?
+       __asm __volatile("isync; slbmte %0, %1; isync" ::
            "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
        #else
        PCPU_SET(curpmap, pm->pmap_phys);
@@ -1104,7 +1105,7 @@
 {
        pmap_t  pm;
=20
-       __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR));
+       __asm __volatile("isync; slbie %0; isync" :: "r"(USER_ADDR));
=20
        pm =3D &td->td_proc->p_vmspace->vm_pmap;
        CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
@@ -1956,7 +1957,7 @@
            (uintptr_t)uaddr >> ADDR_SR_SHFT;
        curthread->td_pcb->pcb_cpu.aim.usr_vsid =3D slbv;
 #ifdef __powerpc64__
-       __asm __volatile ("slbie %0; slbmte %1, %2; isync" ::
+       __asm __volatile ("isync; slbie %0; slbmte %1, %2; isync" ::
            "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE));
 #else
        __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), =
"r"(slbv));
Index: /mnt/usr/src/sys/powerpc/aim/moea64_native.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /mnt/usr/src/sys/powerpc/aim/moea64_native.c        (revision =
347003)
+++ /mnt/usr/src/sys/powerpc/aim/moea64_native.c        (working copy)
@@ -406,7 +406,7 @@
         */
=20
        #ifdef __powerpc64__
-               __asm __volatile ("slbia");
+               __asm __volatile ("isync; slbia");
                __asm __volatile ("slbmfee %0,%1; slbie %0;" : =
"=3Dr"(seg0) :
                    "r"(0));
=20
@@ -417,6 +417,7 @@
                        __asm __volatile ("slbmte %0, %1" ::=20
                            "r"(slb[i].slbv), "r"(slb[i].slbe));=20
                }
+               __asm __volatile ("isync");
        #else
                for (i =3D 0; i < 16; i++)
                        mtsrin(i << ADDR_SR_SHFT, =
kernel_pmap->pm_sr[i]);
Index: /mnt/usr/src/sys/powerpc/aim/slb.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /mnt/usr/src/sys/powerpc/aim/slb.c  (revision 347003)
+++ /mnt/usr/src/sys/powerpc/aim/slb.c  (working copy)
@@ -457,7 +457,7 @@
        /* If it is for this CPU, put it in the SLB right away */
        if (pmap_bootstrapped) {
                /* slbie not required */
-               __asm __volatile ("slbmte %0, %1" ::=20
+               __asm __volatile ("isync; slbmte %0, %1; isync" ::=20
                    "r"(slbcache[i].slbv), "r"(slbcache[i].slbe));=20
        }
=20
Index: /mnt/usr/src/sys/powerpc/aim/trap_subr64.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /mnt/usr/src/sys/powerpc/aim/trap_subr64.S  (revision 347003)
+++ /mnt/usr/src/sys/powerpc/aim/trap_subr64.S  (working copy)
@@ -65,6 +65,7 @@
=20
        li      %r29, 0                 /* Set the counter to zero */
=20
+       isync
        slbia
        slbmfee %r31,%r29              =20
        clrrdi  %r31,%r31,28
@@ -71,6 +72,7 @@
        slbie   %r31
 1:     ld      %r31, 0(%r28)           /* Load SLB entry pointer */
        cmpdi   %r31, 0                 /* If NULL, stop */
+       isync
        beqlr
=20
        ld      %r30, 0(%r31)           /* Load SLBV */
@@ -96,6 +98,7 @@
        /* Otherwise, set up SLBs */
        li      %r29, 0                 /* Set the counter to zero */
=20
+       isync
        slbia
        slbmfee %r31,%r29              =20
        clrrdi  %r31,%r31,28
@@ -105,6 +108,7 @@
=20
        ld      %r31, 8(%r28)           /* Load SLBE */
        cmpdi   %r31, 0                 /* If SLBE is not valid, stop */
+       isync
        beqlr
        ld      %r30, 0(%r28)           /* Load SLBV  */
        slbmte  %r30, %r31              /* Install SLB entry */
@@ -113,6 +117,7 @@
        addi    %r29, %r29, 1
        cmpdi   %r29, 64                /* Repeat if we are not at the =
end */
        blt     1b=20
+       isync
        blr
=20
 /*
Index: /mnt/usr/src/sys/powerpc/powerpc/trap.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /mnt/usr/src/sys/powerpc/powerpc/trap.c     (revision 347003)
+++ /mnt/usr/src/sys/powerpc/powerpc/trap.c     (working copy)
@@ -453,8 +453,8 @@
 #if defined(__powerpc64__) && defined(AIM)
                case EXC_DSE:
                        if (td->td_pcb->pcb_cpu.aim.usr_vsid !=3D 0 &&
-                           (frame->dar & SEGMENT_MASK) =3D=3D =
USER_ADDR) {
-                               __asm __volatile ("slbmte %0, %1" ::
+                           (frame->dar & SEGMENT_MASK) =3D=3D =
USER_ADDR) { // no slbie needed?
+                               __asm __volatile ("isync; slbmte %0, %1; =
isync" ::
                                        =
"r"(td->td_pcb->pcb_cpu.aim.usr_vsid),
                                        "r"(USER_SLB_SLBE));
                                return;
@@ -712,8 +712,8 @@
         * Speculatively restore last user SLB segment, which we know is
         * invalid already, since we are likely to do =
copyin()/copyout().
         */
-       if (td->td_pcb->pcb_cpu.aim.usr_vsid !=3D 0)
-               __asm __volatile ("slbmte %0, %1; isync" ::
+       if (td->td_pcb->pcb_cpu.aim.usr_vsid !=3D 0) // no slbie needed?
+               __asm __volatile ("isync; slbmte %0, %1; isync" ::
                    "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), =
"r"(USER_SLB_SLBE));
 #endif
=20



=3D=3D=3D
Mark Millard
marklmi at yahoo.com
( dsl-only.net went
away in early 2018-Mar)




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?A103C0BF-0329-4915-BF50-86B289B6DB62>