Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 14 May 2019 17:42:15 -0700
From:      Mark Millard <marklmi@yahoo.com>
To:        Justin Hibbits <chmeeedalf@gmail.com>, FreeBSD PowerPC ML <freebsd-ppc@freebsd.org>
Subject:   Re: An experiment in PowerMac G5 multi-socket/multi-core having better matching mftb() values
Message-ID:  <48525401-7C75-4AE7-98D9-AD7CC7F53DE8@yahoo.com>
In-Reply-To: <C7CD77C3-8A0F-44A0-926E-BE7C28FB69A3@yahoo.com>
References:  <C7CD77C3-8A0F-44A0-926E-BE7C28FB69A3@yahoo.com>

next in thread | previous in thread | raw e-mail | index | archive | help
[Switching to code not using 64-bit atomics.]

On 2019-May-13, at 03:23, Mark Millard <marklmi at yahoo.com> wrote:

> I've been experimenting with a alternate
> technique of dealing with boot-time 970 family
> PowerMac G5 tbr value synchronization across
> sockets/cores. So far it has narrowed the
> range significantly. I've reverted my hack for
> tolerating the mismatches in order to see how
> it goes.
>=20
> . . .
>=20
>=20
> # svnlite diff /usr/src/sys/powerpc/powermac/platform_powermac.c =
/usr/src/sys/powerpc/powerpc/mp_machdep.c | more
> . . .

So far the experiment has gone well.

But the original code used 64-bit atomic types
and so was inappropriate for multi-socket
PowerMac G4's.

So I've been experimenting with an alternate
coding that is not powerpc64 specific.

I present the updated code below, still only
enabling the new technique for PowerMac's.

The example code does show my use of volatile
for the ap_pcpu pointer value and so would not
match the svn code in that respect:

/usr/src/sys/powerpc/aim/aim_machdep.c:extern void * volatile ap_pcpu;
/usr/src/sys/powerpc/aim/mp_cpudep.c:void * volatile ap_pcpu;
/usr/src/sys/powerpc/pseries/platform_chrp.c:extern void *ap_pcpu;
/usr/src/sys/powerpc/powermac/platform_powermac.c:extern void * volatile =
ap_pcpu;

(booke has an example volatile for what is pointed to.
But I've  not dealt with examples that I do not have to test
and so, thus far, I only changed the above for the issue.)

(Whitespace details may not survive such e-mail
based handling.)

Index: /usr/src/sys/powerpc/powermac/platform_powermac.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /usr/src/sys/powerpc/powermac/platform_powermac.c   (revision =
347549)
+++ /usr/src/sys/powerpc/powermac/platform_powermac.c   (working copy)
@@ -55,7 +55,7 @@
=20
 #include "platform_if.h"
=20
-extern void *ap_pcpu;
+extern void * volatile ap_pcpu;
=20
 static int powermac_probe(platform_t);
 static int powermac_attach(platform_t);
@@ -333,6 +333,9 @@
        return (powermac_smp_fill_cpuref(cpuref, bsp));
 }
=20
+// platform_powermac.c is implicitly an AIM context: no explicit AIM =
test.
+extern volatile int alternate_timebase_sync_style; // 0 indicates old =
style; 1 indicates new style
+
 static int
 powermac_smp_start_cpu(platform_t plat, struct pcpu *pc)
 {
@@ -367,6 +370,13 @@
=20
        ap_pcpu =3D pc;
=20
+       // platform_powermac.c is implicitly an AIM context: no explicit =
AIM test.
+       // Part of: Attempt a better-than-historical approximately
+       //          equal timebase value for ap vs. bsp
+       alternate_timebase_sync_style=3D 1; // So: new style for =
PowerMacs
+
+       powerpc_sync(); // for ap_pcpu and alternate_timebase_sync_style
+
        if (rstvec_virtbase =3D=3D NULL)
                rstvec_virtbase =3D pmap_mapdev(0x80000000, PAGE_SIZE);
=20

Index: /usr/src/sys/powerpc/powerpc/mp_machdep.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /usr/src/sys/powerpc/powerpc/mp_machdep.c   (revision 347549)
+++ /usr/src/sys/powerpc/powerpc/mp_machdep.c   (working copy)
@@ -70,6 +70,20 @@
 static struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
=20
+#if defined(AIM)
+// Part of: Attempt a better-than-historical approximately
+//          equal timebase value for ap vs. bsp
+
+volatile int alternate_timebase_sync_style=3D 0; // 0 indicates old =
style; 1 indicates new style.
+volatile uint64_t bsp_timebase_sample=3D 0u;
+
+volatile unsigned int from_bsp_status_flag=3D 0u;
+// stages: 0u, 1u (bsp ready to start), 2u (bsp tbr value available to =
ap)
+
+volatile unsigned int from_ap_status_flag=3D 0u;
+// stages: 0u, 1u (ap ready for bsp tbr value to be found and sent)
+#endif
+
 void
 machdep_ap_bootstrap(void)
 {
@@ -77,19 +91,71 @@
        PCPU_SET(awake, 1);
        __asm __volatile("msync; isync");
=20
+#if defined(AIM)
+       powerpc_sync();
+       isync();
+       if (1=3D=3Dalternate_timebase_sync_style)
+       {
+               // Part of: Attempt a better-than-historical =
approximately
+               //          equal timebase value for ap vs. bsp
+
+               register_t oldmsr=3D intr_disable();
+
+               while (1u!=3Dfrom_bsp_status_flag)
+                       ; // spin waiting for bsp to flag that its ready =
to start.
+
+               //  Start to measure a round trip:: to the bsp and back.
+
+               isync(); // Be sure below mftb() result is not from =
earlier speculative execution.
+               register_t const start_round_trip_time_on_ap=3D mftb();
+               atomic_store_rel_int(&from_ap_status_flag, 1u); // bsp =
waits for such before its mftb().
+
+               while (2u!=3Dfrom_bsp_status_flag)
+                       ; // spin waiting for bsp's tbr value
+
+               // Mid-point of ap round trip and the bsp timebase value =
should be approximately equal
+               // when the tbr's are well matched, absent interruptions =
on both sides.
+
+               isync(); // Be sure below mftb() result is not from =
earlier speculative execution.
+               register_t const end_round_trip_time_on_ap=3D mftb();
+
+               int64_t const approx_round_trip_tbr_detla_on_ap
+                               =3D end_round_trip_time_on_ap - =
start_round_trip_time_on_ap;
+               int64_t const ap_midpoint_tbr_value
+                               =3D start_round_trip_time_on_ap + =
(approx_round_trip_tbr_detla_on_ap+1)/2;
+
+               // Establish delta_to_match_bsp_example such that:
+               //           =
ap_midpoint_tbr_value+delta_to_match_bsp_example=3D=3Dbsp_timebase_sample
+               int64_t const delta_to_match_bsp_tbr_example=3D =
bsp_timebase_sample-ap_midpoint_tbr_value;
+
+               isync(); // Be sure below mftb() result is not from =
earlier speculative execution.
+               mttb((int64_t)mftb()+delta_to_match_bsp_tbr_example); // =
Make the ap tbr adjustment.
+
+               atomic_store_rel_int(&from_bsp_status_flag, 0u); // Get =
ready for next ap in bsp loop
+               atomic_store_rel_int(&from_ap_status_flag, 0u);  // also =
flaging bsp that this ap is done
+
+               mtmsr(oldmsr);
+       }
+#endif
+
        while (ap_letgo =3D=3D 0)
                nop_prio_vlow();
        nop_prio_medium();
=20
-       /*
-        * Set timebase as soon as possible to meet an implicit =
rendezvous
-        * from cpu_mp_unleash(), which sets ap_letgo and then =
immediately
-        * sets timebase.
-        *
-        * Note that this is instrinsically racy and is only relevant on
-        * platforms that do not support better mechanisms.
-        */
-       platform_smp_timebase_sync(ap_timebase, 1);
+#if defined(AIM)
+       if (0=3D=3Dalternate_timebase_sync_style)
+#endif
+       {
+               /*
+                * Set timebase as soon as possible to meet an implicit =
rendezvous
+                * from cpu_mp_unleash(), which sets ap_letgo and then =
immediately
+                * sets timebase.
+                *
+                * Note that this is instrinsically racy and is only =
relevant on
+                * platforms that do not support better mechanisms.
+                */
+               platform_smp_timebase_sync(ap_timebase, 1);
+       }
=20
        /* Give platform code a chance to do anything else necessary */
        platform_smp_ap_init();
@@ -260,6 +326,34 @@
                                    pc->pc_cpuid, =
(uintmax_t)pc->pc_hwref,
                                    pc->pc_awake);
                        smp_cpus++;
+
+#if defined(AIM)
+                       // Part of: Attempt a better-than-historical =
approximately
+                       //          equal timebase value for ap vs. bsp
+                       powerpc_sync();
+                       isync();
+                       if (1=3D=3Dalternate_timebase_sync_style)
+                       {
+                               register_t oldmsr=3D intr_disable();
+
+                               =
atomic_store_rel_int(&from_bsp_status_flag, 1u); // bsp ready to start.
+
+                               while (1u!=3Dfrom_ap_status_flag)
+                                       ; // spin waiting for ap to =
flag: time to send a tbr.
+
+                               isync(); // Be sure below mftb() result =
is not from earlier.
+                               bsp_timebase_sample=3D mftb();
+                               =
atomic_store_rel_int(&from_bsp_status_flag, 2u); // bsp tbr available.
+
+                               // Most of the rest of the usage is in =
machdep_ap_bootstrap,
+                               // other than controling =
alternate_timebase_sync_style value.
+
+                               while (0u!=3Dfrom_ap_status_flag)
+                                       ; // spin waiting for ap to be =
done with the sample.
+
+                               mtmsr(oldmsr);
+                       }
+#endif
                } else
                        CPU_SET(pc->pc_cpuid, &stopped_cpus);
        }
@@ -266,14 +360,22 @@
=20
        ap_awake =3D 1;
=20
-       /* Provide our current DEC and TB values for APs */
-       ap_timebase =3D mftb() + 10;
-       __asm __volatile("msync; isync");
+#if defined(AIM)
+       if (0=3D=3Dalternate_timebase_sync_style)
+#endif
+       {
+               /* Provide our current DEC and TB values for APs */
+               ap_timebase =3D mftb() + 10;
+               __asm __volatile("msync; isync");
+       }
       =20
        /* Let APs continue */
        atomic_store_rel_int(&ap_letgo, 1);
=20
-       platform_smp_timebase_sync(ap_timebase, 0);
+#if defined(AIM)
+       if (0=3D=3Dalternate_timebase_sync_style)
+#endif
+               platform_smp_timebase_sync(ap_timebase, 0);
=20
        while (ap_awake < smp_cpus)
                ;

=3D=3D=3D
Mark Millard
marklmi at yahoo.com
( dsl-only.net went
away in early 2018-Mar)




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?48525401-7C75-4AE7-98D9-AD7CC7F53DE8>