Date: Tue, 14 May 2019 17:42:15 -0700 From: Mark Millard <marklmi@yahoo.com> To: Justin Hibbits <chmeeedalf@gmail.com>, FreeBSD PowerPC ML <freebsd-ppc@freebsd.org> Subject: Re: An experiment in PowerMac G5 multi-socket/multi-core having better matching mftb() values Message-ID: <48525401-7C75-4AE7-98D9-AD7CC7F53DE8@yahoo.com> In-Reply-To: <C7CD77C3-8A0F-44A0-926E-BE7C28FB69A3@yahoo.com>
index | next in thread | previous in thread | raw e-mail
[Switching to code not using 64-bit atomics.]
On 2019-May-13, at 03:23, Mark Millard <marklmi at yahoo.com> wrote:
> I've been experimenting with a alternate
> technique of dealing with boot-time 970 family
> PowerMac G5 tbr value synchronization across
> sockets/cores. So far it has narrowed the
> range significantly. I've reverted my hack for
> tolerating the mismatches in order to see how
> it goes.
>
> . . .
>
>
> # svnlite diff /usr/src/sys/powerpc/powermac/platform_powermac.c /usr/src/sys/powerpc/powerpc/mp_machdep.c | more
> . . .
So far the experiment has gone well.
But the original code used 64-bit atomic types
and so was inappropriate for multi-socket
PowerMac G4's.
So I've been experimenting with an alternate
coding that is not powerpc64 specific.
I present the updated code below, still only
enabling the new technique for PowerMac's.
The example code does show my use of volatile
for the ap_pcpu pointer value and so would not
match the svn code in that respect:
/usr/src/sys/powerpc/aim/aim_machdep.c:extern void * volatile ap_pcpu;
/usr/src/sys/powerpc/aim/mp_cpudep.c:void * volatile ap_pcpu;
/usr/src/sys/powerpc/pseries/platform_chrp.c:extern void *ap_pcpu;
/usr/src/sys/powerpc/powermac/platform_powermac.c:extern void * volatile ap_pcpu;
(booke has an example volatile for what is pointed to.
But I've not dealt with examples that I do not have to test
and so, thus far, I only changed the above for the issue.)
(Whitespace details may not survive such e-mail
based handling.)
Index: /usr/src/sys/powerpc/powermac/platform_powermac.c
===================================================================
--- /usr/src/sys/powerpc/powermac/platform_powermac.c (revision 347549)
+++ /usr/src/sys/powerpc/powermac/platform_powermac.c (working copy)
@@ -55,7 +55,7 @@
#include "platform_if.h"
-extern void *ap_pcpu;
+extern void * volatile ap_pcpu;
static int powermac_probe(platform_t);
static int powermac_attach(platform_t);
@@ -333,6 +333,9 @@
return (powermac_smp_fill_cpuref(cpuref, bsp));
}
+// platform_powermac.c is implicitly an AIM context: no explicit AIM test.
+extern volatile int alternate_timebase_sync_style; // 0 indicates old style; 1 indicates new style
+
static int
powermac_smp_start_cpu(platform_t plat, struct pcpu *pc)
{
@@ -367,6 +370,13 @@
ap_pcpu = pc;
+ // platform_powermac.c is implicitly an AIM context: no explicit AIM test.
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+ alternate_timebase_sync_style= 1; // So: new style for PowerMacs
+
+ powerpc_sync(); // for ap_pcpu and alternate_timebase_sync_style
+
if (rstvec_virtbase == NULL)
rstvec_virtbase = pmap_mapdev(0x80000000, PAGE_SIZE);
Index: /usr/src/sys/powerpc/powerpc/mp_machdep.c
===================================================================
--- /usr/src/sys/powerpc/powerpc/mp_machdep.c (revision 347549)
+++ /usr/src/sys/powerpc/powerpc/mp_machdep.c (working copy)
@@ -70,6 +70,20 @@
static struct mtx ap_boot_mtx;
struct pcb stoppcbs[MAXCPU];
+#if defined(AIM)
+// Part of: Attempt a better-than-historical approximately
+// equal timebase value for ap vs. bsp
+
+volatile int alternate_timebase_sync_style= 0; // 0 indicates old style; 1 indicates new style.
+volatile uint64_t bsp_timebase_sample= 0u;
+
+volatile unsigned int from_bsp_status_flag= 0u;
+// stages: 0u, 1u (bsp ready to start), 2u (bsp tbr value available to ap)
+
+volatile unsigned int from_ap_status_flag= 0u;
+// stages: 0u, 1u (ap ready for bsp tbr value to be found and sent)
+#endif
+
void
machdep_ap_bootstrap(void)
{
@@ -77,19 +91,71 @@
PCPU_SET(awake, 1);
__asm __volatile("msync; isync");
+#if defined(AIM)
+ powerpc_sync();
+ isync();
+ if (1==alternate_timebase_sync_style)
+ {
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+
+ register_t oldmsr= intr_disable();
+
+ while (1u!=from_bsp_status_flag)
+ ; // spin waiting for bsp to flag that its ready to start.
+
+ // Start to measure a round trip:: to the bsp and back.
+
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ register_t const start_round_trip_time_on_ap= mftb();
+ atomic_store_rel_int(&from_ap_status_flag, 1u); // bsp waits for such before its mftb().
+
+ while (2u!=from_bsp_status_flag)
+ ; // spin waiting for bsp's tbr value
+
+ // Mid-point of ap round trip and the bsp timebase value should be approximately equal
+ // when the tbr's are well matched, absent interruptions on both sides.
+
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ register_t const end_round_trip_time_on_ap= mftb();
+
+ int64_t const approx_round_trip_tbr_detla_on_ap
+ = end_round_trip_time_on_ap - start_round_trip_time_on_ap;
+ int64_t const ap_midpoint_tbr_value
+ = start_round_trip_time_on_ap + (approx_round_trip_tbr_detla_on_ap+1)/2;
+
+ // Establish delta_to_match_bsp_example such that:
+ // ap_midpoint_tbr_value+delta_to_match_bsp_example==bsp_timebase_sample
+ int64_t const delta_to_match_bsp_tbr_example= bsp_timebase_sample-ap_midpoint_tbr_value;
+
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ mttb((int64_t)mftb()+delta_to_match_bsp_tbr_example); // Make the ap tbr adjustment.
+
+ atomic_store_rel_int(&from_bsp_status_flag, 0u); // Get ready for next ap in bsp loop
+ atomic_store_rel_int(&from_ap_status_flag, 0u); // also flaging bsp that this ap is done
+
+ mtmsr(oldmsr);
+ }
+#endif
+
while (ap_letgo == 0)
nop_prio_vlow();
nop_prio_medium();
- /*
- * Set timebase as soon as possible to meet an implicit rendezvous
- * from cpu_mp_unleash(), which sets ap_letgo and then immediately
- * sets timebase.
- *
- * Note that this is instrinsically racy and is only relevant on
- * platforms that do not support better mechanisms.
- */
- platform_smp_timebase_sync(ap_timebase, 1);
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ {
+ /*
+ * Set timebase as soon as possible to meet an implicit rendezvous
+ * from cpu_mp_unleash(), which sets ap_letgo and then immediately
+ * sets timebase.
+ *
+ * Note that this is instrinsically racy and is only relevant on
+ * platforms that do not support better mechanisms.
+ */
+ platform_smp_timebase_sync(ap_timebase, 1);
+ }
/* Give platform code a chance to do anything else necessary */
platform_smp_ap_init();
@@ -260,6 +326,34 @@
pc->pc_cpuid, (uintmax_t)pc->pc_hwref,
pc->pc_awake);
smp_cpus++;
+
+#if defined(AIM)
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+ powerpc_sync();
+ isync();
+ if (1==alternate_timebase_sync_style)
+ {
+ register_t oldmsr= intr_disable();
+
+ atomic_store_rel_int(&from_bsp_status_flag, 1u); // bsp ready to start.
+
+ while (1u!=from_ap_status_flag)
+ ; // spin waiting for ap to flag: time to send a tbr.
+
+ isync(); // Be sure below mftb() result is not from earlier.
+ bsp_timebase_sample= mftb();
+ atomic_store_rel_int(&from_bsp_status_flag, 2u); // bsp tbr available.
+
+ // Most of the rest of the usage is in machdep_ap_bootstrap,
+ // other than controling alternate_timebase_sync_style value.
+
+ while (0u!=from_ap_status_flag)
+ ; // spin waiting for ap to be done with the sample.
+
+ mtmsr(oldmsr);
+ }
+#endif
} else
CPU_SET(pc->pc_cpuid, &stopped_cpus);
}
@@ -266,14 +360,22 @@
ap_awake = 1;
- /* Provide our current DEC and TB values for APs */
- ap_timebase = mftb() + 10;
- __asm __volatile("msync; isync");
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ {
+ /* Provide our current DEC and TB values for APs */
+ ap_timebase = mftb() + 10;
+ __asm __volatile("msync; isync");
+ }
/* Let APs continue */
atomic_store_rel_int(&ap_letgo, 1);
- platform_smp_timebase_sync(ap_timebase, 0);
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ platform_smp_timebase_sync(ap_timebase, 0);
while (ap_awake < smp_cpus)
;
===
Mark Millard
marklmi at yahoo.com
( dsl-only.net went
away in early 2018-Mar)
home |
help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?48525401-7C75-4AE7-98D9-AD7CC7F53DE8>
