Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 21 Jul 2025 03:04:16 GMT
From:      Justin Hibbits <jhibbits@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 90a9ce456740 - stable/14 - powerpc: Fix multiple issues with FP/VSX save/restore
Message-ID:  <202507210304.56L34GHi009768@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/14 has been updated by jhibbits:

URL: https://cgit.FreeBSD.org/src/commit/?id=90a9ce456740d9e2ef20e781e5a57f5d611d1c83

commit 90a9ce456740d9e2ef20e781e5a57f5d611d1c83
Author:     Timothy Pearson <tpearson@raptorengineering.com>
AuthorDate: 2025-07-06 19:28:30 +0000
Commit:     Justin Hibbits <jhibbits@FreeBSD.org>
CommitDate: 2025-07-21 02:56:59 +0000

    powerpc: Fix multiple issues with FP/VSX save/restore
    
    Multiple issues existed within the powerpc FP/VSX save/restore functionality,
    leading to register corruption and loss of register contents in specific
    scenarios involving high signal load and use of both floating point and VSX
    instructions.
    
    Issue #1
    
    On little endian systems the PCB used the wrong location for the shadowed
    FP register within the larger VSX register.  This appears to have been an
    attempt to correct issue #2 without understanding how the vector load/store
    instructions actually operate.
    
    Issue #2
    
    On little endian systems, the VSX state save/restore routines swapped 32-bit
    words within the 64-bit aliased double word for the associated floating point
    register.  This was due to the use of a word-oriented load/store vs. doubleword
    oriented load/store.
    
    Issue #3
    
    The FPU was turned off in the PCB but not in hardware, leading to a potential
    race condition if the same thread was scheduled immediately after sigreturn.
    
    The triggering codebase for this is Go, which makes heavy use of signals and
    and generates an unusual mix of floating point and VSX assembler.  As a result,
    when combined with th powerpc lazy FPU restore, a condition was repeatedly hit
    whereby the thread was interrupted in FP+VSX mode, then restored in FP only
    mode, thus reliably triggering the issues above.
    
    Also clean up the associated asm() style issue flagged by GitHub Actions.
    
    Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
    
    MFC after:      1 week
    Pull Request:   https://github.com/freebsd/freebsd-src/pull/1756
    
    (cherry picked from commit 077e30e61d7e1c90af7df31989bb976a3ace0c69)
---
 sys/powerpc/include/pcb.h          | 10 +---------
 sys/powerpc/include/ucontext.h     |  2 ++
 sys/powerpc/powerpc/exec_machdep.c | 33 ++++++++++++++++++++++++---------
 sys/powerpc/powerpc/fpu.c          | 30 ++++++++++++++++++++++++++----
 4 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/sys/powerpc/include/pcb.h b/sys/powerpc/include/pcb.h
index 050ada6b0f64..0230cf78aba7 100644
--- a/sys/powerpc/include/pcb.h
+++ b/sys/powerpc/include/pcb.h
@@ -66,16 +66,8 @@ struct pcb {
 #define	PCB_VECREGS     0x200	/* Process had Altivec registers initialized */
 	struct fpu {
 		union {
-#if _BYTE_ORDER == _BIG_ENDIAN
-			double fpr;
-			uint32_t vsr[4];
-#else
 			uint32_t vsr[4];
-			struct {
-				double padding;
-				double fpr;
-			};
-#endif
+			double fpr;
 		} fpr[32];
 		double	fpscr;	/* FPSCR stored as double for easier access */
 	} pcb_fpu;		/* Floating point processor */
diff --git a/sys/powerpc/include/ucontext.h b/sys/powerpc/include/ucontext.h
index d35c6c773fe0..dc87edd578bc 100644
--- a/sys/powerpc/include/ucontext.h
+++ b/sys/powerpc/include/ucontext.h
@@ -41,6 +41,7 @@ typedef struct __mcontext {
 	int		mc_flags;
 #define _MC_FP_VALID	0x01
 #define _MC_AV_VALID	0x02
+#define _MC_VS_VALID	0x04
 	int		mc_onstack;	  	/* saved onstack flag */
 	int		mc_len;			/* sizeof(__mcontext) */
 	__uint64_t	mc_avec[32*2];		/* vector register file */
@@ -56,6 +57,7 @@ typedef struct __mcontext32 {
 	int		mc_flags;
 #define _MC_FP_VALID	0x01
 #define _MC_AV_VALID	0x02
+#define _MC_VS_VALID	0x04
 	int		mc_onstack;	  	/* saved onstack flag */
 	int		mc_len;			/* sizeof(__mcontext) */
 	uint64_t	mc_avec[32*2];		/* vector register file */
diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c
index bdd1da9c9cbd..073266f87c46 100644
--- a/sys/powerpc/powerpc/exec_machdep.c
+++ b/sys/powerpc/powerpc/exec_machdep.c
@@ -349,13 +349,6 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 	if (error != 0)
 		return (error);
 
-	/*
-	 * Save FPU state if needed. User may have changed it on
-	 * signal handler
-	 */
-	if (uc.uc_mcontext.mc_srr1 & PSL_FP)
-		save_fpu(td);
-
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
@@ -432,6 +425,7 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 	}
 
 	if (pcb->pcb_flags & PCB_VSX) {
+		mcp->mc_flags |= _MC_VS_VALID;
 		for (i = 0; i < 32; i++)
 			memcpy(&mcp->mc_vsxfpreg[i],
 			    &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double));
@@ -481,6 +475,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 	struct pcb *pcb;
 	struct trapframe *tf;
 	register_t tls;
+	register_t msr;
 	int i;
 
 	pcb = td->td_pcb;
@@ -531,6 +526,22 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 	tf->srr1 &= ~(PSL_FP | PSL_VSX | PSL_VEC);
 	pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX | PCB_VEC);
 
+	/*
+	 * Ensure the FPU is also disabled in hardware.
+	 *
+	 * Without this, it's possible for the register reload to fail if we
+	 * don't switch to a FPU disabled context before resuming the original
+	 * thread.  Specifically, if the FPU/VSX unavailable exception is never
+	 * hit, then whatever data is still in the FP/VSX registers when
+	 * sigresume is callled will used by the resumed thread, instead of the
+	 * previously saved data from the mcontext.
+	 */
+	critical_enter();
+	msr = mfmsr() & ~(PSL_FP | PSL_VSX | PSL_VEC);
+	isync();
+	mtmsr(msr);
+	critical_exit();
+
 	if (mcp->mc_flags & _MC_FP_VALID) {
 		/* enable_fpu() will happen lazily on a fault */
 		pcb->pcb_flags |= PCB_FPREGS;
@@ -539,8 +550,12 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 		for (i = 0; i < 32; i++) {
 			memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i],
 			    sizeof(double));
-			memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
-			    &mcp->mc_vsxfpreg[i], sizeof(double));
+		}
+		if (mcp->mc_flags & _MC_VS_VALID) {
+			for (i = 0; i < 32; i++) {
+				memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
+				    &mcp->mc_vsxfpreg[i], sizeof(double));
+			}
 		}
 	}
 
diff --git a/sys/powerpc/powerpc/fpu.c b/sys/powerpc/powerpc/fpu.c
index 8f5df2f7d576..a66dcf2ae3be 100644
--- a/sys/powerpc/powerpc/fpu.c
+++ b/sys/powerpc/powerpc/fpu.c
@@ -65,8 +65,19 @@ save_fpu_int(struct thread *td)
 	 * Save the floating-point registers and FPSCR to the PCB
 	 */
 	if (pcb->pcb_flags & PCB_VSX) {
-	#define SFP(n)   __asm ("stxvw4x " #n ", 0,%0" \
+#if _BYTE_ORDER == _BIG_ENDIAN
+	#define SFP(n)   __asm("stxvw4x " #n ", 0,%0" \
 			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#else
+	/*
+	 * stxvw2x will swap words within the FP double word on LE systems,
+	 * leading to corruption if VSX is used to store state and FP is
+	 * subsequently used to restore state.
+	 * Use stxvd2x instead.
+	 */
+	#define SFP(n)   __asm("stxvd2x " #n ", 0,%0" \
+			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#endif
 		SFP(0);		SFP(1);		SFP(2);		SFP(3);
 		SFP(4);		SFP(5);		SFP(6);		SFP(7);
 		SFP(8);		SFP(9);		SFP(10);	SFP(11);
@@ -77,7 +88,7 @@ save_fpu_int(struct thread *td)
 		SFP(28);	SFP(29);	SFP(30);	SFP(31);
 	#undef SFP
 	} else {
-	#define SFP(n)   __asm ("stfd " #n ", 0(%0)" \
+	#define SFP(n)   __asm("stfd " #n ", 0(%0)" \
 			:: "b"(&pcb->pcb_fpu.fpr[n].fpr));
 		SFP(0);		SFP(1);		SFP(2);		SFP(3);
 		SFP(4);		SFP(5);		SFP(6);		SFP(7);
@@ -150,8 +161,19 @@ enable_fpu(struct thread *td)
 			  :: "b"(&pcb->pcb_fpu.fpscr));
 
 	if (pcb->pcb_flags & PCB_VSX) {
-	#define LFP(n)   __asm ("lxvw4x " #n ", 0,%0" \
+#if _BYTE_ORDER == _BIG_ENDIAN
+	#define LFP(n)   __asm("lxvw4x " #n ", 0,%0" \
+			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#else
+	/*
+	 * lxvw4x will swap words within the FP double word on LE systems,
+	 * leading to corruption if FP is used to store state and VSX is
+	 * subsequently used to restore state.
+	 * Use lxvd2x instead.
+	 */
+	#define LFP(n)   __asm("lxvd2x " #n ", 0,%0" \
 			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#endif
 		LFP(0);		LFP(1);		LFP(2);		LFP(3);
 		LFP(4);		LFP(5);		LFP(6);		LFP(7);
 		LFP(8);		LFP(9);		LFP(10);	LFP(11);
@@ -162,7 +184,7 @@ enable_fpu(struct thread *td)
 		LFP(28);	LFP(29);	LFP(30);	LFP(31);
 	#undef LFP
 	} else {
-	#define LFP(n)   __asm ("lfd " #n ", 0(%0)" \
+	#define LFP(n)   __asm("lfd " #n ", 0(%0)" \
 			:: "b"(&pcb->pcb_fpu.fpr[n].fpr));
 		LFP(0);		LFP(1);		LFP(2);		LFP(3);
 		LFP(4);		LFP(5);		LFP(6);		LFP(7);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202507210304.56L34GHi009768>