Date: Tue, 17 Jul 2012 20:09:15 +0300 From: Konstantin Belousov <kostikbel@gmail.com> To: Bruce Evans <brde@optusnet.com.au> Cc: freebsd-amd64@freebsd.org Subject: Re: amd64/169927: siginfo, si_code for fpe errors when error occurs using the SSE math processor Message-ID: <20120717170915.GL2676@deviant.kiev.zoral.com.ua> In-Reply-To: <20120718014222.V7761@besplex.bde.org> References: <201207171350.q6HDoAJS033797@freefall.freebsd.org> <20120717235622.C7417@besplex.bde.org> <20120718011942.D7642@besplex.bde.org> <20120718014222.V7761@besplex.bde.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--EEx6GiKZGZ1wKUra Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Wed, Jul 18, 2012 at 02:03:58AM +1000, Bruce Evans wrote: > On Wed, 18 Jul 2012, Bruce Evans wrote: >=20 > >On Wed, 18 Jul 2012, Bruce Evans wrote: > >>.. > >>So I still want a single kernel exception handle that merges the status= es. > > > >Merge the independent statuses modified by their independent controls: > > > > return (fpetable[(fpsw & ((~fpcw & 0x3f) | 0x40)) | > > ((mxcsr & (mxcsr >> 16)) & 0x3f)]); > > > >Use the same trap handler that reads all these statuses and controls. >=20 > Changed my mind again. Need sleep. Merging the traps breaks the rule > that i387 traps occur on the first non-control instruction after the > one that causes the exception. There may be mixed code like this: >=20 > fldz > fld1 > fdiv %st,%st(1) # i387 exception now; i387 trap pending > load $0 into %xmm0 > load $1 into %xmm1 > divsd %xmm0,%xmm1 # SSE exception now; SSE trap now >=20 > Debuggers can see both exception states including the i387 trap pending, > provided the i387 trap is not bogusly cleared, either by never clearing > it in the kernel trap handler or by using a separate trap handler that > doesn't clear it for T_XMMFLT. They can even figure out that an SSE > trap occurred, because the i387 trap is still pending. >=20 > ... > fnop # i387 trap on first non-control FP instr... >=20 > Apart from doing the bogus fnclex for T_XMMFLT and the delayed effect of > i387 status bits, merging or not merging the statuses makes little > difference, since if a status bit is set and is not masked according > to its control word, then it will generate a trap soon if it didn't > genearate the current one. The trap number is available for SA_SIGINFO type of handlers with si_trapno member of siginfo_t. I think this is final argument to have separate fputrap_{x87,sse} functions. For amd64, SSE hardware is FPU, so I do not see much wrong with the name. I changed fputrap_sse() according to your suggestion. diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index a7812b7..356b3ac 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$"); #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=3Dm" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) +#define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) =20 static __inline void xrstor(char *addr, uint64_t mask) @@ -105,6 +106,7 @@ void fnstsw(caddr_t addr); void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void ldmxcsr(u_int csr); +void stmxcsr(u_int csr); void xrstor(char *addr, uint64_t mask); void xsave(char *addr, uint64_t mask); =20 @@ -113,9 +115,6 @@ void xsave(char *addr, uint64_t mask); #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() =20 -#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) -#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) - CTASSERT(sizeof(struct savefpu) =3D=3D 512); CTASSERT(sizeof(struct xstate_hdr) =3D=3D 64); CTASSERT(sizeof(struct savefpu_ymm) =3D=3D 832); @@ -514,11 +513,15 @@ static char fpetable[128] =3D { }; =20 /* - * Preserve the FP status word, clear FP exceptions, then generate a SIGFP= E. + * Preserve the FP status word, clear FP exceptions for x87, then + * generate a SIGFPE. + * + * Clearing exceptions was necessary mainly to avoid IRQ13 bugs and is + * engraved in our i386 ABI. We now depend on longjmp() restoring a + * usable state. Restoring the state or examining it might fail if we + * didn't clear exceptions. * - * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now - * depend on longjmp() restoring a usable state. Restoring the state - * or examining it might fail if we didn't clear exceptions. + * For SSE exceptions, the exceptions are not cleared. * * The error code chosen will be one of the FPE_... macros. It will be * sent as the second argument to old BSD-style signal handlers and as @@ -531,8 +534,9 @@ static char fpetable[128] =3D { * solution for signals other than SIGFPE. */ int -fputrap() +fputrap_x87(void) { + struct savefpu *pcb_save; u_short control, status; =20 critical_enter(); @@ -543,19 +547,40 @@ fputrap() * wherever they are. */ if (PCPU_GET(fpcurthread) !=3D curthread) { - control =3D GET_FPU_CW(curthread); - status =3D GET_FPU_SW(curthread); + pcb_save =3D curthread->td_pcb->pcb_save; + control =3D pcb_save->sv_env.en_cw; + status =3D pcb_save->sv_env.en_sw; } else { fnstcw(&control); fnstsw(&status); + fnclex(); } =20 - if (PCPU_GET(fpcurthread) =3D=3D curthread) - fnclex(); critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } =20 +int +fputrap_sse(void) +{ + u_int mxcsr; + + critical_enter(); + + /* + * Coomparing with the x87 #MF handler, we do not clear + * exceptions from the mxcsr. + */ + if (PCPU_GET(fpcurthread) !=3D curthread) + mxcsr =3D curthread->td_pcb->pcb_save->sv_env.en_mxcsr; + else + stmxcsr(&mxcsr); + + critical_exit(); + + return (fpetable[(mxcsr & (mxcsr >> 16)) & 0x3f]); +} + /* * Implement device not available (DNA) exception * diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 75e15e0..57d1cc2 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -328,7 +328,7 @@ trap(struct trapframe *frame) break; =20 case T_ARITHTRAP: /* arithmetic trap */ - ucode =3D fputrap(); + ucode =3D fputrap_x87(); if (ucode =3D=3D -1) goto userout; i =3D SIGFPE; @@ -442,7 +442,9 @@ trap(struct trapframe *frame) break; =20 case T_XMMFLT: /* SIMD floating-point exception */ - ucode =3D 0; /* XXX */ + ucode =3D fputrap_sse(); + if (ucode =3D=3D -1) + goto userout; i =3D SIGFPE; break; } diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h index 98a016b..7d0f0ea 100644 --- a/sys/amd64/include/fpu.h +++ b/sys/amd64/include/fpu.h @@ -62,7 +62,8 @@ int fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, size_t xfpustate_size); int fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size); -int fputrap(void); +int fputrap_sse(void); +int fputrap_x87(void); void fpuuserinited(struct thread *td); struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); --EEx6GiKZGZ1wKUra Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (FreeBSD) iEYEARECAAYFAlAFnDoACgkQC3+MBN1Mb4ieJACgndEzfeGT+kCl//cGsh38AbgU ReEAn3p16o10AVdF+k4b9xFRDZaEYkOm =HxFX -----END PGP SIGNATURE----- --EEx6GiKZGZ1wKUra--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120717170915.GL2676>