Date: Wed, 10 Mar 2010 08:18:07 +1100 From: Peter Jeremy <peterjeremy@acm.org> To: Marius Strobl <marius@alchemy.franken.de> Cc: freebsd-sparc64@freebsd.org Subject: Re: gcc code generation problems Message-ID: <20100309211807.GA34125@server.vk2pj.dyndns.org> In-Reply-To: <20100309205048.GB18466@alchemy.franken.de> References: <20100228192329.GA68252@server.vk2pj.dyndns.org> <20100308190301.GA69938@server.vk2pj.dyndns.org> <20100309102753.GC3978@server.vk2pj.dyndns.org> <20100309205048.GB18466@alchemy.franken.de>
next in thread | previous in thread | raw e-mail | index | archive | help
--GRPZ8SYKNexpdSJ7 Content-Type: multipart/mixed; boundary="Qxx1br4bt0+wmkIi" Content-Disposition: inline --Qxx1br4bt0+wmkIi Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On 2010-Mar-09 21:50:48 +0100, Marius Strobl <marius@alchemy.franken.de> wr= ote: >Apparently you're right about these. What do you think about the >following patch? >http://people.freebsd.org/~marius/fpu.c.diff I've found the problem with over-writing registers as well: There's a macro OPSZ() which is documented to return "Operand size in 32-bit registers". In reality, it's only correct for int32 and int64 but never used for int32 or float. Thus double and extended double are consistently broken. Since extended double winds up being the only special case, it was easier to get rid of the macro completely. My patch to fix all three problems is attached - it's slightly different to yours (and doesn't include any of the constification). We both seem to basically agree. I think my approach to RN_DECODE() should be somewhat faster (since it does less testing), though it meant re-defining FTYPE_LNG. Note that I haven't had a chance to test this yet (and have been thinking about how to create regression tests for the emulator). I was also going to have a closer look at some of the other __fpu_execute() cases to check that the register assumptions were correct but ran out of time on the train. (My FreeBSD/SPARC box is turned off so I can't test it until get home tonight). --=20 Peter Jeremy --Qxx1br4bt0+wmkIi Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="fpu.diff" Content-Transfer-Encoding: quoted-printable Index: fpu.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/lib/libc/sparc64/fpu/fpu.c,v retrieving revision 1.9.10.1 diff -u -r1.9.10.1 fpu.c --- fpu.c 3 Aug 2009 08:13:06 -0000 1.9.10.1 +++ fpu.c 9 Mar 2010 20:17:03 -0000 @@ -181,15 +181,11 @@ } #endif =20 -static int opmask[] =3D {0, 0, 1, 3}; +static int opmask[] =3D {0, 0, 1, 3, 1}; =20 /* Decode 5 bit register field depending on the type. */ #define RN_DECODE(tp, rn) \ - ((tp =3D=3D FTYPE_DBL || tp =3D=3D FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) &= \ - ~opmask[tp]) - -/* Operand size in 32-bit registers. */ -#define OPSZ(tp) ((tp) =3D=3D FTYPE_LNG ? 2 : (1 << (tp))) + ((tp) >=3D FTYPE_DBL ? INSFPdq_RN(rn) & ~opmask[tp] : (rn)) =20 /* * Helper for forming the below case statements. Build only the op3 and opf @@ -219,12 +215,13 @@ * Need to use the double versions to be able to access * the upper 32 fp registers. */ - for (i =3D 0; i < OPSZ(type); i +=3D 2, rd +=3D 2, rs2 +=3D 2) { - tmp64 =3D __fpu_getreg64(rs2); - if (i =3D=3D 0) - tmp64 =3D (tmp64 & ~((u_int64_t)nand << 32)) ^ - ((u_int64_t)xor << 32); - __fpu_setreg64(rd, tmp64); + tmp64 =3D __fpu_getreg64(rs2); + tmp64 =3D (tmp64 & ~((u_int64_t)nand << 32)) ^ + ((u_int64_t)xor << 32); + __fpu_setreg64(rd, tmp64); + if (type =3D=3D FTYPE_EXT) { + tmp64 =3D __fpu_getreg64(rs2+2); + __fpu_setreg64(rd+2, tmp64); } } } @@ -409,17 +406,22 @@ case FOP(INS2_FPop1, INSFP1_FxTOs): case FOP(INS2_FPop1, INSFP1_FxTOd): case FOP(INS2_FPop1, INSFP1_FxTOq): + /* + * Source operand is really long, though low two bits are + * zero. Override the implied type and re-calculate rs2 + */ type =3D FTYPE_LNG; + rs2 =3D RN_DECODE(type, IF_F3_RS2(insn)); __fpu_explode(fe, fp =3D &fe->fe_f1, type, rs2); - /* sneaky; depends on instruction encoding */ + /* Target operand size is encoded in bits 3:2, not 1:0 */ type =3D (IF_F3_OPF(insn) >> 2) & 3; rd =3D RN_DECODE(type, IF_F3_RD(insn)); break; case FOP(INS2_FPop1, INSFP1_FTOx): __fpu_explode(fe, fp =3D &fe->fe_f1, type, rs2); + /* Target operand is always long */ type =3D FTYPE_LNG; - mask =3D 1; /* needs 2 registers */ - rd =3D IF_F3_RD(insn) & ~mask; + rd =3D RN_DECODE(type, IF_F3_RD(insn)); break; case FOP(INS2_FPop1, INSFP1_FTOs): case FOP(INS2_FPop1, INSFP1_FTOd): @@ -457,10 +459,10 @@ if (type =3D=3D FTYPE_INT || type =3D=3D FTYPE_SNG) __fpu_setreg(rd, space[0]); else { - for (i =3D 0; i < OPSZ(type); i +=3D 2) { - __fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) | - space[i + 1]); - } + __fpu_setreg64(rd, ((u_int64_t)space[0] << 32) | space[1]); + if (type =3D=3D FTYPE_EXT) + __fpu_setreg64(rd + 2, ((u_int64_t)space[2] << 32) | + space[3]); } return (0); /* success */ } Index: fpu_emu.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/lib/libc/sparc64/fpu/fpu_emu.h,v retrieving revision 1.6.10.1 diff -u -r1.6.10.1 fpu_emu.h --- fpu_emu.h 3 Aug 2009 08:13:06 -0000 1.6.10.1 +++ fpu_emu.h 9 Mar 2010 20:17:04 -0000 @@ -134,13 +134,14 @@ =20 /* * Floating point operand types. FTYPE_LNG is syntethic (it does not occur= in - * instructions). + * instructions). Note that the code relies on the numeric values of these + * constants in some places. */ -#define FTYPE_INT INSFP_i -#define FTYPE_SNG INSFP_s -#define FTYPE_DBL INSFP_d -#define FTYPE_EXT INSFP_q -#define FTYPE_LNG -1 +#define FTYPE_INT INSFP_i /* 0 */ +#define FTYPE_SNG INSFP_s /* 1 */ +#define FTYPE_DBL INSFP_d /* 2 */ +#define FTYPE_EXT INSFP_q /* 3 */ +#define FTYPE_LNG 4 =20 /* * Emulator state. --Qxx1br4bt0+wmkIi-- --GRPZ8SYKNexpdSJ7 Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.14 (FreeBSD) iEYEARECAAYFAkuWuw8ACgkQ/opHv/APuIdbnwCfZkzsGM2gtfQ/9SAqo32aHZqK LIoAoIKDUSNHqrAiiNaT6I2W3w/GCeor =ZzlO -----END PGP SIGNATURE----- --GRPZ8SYKNexpdSJ7--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100309211807.GA34125>