Date: Wed, 10 Mar 2010 21:14:20 +1100 From: Peter Jeremy <peterjeremy@acm.org> To: Marius Strobl <marius@alchemy.franken.de> Cc: freebsd-sparc64@freebsd.org Subject: Re: gcc code generation problems Message-ID: <20100310101420.GB37825@server.vk2pj.dyndns.org> In-Reply-To: <20100309211807.GA34125@server.vk2pj.dyndns.org> References: <20100228192329.GA68252@server.vk2pj.dyndns.org> <20100308190301.GA69938@server.vk2pj.dyndns.org> <20100309102753.GC3978@server.vk2pj.dyndns.org> <20100309205048.GB18466@alchemy.franken.de> <20100309211807.GA34125@server.vk2pj.dyndns.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--ghzN8eJ9Qlbqn3iT Content-Type: multipart/mixed; boundary="oC1+HKm2/end4ao3" Content-Disposition: inline --oC1+HKm2/end4ao3 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On 2010-Mar-10 08:18:07 +1100, Peter Jeremy <peterjeremy@acm.org> wrote: >My patch to fix all three problems is attached - it's slightly >different to yours (and doesn't include any of the constification). >We both seem to basically agree. I think my approach to RN_DECODE() >should be somewhat faster (since it does less testing), though it >meant re-defining FTYPE_LNG. I've revised my previous patch to remove two now-unused variables as well as include the constification in your patch. After rebuilding libc, the eval.s program I posted a few days ago now works correctly as posted and with the following modifications to the two !!! blocks: ((1 << 53) - 1) =3D> %f34; fxtos %f34,%f20 ((1 << 53) - 1) =3D> %f34; fxtod %f34,%f40 ((1 << 53) - 1) =3D> %f34; fxtoq %f34,%f40 0x5a000000 =3D> %f31; fstox %f31,%f32 0x433fffffffffffff =3D> %f34; fdtox %f34,%f40 (The above are all emulated according to the US-IIIi databook). Unfortunately, the following code: 0x4033ffffffffffff =3D> %f40; 0xf000000000000000 =3D> %f42; fqtox %f40,%f30 gives a result of 0x001ffffffffffff0 instead of 0x001fffffffffffff. The low order bits (those in %f42) appear to be ignored. A similar problem occurs with fqtod. faddq %f40,%f40,%f32 has the low destination (%f34) shifted one bit right - which implies that the low order register is ignored in only one argument. I don't see any obvious issue anywhere and have elected to leave this problem for now. fmovq does work but uses a different code-path. > (and have been >thinking about how to create regression tests for the emulator). Two possibilities here are UCBTEST and TestFloat (note that SoftFloat is already in the FreeBSD tree). I have copies of both but haven't actually converted either into a suitable test harness. --=20 Peter Jeremy --oC1+HKm2/end4ao3 Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="fpu.diff" Content-Transfer-Encoding: quoted-printable Index: fpu.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/lib/libc/sparc64/fpu/fpu.c,v retrieving revision 1.9.10.1 diff -u -r1.9.10.1 fpu.c --- fpu.c 3 Aug 2009 08:13:06 -0000 1.9.10.1 +++ fpu.c 10 Mar 2010 08:35:00 -0000 @@ -97,7 +97,7 @@ #define X8(x) X4(x),X4(x) #define X16(x) X8(x),X8(x) =20 -static char cx_to_trapx[] =3D { +static const char cx_to_trapx[] =3D { X1(FSR_NX), X2(FSR_DZ), X4(FSR_UF), @@ -169,7 +169,7 @@ void __fpu_dumpfpn(struct fpn *fp) { - static char *class[] =3D { + static const char *class[] =3D { "SNAN", "QNAN", "ZERO", "NUM", "INF" }; =20 @@ -181,15 +181,11 @@ } #endif =20 -static int opmask[] =3D {0, 0, 1, 3}; +static const int opmask[] =3D {0, 0, 1, 3, 1}; =20 /* Decode 5 bit register field depending on the type. */ #define RN_DECODE(tp, rn) \ - ((tp =3D=3D FTYPE_DBL || tp =3D=3D FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) &= \ - ~opmask[tp]) - -/* Operand size in 32-bit registers. */ -#define OPSZ(tp) ((tp) =3D=3D FTYPE_LNG ? 2 : (1 << (tp))) + ((tp) >=3D FTYPE_DBL ? INSFPdq_RN(rn) & ~opmask[tp] : (rn)) =20 /* * Helper for forming the below case statements. Build only the op3 and opf @@ -210,7 +206,6 @@ u_int32_t xor) { u_int64_t tmp64; - int i; =20 if (type =3D=3D FTYPE_INT || type =3D=3D FTYPE_SNG) __fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor); @@ -219,12 +214,13 @@ * Need to use the double versions to be able to access * the upper 32 fp registers. */ - for (i =3D 0; i < OPSZ(type); i +=3D 2, rd +=3D 2, rs2 +=3D 2) { - tmp64 =3D __fpu_getreg64(rs2); - if (i =3D=3D 0) - tmp64 =3D (tmp64 & ~((u_int64_t)nand << 32)) ^ - ((u_int64_t)xor << 32); - __fpu_setreg64(rd, tmp64); + tmp64 =3D __fpu_getreg64(rs2); + tmp64 =3D (tmp64 & ~((u_int64_t)nand << 32)) ^ + ((u_int64_t)xor << 32); + __fpu_setreg64(rd, tmp64); + if (type =3D=3D FTYPE_EXT) { + tmp64 =3D __fpu_getreg64(rs2+2); + __fpu_setreg64(rd+2, tmp64); } } } @@ -277,7 +273,6 @@ int opf, rs1, rs2, rd, type, mask, cx, cond; u_long reg, fsr; u_int space[4]; - int i; =20 /* * `Decode' and execute instruction. Start with no exceptions. @@ -409,24 +404,29 @@ case FOP(INS2_FPop1, INSFP1_FxTOs): case FOP(INS2_FPop1, INSFP1_FxTOd): case FOP(INS2_FPop1, INSFP1_FxTOq): + /* + * Source operand is really long, though low two bits are + * zero. Override the implied type and re-calculate rs2 + */ type =3D FTYPE_LNG; + rs2 =3D RN_DECODE(type, IF_F3_RS2(insn)); __fpu_explode(fe, fp =3D &fe->fe_f1, type, rs2); - /* sneaky; depends on instruction encoding */ + /* Target operand size is encoded in bits 3:2, not 1:0 */ type =3D (IF_F3_OPF(insn) >> 2) & 3; rd =3D RN_DECODE(type, IF_F3_RD(insn)); break; case FOP(INS2_FPop1, INSFP1_FTOx): __fpu_explode(fe, fp =3D &fe->fe_f1, type, rs2); + /* Target operand is always long */ type =3D FTYPE_LNG; - mask =3D 1; /* needs 2 registers */ - rd =3D IF_F3_RD(insn) & ~mask; + rd =3D RN_DECODE(type, IF_F3_RD(insn)); break; case FOP(INS2_FPop1, INSFP1_FTOs): case FOP(INS2_FPop1, INSFP1_FTOd): case FOP(INS2_FPop1, INSFP1_FTOq): case FOP(INS2_FPop1, INSFP1_FTOi): __fpu_explode(fe, fp =3D &fe->fe_f1, type, rs2); - /* sneaky; depends on instruction encoding */ + /* Target operand size is encoded in bits 3:2, not 1:0 */ type =3D (IF_F3_OPF(insn) >> 2) & 3; rd =3D RN_DECODE(type, IF_F3_RD(insn)); break; @@ -457,10 +457,10 @@ if (type =3D=3D FTYPE_INT || type =3D=3D FTYPE_SNG) __fpu_setreg(rd, space[0]); else { - for (i =3D 0; i < OPSZ(type); i +=3D 2) { - __fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) | - space[i + 1]); - } + __fpu_setreg64(rd, ((u_int64_t)space[0] << 32) | space[1]); + if (type =3D=3D FTYPE_EXT) + __fpu_setreg64(rd + 2, ((u_int64_t)space[2] << 32) | + space[3]); } return (0); /* success */ } Index: fpu_emu.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /usr/ncvs/src/lib/libc/sparc64/fpu/fpu_emu.h,v retrieving revision 1.6.10.1 diff -u -r1.6.10.1 fpu_emu.h --- fpu_emu.h 3 Aug 2009 08:13:06 -0000 1.6.10.1 +++ fpu_emu.h 10 Mar 2010 08:33:26 -0000 @@ -134,13 +134,14 @@ =20 /* * Floating point operand types. FTYPE_LNG is syntethic (it does not occur= in - * instructions). + * instructions). Note that the code relies on the numeric values of these + * constants in some places. */ -#define FTYPE_INT INSFP_i -#define FTYPE_SNG INSFP_s -#define FTYPE_DBL INSFP_d -#define FTYPE_EXT INSFP_q -#define FTYPE_LNG -1 +#define FTYPE_INT INSFP_i /* 0 */ +#define FTYPE_SNG INSFP_s /* 1 */ +#define FTYPE_DBL INSFP_d /* 2 */ +#define FTYPE_EXT INSFP_q /* 3 */ +#define FTYPE_LNG 4 =20 /* * Emulator state. --oC1+HKm2/end4ao3-- --ghzN8eJ9Qlbqn3iT Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.14 (FreeBSD) iEYEARECAAYFAkuXcPwACgkQ/opHv/APuIdmZgCgu8DDl/u+J2qmfKCZe78RZx1t Ss0AnA/kp7D1cG/XoUt+4v/ifMxB/Ytg =mq4R -----END PGP SIGNATURE----- --ghzN8eJ9Qlbqn3iT--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100310101420.GB37825>