Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 10 Mar 2010 08:18:07 +1100
From:      Peter Jeremy <peterjeremy@acm.org>
To:        Marius Strobl <marius@alchemy.franken.de>
Cc:        freebsd-sparc64@freebsd.org
Subject:   Re: gcc code generation problems
Message-ID:  <20100309211807.GA34125@server.vk2pj.dyndns.org>
In-Reply-To: <20100309205048.GB18466@alchemy.franken.de>
References:  <20100228192329.GA68252@server.vk2pj.dyndns.org> <20100308190301.GA69938@server.vk2pj.dyndns.org> <20100309102753.GC3978@server.vk2pj.dyndns.org> <20100309205048.GB18466@alchemy.franken.de>

next in thread | previous in thread | raw e-mail | index | archive | help

--GRPZ8SYKNexpdSJ7
Content-Type: multipart/mixed; boundary="Qxx1br4bt0+wmkIi"
Content-Disposition: inline


--Qxx1br4bt0+wmkIi
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On 2010-Mar-09 21:50:48 +0100, Marius Strobl <marius@alchemy.franken.de> wr=
ote:
>Apparently you're right about these. What do you think about the
>following patch?
>http://people.freebsd.org/~marius/fpu.c.diff

I've found the problem with over-writing registers as well: There's a
macro OPSZ() which is documented to return "Operand size in 32-bit
registers".  In reality, it's only correct for int32 and int64 but
never used for int32 or float.  Thus double and extended double are
consistently broken.  Since extended double winds up being the only
special case, it was easier to get rid of the macro completely.

My patch to fix all three problems is attached - it's slightly
different to yours (and doesn't include any of the constification).
We both seem to basically agree.  I think my approach to RN_DECODE()
should be somewhat faster (since it does less testing), though it
meant re-defining FTYPE_LNG.

Note that I haven't had a chance to test this yet (and have been
thinking about how to create regression tests for the emulator).  I
was also going to have a closer look at some of the other
__fpu_execute() cases to check that the register assumptions were
correct but ran out of time on the train.  (My FreeBSD/SPARC box is
turned off so I can't test it until get home tonight).

--=20
Peter Jeremy

--Qxx1br4bt0+wmkIi
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="fpu.diff"
Content-Transfer-Encoding: quoted-printable

Index: fpu.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /usr/ncvs/src/lib/libc/sparc64/fpu/fpu.c,v
retrieving revision 1.9.10.1
diff -u -r1.9.10.1 fpu.c
--- fpu.c	3 Aug 2009 08:13:06 -0000	1.9.10.1
+++ fpu.c	9 Mar 2010 20:17:03 -0000
@@ -181,15 +181,11 @@
 }
 #endif
=20
-static int opmask[] =3D {0, 0, 1, 3};
+static int opmask[] =3D {0, 0, 1, 3, 1};
=20
 /* Decode 5 bit register field depending on the type. */
 #define	RN_DECODE(tp, rn) \
-	((tp =3D=3D FTYPE_DBL || tp =3D=3D FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) &=
 \
-	    ~opmask[tp])
-
-/* Operand size in 32-bit registers. */
-#define	OPSZ(tp)	((tp) =3D=3D FTYPE_LNG ? 2 : (1 << (tp)))
+	((tp) >=3D FTYPE_DBL ? INSFPdq_RN(rn) & ~opmask[tp] : (rn))
=20
 /*
  * Helper for forming the below case statements. Build only the op3 and opf
@@ -219,12 +215,13 @@
 		 * Need to use the double versions to be able to access
 		 * the upper 32 fp registers.
 		 */
-		for (i =3D 0; i < OPSZ(type); i +=3D 2, rd +=3D 2, rs2 +=3D 2) {
-			tmp64 =3D __fpu_getreg64(rs2);
-			if (i =3D=3D 0)
-				tmp64 =3D (tmp64 & ~((u_int64_t)nand << 32)) ^
-				    ((u_int64_t)xor << 32);
-			__fpu_setreg64(rd, tmp64);
+		tmp64 =3D __fpu_getreg64(rs2);
+		tmp64 =3D (tmp64 & ~((u_int64_t)nand << 32)) ^
+			((u_int64_t)xor << 32);
+		__fpu_setreg64(rd, tmp64);
+		if (type =3D=3D FTYPE_EXT) {
+			tmp64 =3D __fpu_getreg64(rs2+2);
+			__fpu_setreg64(rd+2, tmp64);
 		}
 	}
 }
@@ -409,17 +406,22 @@
 	case FOP(INS2_FPop1, INSFP1_FxTOs):
 	case FOP(INS2_FPop1, INSFP1_FxTOd):
 	case FOP(INS2_FPop1, INSFP1_FxTOq):
+		/*
+		 * Source operand is really long, though low two bits are
+		 * zero.  Override the implied type and re-calculate rs2
+		 */
 		type =3D FTYPE_LNG;
+		rs2 =3D RN_DECODE(type, IF_F3_RS2(insn));
 		__fpu_explode(fe, fp =3D &fe->fe_f1, type, rs2);
-		/* sneaky; depends on instruction encoding */
+		/* Target operand size is encoded in bits 3:2, not 1:0 */
 		type =3D (IF_F3_OPF(insn) >> 2) & 3;
 		rd =3D RN_DECODE(type, IF_F3_RD(insn));
 		break;
 	case FOP(INS2_FPop1, INSFP1_FTOx):
 		__fpu_explode(fe, fp =3D &fe->fe_f1, type, rs2);
+		/* Target operand is always long */
 		type =3D FTYPE_LNG;
-		mask =3D 1;	/* needs 2 registers */
-		rd =3D IF_F3_RD(insn) & ~mask;
+		rd =3D RN_DECODE(type, IF_F3_RD(insn));
 		break;
 	case FOP(INS2_FPop1, INSFP1_FTOs):
 	case FOP(INS2_FPop1, INSFP1_FTOd):
@@ -457,10 +459,10 @@
 	if (type =3D=3D FTYPE_INT || type =3D=3D FTYPE_SNG)
 		__fpu_setreg(rd, space[0]);
 	else {
-		for (i =3D 0; i < OPSZ(type); i +=3D 2) {
-			__fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) |
-			    space[i + 1]);
-		}
+		__fpu_setreg64(rd, ((u_int64_t)space[0] << 32) | space[1]);
+		if (type =3D=3D FTYPE_EXT)
+			__fpu_setreg64(rd + 2, ((u_int64_t)space[2] << 32) |
+				space[3]);
 	}
 	return (0);	/* success */
 }
Index: fpu_emu.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /usr/ncvs/src/lib/libc/sparc64/fpu/fpu_emu.h,v
retrieving revision 1.6.10.1
diff -u -r1.6.10.1 fpu_emu.h
--- fpu_emu.h	3 Aug 2009 08:13:06 -0000	1.6.10.1
+++ fpu_emu.h	9 Mar 2010 20:17:04 -0000
@@ -134,13 +134,14 @@
=20
 /*
  * Floating point operand types. FTYPE_LNG is syntethic (it does not occur=
 in
- * instructions).
+ * instructions).  Note that the code relies on the numeric values of these
+ * constants in some places.
  */
-#define	FTYPE_INT	INSFP_i
-#define	FTYPE_SNG	INSFP_s
-#define	FTYPE_DBL	INSFP_d
-#define	FTYPE_EXT	INSFP_q
-#define	FTYPE_LNG	-1
+#define	FTYPE_INT	INSFP_i		/* 0 */
+#define	FTYPE_SNG	INSFP_s		/* 1 */
+#define	FTYPE_DBL	INSFP_d		/* 2 */
+#define	FTYPE_EXT	INSFP_q		/* 3 */
+#define	FTYPE_LNG	4
=20
 /*
  * Emulator state.

--Qxx1br4bt0+wmkIi--

--GRPZ8SYKNexpdSJ7
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.14 (FreeBSD)

iEYEARECAAYFAkuWuw8ACgkQ/opHv/APuIdbnwCfZkzsGM2gtfQ/9SAqo32aHZqK
LIoAoIKDUSNHqrAiiNaT6I2W3w/GCeor
=ZzlO
-----END PGP SIGNATURE-----

--GRPZ8SYKNexpdSJ7--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100309211807.GA34125>