From owner-svn-src-stable@FreeBSD.ORG Mon Jan 9 04:55:53 2012 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 6AD18106564A; Mon, 9 Jan 2012 04:55:53 +0000 (UTC) (envelope-from das@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 577A78FC15; Mon, 9 Jan 2012 04:55:53 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q094treI023351; Mon, 9 Jan 2012 04:55:53 GMT (envelope-from das@svn.freebsd.org) Received: (from das@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q094trfY023346; Mon, 9 Jan 2012 04:55:53 GMT (envelope-from das@svn.freebsd.org) Message-Id: <201201090455.q094trfY023346@svn.freebsd.org> From: David Schultz Date: Mon, 9 Jan 2012 04:55:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r229839 - stable/9/lib/msun/src X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 09 Jan 2012 04:55:53 -0000 Author: das Date: Mon Jan 9 04:55:52 2012 New Revision: 229839 URL: http://svn.freebsd.org/changeset/base/229839 Log: MFC various fma{,f,l} improvements: r226245 - refactoring r226371 - fix double-rounding bug r226373 - new math_private.h macros r226601 - fix nit in r226371 Modified: stable/9/lib/msun/src/math_private.h stable/9/lib/msun/src/s_fma.c stable/9/lib/msun/src/s_fmaf.c stable/9/lib/msun/src/s_fmal.c Directory Properties: stable/9/lib/msun/ (props changed) Modified: stable/9/lib/msun/src/math_private.h ============================================================================== --- stable/9/lib/msun/src/math_private.h Mon Jan 9 04:27:32 2012 (r229838) +++ stable/9/lib/msun/src/math_private.h Mon Jan 9 04:55:52 2012 (r229839) @@ -58,6 +58,10 @@ typedef union u_int32_t msw; u_int32_t lsw; } parts; + struct + { + u_int64_t w; + } xparts; } ieee_double_shape_type; #endif @@ -72,6 +76,10 @@ typedef union u_int32_t lsw; u_int32_t msw; } parts; + struct + { + u_int64_t w; + } xparts; } ieee_double_shape_type; #endif @@ -86,6 +94,14 @@ do { \ (ix1) = ew_u.parts.lsw; \ } while (0) +/* Get a 64-bit int from a double. */ +#define EXTRACT_WORD64(ix,d) \ +do { \ + ieee_double_shape_type ew_u; \ + ew_u.value = (d); \ + (ix) = ew_u.xparts.w; \ +} while (0) + /* Get the more significant 32 bit int from a double. */ #define GET_HIGH_WORD(i,d) \ @@ -114,6 +130,14 @@ do { \ (d) = iw_u.value; \ } while (0) +/* Set a double from a 64-bit int. */ +#define INSERT_WORD64(d,ix) \ +do { \ + ieee_double_shape_type iw_u; \ + iw_u.xparts.w = (ix); \ + (d) = iw_u.value; \ +} while (0) + /* Set the more significant 32 bits of a double from an int. */ #define SET_HIGH_WORD(d,v) \ Modified: stable/9/lib/msun/src/s_fma.c ============================================================================== --- stable/9/lib/msun/src/s_fma.c Mon Jan 9 04:27:32 2012 (r229838) +++ stable/9/lib/msun/src/s_fma.c Mon Jan 9 04:55:52 2012 (r229839) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 David Schultz + * Copyright (c) 2005-2011 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,132 @@ __FBSDID("$FreeBSD$"); #include #include +#include "math_private.h" + +/* + * A struct dd represents a floating-point number with twice the precision + * of a double. We maintain the invariant that "hi" stores the 53 high-order + * bits of the result. + */ +struct dd { + double hi; + double lo; +}; + +/* + * Compute a+b exactly, returning the exact result in a struct dd. We assume + * that both a and b are finite, but make no assumptions about their relative + * magnitudes. + */ +static inline struct dd +dd_add(double a, double b) +{ + struct dd ret; + double s; + + ret.hi = a + b; + s = ret.hi - a; + ret.lo = (a - (ret.hi - s)) + (b - s); + return (ret); +} + +/* + * Compute a+b, with a small tweak: The least significant bit of the + * result is adjusted into a sticky bit summarizing all the bits that + * were lost to rounding. This adjustment negates the effects of double + * rounding when the result is added to another number with a higher + * exponent. For an explanation of round and sticky bits, see any reference + * on FPU design, e.g., + * + * J. Coonen. An Implementation Guide to a Proposed Standard for + * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. + */ +static inline double +add_adjusted(double a, double b) +{ + struct dd sum; + uint64_t hibits, lobits; + + sum = dd_add(a, b); + if (sum.lo != 0) { + EXTRACT_WORD64(hibits, sum.hi); + if ((hibits & 1) == 0) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + EXTRACT_WORD64(lobits, sum.lo); + hibits += 1 - ((hibits ^ lobits) >> 62); + INSERT_WORD64(sum.hi, hibits); + } + } + return (sum.hi); +} + +/* + * Compute ldexp(a+b, scale) with a single rounding error. It is assumed + * that the result will be subnormal, and care is taken to ensure that + * double rounding does not occur. + */ +static inline double +add_and_denormalize(double a, double b, int scale) +{ + struct dd sum; + uint64_t hibits, lobits; + int bits_lost; + + sum = dd_add(a, b); + + /* + * If we are losing at least two bits of accuracy to denormalization, + * then the first lost bit becomes a round bit, and we adjust the + * lowest bit of sum.hi to make it a sticky bit summarizing all the + * bits in sum.lo. With the sticky bit adjusted, the hardware will + * break any ties in the correct direction. + * + * If we are losing only one bit to denormalization, however, we must + * break the ties manually. + */ + if (sum.lo != 0) { + EXTRACT_WORD64(hibits, sum.hi); + bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; + if (bits_lost != 1 ^ (int)(hibits & 1)) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + EXTRACT_WORD64(lobits, sum.lo); + hibits += 1 - (((hibits ^ lobits) >> 62) & 2); + INSERT_WORD64(sum.hi, hibits); + } + } + return (ldexp(sum.hi, scale)); +} + +/* + * Compute a*b exactly, returning the exact result in a struct dd. We assume + * that both a and b are normalized, so no underflow or overflow will occur. + * The current rounding mode must be round-to-nearest. + */ +static inline struct dd +dd_mul(double a, double b) +{ + static const double split = 0x1p27 + 1.0; + struct dd ret; + double ha, hb, la, lb, p, q; + + p = a * split; + ha = a - p; + ha += p; + la = a - ha; + + p = b * split; + hb = b - p; + hb += p; + lb = b - hb; + + p = ha * hb; + q = ha * lb + la * hb; + + ret.hi = p + q; + ret.lo = p - ret.hi + q + la * lb; + return (ret); +} + /* * Fused multiply-add: Compute x * y + z with a single rounding error. * @@ -48,14 +174,11 @@ __FBSDID("$FreeBSD$"); * Hardware instructions should be used on architectures that support it, * since this implementation will likely be several times slower. */ -#if LDBL_MANT_DIG != 113 double fma(double x, double y, double z) { - static const double split = 0x1p27 + 1.0; - double xs, ys, zs; - double c, cc, hx, hy, p, q, tx, ty; - double r, rr, s; + double xs, ys, zs, adj; + struct dd xy, r; int oround; int ex, ey, ez; int spread; @@ -85,41 +208,6 @@ fma(double x, double y, double z) * will overflow, so we handle these cases specially. Rounding * modes other than FE_TONEAREST are painful. */ - if (spread > DBL_MANT_DIG * 2) { - fenv_t env; - feraiseexcept(FE_INEXACT); - switch(oround) { - case FE_TONEAREST: - return (x * y); - case FE_TOWARDZERO: - if (x > 0.0 ^ y < 0.0 ^ z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafter(r, 0); - feupdateenv(&env); - return (r); - case FE_DOWNWARD: - if (z > 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafter(r, -INFINITY); - feupdateenv(&env); - return (r); - default: /* FE_UPWARD */ - if (z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafter(r, INFINITY); - feupdateenv(&env); - return (r); - } - } if (spread < -DBL_MANT_DIG) { feraiseexcept(FE_INEXACT); if (!isnormal(z)) @@ -144,63 +232,52 @@ fma(double x, double y, double z) return (z); } } + if (spread <= DBL_MANT_DIG * 2) + zs = ldexp(zs, -spread); + else + zs = copysign(DBL_MIN, zs); - /* - * Use Dekker's algorithm to perform the multiplication and - * subsequent addition in twice the machine precision. - * Arrange so that x * y = c + cc, and x * y + z = r + rr. - */ fesetround(FE_TONEAREST); - p = xs * split; - hx = xs - p; - hx += p; - tx = xs - hx; - - p = ys * split; - hy = ys - p; - hy += p; - ty = ys - hy; - - p = hx * hy; - q = hx * ty + tx * hy; - c = p + q; - cc = p - c + q + tx * ty; - - zs = ldexp(zs, -spread); - r = c + zs; - s = r - c; - rr = (c - (r - s)) + (zs - s) + cc; + /* + * Basic approach for round-to-nearest: + * + * (xy.hi, xy.lo) = x * y (exact) + * (r.hi, r.lo) = xy.hi + z (exact) + * adj = xy.lo + r.lo (inexact; low bit is sticky) + * result = r.hi + adj (correctly rounded) + */ + xy = dd_mul(xs, ys); + r = dd_add(xy.hi, zs); spread = ex + ey; - if (spread + ilogb(r) > -1023) { + + if (r.hi == 0.0) { + /* + * When the addends cancel to 0, ensure that the result has + * the correct sign. + */ fesetround(oround); - r = r + rr; - } else { + volatile double vzs = zs; /* XXX gcc CSE bug workaround */ + return (xy.hi + vzs + ldexp(xy.lo, spread)); + } + + if (oround != FE_TONEAREST) { /* - * The result is subnormal, so we round before scaling to - * avoid double rounding. + * There is no need to worry about double rounding in directed + * rounding modes. */ - p = ldexp(copysign(0x1p-1022, r), -spread); - c = r + p; - s = c - r; - cc = (r - (c - s)) + (p - s) + rr; fesetround(oround); - r = (c + cc) - p; + adj = r.lo + xy.lo; + return (ldexp(r.hi + adj, spread)); } - return (ldexp(r, spread)); -} -#else /* LDBL_MANT_DIG == 113 */ -/* - * 113 bits of precision is more than twice the precision of a double, - * so it is enough to represent the intermediate product exactly. - */ -double -fma(double x, double y, double z) -{ - return ((long double)x * y + z); + + adj = add_adjusted(r.lo, xy.lo); + if (spread + ilogb(r.hi) > -1023) + return (ldexp(r.hi + adj, spread)); + else + return (add_and_denormalize(r.hi, adj, spread)); } -#endif /* LDBL_MANT_DIG != 113 */ #if (LDBL_MANT_DIG == 53) __weak_reference(fma, fmal); Modified: stable/9/lib/msun/src/s_fmaf.c ============================================================================== --- stable/9/lib/msun/src/s_fmaf.c Mon Jan 9 04:27:32 2012 (r229838) +++ stable/9/lib/msun/src/s_fmaf.c Mon Jan 9 04:55:52 2012 (r229839) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 David Schultz + * Copyright (c) 2005-2011 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,23 +27,43 @@ #include __FBSDID("$FreeBSD$"); +#include + #include "math.h" +#include "math_private.h" /* * Fused multiply-add: Compute x * y + z with a single rounding error. * * A double has more than twice as much precision than a float, so - * direct double-precision arithmetic suffices. - * - * XXX We are relying on the compiler to convert from double to float - * using the current rounding mode and with the appropriate - * side-effects. But on at least one platform (gcc 3.4.2/sparc64), - * this appears to be too much to ask for. The precision - * reduction should be done manually. + * direct double-precision arithmetic suffices, except where double + * rounding occurs. */ float fmaf(float x, float y, float z) { + double xy, result; + uint32_t hr, lr; + + xy = (double)x * y; + result = xy + z; + EXTRACT_WORDS(hr, lr, result); + /* Common case: The double precision result is fine. */ + if ((lr & 0x1fffffff) != 0x10000000 || /* not a halfway case */ + (hr & 0x7ff00000) == 0x7ff00000 || /* NaN */ + result - xy == z || /* exact */ + fegetround() != FE_TONEAREST) /* not round-to-nearest */ + return (result); - return ((double)x * y + z); + /* + * If result is inexact, and exactly halfway between two float values, + * we need to adjust the low-order bit in the direction of the error. + */ + fesetround(FE_TOWARDZERO); + volatile double vxy = xy; /* XXX work around gcc CSE bug */ + double adjusted_result = vxy + z; + fesetround(FE_TONEAREST); + if (result == adjusted_result) + SET_LOW_WORD(adjusted_result, lr + 1); + return (adjusted_result); } Modified: stable/9/lib/msun/src/s_fmal.c ============================================================================== --- stable/9/lib/msun/src/s_fmal.c Mon Jan 9 04:27:32 2012 (r229838) +++ stable/9/lib/msun/src/s_fmal.c Mon Jan 9 04:55:52 2012 (r229839) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 David Schultz + * Copyright (c) 2005-2011 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,128 @@ __FBSDID("$FreeBSD$"); #include #include +#include "fpmath.h" + +/* + * A struct dd represents a floating-point number with twice the precision + * of a long double. We maintain the invariant that "hi" stores the high-order + * bits of the result. + */ +struct dd { + long double hi; + long double lo; +}; + +/* + * Compute a+b exactly, returning the exact result in a struct dd. We assume + * that both a and b are finite, but make no assumptions about their relative + * magnitudes. + */ +static inline struct dd +dd_add(long double a, long double b) +{ + struct dd ret; + long double s; + + ret.hi = a + b; + s = ret.hi - a; + ret.lo = (a - (ret.hi - s)) + (b - s); + return (ret); +} + +/* + * Compute a+b, with a small tweak: The least significant bit of the + * result is adjusted into a sticky bit summarizing all the bits that + * were lost to rounding. This adjustment negates the effects of double + * rounding when the result is added to another number with a higher + * exponent. For an explanation of round and sticky bits, see any reference + * on FPU design, e.g., + * + * J. Coonen. An Implementation Guide to a Proposed Standard for + * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. + */ +static inline long double +add_adjusted(long double a, long double b) +{ + struct dd sum; + union IEEEl2bits u; + + sum = dd_add(a, b); + if (sum.lo != 0) { + u.e = sum.hi; + if ((u.bits.manl & 1) == 0) + sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return (sum.hi); +} + +/* + * Compute ldexp(a+b, scale) with a single rounding error. It is assumed + * that the result will be subnormal, and care is taken to ensure that + * double rounding does not occur. + */ +static inline long double +add_and_denormalize(long double a, long double b, int scale) +{ + struct dd sum; + int bits_lost; + union IEEEl2bits u; + + sum = dd_add(a, b); + + /* + * If we are losing at least two bits of accuracy to denormalization, + * then the first lost bit becomes a round bit, and we adjust the + * lowest bit of sum.hi to make it a sticky bit summarizing all the + * bits in sum.lo. With the sticky bit adjusted, the hardware will + * break any ties in the correct direction. + * + * If we are losing only one bit to denormalization, however, we must + * break the ties manually. + */ + if (sum.lo != 0) { + u.e = sum.hi; + bits_lost = -u.bits.exp - scale + 1; + if (bits_lost != 1 ^ (int)(u.bits.manl & 1)) + sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return (ldexp(sum.hi, scale)); +} + +/* + * Compute a*b exactly, returning the exact result in a struct dd. We assume + * that both a and b are normalized, so no underflow or overflow will occur. + * The current rounding mode must be round-to-nearest. + */ +static inline struct dd +dd_mul(long double a, long double b) +{ +#if LDBL_MANT_DIG == 64 + static const long double split = 0x1p32L + 1.0; +#elif LDBL_MANT_DIG == 113 + static const long double split = 0x1p57L + 1.0; +#endif + struct dd ret; + long double ha, hb, la, lb, p, q; + + p = a * split; + ha = a - p; + ha += p; + la = a - ha; + + p = b * split; + hb = b - p; + hb += p; + lb = b - hb; + + p = ha * hb; + q = ha * lb + la * hb; + + ret.hi = p + q; + ret.lo = p - ret.hi + q + la * lb; + return (ret); +} + /* * Fused multiply-add: Compute x * y + z with a single rounding error. * @@ -43,14 +165,8 @@ __FBSDID("$FreeBSD$"); long double fmal(long double x, long double y, long double z) { -#if LDBL_MANT_DIG == 64 - static const long double split = 0x1p32L + 1.0; -#elif LDBL_MANT_DIG == 113 - static const long double split = 0x1p57L + 1.0; -#endif - long double xs, ys, zs; - long double c, cc, hx, hy, p, q, tx, ty; - long double r, rr, s; + long double xs, ys, zs, adj; + struct dd xy, r; int oround; int ex, ey, ez; int spread; @@ -80,41 +196,6 @@ fmal(long double x, long double y, long * will overflow, so we handle these cases specially. Rounding * modes other than FE_TONEAREST are painful. */ - if (spread > LDBL_MANT_DIG * 2) { - fenv_t env; - feraiseexcept(FE_INEXACT); - switch(oround) { - case FE_TONEAREST: - return (x * y); - case FE_TOWARDZERO: - if (x > 0.0 ^ y < 0.0 ^ z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafterl(r, 0); - feupdateenv(&env); - return (r); - case FE_DOWNWARD: - if (z > 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafterl(r, -INFINITY); - feupdateenv(&env); - return (r); - default: /* FE_UPWARD */ - if (z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafterl(r, INFINITY); - feupdateenv(&env); - return (r); - } - } if (spread < -LDBL_MANT_DIG) { feraiseexcept(FE_INEXACT); if (!isnormal(z)) @@ -139,49 +220,49 @@ fmal(long double x, long double y, long return (z); } } + if (spread <= LDBL_MANT_DIG * 2) + zs = ldexpl(zs, -spread); + else + zs = copysignl(LDBL_MIN, zs); - /* - * Use Dekker's algorithm to perform the multiplication and - * subsequent addition in twice the machine precision. - * Arrange so that x * y = c + cc, and x * y + z = r + rr. - */ fesetround(FE_TONEAREST); - p = xs * split; - hx = xs - p; - hx += p; - tx = xs - hx; - - p = ys * split; - hy = ys - p; - hy += p; - ty = ys - hy; - - p = hx * hy; - q = hx * ty + tx * hy; - c = p + q; - cc = p - c + q + tx * ty; - - zs = ldexpl(zs, -spread); - r = c + zs; - s = r - c; - rr = (c - (r - s)) + (zs - s) + cc; + /* + * Basic approach for round-to-nearest: + * + * (xy.hi, xy.lo) = x * y (exact) + * (r.hi, r.lo) = xy.hi + z (exact) + * adj = xy.lo + r.lo (inexact; low bit is sticky) + * result = r.hi + adj (correctly rounded) + */ + xy = dd_mul(xs, ys); + r = dd_add(xy.hi, zs); spread = ex + ey; - if (spread + ilogbl(r) > -16383) { + + if (r.hi == 0.0) { + /* + * When the addends cancel to 0, ensure that the result has + * the correct sign. + */ fesetround(oround); - r = r + rr; - } else { + volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ + return (xy.hi + vzs + ldexpl(xy.lo, spread)); + } + + if (oround != FE_TONEAREST) { /* - * The result is subnormal, so we round before scaling to - * avoid double rounding. + * There is no need to worry about double rounding in directed + * rounding modes. */ - p = ldexpl(copysignl(0x1p-16382L, r), -spread); - c = r + p; - s = c - r; - cc = (r - (c - s)) + (p - s) + rr; fesetround(oround); - r = (c + cc) - p; + adj = r.lo + xy.lo; + return (ldexpl(r.hi + adj, spread)); } - return (ldexpl(r, spread)); + + adj = add_adjusted(r.lo, xy.lo); + if (spread + ilogbl(r.hi) > -16383) + return (ldexpl(r.hi + adj, spread)); + else + return (add_and_denormalize(r.hi, adj, spread)); }