From owner-freebsd-numerics@FreeBSD.ORG Tue Sep 25 15:46:08 2012 Return-Path: Delivered-To: numerics@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id F3CC61065670 for ; Tue, 25 Sep 2012 15:46:07 +0000 (UTC) (envelope-from sgk@troutmask.apl.washington.edu) Received: from troutmask.apl.washington.edu (troutmask.apl.washington.edu [128.95.76.21]) by mx1.freebsd.org (Postfix) with ESMTP id BEEE28FC0A for ; Tue, 25 Sep 2012 15:46:07 +0000 (UTC) Received: from troutmask.apl.washington.edu (localhost.apl.washington.edu [127.0.0.1]) by troutmask.apl.washington.edu (8.14.5/8.14.5) with ESMTP id q8PFk6lU028993 for ; Tue, 25 Sep 2012 08:46:06 -0700 (PDT) (envelope-from sgk@troutmask.apl.washington.edu) Received: (from sgk@localhost) by troutmask.apl.washington.edu (8.14.5/8.14.5/Submit) id q8PFk66t028992 for numerics@freebsd.org; Tue, 25 Sep 2012 08:46:06 -0700 (PDT) (envelope-from sgk) Date: Tue, 25 Sep 2012 08:46:06 -0700 From: Steve Kargl To: numerics@freebsd.org Message-ID: <20120925154606.GA28919@troutmask.apl.washington.edu> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Cc: Subject: [PATCH] implementation for expm1l() X-BeenThere: freebsd-numerics@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "Discussions of high quality implementation of libm functions." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 25 Sep 2012 15:46:08 -0000 The patch following my .sig implements expm1l(). An example of testing 10M values uniformly distributed in each interval gives amd64: % ./testl -n 10 Interval tested: [-11355.000000:-0.250000] Max ULP: 0.50325 x Max ULP: -5.21316172131617162e+00, -0x1.4da4710f73f83p+2 Interval tested: [-0.250000:0.250000] Max ULP: 0.53926 x Max ULP: -1.15709936570993657e-01, -0x1.d9f2a996507ec308p-4 Interval tested: [0.250000:11356.000000] Max ULP: 0.50584 x Max ULP: 8.79177827642782764e+03, 0x1.12be39e8fde623b2p+13 sparc64 (only very limited testing due to speed of flame): % ./testl -n 10 Interval tested: [-11355.000000:-0.287682] Max ULP: 0.50165 x Max ULP: -5.34961332997700764996445801944829055e+00, -0x1.566010969fcd47a521247b4128p+2 Interval tested: [-0.287682:0.223144] Max ULP: 0.54843 x Max ULP: -2.86792265208762197924244858751372070e-01, -0x1.25acdf1f45027985d7c04fa34168p-2 Interval tested: [0.223144:11356.000000] Max ULP: 0.50577 x Max ULP: 7.83174792710817081424055325173740399e+03, 0x1.e97bf7826a562b03aafc33340393p+12 Now, the problem! On i386 in the interval [-0.287682:0.223144], I see up to 15.5 ULP for the ld80 patch. Note, if I don't use ENTERI and RETURNI in the ld80 code, I see ULP that are consistent with the amd64 results. This of course leads back to the 53- versus 64-bit precision settings on i386. I have tried re-arranging the order of evaluation of the polynomial and some of the intermediate results with little improvement. I've also increased the order of the polynomial. Everything I've tried seems to fail on i386. So, anyone got a suggestion? PS: the relevant section of code is in the 'if (T1 < x && x < T2)' block of code. -- Steve Index: ld80/s_expl.c =================================================================== --- ld80/s_expl.c (revision 240889) +++ ld80/s_expl.c (working copy) @@ -301,3 +301,149 @@ RETURNI(t * twopkp10000 * twom10000); } } + +/*- + * Implementation of expm1l(x) in Intel 80-bit long double format. + * The significand is 64 bits. This is based on + * + * PTP Tang, "Table-driven implementation of the Expm1 function + * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 18, + * 211-222 (1992). + */ + +#if 0 +static const long double +T1 = -0x1.269621134db92786p-2L, /* log(1-1/4) = -2.876820724517809275e-01L */ +T2 = 0x1.c8ff7c79a9a21ac4p-3L; /* log(1+1/4) = 2.231435513142097558e-01L */ +#else +static const long double +T1 = -0.25L, +T2 = 0.25L; +#endif + +/* + * Remes polynomial coefficients on the interval [T1:T2] with an error + * estimate of log2(|expm1(x)-p(x)|) ~ -74. These are the coefficients + * rounded to long double precision where B5 through B14 can be used with + * a double type. + */ +static const long double +B3 = 0x1.5555555555555556p-3L, /* 1.66666666666666666671e-01 */ +B4 = 0x1.5555555555555556p-5L; /* 4.16666666666666666678e-02 */ + +static const double __aligned(64) +B5 = 0x1.1111111111111118p-7, /* 8.33333333333333333627e-03 */ +B6 = 0x1.6c16c16c16c147cp-10, /* 1.38888888888888839641e-03 */ +B7 = 0x1.a01a01a019ffd3b6p-13, /* 1.98412698412697632766e-04 */ +B8 = 0x1.a01a01a01db26a7cp-16, /* 2.48015873016385187861e-05 */ +B9 = 0x1.71de3a55a0c1ee4ep-19, /* 2.75573192248932803421e-06 */ +B10 = 0x1.27e4fb50209391bep-22, /* 2.75573190052072576834e-07 */ +B11 = 0x1.ae645165ce954702p-26, /* 2.50521038560011694434e-08 */ +B12 = 0x1.1eef018ecbb0486cp-29, /* 2.08771683916037414477e-09 */ +B13 = 0x1.615bbeecd7fc9bd8p-33, /* 1.60688788144810303816e-10 */ +B14 = 0x1.89b55fd4d237b3a2p-37; /* 1.11898684031101176243e-11 */ + +long double +expm1l(long double x) +{ + union IEEEl2bits u, v; + + long double fn, q, r, r1, r2, t, t23, t45, twomk, twopk, z; + int k, n, n2; + uint16_t hx, ix; + + /* Filter out exceptional cases. */ + u.e = x; + hx = u.xbits.expsign; + ix = hx & 0x7fff; + if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ + if (ix == BIAS + LDBL_MAX_EXP) { + if (hx & 0x8000 && u.xbits.man == 1ULL << 63) + return (-1.0L); /* x is -Inf */ + return (x + x); /* x is +Inf, NaN or unsupported */ + } + if (x > o_threshold.e) + return (huge * huge); + if (x < -50) /* XXX s.b. ~66ln2, maybe in bits */ + return (tiny - 1.0L); + + } else if (ix < BIAS - 66) { /* |x| < 0x1p-66 */ + if ((int)x == 0) + return (x); /* x with inexact if x != 0 */ + } + + ENTERI(); + + if (T1 < x && x < T2) { + + long double t1, t2, t3, t4, u, v, x3, y; + + u = (0x1.0p40 * x + x) - 0x1.0p40 * x; + v = x - u; + y = u * u * 0.5L; + z = v * (x + u) * 0.5L; + t1 = B3 + x * (B4 + B5 * x); + t2 = B6 + x * (B7 + B8 * x); + t3 = B9 + x * (B10 + B11 * x); + t4 = B12 + x * (B13 + B14 * x); + x3 = x * x * x; + q = x3 * (t1 + x3 * (t2 + x3 * (t3 + x3 * t4))); + + if (y >= 0x1.p-7) + RETURNI((u + y) + (q + (v + z))); + else + RETURNI(x + (y + (q + z))); + } + + /* Reduce x to (k*ln2 + midpoint[n2] + r1 + r2). */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + fn = x * INV_L + 0x1.8p63 - 0x1.8p63; + r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */ +#if defined(HAVE_EFFICIENT_IRINTL) + n = irintl(fn); +#elif defined(HAVE_EFFICIENT_IRINT) + n = irint(fn); +#else + n = (int)fn; +#endif + n2 = (unsigned)n % INTERVALS; + k = (n - n2) / INTERVALS; + r1 = x - fn * L1; + r2 = -fn * L2; + + /* Prepare scale factor. */ + v.xbits.man = 1ULL << 63; + v.xbits.expsign = BIAS + k; + twopk = v.e; + + /* Evaluate expl(midpoint[n2] + r1 + r2) = s[n2] * expl(r1 + r2). */ + /* Here q = q(r), not q(r1), since r1 is lopped like L1. */ + t45 = r * P5 + P4; + z = r * r; + t23 = r * P3 + P2; + q = r2 + z * t23 + z * z * t45 + z * z * z * P6; + t = (long double)s[n2].lo + s[n2].hi; + + if (k < -7) { + t = s[n2].lo + t * (q + r1) + s[n2].hi; + RETURNI(t * twopk - 1.L); + } + + if (k > 2 * LDBL_MANT_DIG - 1) { + t = s[n2].lo + t * (q + r1) + s[n2].hi; + if (k == LDBL_MAX_EXP) + RETURNI(t * 2.L * 0x1p16383L); + RETURNI(t * twopk - 1.L); + } + + v.xbits.man = 1ULL << 63; + v.xbits.expsign = BIAS - k; + twomk = v.e; + + if (k > LDBL_MANT_DIG - 1) + t = s[n2].hi + (t * (q + r1) + (s[n2].lo - twomk)); + else + t = (s[n2].hi - twomk) + (t * (q + r1) + s[n2].lo); + + RETURNI(t * twopk); +} Index: ld128/s_expl.c =================================================================== --- ld128/s_expl.c (revision 240889) +++ ld128/s_expl.c (working copy) @@ -259,3 +259,132 @@ return (t * twopkp10000 * twom10000); } } + +static const long double +T1 = -2.87682072451780927439219005993827491e-01L, /* log(1-1/4) */ +T2 = 2.23143551314209755766295090309834524e-01L; /* log(1+1/4) */ + +/* + * Remes polynomial coefficients on the interval [T1:T2] with an error + * estimate of log2(|expm1(x)-p(x)|) ~ -118. + */ +static const long double +B3 = 1.6666666666666666666666666666666666467e-01L, +B4 = 4.1666666666666666666666666666666496958e-02L, +B5 = 8.3333333333333333333333333333383308238e-03L, +B6 = 1.3888888888888888888888888890337273186e-03L, +B7 = 1.9841269841269841269841269642813099020e-04L, +B8 = 2.4801587301587301587301550362968625647e-05L, +B9 = 2.7557319223985890652560213443426485839e-06L, +B10 = 2.7557319223985890652989939489574826910e-07L, +B11 = 2.5052108385441718755414747818597290671e-08L, +B12 = 2.0876756987868096239671566018121112987e-09L, +B13 = 1.6059043836821679450153540085716850723e-10L, +B14 = 1.1470745597739719945035140610973825690e-11L, +B15 = 7.6471637317367933536887863417862982136e-13L, +B16 = 4.7794773113757503628361216897593430644e-14L, +B17 = 2.8114571923374448979404436413403060597e-15L, +B18 = 1.5619443087615951176479734695316869815e-16L, +B19 = 8.2233234109330560523187524072569835812e-18L, +B20 = 4.0010880199748106995049690682517608541e-19L; + +long double +expm1l(long double x) +{ + union IEEEl2bits u, v; + + long double fn, q, r, r1, r2, t, t1, t2, t3, twomk, twopk, z; + int k, n, n2; + uint32_t hx, ix; + + /* Filter out exceptional cases. */ + u.e = x; + hx = u.xbits.expsign; + ix = hx & 0x7fff; + if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ + if (ix == BIAS + LDBL_MAX_EXP) { + if (hx & 0x8000 && u.xbits.manh == 0 && + u.xbits.manl == 0) + return (-1.0L); /* x is -Inf */ + return (x + x); /* x is +Inf or NaN */ + } + if (x > o_threshold) + return (huge * huge); + if (x < -84) /* XXX s.b. ~115ln2, maybe in bits */ + return (tiny - 1.0L); + } else if (ix < BIAS - 115) { /* |x| < 0x1p-115 */ + if ((int)x == 0) + return (x); /* x with inexact if x != 0 */ + } + + if (T1 < x && x < T2) { + + long double t1, t2, t3, t4, t5, t6, u, v, x3, y; + + u = (0x1.0p80 * x + x) - 0x1.0p80 * x; + v = x - u; + y = u * u * 0.5L; + z = v * (x + u) * 0.5L; + t1 = B3 + x * (B4 + B5 * x); + t2 = B6 + x * (B7 + B8 * x); + t3 = B9 + x * (B10 + B11 * x); + t4 = B12 + x * (B13 + B14 * x); + t5 = B15 + x * (B16 + B17 * x); + t6 = B18 + x * (B19 + B20 * x); + x3 = x * x * x; + t4 += x3 * (t5 + x3 * t6); + q = x3 * (t1 + x3 * (t2 + x3 * (t3 + x3 * t4))); + + if (y >= 0x1.p-7) + return ((u + y) + (q + (v + z))); + else + return (x + (y + (q + z))); + } + + /* Reduce x to (k*ln2 + midpoint[n2] + r1 + r2). */ + fn = x * INV_L + 0x1.8p112 - 0x1.8p112; + r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */ + n = (int)fn; + n2 = (unsigned)n % INTERVALS; + k = (n - n2) / INTERVALS; + r1 = x - fn * L1; + r2 = -fn * L2; + + /* Prepare scale factors. */ + v.xbits.manh = 0; + v.xbits.manl = 0; + v.xbits.expsign = BIAS + k; + twopk = v.e; + + z = r * r; + t1 = P2 + P3 * r + P4 * z; + t2 = P5 + P6 * r + P7 * z; + t3 = P8 + P9 * r + P10 * z; + q = r2 + z * (t1 + r * (t2 + r * (t3 + r * P11 * z) * z) * z); + t = s[n2].lo + s[n2].hi; + + if (k < -7) { + t = s[n2].lo + t * (q + r1) + s[n2].hi; + return (t * twopk - 1.L); + } + + if (k > 2 * LDBL_MANT_DIG - 1) { + t = s[n2].lo + t * (q + r1) + s[n2].hi; + if (k == LDBL_MAX_EXP) + return (t * 2.L * 0x1p16383L); + return (t * twopk - 1.L); + } + + /* Prepare scale factors. */ + v.xbits.manh = 0; + v.xbits.manl = 0; + v.xbits.expsign = BIAS - k; + twomk = v.e; + + if (k > LDBL_MANT_DIG - 1) + t = s[n2].hi + (t * (q + r1) + (s[n2].lo - twomk)); + else + t = (s[n2].hi - twomk) + (t * (q + r1) + s[n2].lo); + + return (t * twopk); +}