Date: Fri, 19 May 2017 16:52:02 +1000 (EST) From: Bruce Evans <brde@optusnet.com.au> To: Steve Kargl <sgk@troutmask.apl.washington.edu> Cc: freebsd-numerics@freebsd.org Subject: Re: Implementation of half-cycle trignometric functions Message-ID: <20170519153326.A20099@besplex.bde.org> In-Reply-To: <20170518234722.GB77471@troutmask.apl.washington.edu> References: <20170427231411.GA11346@troutmask.apl.washington.edu> <20170428175851.A1386@besplex.bde.org> <20170516224618.GA40855@troutmask.apl.washington.edu> <20170517094848.A52247@besplex.bde.org> <20170517180927.GA54431@troutmask.apl.washington.edu> <20170518072636.E951@besplex.bde.org> <20170518014226.GA63819@troutmask.apl.washington.edu> <20170518154820.A8280@besplex.bde.org> <20170518175434.GA74453@troutmask.apl.washington.edu> <20170519074512.M884@besplex.bde.org> <20170518234722.GB77471@troutmask.apl.washington.edu>
next in thread | previous in thread | raw e-mail | index | archive | help
I fixed all bugs found by my tests: - sinpif() and cospif() were unnecessarily inaccurate. Now they are have the same maximum error of 0.5009 ulps as sinf() and cosf(), and more accuracy on average. - sinpi(), cospi() and tanpi() were broken on i386 with extra precision. Now they have a smaller error than sin(), etc. (about 0.55 ulps; the bug gave 1.5 ulps). - fix cospi*(odd integer) returning +1 for odd integers. Clean up related but accidentally working code for sinpi*() and tanpi*() (12 copies altogether). - fix broken threshold for Infs and NaNs in sinpi(), cosp() and tanpi() - try to fix off-by-1 error in translation of floorl() for ld80. This seems to work. The bug showed up as 2**62 being treated as 2**63, so args above 2**62 and below 2**63 only worked accidentally for sinl(), cosl() and tanl(). - fix the return value for NaNs. XX diff -u2 k_cospi.c~ k_cospi.c XX --- k_cospi.c~ Fri May 19 09:05:14 2017 XX +++ k_cospi.c Fri May 19 12:02:24 2017 XX @@ -35,5 +35,8 @@ XX __kernel_cospi(double x) XX { XX - double hi, lo; XX + /* XXX: volatile hack to abuse _2sumF(): */ XX + volatile double hi; XX + double lo; XX + XX hi = (float)x; XX lo = x - hi; This hack is sufficent to get strict assignment to hi. It is a bit slow. It costs about 30% on i386 (70 cycles increased to 90) in my version. XX diff -u2 k_sinpi.c~ k_sinpi.c XX --- k_sinpi.c~ Fri May 19 09:05:14 2017 XX +++ k_sinpi.c Fri May 19 12:02:56 2017 XX @@ -35,5 +35,8 @@ XX __kernel_sinpi(double x) XX { XX - double hi, lo; XX + /* XXX: volatile hack to abuse _2sumF(): */ XX + volatile double hi; XX + double lo; XX + XX hi = (float)x; XX lo = x - hi; XX diff -u2 ld128-s_cospil.c~ ld128-s_cospil.c XX --- ld128-s_cospil.c~ Fri May 19 11:45:43 2017 XX +++ ld128-s_cospil.c Fri May 19 12:30:56 2017 XX @@ -45,5 +45,4 @@ XX cospil(long double x) XX { XX - volatile static const double vzero = 0; XX long double ax, c, xf; XX uint32_t ix; This variable was only used to return wrong results for NaNs. XX @@ -73,5 +72,5 @@ XX if (ax < 0x1p112) { XX XX - xf = floorl(ax); XX + xf = ax - remainderl(ax, 1); /* floorl() without inexact */ XX XX ax -= xf; Try to fix spurious inexact for half-integers (quarter-integers for tanpil*()). I didn't test ld128. I tested ld80 enough to find inconsisties with double. They were much the same as inconsistencies with float. XX @@ -98,12 +97,6 @@ XX XX if (isinf(x) || isnan(x)) XX - return (vzero / vzero); XX + return (x - x); Use the standard convention and method for producing NaNs (+-Inf -> default NaN; NaN -> quiet(same NaN). XX XX - /* XX - * |x| >= 0x1p112 is always an even integer, so return 1. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = 1; XX - return (ax); XX + return (ix >= 0x403f || (lx & 1) == 0 ? 1 : -1); XX } Remove lots of style bugs and make this work. ax + 1 > 1 was always true, so this always returned +1, but it must return -1 at odd integers. The details are intentionally only given in s_cospi.c. XX diff -u2 ld128-s_sinpil.c~ ld128-s_sinpil.c XX --- ld128-s_sinpil.c~ Fri May 19 09:19:00 2017 XX +++ ld128-s_sinpil.c Fri May 19 12:34:02 2017 XX @@ -45,5 +45,4 @@ XX sinpil(long double x) XX { XX - volatile static const double vzero = 0; XX long double ax, s, xf, xhi, xlo; XX uint32_t ix; XX @@ -78,5 +77,5 @@ XX if (ax < 0x1p112) { XX XX - xf = floorl(ax); XX + xf = ax - remainderl(ax, 1); /* floorl() without inexact */ XX XX ax -= xf; XX @@ -106,12 +105,6 @@ XX XX if (isinf(x) || isnan(x)) XX - return (vzero / vzero); XX + return (x - x); XX XX - /* XX - * |x| >= 0x1p112 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = copysignl(0, x); XX - return (ax); XX + return (x < 0 ? -0.0 : 0); XX } The sign wasn't even copied for cospil(). As usual, ax + 1 > 1 is always true, so this returned the correct value of 0 with the sign of x in all cases. I only fixed this because the bug was more serious for cospi*() so I had to edit that, and I wanted to remove the nearly-duplicated style bugs. XX diff -u2 ld128-s_tanpil.c~ ld128-s_tanpil.c XX --- ld128-s_tanpil.c~ Fri May 19 09:15:52 2017 XX +++ ld128-s_tanpil.c Fri May 19 12:34:41 2017 XX @@ -70,5 +70,4 @@ XX tanpil(long double x) XX { XX - volatile static const double vzero = 0; XX long double ax, xf; XX uint32_t ix; XX @@ -97,5 +96,5 @@ XX if (ix < 0x1p112) { XX XX - xf = floorl(ax); XX + xf = ax - remainderl(ax, 1); /* floorl() without inexact */ XX XX ax -= xf; XX @@ -112,12 +111,6 @@ XX /* x = +-inf or nan. */ XX if (isinf(x) || isnan(x)) XX - return (vzero / vzero); XX + return (x - x); XX XX - /* XX - * |x| >= 0x1p53 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = copysignl(0, x); XX - return (ax); XX + return (x < 0 ? -0.0 : 0); XX } XX diff -u2 s_cospi.c~ s_cospi.c XX --- s_cospi.c~ Fri May 19 09:05:16 2017 XX +++ s_cospi.c Fri May 19 12:31:08 2017 XX @@ -74,5 +74,4 @@ XX cospi(double x) XX { XX - volatile static const double vzero = 0; XX double ax, c; XX uint32_t hx, ix, j0, lx; XX @@ -136,14 +135,12 @@ XX } XX XX - if (ix >= 0x7f800000) XX - return (vzero / vzero); XX + if (ix >= 0x7ff00000) XX + return (x - x); The wrong threshold was copied from the float case. XX XX /* XX - * |x| >= 0x1p52 is always an even integer, so return 1. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID? XX + * 0x1p53 <= |x|, then x is an even integer. Otherwise, XX + * 0x1p52 <= |x| < 0x1p53 and is odd iff lx is odd. XX */ XX - if (ax + 1 > 1) XX - ax = 1; XX - return (ax); XX + return (ix >= 0x43400000 || (lx & 1) == 0 ? 1 : -1); XX } XX This gives details of how to decide if an value is an even or odd integer. XX diff -u2 s_cospif.c~ s_cospif.c XX --- s_cospif.c~ Fri May 19 09:05:16 2017 XX +++ s_cospif.c Fri May 19 12:56:24 2017 XX @@ -42,5 +42,4 @@ XX cospif(float x) XX { XX - volatile static const float vzero = 0; XX float ax, c; XX uint32_t ix, j0; XX @@ -100,12 +99,6 @@ XX /* x = +-inf or nan. */ XX if (ix >= 0x7f800000) XX - return (vzero / vzero); XX + return (x - x); XX XX - /* XX - * |x| >= 0x1p23 is always an even integer, so return 1. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = 1; XX - return (ax); XX + return (ix >= 0x4b800000 || (ix & 1) == 0 ? 1 : -1); XX } XX diff -u2 s_cospil.c~ s_cospil.c XX --- s_cospil.c~ Fri May 19 09:22:48 2017 XX +++ s_cospil.c Fri May 19 12:31:28 2017 XX @@ -48,5 +48,4 @@ XX cospil(long double x) XX { XX - volatile static const double vzero = 0; XX long double ax, c; XX uint32_t j0; XX @@ -87,5 +86,5 @@ XX lx &= ~(((lx << 32)-1) >> j0); XX } else { XX - uint64_t m = (uint64_t)-1 >> (j0 + 1); XX + uint64_t m = (uint64_t)-1 >> j0; XX if (lx & m) lx &= ~m; XX } XX @@ -118,12 +117,6 @@ XX XX if (ix >= 0x7fff) XX - RETURNI(vzero / vzero); XX + RETURNI(x - x); XX XX - /* XX - * |x| >= 0x1p63 is always an even integer, so return 1. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = 1; XX - RETURNI(ax); XX + RETURNI(ix >= 0x403f || (lx & 1) == 0 ? 1 : -1); XX } XX diff -u2 s_sinpi.c~ s_sinpi.c XX --- s_sinpi.c~ Fri May 19 09:05:16 2017 XX +++ s_sinpi.c Fri May 19 13:44:44 2017 XX @@ -77,5 +77,4 @@ XX sinpi(double x) XX { XX - volatile static const double vzero = 0; XX double ax, s; XX uint32_t hx, ix, j0, lx; XX @@ -87,5 +86,5 @@ XX if (ix < 0x3ff00000) { /* |x| < 1 */ XX if (ix < 0x3fd00000) { /* |x| < 0.25 */ XX - if (ix < 0x3e200000) { /* |x| < 0x1p-29 */ XX + if (ix < 0x3de00000) { /* |x| < 2**-33 */ XX if (x == 0) XX return (x); Reduce this threshold by a large factor for sinpi() and tanpi(), and by an even larger factor for sinpif() and tanpif(). For some reason, the x for sin(x), etc. is just as accurate as the poly up to a much higher threshold than pi*x for sinpi(x), etc. This reduces the maximum error for sinpif() from ~0.54 ulps to the same ~0.5009 ulps as for sinf(). For tanpif(), it only reduces the error like that for small x (tanpif()'s max error is large elswhere). For sinpi() and tanpi(), similarly except sinpi()'s maximum error is smaller than tanpi()'s (it seems to be smaller than sin()'s too -- I haven't seen it larger than 0.5312 ulps, where .0312 is from the poly error of 2**-5). For sinpil() and tanpil(), I didn't change this since I can't test it. This might depend on extra precision. Accuracy was thrown away for some other functions by increasing the threshold to what works without extra precision. It is unusual to even test with extra precision for doubles. Start fixing style bugs -- use the Fortran spelling of powers and not C hex FP values when the code doesn't use the latter. XX @@ -147,14 +146,8 @@ XX } XX XX - if (ix >= 0x7f800000) XX - return (vzero / vzero); XX + if (ix >= 0x7ff00000) XX + return (x - x); XX XX - /* XX - * |x| >= 0x1p52 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID? XX - */ XX - if (ax + 1 > 1) XX - ax = copysign(0, x); XX - return (ax); XX + return (x < 0 ? -0.0 : 0); XX } XX XX diff -u2 s_sinpif.c~ s_sinpif.c XX --- s_sinpif.c~ Fri May 19 09:05:16 2017 XX +++ s_sinpif.c Fri May 19 13:22:55 2017 XX @@ -46,5 +46,4 @@ XX sinpif(float x) XX { XX - volatile static const float vzero = 0; XX float ax, s; XX uint32_t hx, ix, j0; XX @@ -56,5 +55,5 @@ XX if (ix < 0x3f800000) { /* |x| < 1 */ XX if (ix < 0x3e800000) { /* |x| < 0.25 */ XX - if (ix < 0x38800000) { /* |x| < 0x1p-14 */ XX + if (ix < 0x34800000) { /* |x| < 2**-22 */ XX if (x == 0) XX return (x); XX @@ -111,12 +110,6 @@ XX /* x = +-inf or nan. */ XX if (ix >= 0x7f800000) XX - return (vzero / vzero); XX + return (x - x); XX XX - /* XX - * |x| >= 0x1p23 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID? XX - */ XX - if (ax + 1 > 1) XX - ax = copysignf(0, x); XX - return (ax); XX + return (x < 0 ? -0.0F : 0); XX } XX diff -u2 s_sinpil.c~ s_sinpil.c XX --- s_sinpil.c~ Fri May 19 09:23:00 2017 XX +++ s_sinpil.c Fri May 19 12:34:28 2017 XX @@ -48,5 +48,4 @@ XX sinpil(long double x) XX { XX - volatile static const double vzero = 0; XX long double ax, s; XX uint32_t j0; XX @@ -91,5 +90,5 @@ XX lx &= ~(((lx << 32)-1) >> j0); XX } else { XX - uint64_t m = (uint64_t)-1 >> (j0 + 1); XX + uint64_t m = (uint64_t)-1 >> j0; XX if (lx & m) lx &= ~m; XX } XX @@ -125,12 +124,6 @@ XX /* x = +-inf or nan. */ XX if (ix >= 0x7fff) XX - RETURNI(vzero / vzero); XX + RETURNI(x - x); XX XX - /* XX - * |x| >= 0x1p63 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = copysignl(0, x); XX - RETURNI(ax); XX + RETURNI(x < 0 ? -0.0 : 0); XX } XX diff -u2 s_tanpi.c~ s_tanpi.c XX --- s_tanpi.c~ Fri May 19 09:05:16 2017 XX +++ s_tanpi.c Fri May 19 14:02:34 2017 XX @@ -78,5 +78,7 @@ XX __kernel_tanpi(double x) XX { XX - double hi, lo, t; XX + /* XXX: volatile hack to abuse _2sumF(): */ XX + volatile double hi; XX + double lo, t; XX XX if (x < 0.25) { XX @@ -104,5 +106,4 @@ XX tanpi(double x) XX { XX - volatile static const double vzero = 0; XX double ax, t; XX uint32_t hx, ix, j0, lx; XX @@ -114,5 +115,5 @@ XX if (ix < 0x3ff00000) { /* |x| < 1 */ XX if (ix < 0x3fe00000) { /* |x| < 0.5 */ XX - if (ix < 0x3e200000) { /* |x| < 0x1p-29 */ XX + if (ix < 0x3df00000) { /* |x| < 0x1p-32 */ XX if (x == 0) XX return (x); XX @@ -156,14 +157,8 @@ XX XX /* x = +-inf or nan. */ XX - if (ix >= 0x7f800000) XX - return (vzero / vzero); XX + if (ix >= 0x7ff00000) XX + return (x - x); XX XX - /* XX - * |x| >= 0x1p52 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID? XX - */ XX - if (ax + 1 > 1) XX - ax = copysign(0, x); XX - return (ax); XX + return (x < 0 ? -0.0 : 0); XX } XX XX diff -u2 s_tanpif.c~ s_tanpif.c XX --- s_tanpif.c~ Fri May 19 09:05:16 2017 XX +++ s_tanpif.c Fri May 19 13:20:29 2017 XX @@ -57,5 +57,4 @@ XX tanpif(float x) XX { XX - volatile static const float vzero = 0; XX float ax, t; XX uint32_t hx, ix, j0; XX @@ -67,5 +66,5 @@ XX if (ix < 0x3f800000) { /* |x| < 1 */ XX if (ix < 0x3f000000) { /* |x| < 0.5 */ XX - if (ix < 0x38800000) { /* |x| < 0x1p-14 */ XX + if (ix < 0x34800000) { /* |x| < 2**-22 */ XX if (ix == 0) XX return (x); XX @@ -104,12 +103,6 @@ XX /* x = +-inf or nan. */ XX if (ix >= 0x7f800000) XX - return (vzero / vzero); XX + return (x - x); XX XX - /* XX - * |x| >= 0x1p23 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = copysignf(0, x); XX - return (ax); XX + return (x < 0 ? -0.0F : 0); XX } XX diff -u2 s_tanpil.c~ s_tanpil.c XX --- s_tanpil.c~ Fri May 19 09:15:55 2017 XX +++ s_tanpil.c Fri May 19 12:35:05 2017 XX @@ -71,5 +71,4 @@ XX tanpil(long double x) XX { XX - volatile static const double vzero = 0; XX long double ax, t; XX uint32_t j0; XX @@ -109,5 +108,5 @@ XX lx &= ~(((lx << 32)-1) >> j0); XX } else { XX - uint64_t m = (uint64_t)-1 >> (j0 + 1); XX + uint64_t m = (uint64_t)-1 >> j0; XX if (lx & m) lx &= ~m; XX } XX @@ -128,12 +127,6 @@ XX /* x = +-inf or nan. */ XX if (ix >= 0x7fff) XX - RETURNI(vzero / vzero); XX + RETURNI(x - x); XX XX - /* XX - * |x| >= 0x1p63 is always an integer, so return +-0. XX - * FIXME: should this raise FE_INEXACT or FE_INVALID. XX - */ XX - if (ax + 1 > 1) XX - ax = copysignl(0, x); XX - RETURNI(ax); XX + RETURNI(x < 0 ? -0.0 : 0); XX } I now have complete and debugged versions of sinpif() and sinpil(). They are still about twice as fast, and competitive with sinf() etc. instead of more than twice as slow. They needed further fixes for classifications of half-integers and quarter-integers near the threshold. lgamma*() was broken (probably by us) by not doing this right, but for lgamma() these errors are hidden in the noise of other large errors. Here my a complete sinpinf(). It grew larger and slower than I like to become correct: YY #include <float.h> YY YY #include "math.h" YY #include "math_private.h" YY YY float YY ysinpif(float x) YY { YY float s,y,z; YY int32_t hx,ix; YY int n; YY YY GET_FLOAT_WORD(hx,x); YY ix = hx & 0x7fffffff; YY YY if (ix < 0x3e800000) { /* |x| < 0.25 */ YY if (ix < 0x34800000) /* |x| < 2**-22 */ YY if(((int)x)==0) return M_PI*x; /* pi*x with inexact if x != 0 */ YY return __kernel_sindf(M_PI*x); YY } YY YY if (ix>=0x7f800000) return x-x; /* Inf or NaN -> NaN */ YY YY if (ix >= 0x4b000000) return (x < 0 ? -0.0F : 0); /* integer -> +-0 */ YY YY y = fabs(x); YY YY if (ix >= 0x4a800000) { /* half-integer */ YY n = (ix & 3) * 2; YY y = 0; YY } else if (ix >= 0x4a000000) { /* quarter-integer */ YY n = ix & 7; YY y = 0; The half-integer and quarter-intger classification is new. Adding 0x1p21F to classify only works below 0x1p21F. The commments should translate the thresholds. The version for lgamma*() in -current is broken too, but the error is reduced by a preliminary classification of all integers in the above ranges. Adding 0x1p21F doesn't work for these either. So lgamma*() only gives wrong results for half-integers and quarter-integers in the above range. I think gamma*() overflows on these values, and the errors are relatively not so enormous for lgamma*(). YY } else { YY STRICT_ASSIGN(float,z,y+0x1p21F); YY GET_FLOAT_WORD(n,z); /* bits for rounded y (units 0.25) */ YY z -= 0x1p21F; /* y rounded to a multiple of 0.25 */ YY if (z > y) { YY z -= 0.25F; /* adjust to round down */ YY n--; YY } YY n &= 7; /* octant of y mod 2 */ YY y -= z; /* y mod 0.25 */ YY } YY YY if (n >= 3 && n < 7) YY s = -1; YY else YY s = 1; YY if (x < 0) YY s = -s; YY n &= 3; YY if (y == 0 && n == 0) YY return (x < 0 ? -0.0F : 0); YY if (n == 0) YY return s*__kernel_sindf(M_PI*y); YY else if (n == 1) YY return s*__kernel_cosdf(M_PI*(y-0.25F)); YY else if (n == 2) YY return s*__kernel_cosdf(M_PI*y); YY else YY return s*__kernel_sindf(M_PI*(y-0.25F)); YY } This still has style bugs suitable for copying back to lgamma*. Bruce
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20170519153326.A20099>