From owner-freebsd-numerics@FreeBSD.ORG Fri Sep 14 12:06:35 2012 Return-Path: Delivered-To: freebsd-numerics@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 4DC74106564A for ; Fri, 14 Sep 2012 12:06:35 +0000 (UTC) (envelope-from brde@optusnet.com.au) Received: from mail02.syd.optusnet.com.au (mail02.syd.optusnet.com.au [211.29.132.183]) by mx1.freebsd.org (Postfix) with ESMTP id 91B2D8FC14 for ; Fri, 14 Sep 2012 12:06:34 +0000 (UTC) Received: from c122-106-157-84.carlnfd1.nsw.optusnet.com.au (c122-106-157-84.carlnfd1.nsw.optusnet.com.au [122.106.157.84]) by mail02.syd.optusnet.com.au (8.13.1/8.13.1) with ESMTP id q8EC6Uvc007136 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Fri, 14 Sep 2012 22:06:32 +1000 Date: Fri, 14 Sep 2012 22:06:30 +1000 (EST) From: Bruce Evans X-X-Sender: bde@besplex.bde.org To: Stephen Montgomery-Smith In-Reply-To: <50526050.2070303@missouri.edu> Message-ID: <20120914212403.H1983@besplex.bde.org> References: <5017111E.6060003@missouri.edu> <501EC015.3000808@missouri.edu> <20120805191954.GA50379@troutmask.apl.washington.edu> <20120807205725.GA10572@server.rulingia.com> <20120809025220.N4114@besplex.bde.org> <5027F07E.9060409@missouri.edu> <20120814003614.H3692@besplex.bde.org> <50295F5C.6010800@missouri.edu> <20120814072946.S5260@besplex.bde.org> <50297CA5.5010900@missouri.edu> <50297E43.7090309@missouri.edu> <20120814201105.T934@besplex.bde.org> <502A780B.2010106@missouri.edu> <20120815223631.N1751@besplex.bde.org> <502C0CF8.8040003@missouri.edu> <20120906221028.O1542@besplex.bde.org> <5048D00B.8010401@missouri.edu> <504D3CCD.2050006@missouri.edu> <504FF726.9060001@missouri.edu> <20120912191556.F1078@besplex.bde.org> <20120912225847.J1771@besplex.bde.org> <50511B40.3070009@missouri.edu> <20120913204808.T1964@besplex.bde.org> <5051F59C.6000603@missouri.edu> <20120914014208.I2862@besplex.bde.org> <50526050.2070303@missouri.edu> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII; format=flowed Cc: freebsd-numerics@freebsd.org Subject: Re: Complex arg-trig functions X-BeenThere: freebsd-numerics@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "Discussions of high quality implementation of libm functions." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 14 Sep 2012 12:06:35 -0000 On Thu, 13 Sep 2012, Stephen Montgomery-Smith wrote: > OK, I think I have implemented all your suggestions now. Tell me what you > think. (Today I am a little more spacey than usual, but I think I got it > all.) Not quite. After merging them, I get: % diff -u2 catrig.c~ catrig.c % --- catrig.c~ 2012-09-14 01:49:04.000000000 +0000 % +++ catrig.c 2012-09-14 11:15:42.305034000 +0000 % @@ -27,5 +27,6 @@ % #include % #include % -#include % + % +#include "math.h" % #include "math_private.h" % Start cleaning up includes. % @@ -37,13 +38,14 @@ % /* We need that DBL_EPSILON^2 is larger than FOUR_SQRT_MIN. */ % static const double % -A_crossover = 10, /* Hull et al suggest 1.5, but 10 works better. */ % -B_crossover = 0.6417, /* suggested by Hull et al. */ % +A_crossover = 10, /* Hull et al suggest 1.5, but 10 works better */ % +B_crossover = 0.6417, /* suggested by Hull et al */ Old changes. % FOUR_SQRT_MIN = 0x1p-509, /* greater than 4 * sqrt(DBL_MIN) */ % -QUARTER_SQRT_MAX = 0x1p509, /* less than 0.25 * sqrt(DBL_MAX) */ % + /* XXX this equal, not greater */ % +QUARTER_SQRT_MAX = 0x1p509, /* less than sqrt(DBL_MAX) / 4 */ Change halfs and quarters to division by 2 and 4 in comments too. This gives large diffs. QUARTER_SQRT_MAX satisfies the fixed comment (it is smaller by a factor of about sqrt(2), since DBL_MAX is close to 2**1023 and not close to 2**1022), but FOUR_SQRT_MIN doesn't satisify the fixed comment (it is exact since DBL_MIN is exactly 2**-1022 and I calculated the sqrt() to accurately by halving the exponent). This seems to be harmless, but the comment should agree. % m_e = 2.7182818284590452e0, /* 0x15bf0a8b145769.0p-51 */ % m_ln2 = 6.9314718055994531e-1, /* 0x162e42fefa39ef.0p-53 */ % m_pi_2 = 1.5707963267948966e0, /* 0x1921fb54442d18.0p-52 */ % RECIP_EPSILON = 1/DBL_EPSILON, % -SQRT_MIN = 0x1p-511; /* greater than sqrt(DBL_MIN) */ % +SQRT_MIN = 0x1p-511; /* >= sqrt(DBL_MIN) */ For all uses of this, having a precise threshold is good, so I changed the comment. There was only 1 use, but I added another. % % static const volatile double % @@ -82,6 +84,6 @@ % * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B) % * where % - * A = 0.5 * (|z+I| + |z-I|) % - * B = 0.5 * (|z+I| - |z-I|) = y/A % + * A = (|z+I| + |z-I|) / 2 % + * B = (|z+I| - |z-I|) / 2 = y/A % * % * These formulas become numerically unstable: % @@ -93,5 +95,5 @@ % * % * These numerical problems are overcome by defining % - * f(a, b) = 0.5 * (hypot(a, b) - b) = 0.5 * a*a / (hypot(a, b) + b) % + * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2 % * Then if A < A_crossover, we use % * log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1))) % @@ -116,5 +118,5 @@ % % /* % - * Function f(a, b, hypot_a_b) = 0.5 * (hypot(a, b) - b). % + * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. % * Pass hypot(a, b) as the third argument. % */ % @@ -146,5 +148,5 @@ % S = hypot(x, y-1); /* |z-I| */ % % - /* A = 0.5 * (|z+I| + |z-I|) */ % + /* A = (|z+I| + |z-I|) / 2 */ % A = (R + S) / 2; % /* % @@ -170,5 +172,5 @@ % } else if (y == 1) { % /* % - * fp is of order x^2, and fm = 0.5*x. % + * fp is of order x^2, and fm = x/2. % * A = 1 (inexactly). % */ % @@ -177,5 +179,5 @@ % } else if (y < 1) { % /* % - * fp = 0.25*x*x/(1+y), fm = 0.25*x*x/(1-y), and % + * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and % * A = 1 (inexactly). % */ % @@ -208,5 +210,5 @@ % } % % - /* B = 0.5 * (|z+I| - |z-I|) = y/A */ % + /* B = (|z+I| - |z-I|) / 2 = y/A */ % *B = y/A; % *B_is_usable = 1; % @@ -228,12 +230,12 @@ % } else if (y == 1) { % /* % - * fp is of order x^2, and fm = 0.5*x. % + * fp is of order x^2, and fm = x/2. % * A = 1 (inexactly). % */ % if ((int)x==0) /* raise inexact */ % - *sqrt_A2my2 = sqrt(x)*sqrt(0.5*(A+y)); % + *sqrt_A2my2 = sqrt(x)*sqrt((A+y)/2); Stray 0.5 in the code too. % } else if (y > 1) { % /* % - * fp = 0.25*x*x/(y+1), fm = 0.25*x*x/(y-1), and % + * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and % * A = y (inexactly). % * % @@ -282,7 +284,7 @@ % if (ISINF(bx)) % return (cpack(x, y+y)); % - /* casinh(NaN + I*+-Inf) = +-Inf + I*NaN (sign optional) */ % + /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ % if (ISINF(by)) % - return (cpack(INFINITY, x+x)); % + return (cpack(y, x+x)); % /* casinh(NaN + I*0) = NaN + I*0 */ % if (y == 0) return (cpack(x+x, y)); You put the optional choice in the wrong function (cacos() instead of casin()). My choice makes casin() odd for this infinity. Start using the notation opt(whatever) for the choice. % @@ -359,7 +361,7 @@ % % if (ISNAN(x) || ISNAN(y)) { % - /* cacos(+-Inf + I*NaN) = NaN + I*-+Inf */ % + /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ % if (ISINF(bx)) % - return (cpack(y+y, -x)); % + return (cpack(y+y, -INFINITY)); % /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ % if (ISINF(by)) Now my choice is a fixed sign (minus, selected from the signs visible in the description of the input parameters). My choice makes cacos() even for this infinitity, and hopefully as continuous as possible when x -> +-Inf. % @@ -416,8 +418,10 @@ % { % double complex w; % - double x, y, rx, ry; % + double rx, ry; % + % w = cacos(z); % rx = creal(w); % ry = cimag(w); % + % /* cacosh(NaN + I*NaN) = NaN + I*NaN */ % if (ISNAN(rx) && ISNAN(ry)) % @@ -434,5 +438,5 @@ % % /* % - * Optimized version of clog() for |z| finite and larger than ~1/DBL_EPSILON. % + * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. % */ % static double complex % @@ -453,4 +457,5 @@ % % /* % + * XXX this comment is now misindented. % * Avoid overflow in hypot() when x and y are both very large. % * Divide x and y by E, and then add 1 to the logarithm. This depends on E % @@ -462,9 +467,9 @@ % return (cpack(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x))); % % -/* % - * Avoid overflow when x or y is large. Avoid underflow when x or % - * y is small. % - */ % - if (ay < FOUR_SQRT_MIN || ax > QUARTER_SQRT_MAX) % + /* % + * Avoid overflow when x or y is large. Avoid underflow when x or % + * y is small. % + */ % + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) % return (cpack(log(hypot(x, y)), atan2(y, x))); % SQRT_MIN is the correct threshold, so use it now that it is available. This is just an optimization. ax > EPSILON, so we could discard y*y below a much larger threshold, but by using the smallest possible threshold we get the fast code in all possible cases. Old change to sort the comparisons in the code to match the comment. % @@ -527,8 +532,8 @@ % } % % -/* % - * catanh(z) = 0.5 * log((1+z)/(1-z)) % - * = 0.25 * log1p(4*x / |z-1|^2) % - * + 0.5 * I * atan2(2y, (1-x)*(1+x)-y*y) % +/* % + * catanh(z) = log((1+z)/(1-z)) / 2 % + * = log1p(4*x / |z-1|^2) / 4 % + * + I * atan2(2y, (1-x)*(1+x)-y*y) / 2 % * % * catanh(z) = z + O(1/|z|^3) as z -> 0 % @@ -556,8 +561,8 @@ % if (ISINF(bx)) % return (cpack(copysign(0, x), y+y)); % - /* catanh(NaN + I*+-Inf) = 0 + I*+-PI/2 */ % + /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ % if (ISINF(by)) % return (cpack(copysign(0, x), copysign(m_pi_2, y))); % - /* catanh(0 + I*NaN) = 0 + I*NaN */ % + /* catanh(+-0 + I*NaN) = +-0 + I*NaN */ % if (x == 0) % return (cpack(x, y+y)); You made some subtle changes near here, and I noticed these. The first change documents copying the sign of the NaN to a signed 0. I think copying the sign of a NaN shouldn't be done, especually when it takes extra code as here, but that's what the code does. The sccond change documents that the result is a signed 0, with the sign coming from the arg 0. Perhaps plain 0 should have these semantics, with the sign only mentioned when it is not there on either side, but that seems backwards. % diff -u2 catrigf.c~ catrigf.c % --- catrigf.c~ 2012-09-14 01:49:19.000000000 +0000 % +++ catrigf.c 2012-09-14 10:49:46.482399000 +0000 % @@ -37,17 +37,25 @@ % #include % #include % -#include % + % +#include "math.h" % #include "math_private.h" % % +#undef isinf % +#define isinf(x) (fabsf(x) == INFINITY) % +#undef isnan % +#define isnan(x) ((x) != (x)) % +#undef signbit % +#define signbit(x) (__builtin_signbit(x)) % + Local hacks to try to speed this up. It still seems slower than double precision. % static const float % A_crossover = 10, % B_crossover = 0.6417, % FOUR_SQRT_MIN = 0x1p-61, This may be too small. % -QUARTER_SQRT_MAX = 0x1p60, % +QUARTER_SQRT_MAX = 0x1p61, 0x1p61 was already an underestimate, so it doesn't need to be reduced more. % m_e = 2.7182818285e0, /* 0xadf854.0p-22 */ % m_ln2 = 6.9314718056e-1, /* 0xb17218.0p-24 */ % -m_pi_2 = 1.5707963268e0, /* 0xc90fdb.0p-23 */ % +m_pi_2 = 1.5707963268e0, /* 0xc90fdb.0p-23 */ % RECIP_EPSILON = 1/FLT_EPSILON, % -SQRT_MIN = 0x1p-511; % +SQRT_MIN = 0x1p-63; This was broken (underflowed to 0). % % static const volatile float % @@ -113,5 +121,5 @@ % } else if (y == 1) { % if ((int)x==0) % - *sqrt_A2my2 = sqrtf(x)*sqrtf(0.5*(A+y)); % + *sqrt_A2my2 = sqrtf(x)*sqrtf((A+y)/2); % } else if (y > 1) { % if ((int)x==0) % @@ -141,5 +149,5 @@ % return (cpackf(x, y+y)); % if (isinf(y)) % - return (cpackf(INFINITY, x+x)); % + return (cpackf(y, x+x)); % if (y == 0) return (cpackf(x+x, y)); % return (cpackf(x+0.0L+(y+0), x+0.0L+(y+0))); % @@ -191,5 +199,5 @@ % if (isnan(x) || isnan(y)) { % if (isinf(x)) % - return (cpackf(y+y, -x)); % + return (cpackf(y+y, -INFINITY)); % if (isinf(y)) % return (cpackf(x+x, -y)); % @@ -235,5 +243,6 @@ % { % float complex w; % - float x, y, rx, ry; % + float rx, ry; % + % w = cacosf(z); % rx = crealf(w); % @@ -267,5 +276,5 @@ % return (cpackf(logf(hypotf(x / m_e, y / m_e)) + 1, atan2f(y, x))); % % - if (ay < FOUR_SQRT_MIN || ax > QUARTER_SQRT_MAX) % + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) % return (cpackf(logf(hypotf(x, y)), atan2f(y, x))); % % diff -u2 catrigl.c~ catrigl.c % --- catrigl.c~ 2012-09-14 01:49:21.000000000 +0000 % +++ catrigl.c 2012-09-14 10:15:19.216734000 +0000 % @@ -37,15 +37,16 @@ % #include % #include % -#include % + % +#include "fpmath.h" % +#include "math.h" % #include "math_private.h" % -#include <_fpmath.h> The normal _fpmath.h is libc/i386/_fpmath.h; it is supposed to be included by fpmath.h, which is libc/include/fpmath.h. The non-modularity of these can cause problems, but I have include paths to them set up in dozens of scripts and makefiles, where the script or makefile has complications to figure out a path to an out-of-tree libc, so I only notice the problems when the source tree layout changes. Another -I path turns into the source tree math.h, so that instead of "math.h" is just a style bug. % % static const long double % -FOUR_SQRT_MIN = 0x1p-8000L, % -QUARTER_SQRT_MAX = 0x1p8000L, % A_crossover = 10, % B_crossover = 0.6417, % +FOUR_SQRT_MIN = 0x1p-8189L, % +QUARTER_SQRT_MAX = 0x1p8189L, Use my possibly-too-exact constents and sort them. % RECIP_EPSILON = 1/LDBL_EPSILON, % -SQRT_MIN = 0x1p-8191; % +SQRT_MIN = 0x1p-8191L; This was broken (underflowed to 0 in a different way). % % #if LDBL_MANT_DIG == 64 % @@ -64,8 +65,8 @@ % #else % #error "Unsupported long double format" % -#endif % +#endif % % static const volatile long double % -tiny = 0x1p-16000L; % +tiny = 0x1p-10000L; Use the normal magic number. % % static long double complex clog_for_large_values(long double complex z); % @@ -128,5 +129,5 @@ % } else if (y == 1) { % if ((int)x==0) % - *sqrt_A2my2 = sqrtl(x)*sqrtl(0.5*(A+y)); % + *sqrt_A2my2 = sqrtl(x)*sqrtl((A+y)/2); % } else if (y > 1) { % if ((int)x==0) % @@ -156,5 +157,5 @@ % return (cpackl(x, y+y)); % if (isinf(y)) % - return (cpackl(INFINITY, x+x)); % + return (cpackl(y, x+x)); % if (y == 0) return (cpackl(x+x, y)); % return (cpackl(x+0.0L+(y+0), x+0.0L+(y+0))); % @@ -206,5 +207,5 @@ % if (isnan(x) || isnan(y)) { % if (isinf(x)) % - return (cpackl(y+y, -x)); % + return (cpackl(y+y, -INFINITY)); % if (isinf(y)) % return (cpackl(x+x, -y)); % @@ -250,5 +251,6 @@ % { % long double complex w; % - long double x, y, rx, ry; % + long double rx, ry; % + % w = cacosl(z); % rx = creall(w); % @@ -282,5 +284,5 @@ % return (cpackl(logl(hypotl(x / m_e, y / m_e)) + 1, atan2l(y, x))); % % - if (ay < FOUR_SQRT_MIN || ax > QUARTER_SQRT_MAX) % + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) % return (cpackl(logl(hypotl(x, y)), atan2l(y, x))); % Bruce