From owner-freebsd-numerics@FreeBSD.ORG Wed Aug 15 13:35:55 2012 Return-Path: Delivered-To: freebsd-numerics@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id EAD021065674 for ; Wed, 15 Aug 2012 13:35:54 +0000 (UTC) (envelope-from brde@optusnet.com.au) Received: from mail14.syd.optusnet.com.au (mail14.syd.optusnet.com.au [211.29.132.195]) by mx1.freebsd.org (Postfix) with ESMTP id 2E1AF8FC17 for ; Wed, 15 Aug 2012 13:35:53 +0000 (UTC) Received: from c122-106-171-246.carlnfd1.nsw.optusnet.com.au (c122-106-171-246.carlnfd1.nsw.optusnet.com.au [122.106.171.246]) by mail14.syd.optusnet.com.au (8.13.1/8.13.1) with ESMTP id q7FDZidA007438 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Wed, 15 Aug 2012 23:35:46 +1000 Date: Wed, 15 Aug 2012 23:35:44 +1000 (EST) From: Bruce Evans X-X-Sender: bde@besplex.bde.org To: Stephen Montgomery-Smith In-Reply-To: <502A780B.2010106@missouri.edu> Message-ID: <20120815223631.N1751@besplex.bde.org> References: <5017111E.6060003@missouri.edu> <501C361D.4010807@missouri.edu> <20120804165555.X1231@besplex.bde.org> <501D51D7.1020101@missouri.edu> <20120805030609.R3101@besplex.bde.org> <501D9C36.2040207@missouri.edu> <20120805175106.X3574@besplex.bde.org> <501EC015.3000808@missouri.edu> <20120805191954.GA50379@troutmask.apl.washington.edu> <20120807205725.GA10572@server.rulingia.com> <20120809025220.N4114@besplex.bde.org> <5027F07E.9060409@missouri.edu> <20120814003614.H3692@besplex.bde.org> <50295F5C.6010800@missouri.edu> <20120814072946.S5260@besplex.bde.org> <50297CA5.5010900@missouri.edu> <50297E43.7090309@missouri.edu> <20120814201105.T934@besplex.bde.org> <502A780B.2010106@missouri.edu> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII; format=flowed Cc: freebsd-numerics@freebsd.org, Bruce Evans Subject: Re: Complex arg-trig functions X-BeenThere: freebsd-numerics@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "Discussions of high quality implementation of libm functions." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Aug 2012 13:35:55 -0000 On Tue, 14 Aug 2012, Stephen Montgomery-Smith wrote: > On 08/14/2012 05:46 AM, Bruce Evans wrote: >> On Mon, 13 Aug 2012, Stephen Montgomery-Smith wrote: >> >>> if (sqrt_huge+x>one && sqrt_huge+y>one) >> >> x and y can be DBL_MAX, giving overflow. > > Why? When x is DBL_MAX, sqrt_huge is so very much smaller than DBL_MAX that > DBL_MAX+sqrt_huge should be DBL_MAX within floating point precision. So no > overflow. Oh, I see now. The only problem is if someone sets the rounding mode to FE_UPWARD, but we depend on it not being changed from the default of FE_TONEAREST in many places. > It seemed to me that there is a logic behind why the the infs and nans > produce the results they do. I noticed that do_the_hard_work() already got > the answers correct for the real part *rx. Getting the imaginary part to > work as well seemed to me to be the cleanest way to make it work. (I added > all the nan and inf checking after writing the rest of the code.) An up-front check may still be simpler, and gives more control. In csqrt*(), I needed an explicit check and special expressions to get uniform behaviour. I added this to the NaN mixing in catan[h]*(), and now all my tests pass: % diff -c2 catrig.c~ catrig.c % *** catrig.c~ Sun Aug 12 17:29:18 2012 % --- catrig.c Wed Aug 15 11:57:02 2012 % *************** % *** 605,609 **** % */ % if (ISNAN(x) || ISNAN(y)) % ! return (cpack(x+y, x+y)); % % /* (x,inf) and (inf,y) and (inf,inf) -> (0,PI/2) */ % --- 609,613 ---- % */ % if (ISNAN(x) || ISNAN(y)) % ! return (cpack((x+0.0L)+(y+0), (x+0.0L)+(y+0))); % % /* (x,inf) and (inf,y) and (inf,inf) -> (0,PI/2) */ Use this expression in all precisions. I forgot to comment it. Adding 0 quietens signaling NaNs before mixing NaNs. I should have tried y+y. Adding 0.0L promotes part of the expression to long double together with quietening signaling NaNs. The rest of the expression is promoted to match. I should try the old way again: of (long double)x+x. % diff -c2 catrigf.c~ catrigf.c % *** catrigf.c~ Sun Aug 12 17:00:52 2012 % --- catrigf.c Wed Aug 15 11:57:08 2012 % *************** % *** 349,353 **** % % if (isnan(x) || isnan(y)) % ! return (cpackf(x+y, x+y)); % % if (isinf(x) || isinf(y)) % --- 351,355 ---- % % if (isnan(x) || isnan(y)) % ! return (cpack((x+0.0L)+(y+0), (x+0.0L)+(y+0))); % % if (isinf(x) || isinf(y)) % diff -c2 catrigl.c~ catrigl.c % *** catrigl.c~ Sun Aug 12 06:54:46 2012 % --- catrigl.c Wed Aug 15 11:58:46 2012 % *************** % *** 323,327 **** % % if (isnan(x) || isnan(y)) % ! return (cpackl(x+y, x+y)); % % if (isinf(x) || isinf(y)) % --- 325,329 ---- % % if (isnan(x) || isnan(y)) % ! return (cpack((x+0.0L)+(y+0), (x+0.0L)+(y+0))); % % if (isinf(x) || isinf(y)) % Index: ../s_csqrt.c % =================================================================== % RCS file: /home/ncvs/src/lib/msun/src/s_csqrt.c,v % retrieving revision 1.4 % diff -u -2 -r1.4 s_csqrt.c % --- ../s_csqrt.c 8 Aug 2008 00:15:16 -0000 1.4 % +++ ../s_csqrt.c 14 Aug 2012 20:34:07 -0000 % @@ -34,14 +34,5 @@ % #include "math_private.h" % % -/* % - * gcc doesn't implement complex multiplication or division correctly, % - * so we need to handle infinities specially. We turn on this pragma to % - * notify conforming c99 compilers that the fast-but-incorrect code that % - * gcc generates is acceptable, since the special cases have already been % - * handled. % - */ % -#pragma STDC CX_LIMITED_RANGE ON Remove this. There was only 1 complex expression, and it depended on the negation of this pragma to work. Since gcc doesn't support this pragma, the expression only worked accidentally when it was optimized. % - % -/* We risk spurious overflow for components >= DBL_MAX / (1 + sqrt(2)). */ % +/* For avoiding overflow for components >= DBL_MAX / (1 + sqrt(2)). */ % #define THRESH 0x1.a827999fcef32p+1022 We only risked this threshold being wrong. % % @@ -50,7 +41,5 @@ % { % double complex result; % - double a, b; % - double t; % - int scale; % + double a, b, rx, ry, scale, t; % % a = creal(z); `scale' is now a scale factor intead of a flag. New variables to fix the complex expression. Fix style bugs. % @@ -64,5 +53,5 @@ % if (isnan(a)) { % t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ % - return (cpack(a, t)); /* return NaN + NaN i */ % + return (cpack(a + a, a + 0.0L + t)); /* return NaN + NaN i */ % } % if (isinf(a)) { Old fix for not quietening a. Also, mix the NaNs (if there are 2) in the imaginary part. This depends on the hardware doing the right thing when t is the default NaN, and it does in all cases tested: when b is NaN, we want t to be b quietened and the imaginary part to be the mix of a quietened and b quietened; when b is not NaN, we want both parts to be a quietened. We don't want t to have an effect if it is just the default NaN. The real part should mix the NaNs too, but I didn't do it right and got some inconsistencies. % @@ -78,16 +67,25 @@ % return (cpack(a, copysign(b - b, b))); % } % - /* % - * The remaining special case (b is NaN) is handled just fine by % - * the normal code path below. % - */ % + if (isnan(b)) { % + t = (a - a) / (a - a); /* raise invalid */ % + return (cpack(b + b, b + 0.0L + t)); /* return NaN + NaN i */ % + } It was easier add a special case than to fall through. Again t is only needed for the side effects of its expression. It would be better to assign it to a volatile variable and return b + B for both parts. % % /* Scale to avoid overflow. */ % if (fabs(a) >= THRESH || fabs(b) >= THRESH) { % - a *= 0.25; % - b *= 0.25; % - scale = 1; % + if (fabs(a) >= 0x1p-1021) % + a *= 0.25; % + if (fabs(b) >= 0x1p-1021) % + b *= 0.25; While testing the NaNs, I noticed several other bugs. This fixes spurious underflow when one of a or b is tiny. There are too many scattered fabs()'s. It would be better to take fabs()'s up front, or determine exponents and use exponents in the threshold tests. % + scale = 2; % } else { % - scale = 0; % + scale = 1; % + } % + % + /* Scale to reduce inaccuracies when both components are denormal. */ % + if (fabs(a) <= 0x1p-1023 && fabs(b) <= 0x1p-1023) { % + a *= 0x1p54; % + b *= 0x1p54; % + scale = 0x1p-27; % } % This is like a fix in clog(). hypot() handles denormals OK, but necessarily loses accuracy when it returns a denormal result, so the expression (a + hypot(a, b)) is more inaccurate than necessary. With scaling, it is accurate to about 1 ulp at first, and then inverse scaling makes it accurate to 1 denormal ulp in even more cases. For example, let a = 0 and b = smallest denormal. Then csqrt(z) should be sqrt(2)/2*(1+I)* rounded = (1+I)*, but t = sqrt(a + hypot(a, b)) * 0.5 is * 0.5 rounded = 0, since the sqrt() has to round down to and then the muliplication has to round down again. Here (a + hypot(a, b)) is exact but the sqrt() and the multiplication aren't. The error without this fix was about 34 ulps on values near 2**36 times the smallest denormal. It was many gulps on smaller values. % @@ -95,15 +93,13 @@ % if (a >= 0) { % t = sqrt((a + hypot(a, b)) * 0.5); % - result = cpack(t, b / (2 * t)); % + rx = t; % + ry = b / (2 * t); % } else { % t = sqrt((-a + hypot(a, b)) * 0.5); % - result = cpack(fabs(b) / (2 * t), copysign(t, b)); % + rx = fabs(b) / (2 * t); % + ry = copysign(t, b); % } % % - /* Rescale. */ % - if (scale) % - return (result * 2); % - else % - return (result); % + return (cpack(rx * scale, ry * scale)); % } % Multiplication of the complex value result by either the fixed scale 2 or the variable scale 'scale' should probably clobber the sign of 0 in many cases, should it should probably be equivalent to multiplication by (iscale + I * 0). This is like the sign clobbering for adding ln2. This is not wanted, so we should't do a complex multiplication. When the scale factor was 2, gcc optimized the multiplication to an addition, even at -O0 IIRC, so the sign was accidentally 0. Otherwise, -O always optimized to avoid clobbering. But with the variable scale and -O0, a full complex multiplication was done. % Index: ../s_csqrtf.c % =================================================================== % RCS file: /home/ncvs/src/lib/msun/src/s_csqrtf.c,v % retrieving revision 1.3 % diff -u -2 -r1.3 s_csqrtf.c % --- ../s_csqrtf.c 8 Aug 2008 00:15:16 -0000 1.3 % +++ ../s_csqrtf.c 14 Aug 2012 20:34:21 -0000 % @@ -33,18 +33,12 @@ % #include "math_private.h" % % -/* % - * gcc doesn't implement complex multiplication or division correctly, % - * so we need to handle infinities specially. We turn on this pragma to % - * notify conforming c99 compilers that the fast-but-incorrect code that % - * gcc generates is acceptable, since the special cases have already been % - * handled. % - */ % -#pragma STDC CX_LIMITED_RANGE ON % - This didn't use complex arithmetic before. % float complex % csqrtf(float complex z) % { % - float a = crealf(z), b = cimagf(z); % double t; % + float a, b; % + % + a = creal(z); % + b = cimag(z); % % /* Handle special cases. */ More style fixes. % @@ -55,5 +49,5 @@ % if (isnan(a)) { % t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ % - return (cpackf(a, t)); /* return NaN + NaN i */ % + return (cpackf(a + a, a + 0.0L + t)); /* return NaN + NaN i */ % } % if (isinf(a)) { % @@ -69,8 +63,8 @@ % return (cpackf(a, copysignf(b - b, b))); % } % - /* % - * The remaining special case (b is NaN) is handled just fine by % - * the normal code path below. % - */ % + if (isnan(b)) { % + t = (a - a) / (a - a); /* raise invalid */ % + return (cpack(b + b, b + 0.0L + t)); /* return NaN + NaN i */ % + } % % /* NaN fixes. % @@ -81,8 +75,8 @@ % if (a >= 0) { % t = sqrt((a + hypot(a, b)) * 0.5); % - return (cpackf(t, b / (2.0 * t))); % + return (cpackf(t, b / (2 * t))); % } else { % t = sqrt((-a + hypot(a, b)) * 0.5); % - return (cpackf(fabsf(b) / (2.0 * t), copysignf(t, b))); % + return (cpackf(fabsf(b) / (2 * t), copysignf(t, b))); % } % } Style fixes. Overflow and accuracy fixes are not needed, since we use extra precision. % Index: ../s_csqrtl.c % =================================================================== % RCS file: /home/ncvs/src/lib/msun/src/s_csqrtl.c,v % retrieving revision 1.2 % diff -u -2 -r1.2 s_csqrtl.c % --- ../s_csqrtl.c 8 Aug 2008 00:15:16 -0000 1.2 % +++ ../s_csqrtl.c 15 Aug 2012 09:04:11 -0000 % @@ -34,14 +34,5 @@ % #include "math_private.h" % % -/* % - * gcc doesn't implement complex multiplication or division correctly, % - * so we need to handle infinities specially. We turn on this pragma to % - * notify conforming c99 compilers that the fast-but-incorrect code that % - * gcc generates is acceptable, since the special cases have already been % - * handled. % - */ % -#pragma STDC CX_LIMITED_RANGE ON % - % -/* We risk spurious overflow for components >= LDBL_MAX / (1 + sqrt(2)). */ % +/* For avoiding spurious overflow for components >= LDBL_MAX / (1 + sqrt(2)). */ % #define THRESH (LDBL_MAX / 2.414213562373095048801688724209698L) % % @@ -50,7 +41,5 @@ % { % long double complex result; % - long double a, b; % - long double t; % - int scale; % + long double a, b, rx, ry, scale, t; % % a = creall(z); % @@ -64,5 +53,5 @@ % if (isnan(a)) { % t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ % - return (cpackl(a, t)); /* return NaN + NaN i */ % + return (cpackl(a + a, a + 0.0L + t)); /* return NaN + NaN i */ % } % if (isinf(a)) { % @@ -78,16 +67,25 @@ % return (cpackl(a, copysignl(b - b, b))); % } % - /* % - * The remaining special case (b is NaN) is handled just fine by % - * the normal code path below. % - */ % + if (isnan(b)) { % + t = (a - a) / (a - a); /* raise invalid */ % + return (cpack(b + b, b + 0.0L + t)); /* return NaN + NaN i */ % + } % % /* Scale to avoid overflow. */ % if (fabsl(a) >= THRESH || fabsl(b) >= THRESH) { % - a *= 0.25; % - b *= 0.25; % - scale = 1; % + if (fabsl(a) >= 0x1p-16381L) % + a *= 0.25; % + if (fabsl(b) >= 0x1p-16381L) % + b *= 0.25; % + scale = 2; % } else { % - scale = 0; % + scale = 1; % + } % + % + /* Scale to reduce inaccuracies when both components are denormal. */ % + if (fabsl(a) <= 0x1p-16383L && fabsl(b) <= 0x1p-16383L) { % + a *= 0x1p64; % + b *= 0x1p64; % + scale = 0x1p-32; % } % % @@ -95,14 +93,12 @@ % if (a >= 0) { % t = sqrtl((a + hypotl(a, b)) * 0.5); % - result = cpackl(t, b / (2 * t)); % + rx = t; % + ry = b / (2 * t); % } else { % t = sqrtl((-a + hypotl(a, b)) * 0.5); % - result = cpackl(fabsl(b) / (2 * t), copysignl(t, b)); % + rx = fabs(b) / (2 * t); % + ry = copysign(t, b); % } % % - /* Rescale. */ % - if (scale) % - return (result * 2); % - else % - return (result); % + return (cpack(rx * scale, ry * scale)); % } Same changes for long doubles as for doubles, except for magic number. Bruce