Date: Sun, 12 Aug 2012 23:10:05 -0000 From: Bruce Evans <brde@optusnet.com.au> To: Bruce Evans <brde@optusnet.com.au> Cc: Diane Bruce <db@db.net>, John Baldwin <jhb@freebsd.org>, David Chisnall <theraven@freebsd.org>, Stephen Montgomery-Smith <stephen@missouri.edu>, Bruce Evans <bde@freebsd.org>, Steve Kargl <sgk@troutmask.apl.washington.edu>, David Schultz <das@freebsd.org>, Peter Jeremy <peter@rulingia.com>, Warner Losh <imp@bsdimp.com> Subject: Re: Use of C99 extra long double math functions after r236148 Message-ID: <20120720184114.B2790@besplex.bde.org> Resent-Message-ID: <20120812230958.GR20453@server.rulingia.com> In-Reply-To: <20120720162953.N2162@besplex.bde.org> References: <20120714120432.GA70706@server.rulingia.com> <20120717084457.U3890@besplex.bde.org> <5004A5C7.1040405@missouri.edu> <5004DEA9.1050001@missouri.edu> <20120717200931.U6624@besplex.bde.org> <5006D13D.2080702@missouri.edu> <20120718205625.GA409@troutmask.apl.washington.edu> <500725F2.7060603@missouri.edu> <20120719025345.GA1376@troutmask.apl.washington.edu> <50077987.1080307@missouri.edu> <20120719032706.GA1558@troutmask.apl.washington.edu> <5007826D.7060806@missouri.edu> <5007AD41.9070000@missouri.edu> <20120719205347.T2601@besplex.bde.org> <50084322.7020401@missouri.edu> <20120720035001.W4053@besplex.bde.org> <50085441.4090305@missouri.edu> <20120720162953.N2162@besplex.bde.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, 20 Jul 2012, Bruce Evans wrote: > I was going to leave all the fixes to you, but now want to try the > approximation for tiny y :-). This version now beats Apple clog() for accuracy. The maximum difference observed is down from the exa-ulp range to 16 ulps with all of the 4-16 ulp differences checked against pari being innaccuracies in Apple clog(). 2**28 cases were tested, but most of the errors found look like this: % x = 0x3fe0000000000000 0.5 % y = 0x3fec000000000000 0.875 % loghypota(x, y) = 0x3ff7fe054587e01ea000 0x3f7fc0a8b0fc03d4 0.00775209326798261336156 % loghypot(x, y) = 0x3ff7fe054587e01f2000 0x3f7fc0a8b0fc03e4 0.00775209326798262723934 % err = +0x8000 16.00000 % pari log(0.5+0.875*I): % 0.007752093267982627075427023021 + 1.051650212548373667459867312*I so my tests barely cover fractions like 1/8 and the coverage may be too limited. New errors kept turning up as I expanded the coverage. % #include <complex.h> % #include <float.h> % #include <math.h> % #include <fenv.h> % % #include "math_private.h" % % /* % * gcc doesn't implement complex multiplication or division correctly, so we % * need to handle infinities specially. We turn on this pragma to notify % * conforming c99 compilers that the fast-but-incorrect code that gcc % * generates is acceptable, since the special cases have already been % * handled. % */ % #pragma STDC CX_LIMITED_RANGE ON % % double complex % clog(double complex z) % { % double x, y, h, t1, t2, t3; % double x0, y0, x1, y1; % % x = creal(z); % y = cimag(z); % % #define NANMIX_APPLE_CLOG_COMPAT 1 % /* Handle special cases when x or y is NAN. */ % if (isnan(x)) { % if (isinf(y)) % return (cpack(INFINITY, NAN)); % else { % #if NANMIX_HYPOTF_COMPAT % y = fabs(y + 0); % t1 = (y - y) / (y - y); /* Raise invalid flag if y is % * not NaN */ % t1 = fabs(x + 0) + t1; /* Mix NaN(s). */ % #elif NANMIX_APPLE_CLOG_COMPAT % /* No actual mixing. */ % return (cpack(x, copysign(x, y))); % #else % t1 = (y - y) / (y - y); /* Raise invalid flag if y is % * not NaN */ % #endif % return (cpack(t1, t1)); % } % } else if (isnan(y)) { % if (isinf(x)) % return (cpack(INFINITY, NAN)); % else { % #ifdef NANMIX_HYPOTF_COMPAT % x = fabs(x + 0); % t1 = (x - x) / (x - x); /* Raise invalid flag if x is % * not NaN */ % t1 = t1 + fabs(y + 0); /* Mix NaN(s). */ % #elif NANMIX_APPLE_CLOG_COMPAT % /* No actual mixing. */ % return (cpack(y, y)); % #else % t1 = (x - x) / (x - x); /* Raise invalid flag if x is % * not NaN */ % #endif % return (cpack(t1, t1)); % } % } else if (isfinite(x) && isfinite(y) && % (fabs(x) > 1e308 || fabs(y) > 1e308)) % /* % * To avoid unnecessary overflow, if x or y are very large, % * divide x and y by M_E, and then add 1 to the logarithm. % * This depends on M_E being larger than sqrt(2). % */ % return (cpack(log(hypot(x / M_E, y / M_E)) + 1, atan2(y, x))); Important fix here. 1 was added to the log() arg instead of to log(). % else if (fabs(x) < 1e-50 && fabs(y) != 1 || % fabs(y) < 1e-50 && fabs(x) != 1 || % fabs(x) > 1e50 || fabs(y) > 1e50) The special case for |x| == 1 and |y| == 1 was defeated by returning for it here. % /* % * Because atan2 and hypot conform to C99, this also covers % * all the edge cases when x or y are 0 or infinite. % */ % return (cpack(log(hypot(x, y)), atan2(y, x))); % else { % /* We don't need to worry about overflow in x*x+y*y. */ % h = x * x + y * y; % if (h < 0.1 || h > 10) % return (cpack(log(h) / 2, atan2(y, x))); % /* Take extra care if h is moderately close to 1 */ % else { % #if 1 % if (fabs(x) == 1) % return (cpack(log1p(y * y) / 2, % atan2(y, x))); % if (fabs(y) == 1) % return (cpack(log1p(x * x) / 2, % atan2(y, x))); % #endif Special case. It seems too special, but Apple clog() doesn't do any more, and this with the other special cases is enough to beat Apple clog(). % /* % * x0 and y0 are good approximations to x and y, but % * have their bits trimmed so that double precision % * floating point is capable of calculating x0*x0 + % * y0*y0 - 1 exactly. % */ The only way for x*x + y*y to be _very_ near 1 in infinite precision is for |x| or y| to be 1 (I think). Other cases are bounded away from 1, and if you are lucky the bound is fairly far from 1 so that sloppier approximations work OK. Mathematicians should determine the bound exactly using continued fractions or something like they do for approximations to N*Pi/2. This becomes especially interesting in high precisions where you can't hope to get near the worst case by random testing. % x0 = ldexp(floor(ldexp(x, 24)), -24); % x1 = x - x0; % y0 = ldexp(floor(ldexp(y, 24)), -24); % y1 = y - y0; This has a chance of working iff the bound away from 1 is something like 2**-24. Otherwise, multiplying by 2**24 and flooring a positive value will just produce 0. 2**-24 seems much too small a bound. My test coverage is not wide enough to hit many bad cases. % /* Notice that mathematically, h = t1*(1+t3). */ % t1 = x0 * x0 + y0 * y0; % t2 = 2 * x0 * x1 + x1 * x1 + 2 * y0 * y1 + y1 * y1; % t3 = t2 / t1; % return (cpack((log1p(t1 - 1) + log1p(t3)) / 2, % atan2(y, x))); % } % } % } Bruce
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120720184114.B2790>