From owner-freebsd-standards@FreeBSD.ORG Mon Nov 5 20:14:54 2007 Return-Path: Delivered-To: freebsd-standards@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 5EE3816A418 for ; Mon, 5 Nov 2007 20:14:54 +0000 (UTC) (envelope-from brde@optusnet.com.au) Received: from fallbackmx03.syd.optusnet.com.au (fallbackmx03.syd.optusnet.com.au [211.29.133.136]) by mx1.freebsd.org (Postfix) with ESMTP id A5A2B13C4AA for ; Mon, 5 Nov 2007 20:14:53 +0000 (UTC) (envelope-from brde@optusnet.com.au) Received: from mail35.syd.optusnet.com.au (mail35.syd.optusnet.com.au [211.29.133.51]) by fallbackmx03.syd.optusnet.com.au (8.12.11.20060308/8.12.11) with ESMTP id lA5D59qK006493 for ; Tue, 6 Nov 2007 00:05:09 +1100 Received: from c211-30-219-213.carlnfd3.nsw.optusnet.com.au (c211-30-219-213.carlnfd3.nsw.optusnet.com.au [211.30.219.213]) by mail35.syd.optusnet.com.au (8.13.1/8.13.1) with ESMTP id lA5D44n3013663 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Tue, 6 Nov 2007 00:04:07 +1100 Date: Tue, 6 Nov 2007 00:04:04 +1100 (EST) From: Bruce Evans X-X-Sender: bde@delplex.bde.org To: Bruce Evans In-Reply-To: <20071105220251.C19770@delplex.bde.org> Message-ID: <20071105234202.T19976@delplex.bde.org> References: <20071012180959.GA36345@troutmask.apl.washington.edu> <20071103155721.K671@besplex.bde.org> <20071103195952.GA91682@troutmask.apl.washington.edu> <20071105220251.C19770@delplex.bde.org> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII; format=flowed Cc: freebsd-standards@freebsd.org, Steve Kargl Subject: Re: [PATCH] hypotl, cabsl, and code removal in cabs X-BeenThere: freebsd-standards@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Standards compliance List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 05 Nov 2007 20:14:54 -0000 On Mon, 5 Nov 2007, Bruce Evans wrote: > I found only 1 serious bug n your hypotl(): adding n or m back to the > exponent gives garbage if the resulting exponent doesn't fit. Fix for > the float precision case: > > % if (n >= m) { > ... Full patch to convert to float precision and fix some bugs: % --- z~ Mon Nov 5 23:40:51 2007 % +++ z Mon Nov 5 23:41:32 2007 % @@ -27,6 +27,6 @@ % /* % * Compute hypotl(x, y) = sqrt(x**2 + y**2) such that underflow and % - * overflow are avoided. To accomplishe the task, write x = a * 2**n % - * and y = b * 2**m, one then has % + * overflow are avoided. To accomplish the task, write x = a * 2**n % + * and y = b * 2**m; one then has % * % * hypotl(x, y) = 2**n * sqrt(a**2 + b**(2*(m - n))) for n >= m I didn't converrt the comments to float precision. % @@ -42,9 +42,10 @@ % * hypotl(NaN, NaN) = hypotl(NaN, NaN) = Inf % */ % + % #include "math.h" % #include "math_private.h" % #include "fpmath.h" % % -#define ZERO 0.L % +static const float ZERO = 0.0; % % /* ZERO must be a non-literal constant to prevent evaluation of 1.0/0.0 at runtime. But due to compiler bugs/lack of support for C99/fenv, the above is probably no different. % @@ -53,12 +54,21 @@ % * function sqrtl() function exists. % */ % -#define PREC 32 % +#define PREC 12 /* XXX 6 works, but why isn't > 12 needed? */ % + % +union IEEEfbits { % + float e; % + struct { % + unsigned int man :23; % + unsigned int exp :8; % + unsigned int sign :1; % + } bits; % +}; I forgot that there was a suitable struct IEEEf2bits already defined in fpmath.h. % % -long double % -hypotl(long double x, long double y) % +float % +myhypotf(float x, float y) % { % - union IEEEl2bits a, b; % + union IEEEfbits a, b; % + float t; % int m, n; % - long double val; % % a.e = x; val is not used. % @@ -68,6 +78,8 @@ % % /* If x = 0 or y = 0, then hypotl(x, y) = |x| + |y|. */ % - if (!(a.bits.manh | a.bits.manl) || !(b.bits.manh | b.bits.manl)) % + if ((a.bits.exp == 0 && a.bits.man == 0) || % + (b.bits.exp == 0 && b.bits.man == 0)) % return (a.e + b.e); % + % /* % * If x = +-Inf or y = +- Inf, then hypotl(x, y) = Inf (even if the % @@ -75,11 +87,31 @@ % * argument is not +- Inf, then hypotl(x, y) = NaN. % */ % - if (a.bits.exp == 32767 || b.bits.exp == 32767) { % - mask_nbit_l(a); % - mask_nbit_l(b); % - if (!(a.bits.manh | a.bits.manl) || % - !(b.bits.manh | b.bits.manl)) % - return (1.L / ZERO); % - return ((x - x) / (x - x)); % + if (a.bits.exp == 0xff || b.bits.exp == 0xff) { % + if ((a.bits.exp == 0xff && a.bits.man == 0) || % + (b.bits.exp == 0xff && b.bits.man == 0)) % + return (1.0F / ZERO); % + /* % + * Here we do extra work to return the same NaN as fdlibm % + * on i386's, so that simple binary comparisons work. If % + * both a.e and b.e are NaNs, we want to return the one % + * that is highest as an integer. The height depends on % + * whether the hardware has set the qNaN bit, and whether % + * the hardware gets a chance to do that depends on the % + * optimization level and on whether our comparison function % + * has higher precision, since passing floats in integer % + * registers prevents quieting and converting floats to % + * higher precision uses the hardware. % + */ % + if (a.bits.exp == 0xff && b.bits.exp == 0xff) { % +#if 1 /* if call to comparison function will quieten */ % + if (a.bits.man != 0) % + a.bits.man |= 0x00400000; /* quieten */ % + if (b.bits.man != 0) % + b.bits.man |= 0x00400000; % +#endif % + if (a.bits.man < b.bits.man) % + return (b.e + a.e); % + } % + return (a.e + b.e); % } % Return the same value as fdlibm for testing. We should at least return some combination of both x and y in the NaN case, not just ((x - x) / (x - x)) when only y is a NaN, since when x is not a NaN the latter is unrelated to the only NaN arg. fdlibm sets the signs to 0 early similarly, so it tends to convert negative NaNs to positive ones in a way that the hardware would not do. Hacking on values using bit-fields or SET_*() tends to allow changes to NaNs that the hardware would not do. The above was written mainly on an A64 in i386 mode. In amd64 mode, I think loading sNaNs doesn't quieten them, so there would be even more binary differences. % @@ -90,16 +122,15 @@ % */ % if (a.bits.exp == 0) { % - a.e *= 0x1.0p514; % - n = a.bits.exp - 0x4200; % + a.e *= 0x1.0p25; % + n = a.bits.exp - 0x97; % } else % - n = a.bits.exp - 0x3ffe; % - a.bits.exp = 0x3ffe; % - % + n = a.bits.exp - 0x7e; % + a.bits.exp = 0x7e; % if (b.bits.exp == 0) { % - b.e *= 0x1.0p514; % - m = b.bits.exp - 0x4200; % + b.e *= 0x1.0p25; % + m = b.bits.exp - 0x97; % } else % - m = b.bits.exp - 0x3ffe; % - b.bits.exp = 0x3ffe; % + m = b.bits.exp - 0x7e; % + b.bits.exp = 0x7e; % % if (n >= m) { I'm not sure why this uses 514. I first tried converting 514 to 34, to get a nice round number instead of 0x97. This didn't work, but the problems may have been just the rescaling bugs later. fdlibm normalizes to a non-constant exponent. This would be more complicated here but simpler later. % @@ -110,6 +141,11 @@ % a.e += b.e * b.e; % } % - a.e = sqrtl(a.e); % - a.bits.exp += n; % + a.e = sqrtf(a.e); % + if (a.bits.exp + n <= 0 || a.bits.exp + n >= 0xff) { % + a.bits.exp += n - n / 2; % + SET_FLOAT_WORD(t, 0x3f800000 + ((n / 2) << 23)); % + a.e *= t; % + } else % + a.bits.exp += n; % return (a.e); % } else { % @@ -120,6 +156,11 @@ % b.e += a.e * a.e; % } % - b.e = sqrtl(b.e); % - b.bits.exp += m; % + b.e = sqrtf(b.e); % + if (b.bits.exp + m <= 0 || b.bits.exp + m >= 0xff) { % + b.bits.exp += m - m / 2; % + SET_FLOAT_WORD(t, 0x3f800000 + ((m / 2) << 23)); % + b.e *= t; % + } else % + b.bits.exp += m; % return (b.e); % } Fix rescaling. Bruce