From owner-svn-src-all@FreeBSD.ORG Sun Dec 5 22:11:03 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id E79D9106566C; Sun, 5 Dec 2010 22:11:03 +0000 (UTC) (envelope-from das@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id D643C8FC0A; Sun, 5 Dec 2010 22:11:03 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oB5MB3K8009956; Sun, 5 Dec 2010 22:11:03 GMT (envelope-from das@svn.freebsd.org) Received: (from das@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oB5MB3Ca009953; Sun, 5 Dec 2010 22:11:03 GMT (envelope-from das@svn.freebsd.org) Message-Id: <201012052211.oB5MB3Ca009953@svn.freebsd.org> From: David Schultz Date: Sun, 5 Dec 2010 22:11:03 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r216210 - head/lib/msun/src X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 05 Dec 2010 22:11:04 -0000 Author: das Date: Sun Dec 5 22:11:03 2010 New Revision: 216210 URL: http://svn.freebsd.org/changeset/base/216210 Log: Add a "kernel" log function, based on e_log.c, which is useful for implementing accurate logarithms in different bases. This is based on an approach bde coded up years ago. This function should always be inlined; it will be used in only a few places, and rudimentary tests show a 40% performance improvement in implementations of log2() and log10() on amd64. The kernel takes a reduced argument x and returns the same polynomial approximation as e_log.c, but omitting the low-order term. The low-order term is much larger than the rest of the approximation, so the caller of the kernel function can scale it to the appropriate base in extra precision and obtain a much more accurate answer than by using log(x)/log(b). Added: head/lib/msun/src/k_log.h - copied, changed from r216174, head/lib/msun/src/e_log.c head/lib/msun/src/k_logf.h - copied, changed from r216174, head/lib/msun/src/e_logf.c Copied and modified: head/lib/msun/src/k_log.h (from r216174, head/lib/msun/src/e_log.c) ============================================================================== --- head/lib/msun/src/e_log.c Sat Dec 4 02:42:52 2010 (r216174, copy source) +++ head/lib/msun/src/k_log.h Sun Dec 5 22:11:03 2010 (r216210) @@ -14,8 +14,13 @@ #include __FBSDID("$FreeBSD$"); -/* __ieee754_log(x) - * Return the logrithm of x +/* __kernel_log(x) + * Return log(x) - (x-1) for x in ~[sqrt(2)/2, sqrt(2)]. + * + * The following describes the overall strategy for computing + * logarithms in base e. The argument reduction and adding the final + * term of the polynomial are done by the caller for increased accuracy + * when different bases are used. * * Method : * 1. Argument Reduction: find k and f such that @@ -65,13 +70,7 @@ __FBSDID("$FreeBSD$"); * to produce the hexadecimal values shown. */ -#include "math.h" -#include "math_private.h" - static const double -ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ -ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ -two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ @@ -80,48 +79,27 @@ Lg5 = 1.818357216161805012e-01, /* 3FC7 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ -static const double zero = 0.0; - -double -__ieee754_log(double x) +/* + * We always inline __kernel_log(), since doing so produces a + * substantial performance improvement (~40% on amd64). + */ +static inline double +__kernel_log(double x) { - double hfsq,f,s,z,R,w,t1,t2,dk; - int32_t k,hx,i,j; + double hfsq,f,s,z,R,w,t1,t2; + int32_t hx,i,j; u_int32_t lx; EXTRACT_WORDS(hx,lx,x); - k=0; - if (hx < 0x00100000) { /* x < 2**-1022 */ - if (((hx&0x7fffffff)|lx)==0) - return -two54/zero; /* log(+-0)=-inf */ - if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ - k -= 54; x *= two54; /* subnormal number, scale up x */ - GET_HIGH_WORD(hx,x); - } - if (hx >= 0x7ff00000) return x+x; - k += (hx>>20)-1023; - hx &= 0x000fffff; - i = (hx+0x95f64)&0x100000; - SET_HIGH_WORD(x,hx|(i^0x3ff00000)); /* normalize x or x/2 */ - k += (i>>20); f = x-1.0; if((0x000fffff&(2+hx))<3) { /* -2**-20 <= f < 2**-20 */ - if(f==zero) { - if(k==0) { - return zero; - } else { - dk=(double)k; - return dk*ln2_hi+dk*ln2_lo; - } - } - R = f*f*(0.5-0.33333333333333333*f); - if(k==0) return f-R; else {dk=(double)k; - return dk*ln2_hi-((R-dk*ln2_lo)-f);} + if(f==0.0) return 0.0; + return f*f*(0.33333333333333333*f-0.5); } s = f/(2.0+f); - dk = (double)k; z = s*s; + hx &= 0x000fffff; i = hx-0x6147a; w = z*z; j = 0x6b851-hx; @@ -129,12 +107,10 @@ __ieee754_log(double x) t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); i |= j; R = t2+t1; - if(i>0) { + if (i>0) { hfsq=0.5*f*f; - if(k==0) return f-(hfsq-s*(hfsq+R)); else - return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); + return s*(hfsq+R) - hfsq; } else { - if(k==0) return f-s*(f-R); else - return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f); + return s*(R-f); } } Copied and modified: head/lib/msun/src/k_logf.h (from r216174, head/lib/msun/src/e_logf.c) ============================================================================== --- head/lib/msun/src/e_logf.c Sat Dec 4 02:42:52 2010 (r216174, copy source) +++ head/lib/msun/src/k_logf.h Sun Dec 5 22:11:03 2010 (r216210) @@ -1,7 +1,3 @@ -/* e_logf.c -- float version of e_log.c. - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ - /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. @@ -16,60 +12,33 @@ #include __FBSDID("$FreeBSD$"); -#include "math.h" -#include "math_private.h" +/* __kernel_logf(x) + * Return log(x) - (x-1) for x in ~[sqrt(2)/2, sqrt(2)]. + */ static const float -ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ -ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ -two25 = 3.355443200e+07, /* 0x4c000000 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ -static const float zero = 0.0; - -float -__ieee754_logf(float x) +static inline float +__kernel_logf(float x) { - float hfsq,f,s,z,R,w,t1,t2,dk; - int32_t k,ix,i,j; + float hfsq,f,s,z,R,w,t1,t2; + int32_t ix,i,j; GET_FLOAT_WORD(ix,x); - k=0; - if (ix < 0x00800000) { /* x < 2**-126 */ - if ((ix&0x7fffffff)==0) - return -two25/zero; /* log(+-0)=-inf */ - if (ix<0) return (x-x)/zero; /* log(-#) = NaN */ - k -= 25; x *= two25; /* subnormal number, scale up x */ - GET_FLOAT_WORD(ix,x); - } - if (ix >= 0x7f800000) return x+x; - k += (ix>>23)-127; - ix &= 0x007fffff; - i = (ix+(0x95f64<<3))&0x800000; - SET_FLOAT_WORD(x,ix|(i^0x3f800000)); /* normalize x or x/2 */ - k += (i>>23); f = x-(float)1.0; if((0x007fffff&(0x8000+ix))<0xc000) { /* -2**-9 <= f < 2**-9 */ - if(f==zero) { - if(k==0) { - return zero; - } else { - dk=(float)k; - return dk*ln2_hi+dk*ln2_lo; - } - } - R = f*f*((float)0.5-(float)0.33333333333333333*f); - if(k==0) return f-R; else {dk=(float)k; - return dk*ln2_hi-((R-dk*ln2_lo)-f);} + if(f==0.0) return 0.0; + return f*f*((float)0.33333333333333333*f-(float)0.5); } s = f/((float)2.0+f); - dk = (float)k; z = s*s; + ix &= 0x007fffff; i = ix-(0x6147a<<3); w = z*z; j = (0x6b851<<3)-ix; @@ -79,10 +48,8 @@ __ieee754_logf(float x) R = t2+t1; if(i>0) { hfsq=(float)0.5*f*f; - if(k==0) return f-(hfsq-s*(hfsq+R)); else - return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); + return s*(hfsq+R) - hfsq; } else { - if(k==0) return f-s*(f-R); else - return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f); + return s*(R-f); } }