Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 10 Jun 2004 08:32:22 +1000 (EST)
From:      Bruce Evans <bde@zeta.org.au>
To:        Stefan Farfeleder <stefanf@freebsd.org>
Cc:        freebsd-standards@freebsd.org
Subject:   Re: Fixing ilogb()
Message-ID:  <20040610074734.L6340@gamplex.bde.org>
In-Reply-To: <20040510153731.GP29712@wombat.fafoe.narf.at>
References:  <20040508194748.GN29712@wombat.fafoe.narf.at> <20040509201148.P8241@gamplex.bde.org> <20040510153731.GP29712@wombat.fafoe.narf.at>

next in thread | previous in thread | raw e-mail | index | archive | help
Long ago, On Mon, 10 May 2004, Stefan Farfeleder wrote:

> On Sun, May 09, 2004 at 09:44:03PM +1000, Bruce Evans wrote:
> [about delayed detection of exceptional cases]
>
> Thanks for your comments, Bruce.  A revised patch is attached, it only adds two
> instructions in the common case.
>
> <snip>
>
> > % +	movl	$0x7fffffff,%eax	/* FP_ILOGBNAN, INT_MAX */
> >
> > Style bugs: some comments could be improved, but won't be needed when
> > <machine/_limits.h> is used.
>
> Unfortunately <math.h> cannot be included for FP_ILOGB{0,NAN}, so I've
> duplicated those #defines for now.  Any suggestions?  Move them to another/new
> header instead?

I'm happy with this version, but I tried using ilogb() to implement
fls() and while micro-optimizing things it became clear that the MI C
version of ilogb() is good enough.  The difference is only a few cycles.
On an old Celeron, the MI C version can be tweaked to save a few
instructions and then it is actually faster than the MD version except
probably for denormal inputs.  On an Athlon XP, the FPU is relatively
faster than integer instructions, so it is hard to beat in C, but it
can be beaten for non-denormal inputs by using the same integer algorithm
as the C version.  The integer algorithm is especially efficent on
i386's because the parameter is passed on the stack so no conversions
are required.  On an Athlon64, the parameter has to be moved from %xmm0
for both the integer version and for fxtract, so the integer algorithm
should lose nothing.

Tweaked versions:

These use correct algorithms but may have some details wrong.

The patch for s_ilogb.S includes all your changes:

%%%
Index: s_ilogb.S
===================================================================
RCS file: /home/ncvs/src/lib/msun/i387/s_ilogb.S,v
retrieving revision 1.8
diff -u -2 -r1.8 s_ilogb.S
--- s_ilogb.S	6 Jun 2000 12:12:36 -0000	1.8
+++ s_ilogb.S	9 Jun 2004 20:47:56 -0000
@@ -34,9 +34,34 @@
  */

+#include <machine/_limits.h>
 #include <machine/asm.h>

 RCSID("$FreeBSD: src/lib/msun/i387/s_ilogb.S,v 1.8 2000/06/06 12:12:36 bde Exp $")

+#define	FP_ILOGB0	(-__INT_MAX)
+#define	FP_ILOGBNAN	__INT_MAX
+#define	FP_ILOGBINF	__INT_MAX
+
 ENTRY(ilogb)
+	movl	8(%esp),%eax
+	andl	$0x7FF00000,%eax
+#if 0
+	je	1f			# zero or denormal
+	cmpl	$0x7FF00000,%eax
+	je	1f			# Inf or NaN
+#else
+	decl	%eax
+	cmpl	$0x7FEFFFFF,%eax
+	jae	1f
+#endif
+	shrl	$20,%eax
+	subl	$1022,%eax
+	ret
+
+1:
+	/*
+	 * Lots of dead code left by above -- should jump further (but
+	 * fxtract is probably still the best way to handle denormals).
+	 */
 	pushl	%ebp
 	movl	%esp,%ebp
@@ -45,9 +70,34 @@
 	fldl	8(%ebp)
 	fxtract
-	fstp	%st
+	fstp	%st(0)

 	fistpl	-4(%ebp)
 	movl	-4(%ebp),%eax

+	/* fistpl yields __INT_MIN for NaN, Inf and 0. */
+	cmpl	$__INT_MIN,%eax
+	je	.L2
+
+.L1:
 	leave
 	ret
+
+.L2:
+	fldl	8(%ebp)
+	fldz
+	fucompp
+	fnstsw	%ax
+	sahf
+	jp	.L3
+	jz	.L4
+
+	movl	$FP_ILOGBINF,%eax
+	jmp	.L1
+
+.L3:
+	movl	$FP_ILOGBNAN,%eax
+	jmp	.L1
+
+.L4:
+	movl	$FP_ILOGB0,%eax
+	jmp	.L1
%%%

The "if 0" clause gives better classification (it has rotted - the it needs
an adjustment of 1023 instead of 1022).  The else clase is micro-optimized
to avoid a branch.

Tweaked ilogb() (same branch avoidance):

%%%
	int xilogb(double x)
{
	int32_t hx,lx,ix,xx;

	EXTRACT_WORDS(hx,lx,x);
	xx = hx & 0x7ff00000;
	if ((uint32_t)(xx-1)<0x7fefffff) return (xx>>20)-1022;
	if(xx==0) {
	    hx &= 0x7fffffff;
	    if((hx|lx)==0)
		return 0x80000001;	/* ilogb(0) = 0x80000001 */
	    else			/* subnormal x */
		if(hx==0) {
		    for (ix = -1043; lx>0; lx<<=1) ix -=1;
		} else {
		    for (ix = -1022,hx<<=11; hx>0; hx<<=1) ix -=1;
		}
	    return ix;
	}
	else return 0x7fffffff;
}
%%%

Bruce



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20040610074734.L6340>