Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 2 Mar 2012 22:52:41 +1100 (EST)
From:      Bruce Evans <brde@optusnet.com.au>
To:        Bruce Evans <brde@optusnet.com.au>
Cc:        svn-src-head@freebsd.org, Tijl Coosemans <tijl@freebsd.org>, src-committers@freebsd.org, Dimitry Andric <dim@freebsd.org>, svn-src-all@freebsd.org
Subject:   Re: svn commit: r232266 - in head/sys: amd64/include i386/include pc98/include x86/include
Message-ID:  <20120302223355.Q2543@besplex.bde.org>
In-Reply-To: <20120302130435.J929@besplex.bde.org>
References:  <201202281939.q1SJdtYr084858@svn.freebsd.org> <20120229160522.W2514@besplex.bde.org> <20120229181608.S2887@besplex.bde.org> <201203012346.20648.tijl@freebsd.org> <20120302130435.J929@besplex.bde.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, 2 Mar 2012, Bruce Evans wrote:

> On Thu, 1 Mar 2012, Tijl Coosemans wrote:
>
>> On Wednesday 29 February 2012 08:44:54 Bruce Evans wrote:
>>> ...
>>> So any macro version must use gcc features to be safe.  The following
>>> seems to work:
>>> 
>>> #define	__bswap16(x)	__extension__ ({ __uint16_t __x = x;
>>>  	(__uint16_t)(__x << 8 | __x >> 8); })
>>> 
>>> clang now produces "rolw $8,x" when x is volatile.  This seems to
>>> violate volatile semantics.  gcc produces load-rolw-store then.  Both
>>> produce "rolw $8,x" when x is not volatile.
>> 
>> I'll have a closer look at the patch tomorrow, but the Intel
>> documentation for the bswap instruction recommends to use xchg for
>> bswap16.
>
> That's what i386 used to do, using an asm (see for example the RELENG_4
> version), but the asm was replaced by C code and the compiler apparently
> knows better.  This might depend on -mtune.  I tested with the default,
> ...

Here is another version.  It has now been tested a bit at runtime.
I had to restore almost all of the complications, so the following
reduces to mostly style changes including comments about surprising
details.  It has 1 fix for clang (the comitted version doesn't
work for clang).

% Index: endian.h
% ===================================================================
% RCS file: /home/ncvs/src/sys/x86/include/endian.h,v
% retrieving revision 1.1
% diff -u -2 -r1.1 endian.h
% --- endian.h	28 Feb 2012 19:39:54 -0000	1.1
% +++ endian.h	2 Mar 2012 10:43:53 -0000
% @@ -37,8 +37,4 @@
%  #include <sys/_types.h>
% 
% -#ifdef __cplusplus
% -extern "C" {
% -#endif
% -
%  /*
%   * Define the order of 32-bit words in 64-bit words.
% @@ -68,25 +64,17 @@
%  #endif
% 
% -#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
% -
% -#define	__bswap16_const(_x)	(__uint16_t)((_x) << 8 | (_x) >> 8)
% +#define	__bswap16_gen(x)	(__uint16_t)((x) << 8 | (x) >> 8)
% +#define	__bswap32_gen(x)		\
% +	(((__uint32_t)__bswap16(x) << 16) | __bswap16((x) >> 16))
% +#define	__bswap64_gen(x)		\
% +	(((__uint64_t)__bswap32(x) << 32) | __bswap32((x) >> 32))
% 
% -#define	__bswap16(_x)			\
% -	(__builtin_constant_p(_x) ?	\
% -	    __bswap16_const((__uint16_t)(_x)) : __bswap16_var(_x))
% -
% -#define	__bswap32_const(_x)		\
% -	(((__uint32_t)__bswap16(_x) << 16) | __bswap16((_x) >> 16))
% -
% -#define	__bswap32(_x)			\
% -	(__builtin_constant_p(_x) ?	\
% -	    __bswap32_const((__uint32_t)(_x)) : __bswap32_var(_x))
% +#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
% 
% -#define	__bswap64_const(_x)		\
% -	(((__uint64_t)__bswap32(_x) << 32) | __bswap32((_x) >> 32))
% +/* The following mess is to avoid multiple evaluation of x. */
% 
% -#define	__bswap64(_x)			\
% -	(__builtin_constant_p(_x) ?	\
% -	    __bswap64_const((__uint64_t)(_x)) : __bswap64_var(_x))
% +#define	__bswap16(x)			\
% +	(__builtin_constant_p(x) ?	\
% +	    __bswap16_gen((__uint16_t)(x)) : __bswap16_var((__uint16_t)(x)))

For clang, x must be cast in the `var' clause too, since when x is
something like 0x12345678 (having been passed here without conversion
by __bswap32()), clang parses the `var' clause and warns about the
implicit truncation of 0x12345678 to 0x5678 in it.  This is a bug
in clang -- when warning about fine details, it is important not to
do it for non-problems.

% 
%  static __inline __uint16_t
% @@ -94,7 +82,19 @@
%  {
% 
% -	return (__bswap16_const(_x));
% +	return (__bswap16_gen(_x));
%  }
% 
% +/*
% + * The following messes are old optimizations for gcc.  The messes have
% + * been reduced significantly, but I don't know how to implement the
% + * preferred way of defining defaults above and overriding them cleanly
% + * here.  Even clang needs help for the 64-bit case, so to keep the
% + * ifdefs sane we use this for the 32-bit case with clang too.
% + */
% +
% +#define	__bswap32(x)			\
% +	(__builtin_constant_p(x) ?	\
% +	    __bswap32_gen((__uint32_t)(x)) : __bswap32_var(x))
% +
%  static __inline __uint32_t
%  __bswap32_var(__uint32_t _x)
% @@ -105,34 +105,37 @@
%  }
% 
% +#define	__bswap64(x)			\
% +	(__builtin_constant_p(x) ?	\
% +	    __bswap64_gen((__uint64_t)(x)) : __bswap64_var(x))
% +
%  static __inline __uint64_t
%  __bswap64_var(__uint64_t _x)
%  {
% -#ifdef	_LP64
% +
% +#ifdef _LP64
%  	__asm ("bswap %0" : "+r" (_x));
%  	return (_x);
%  #else
% -	return (__bswap64_const(_x));
% +	/*
% +	 * It is important for the optimizations that the following is not
% +	 * really generic, but expands to 2 __bswap32_var()'s.
% +	 */
% +	return (__bswap64_gen(_x));
%  #endif
%  }
% 
% -#define	__htonl(x)	__bswap32(x)
% -#define	__htons(x)	__bswap16(x)
% -#define	__ntohl(x)	__bswap32(x)
% -#define	__ntohs(x)	__bswap16(x)
% +#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */
% 
% -#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P) */
% -
% -/*
% - * No optimizations are available for this compiler.  Fall back to
% - * non-optimized functions by defining the constant usually used to prevent
% - * redefinition.
% - */
% -#define	_BYTEORDER_FUNC_DEFINED
% +/* XXX these are broken, since they evaluate x more than once. */
% +#define	__bswap16(x)	(__bswap16_gen((__uint16_t)(x)))
% +#define	__bswap32(x)	(__bswap32_gen((__uint32_t)(x)))
% +#define	__bswap64(x)	(__bswap64_gen((__uint64_t)(x)))
% 
%  #endif /* __GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */
% 
% -#ifdef __cplusplus
% -}
% -#endif
% +#define	__htonl(x)	__bswap32(x)
% +#define	__htons(x)	__bswap16(x)
% +#define	__ntohl(x)	__bswap32(x)
% +#define	__ntohs(x)	__bswap16(x)
% 
%  #endif /* !_MACHINE_ENDIAN_H_ */

Here is a cut down version showing the clang bug:

% static inline int
% unused(short x)
% {
% 	return (x);
% }
% 
% int
% foo(void)
% {
% 	return (1 ? 0 : unused(0x12345678));
% }

% w.c:10:25: warning: implicit conversion from 'int' to 'short' changes value from 305419896 to 22136 [-Wconstant-conversion]
%         return (1 ? 0 : unused(0x12345678));
%                         ~~~~~~ ^~~~~~~~~~
% 1 warning generated.

This shows another bug: the hex constants are obfuscated in the error message
by printing them in decimal.

Bruce



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120302223355.Q2543>