Date: Fri, 2 Mar 2012 21:29:00 +0100 From: Tijl Coosemans <tijl@freebsd.org> To: Bruce Evans <brde@optusnet.com.au> Cc: svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org, Dimitry Andric <dim@freebsd.org> Subject: Re: svn commit: r232266 - in head/sys: amd64/include i386/include pc98/include x86/include Message-ID: <201203022129.10849.tijl@freebsd.org> In-Reply-To: <20120302223355.Q2543@besplex.bde.org> References: <201202281939.q1SJdtYr084858@svn.freebsd.org> <20120302130435.J929@besplex.bde.org> <20120302223355.Q2543@besplex.bde.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--nextPart26093839.3xFXd3iPZv Content-Type: Text/Plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable On Friday 02 March 2012 12:52:41 Bruce Evans wrote: > On Fri, 2 Mar 2012, Bruce Evans wrote: >=20 > > On Thu, 1 Mar 2012, Tijl Coosemans wrote: > > > >> On Wednesday 29 February 2012 08:44:54 Bruce Evans wrote: > >>> ... > >>> So any macro version must use gcc features to be safe. The following > >>> seems to work: > >>>=20 > >>> #define __bswap16(x) __extension__ ({ __uint16_t __x =3D x; > >>> (__uint16_t)(__x << 8 | __x >> 8); }) > >>>=20 > >>> clang now produces "rolw $8,x" when x is volatile. This seems to > >>> violate volatile semantics. gcc produces load-rolw-store then. Both > >>> produce "rolw $8,x" when x is not volatile. > >>=20 > >> I'll have a closer look at the patch tomorrow, but the Intel > >> documentation for the bswap instruction recommends to use xchg for > >> bswap16. > > > > That's what i386 used to do, using an asm (see for example the RELENG_4 > > version), but the asm was replaced by C code and the compiler apparently > > knows better. This might depend on -mtune. I tested with the default, > > ... >=20 > Here is another version. It has now been tested a bit at runtime. > I had to restore almost all of the complications, so the following > reduces to mostly style changes including comments about surprising > details. It has 1 fix for clang (the comitted version doesn't > work for clang). >=20 > % Index: endian.h > % =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D > % RCS file: /home/ncvs/src/sys/x86/include/endian.h,v > % retrieving revision 1.1 > % diff -u -2 -r1.1 endian.h > % --- endian.h 28 Feb 2012 19:39:54 -0000 1.1 > % +++ endian.h 2 Mar 2012 10:43:53 -0000 > % @@ -37,8 +37,4 @@ > % #include <sys/_types.h> > %=20 > % -#ifdef __cplusplus > % -extern "C" { > % -#endif > % - > % /* > % * Define the order of 32-bit words in 64-bit words. > % @@ -68,25 +64,17 @@ > % #endif > %=20 > % -#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P) > % - > % -#define __bswap16_const(_x) (__uint16_t)((_x) << 8 | (_x) >> 8) > % +#define __bswap16_gen(x) (__uint16_t)((x) << 8 | (x) >> 8) > % +#define __bswap32_gen(x) \ > % + (((__uint32_t)__bswap16(x) << 16) | __bswap16((x) >> 16)) > % +#define __bswap64_gen(x) \ > % + (((__uint64_t)__bswap32(x) << 32) | __bswap32((x) >> 32)) > %=20 > % -#define __bswap16(_x) \ > % - (__builtin_constant_p(_x) ? \ > % - __bswap16_const((__uint16_t)(_x)) : __bswap16_var(_x)) > % - > % -#define __bswap32_const(_x) \ > % - (((__uint32_t)__bswap16(_x) << 16) | __bswap16((_x) >> 16)) > % - > % -#define __bswap32(_x) \ > % - (__builtin_constant_p(_x) ? \ > % - __bswap32_const((__uint32_t)(_x)) : __bswap32_var(_x)) > % +#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P) > %=20 > % -#define __bswap64_const(_x) \ > % - (((__uint64_t)__bswap32(_x) << 32) | __bswap32((_x) >> 32)) > % +/* The following mess is to avoid multiple evaluation of x. */ > %=20 > % -#define __bswap64(_x) \ > % - (__builtin_constant_p(_x) ? \ > % - __bswap64_const((__uint64_t)(_x)) : __bswap64_var(_x)) > % +#define __bswap16(x) \ > % + (__builtin_constant_p(x) ? \ > % + __bswap16_gen((__uint16_t)(x)) : __bswap16_var((__uint16_t)(x))) >=20 > For clang, x must be cast in the `var' clause too, since when x is > something like 0x12345678 (having been passed here without conversion > by __bswap32()), clang parses the `var' clause and warns about the > implicit truncation of 0x12345678 to 0x5678 in it. This is a bug > in clang -- when warning about fine details, it is important not to > do it for non-problems. >=20 > %=20 > % static __inline __uint16_t > % @@ -94,7 +82,19 @@ > % { > %=20 > % - return (__bswap16_const(_x)); > % + return (__bswap16_gen(_x)); > % } > %=20 > % +/* > % + * The following messes are old optimizations for gcc. The messes have > % + * been reduced significantly, but I don't know how to implement the > % + * preferred way of defining defaults above and overriding them cleanly > % + * here. Even clang needs help for the 64-bit case, so to keep the > % + * ifdefs sane we use this for the 32-bit case with clang too. > % + */ > % + > % +#define __bswap32(x) \ > % + (__builtin_constant_p(x) ? \ > % + __bswap32_gen((__uint32_t)(x)) : __bswap32_var(x)) > % + > % static __inline __uint32_t > % __bswap32_var(__uint32_t _x) > % @@ -105,34 +105,37 @@ > % } > %=20 > % +#define __bswap64(x) \ > % + (__builtin_constant_p(x) ? \ > % + __bswap64_gen((__uint64_t)(x)) : __bswap64_var(x)) > % + > % static __inline __uint64_t > % __bswap64_var(__uint64_t _x) > % { > % -#ifdef _LP64 > % + > % +#ifdef _LP64 > % __asm ("bswap %0" : "+r" (_x)); > % return (_x); > % #else > % - return (__bswap64_const(_x)); > % + /* > % + * It is important for the optimizations that the following is not > % + * really generic, but expands to 2 __bswap32_var()'s. > % + */ > % + return (__bswap64_gen(_x)); > % #endif > % } > %=20 > % -#define __htonl(x) __bswap32(x) > % -#define __htons(x) __bswap16(x) > % -#define __ntohl(x) __bswap32(x) > % -#define __ntohs(x) __bswap16(x) > % +#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */ > %=20 > % -#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P) */ > % - > % -/* > % - * No optimizations are available for this compiler. Fall back to > % - * non-optimized functions by defining the constant usually used to pr= event > % - * redefinition. > % - */ > % -#define _BYTEORDER_FUNC_DEFINED > % +/* XXX these are broken, since they evaluate x more than once. */ Maybe make them static inline functions. That won't be very efficient at =2DO0 but it'll be correct and this implementation probably isn't very efficient anyway. Otherwise the patch looks good to me. > % +#define __bswap16(x) (__bswap16_gen((__uint16_t)(x))) > % +#define __bswap32(x) (__bswap32_gen((__uint32_t)(x))) > % +#define __bswap64(x) (__bswap64_gen((__uint64_t)(x))) > %=20 > % #endif /* __GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */ > %=20 > % -#ifdef __cplusplus > % -} > % -#endif > % +#define __htonl(x) __bswap32(x) > % +#define __htons(x) __bswap16(x) > % +#define __ntohl(x) __bswap32(x) > % +#define __ntohs(x) __bswap16(x) > %=20 > % #endif /* !_MACHINE_ENDIAN_H_ */ --nextPart26093839.3xFXd3iPZv Content-Type: application/pgp-signature; name=signature.asc Content-Description: This is a digitally signed message part. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.18 (FreeBSD) iF4EABEIAAYFAk9RLZYACgkQfoCS2CCgtium0QD9Fj2tmyw4NZPOavHtHihtQsUV 4LcQFnwfHsHdhZg4L20A/RZYbbqqQtSQlQh91qF6fS0ycfua46zMGI/BN4E0jqUC =uzCf -----END PGP SIGNATURE----- --nextPart26093839.3xFXd3iPZv--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201203022129.10849.tijl>