Date: Wed, 08 Apr 2009 21:58:02 +0200 From: Christoph Mallon <christoph.mallon@gmx.de> To: freebsd-amd64@freebsd.org, freebsd-i386@freebsd.org Cc: Ed Schouten <ed@FreeBSD.org> Subject: [PATCH] Simplify in*() and out*() functions of AMD64 and i386 Message-ID: <49DD01CA.3040900@gmx.de>
next in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --------------010606040208020605070204 Content-Type: text/plain; charset=ISO-8859-15; format=flowed Content-Transfer-Encoding: 7bit Hi amd64@ and i386@, attached is a patch which simplifies the in*() and out*() functions for I/O port access of AMD64 and i386. It removes an unnecessary distinction of cases for inb() and outb(), which was used to generate better code for ports < 256. This is unnecessary, because GCC supports an asm input constraint to handle this ("N"). The stale comment, which states there is no constraint for this, is removed, too. Also the {in,out}{w,l}() get treated with this constraint. They had no special case before, so now better code is generated for them. Further, the unnecessary "cld" is removed from {in,out}s{b,w,l}(), because it is guaranteed by the ABI that the direction flag is cleared. All in all the code for in/out gets a bit simpler. Comments are welcome Christoph --------------010606040208020605070204 Content-Type: text/plain; name="inout.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="inout.diff" Index: sys/i386/include/cpufunc.h =================================================================== --- sys/i386/include/cpufunc.h (Revision 190841) +++ sys/i386/include/cpufunc.h (Arbeitskopie) @@ -170,177 +170,97 @@ __asm __volatile("hlt"); } -#if !defined(__GNUCLIKE_BUILTIN_CONSTANT_P) || __GNUCLIKE_ASM < 3 - -#define inb(port) inbv(port) -#define outb(port, data) outbv(port, data) - -#else /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3 */ - -/* - * The following complications are to get around gcc not having a - * constraint letter for the range 0..255. We still put "d" in the - * constraint because "i" isn't a valid constraint when the port - * isn't constant. This only matters for -O0 because otherwise - * the non-working version gets optimized away. - * - * Use an expression-statement instead of a conditional expression - * because gcc-2.6.0 would promote the operands of the conditional - * and produce poor code for "if ((inb(var) & const1) == const2)". - * - * The unnecessary test `(port) < 0x10000' is to generate a warning if - * the `port' has type u_short or smaller. Such types are pessimal. - * This actually only works for signed types. The range check is - * careful to avoid generating warnings. - */ -#define inb(port) __extension__ ({ \ - u_char _data; \ - if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ - && (port) < 0x10000) \ - _data = inbc(port); \ - else \ - _data = inbv(port); \ - _data; }) - -#define outb(port, data) ( \ - __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ - && (port) < 0x10000 \ - ? outbc(port, data) : outbv(port, data)) - -static __inline u_char -inbc(u_int port) +static inline u_char +inb(u_short port) { - u_char data; - - __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port))); - return (data); + u_char data; + __asm volatile("inb %1, %0" : "=a" (data) : "Nd" (port)); + return data; } -static __inline void -outbc(u_int port, u_char data) +static inline u_int +inl(u_short port) { - __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port))); -} - -#endif /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3*/ - -static __inline u_char -inbv(u_int port) -{ - u_char data; - /* - * We use %%dx and not %1 here because i/o is done at %dx and not at - * %edx, while gcc generates inferior code (movw instead of movl) - * if we tell it to load (u_short) port. - */ - __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); + u_int data; + __asm volatile("inl %1, %0" : "=a" (data) : "Nd" (port)); return (data); } -static __inline u_int -inl(u_int port) +static inline void +insb(u_short port, void *addr, size_t cnt) { - u_int data; - - __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port)); - return (data); + __asm volatile("rep; insb" + : "+D" (addr), "+c" (cnt) + : "d" (port) + : "memory"); } -static __inline void -insb(u_int port, void *addr, size_t cnt) +static inline void +insw(u_short port, void *addr, size_t cnt) { - __asm __volatile("cld; rep; insb" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); + __asm volatile("rep; insw" + : "+D" (addr), "+c" (cnt) + : "d" (port) + : "memory"); } -static __inline void -insw(u_int port, void *addr, size_t cnt) +static inline void +insl(u_short port, void *addr, size_t cnt) { - __asm __volatile("cld; rep; insw" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); + __asm volatile("rep; insl" + : "+D" (addr), "+c" (cnt) + : "d" (port) + : "memory"); } static __inline void -insl(u_int port, void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; insl" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); -} - -static __inline void invd(void) { __asm __volatile("invd"); } -static __inline u_short -inw(u_int port) +static inline u_short +inw(u_short port) { - u_short data; - - __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port)); + u_short data; + __asm volatile("inw %1, %0" : "=a" (data) : "Nd" (port)); return (data); } -static __inline void -outbv(u_int port, u_char data) +static inline void +outb(u_short port, u_char data) { - u_char al; - /* - * Use an unnecessary assignment to help gcc's register allocator. - * This make a large difference for gcc-1.40 and a tiny difference - * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for - * best results. gcc-2.6.0 can't handle this. - */ - al = data; - __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); + __asm volatile("outb %0, %1" : : "a" (data), "Nd" (port)); } -static __inline void -outl(u_int port, u_int data) +static inline void +outl(u_short port, u_int data) { - /* - * outl() and outw() aren't used much so we haven't looked at - * possible micro-optimizations such as the unnecessary - * assignment for them. - */ - __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port)); + __asm volatile("outl %0, %1" : : "a" (data), "Nd" (port)); } -static __inline void -outsb(u_int port, const void *addr, size_t cnt) +static inline void +outsb(u_short port, const void *addr, size_t cnt) { - __asm __volatile("cld; rep; outsb" - : "+S" (addr), "+c" (cnt) - : "d" (port)); + __asm volatile("rep; outsb" : "+S" (addr), "+c" (cnt) : "d" (port)); } -static __inline void -outsw(u_int port, const void *addr, size_t cnt) +static inline void +outsw(u_short port, const void *addr, size_t cnt) { - __asm __volatile("cld; rep; outsw" - : "+S" (addr), "+c" (cnt) - : "d" (port)); + __asm volatile("rep; outsw" : "+S" (addr), "+c" (cnt) : "d" (port)); } -static __inline void -outsl(u_int port, const void *addr, size_t cnt) +static inline void +outsl(u_short port, const void *addr, size_t cnt) { - __asm __volatile("cld; rep; outsl" - : "+S" (addr), "+c" (cnt) - : "d" (port)); + __asm volatile("rep; outsl" : "+S" (addr), "+c" (cnt) : "d" (port)); } -static __inline void -outw(u_int port, u_short data) +static inline void +outw(u_short port, u_short data) { - __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port)); + __asm volatile("outw %0, %1" : : "a" (data), "Nd" (port)); } static __inline void Index: sys/i386/i386/machdep.c =================================================================== --- sys/i386/i386/machdep.c (Revision 190841) +++ sys/i386/i386/machdep.c (Arbeitskopie) @@ -3555,45 +3555,24 @@ #ifdef KDB /* - * Provide inb() and outb() as functions. They are normally only - * available as macros calling inlined functions, thus cannot be - * called from the debugger. - * - * The actual code is stolen from <machine/cpufunc.h>, and de-inlined. + * Provide inb() and outb() as functions. They are normally only available as + * inline functions, thus cannot be called from the debugger. */ -#undef inb -#undef outb - /* silence compiler warnings */ -u_char inb(u_int); -void outb(u_int, u_char); +u_char inb_(u_short); +void outb_(u_short, u_char); u_char -inb(u_int port) +inb_(u_short port) { - u_char data; - /* - * We use %%dx and not %1 here because i/o is done at %dx and not at - * %edx, while gcc generates inferior code (movw instead of movl) - * if we tell it to load (u_short) port. - */ - __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); - return (data); + return inb(port); } void -outb(u_int port, u_char data) +outb_(u_short port, u_char data) { - u_char al; - /* - * Use an unnecessary assignment to help gcc's register allocator. - * This make a large difference for gcc-1.40 and a tiny difference - * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for - * best results. gcc-2.6.0 can't handle this. - */ - al = data; - __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); + outb(port, data); } #endif /* KDB */ Index: sys/amd64/include/cpufunc.h =================================================================== --- sys/amd64/include/cpufunc.h (Revision 190841) +++ sys/amd64/include/cpufunc.h (Arbeitskopie) @@ -164,177 +164,97 @@ __asm __volatile("hlt"); } -#if !defined(__GNUCLIKE_BUILTIN_CONSTANT_P) || __GNUCLIKE_ASM < 3 - -#define inb(port) inbv(port) -#define outb(port, data) outbv(port, data) - -#else /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3 */ - -/* - * The following complications are to get around gcc not having a - * constraint letter for the range 0..255. We still put "d" in the - * constraint because "i" isn't a valid constraint when the port - * isn't constant. This only matters for -O0 because otherwise - * the non-working version gets optimized away. - * - * Use an expression-statement instead of a conditional expression - * because gcc-2.6.0 would promote the operands of the conditional - * and produce poor code for "if ((inb(var) & const1) == const2)". - * - * The unnecessary test `(port) < 0x10000' is to generate a warning if - * the `port' has type u_short or smaller. Such types are pessimal. - * This actually only works for signed types. The range check is - * careful to avoid generating warnings. - */ -#define inb(port) __extension__ ({ \ - u_char _data; \ - if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ - && (port) < 0x10000) \ - _data = inbc(port); \ - else \ - _data = inbv(port); \ - _data; }) - -#define outb(port, data) ( \ - __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ - && (port) < 0x10000 \ - ? outbc(port, data) : outbv(port, data)) - -static __inline u_char -inbc(u_int port) +static inline u_char +inb(u_short port) { - u_char data; - - __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port))); - return (data); + u_char data; + __asm volatile("inb %1, %0" : "=a" (data) : "Nd" (port)); + return data; } -static __inline void -outbc(u_int port, u_char data) +static inline u_int +inl(u_short port) { - __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port))); -} - -#endif /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3*/ - -static __inline u_char -inbv(u_int port) -{ - u_char data; - /* - * We use %%dx and not %1 here because i/o is done at %dx and not at - * %edx, while gcc generates inferior code (movw instead of movl) - * if we tell it to load (u_short) port. - */ - __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); + u_int data; + __asm volatile("inl %1, %0" : "=a" (data) : "Nd" (port)); return (data); } -static __inline u_int -inl(u_int port) +static inline void +insb(u_short port, void *addr, size_t cnt) { - u_int data; - - __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port)); - return (data); + __asm volatile("rep; insb" + : "+D" (addr), "+c" (cnt) + : "d" (port) + : "memory"); } -static __inline void -insb(u_int port, void *addr, size_t cnt) +static inline void +insw(u_short port, void *addr, size_t cnt) { - __asm __volatile("cld; rep; insb" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); + __asm volatile("rep; insw" + : "+D" (addr), "+c" (cnt) + : "d" (port) + : "memory"); } -static __inline void -insw(u_int port, void *addr, size_t cnt) +static inline void +insl(u_short port, void *addr, size_t cnt) { - __asm __volatile("cld; rep; insw" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); + __asm volatile("rep; insl" + : "+D" (addr), "+c" (cnt) + : "d" (port) + : "memory"); } static __inline void -insl(u_int port, void *addr, size_t cnt) -{ - __asm __volatile("cld; rep; insl" - : "+D" (addr), "+c" (cnt) - : "d" (port) - : "memory"); -} - -static __inline void invd(void) { __asm __volatile("invd"); } -static __inline u_short -inw(u_int port) +static inline u_short +inw(u_short port) { - u_short data; - - __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port)); + u_short data; + __asm volatile("inw %1, %0" : "=a" (data) : "Nd" (port)); return (data); } -static __inline void -outbv(u_int port, u_char data) +static inline void +outb(u_short port, u_char data) { - u_char al; - /* - * Use an unnecessary assignment to help gcc's register allocator. - * This make a large difference for gcc-1.40 and a tiny difference - * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for - * best results. gcc-2.6.0 can't handle this. - */ - al = data; - __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); + __asm volatile("outb %0, %1" : : "a" (data), "Nd" (port)); } -static __inline void -outl(u_int port, u_int data) +static inline void +outl(u_short port, u_int data) { - /* - * outl() and outw() aren't used much so we haven't looked at - * possible micro-optimizations such as the unnecessary - * assignment for them. - */ - __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port)); + __asm volatile("outl %0, %1" : : "a" (data), "Nd" (port)); } -static __inline void -outsb(u_int port, const void *addr, size_t cnt) +static inline void +outsb(u_short port, const void *addr, size_t cnt) { - __asm __volatile("cld; rep; outsb" - : "+S" (addr), "+c" (cnt) - : "d" (port)); + __asm volatile("rep; outsb" : "+S" (addr), "+c" (cnt) : "d" (port)); } -static __inline void -outsw(u_int port, const void *addr, size_t cnt) +static inline void +outsw(u_short port, const void *addr, size_t cnt) { - __asm __volatile("cld; rep; outsw" - : "+S" (addr), "+c" (cnt) - : "d" (port)); + __asm volatile("rep; outsw" : "+S" (addr), "+c" (cnt) : "d" (port)); } -static __inline void -outsl(u_int port, const void *addr, size_t cnt) +static inline void +outsl(u_short port, const void *addr, size_t cnt) { - __asm __volatile("cld; rep; outsl" - : "+S" (addr), "+c" (cnt) - : "d" (port)); + __asm volatile("rep; outsl" : "+S" (addr), "+c" (cnt) : "d" (port)); } -static __inline void -outw(u_int port, u_short data) +static inline void +outw(u_short port, u_short data) { - __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port)); + __asm volatile("outw %0, %1" : : "a" (data), "Nd" (port)); } static __inline void Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c (Revision 190841) +++ sys/amd64/amd64/machdep.c (Arbeitskopie) @@ -2178,45 +2178,24 @@ #ifdef KDB /* - * Provide inb() and outb() as functions. They are normally only - * available as macros calling inlined functions, thus cannot be - * called from the debugger. - * - * The actual code is stolen from <machine/cpufunc.h>, and de-inlined. + * Provide inb() and outb() as functions. They are normally only available as + * inline functions, thus cannot be called from the debugger. */ -#undef inb -#undef outb - /* silence compiler warnings */ -u_char inb(u_int); -void outb(u_int, u_char); +u_char inb_(u_short); +void outb_(u_short, u_char); u_char -inb(u_int port) +inb_(u_short port) { - u_char data; - /* - * We use %%dx and not %1 here because i/o is done at %dx and not at - * %edx, while gcc generates inferior code (movw instead of movl) - * if we tell it to load (u_short) port. - */ - __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); - return (data); + return inb(port); } void -outb(u_int port, u_char data) +outb_(u_short port, u_char data) { - u_char al; - /* - * Use an unnecessary assignment to help gcc's register allocator. - * This make a large difference for gcc-1.40 and a tiny difference - * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for - * best results. gcc-2.6.0 can't handle this. - */ - al = data; - __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); + outb(port, data); } #endif /* KDB */ --------------010606040208020605070204--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?49DD01CA.3040900>