Date: Sun, 24 Jun 2001 10:02:50 -0700 (PDT) From: Matt Dillon <dillon@earth.backplane.com> To: Bruce Evans <bde@zeta.org.au> Cc: Mikhail Teterin <mi@aldan.algebra.com>, jlemon@FreeBSD.ORG, cvs-committers@FreeBSD.ORG, cvs-all@FreeBSD.ORG Subject: kernel size w/ optimized bzero() & patch set (was Re: Inline optimized bzero (was Re: cvs commit: src/sys/netinettcp_subr.c)) Message-ID: <200106241702.f5OH2oN78720@earth.backplane.com> References: <Pine.BSF.4.21.0106241725360.54646-100000@besplex.bde.org>
next in thread | previous in thread | raw e-mail | index | archive | help
Ok, how about this. I replaced bzero() with the inline and placed it
in the machine-dependant section of code. I managed to knock the inline
code generation down to the point where it does not bloat the resulting
kernel binary. As an example of this, the 'register int z = 0' caused
all the assignments to 0 to use 'movl %eax,...' (3 byte instruction)
instead of 'movl $0,...' (7 byte instruction). The kernel size is
around 6000 bytes larger without that optimization. Sometimes GCC's
optimizer gets in the way :-(
I am amazed by the results... and I found a couple of interesting things
out too. For example, tcp_input bzero's a number of 8 and 12 byte
structures, not just the 20 byte structures we were looking at previously.
I don't test for address alignment (it can't be done in the inline and
still have good code), but i586_bzero() doesn't check for address
alignment either so it is no worse then before.
-Matt
Normal bzero:
apollo:/usr/src/sys/compile/MOBILE# size kernel
text data bss dec hex filename
1850705 159392 144536 2154633 20e089 kernel
Inline bzero:
apollo:/usr/src/sys/compile/MOBILE# size kernel
text data bss dec hex filename
1850833 159396 144536 2154765 20e10d kernel
Patch set (relative to -stable) (UNTESTED):
Index: i386/i386/identcpu.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/identcpu.c,v
retrieving revision 1.80.2.4
diff -u -r1.80.2.4 identcpu.c
--- i386/i386/identcpu.c 2000/09/30 03:32:21 1.80.2.4
+++ i386/i386/identcpu.c 2001/06/24 16:16:42
@@ -504,7 +504,7 @@
#if defined(I486_CPU)
case CPUCLASS_486:
printf("486");
- bzero = i486_bzero;
+ md_bzero = i486_bzero;
break;
#endif
#if defined(I586_CPU)
Index: i386/i386/support.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/support.s,v
retrieving revision 1.67.2.3
diff -u -r1.67.2.3 support.s
--- i386/i386/support.s 2000/09/30 02:49:33 1.67.2.3
+++ i386/i386/support.s 2001/06/24 15:57:51
@@ -49,6 +49,8 @@
_bcopy_vector:
.long _generic_bcopy
.globl _bzero
+ .globl _md_bzero
+_md_bzero:
_bzero:
.long _generic_bzero
.globl _copyin_vector
Index: i386/include/asnames.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/Attic/asnames.h,v
retrieving revision 1.44.2.1
diff -u -r1.44.2.1 asnames.h
--- i386/include/asnames.h 2000/05/16 06:58:10 1.44.2.1
+++ i386/include/asnames.h 2001/06/24 15:59:07
@@ -244,6 +244,7 @@
#define _lapic lapic
#define _linux_sigcode linux_sigcode
#define _linux_szsigcode linux_szsigcode
+#define _md_bzero md_bzero
#define _mi_startup mi_startup
#define _microuptime microuptime
#define _mp_gdtbase mp_gdtbase
Index: i386/include/cpufunc.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/cpufunc.h,v
retrieving revision 1.96.2.1
diff -u -r1.96.2.1 cpufunc.h
--- i386/include/cpufunc.h 2001/05/16 20:51:38 1.96.2.1
+++ i386/include/cpufunc.h 2001/06/24 16:55:05
@@ -53,6 +53,83 @@
#ifdef __GNUC__
+/*
+ * bzero() inline. IA32 specific.
+ *
+ * This function assumes that unaligned accesses are allowed. If the
+ * length is a constant we attempt to optimize small bzeros, generating
+ * only a few instructions. We also optimize medium sized bzeros with
+ * a simple loop (where call overhead would otherwise be inefficient).
+ * Anything else goes through the assembly-optimized bzero function.
+ *
+ * Do not mess around with this function without also checking the
+ * resulting assembly.
+ */
+
+void (*md_bzero) __P((void *buf, size_t len));
+
+#if 0
+
+static __inline void
+bzero(void *buf, size_t len)
+{
+ md_bzero(buf, len);
+}
+
+#else
+
+static __inline void
+bzero(void *buf, size_t len)
+{
+ if (__builtin_constant_p(len)) {
+ register int z = 0; /* this+switch results in %eax instead of $0 */
+
+ switch(len) {
+ case 16 * sizeof(int):
+ case 15 * sizeof(int):
+ case 14 * sizeof(int):
+ case 13 * sizeof(int):
+ case 12 * sizeof(int):
+ case 11 * sizeof(int):
+ case 10 * sizeof(int):
+ case 9 * sizeof(int):
+ case 8 * sizeof(int):
+ do {
+ len -= sizeof(int);
+ *(int *)((char *)buf + len) = z;
+ } while(len);
+ break;
+ case 7 * sizeof(int):
+ *((int *)buf + 6) = z;
+ case 6 * sizeof(int):
+ *((int *)buf + 5) = z;
+ case 5 * sizeof(int):
+ *((int *)buf + 4) = z;
+ case 4 * sizeof(int):
+ *((int *)buf + 3) = z;
+ case 3 * sizeof(int):
+ *((int *)buf + 2) = z;
+ case 2 * sizeof(int):
+ *((int *)buf + 1) = z;
+ case 1 * sizeof(int):
+ *((int *)buf + 0) = z;
+ case 0 * sizeof(int):
+ break;
+ default:
+ /*
+ * Warning! gcc inline no longer considers 'len' a constant in
+ * the default case of this switch.
+ */
+ md_bzero(buf, len);
+ break;
+ }
+ } else {
+ md_bzero(buf, len);
+ }
+}
+
+#endif
+
#ifdef SMP
#include <machine/lock.h> /* XXX */
#endif
@@ -508,6 +585,7 @@
int breakpoint __P((void));
u_int bsfl __P((u_int mask));
u_int bsrl __P((u_int mask));
+void (*bzero) __P((void *buf, size_t len));
void disable_intr __P((void));
void enable_intr __P((void));
u_char inb __P((u_int port));
Index: i386/isa/npx.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/isa/npx.c,v
retrieving revision 1.80
diff -u -r1.80 npx.c
--- i386/isa/npx.c 2000/01/29 16:17:36 1.80
+++ i386/isa/npx.c 2001/06/24 16:14:11
@@ -456,7 +456,7 @@
ovbcopy_vector = i586_bcopy;
}
if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO))
- bzero = i586_bzero;
+ md_bzero = i586_bzero;
if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) {
copyin_vector = i586_copyin;
copyout_vector = i586_copyout;
Index: sys/systm.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/systm.h,v
retrieving revision 1.111.2.5
diff -u -r1.111.2.5 systm.h
--- sys/systm.h 2001/01/16 12:26:21 1.111.2.5
+++ sys/systm.h 2001/06/24 15:52:22
@@ -135,7 +135,7 @@
void ovbcopy __P((const void *from, void *to, size_t len));
#ifdef __i386__
-extern void (*bzero) __P((void *buf, size_t len));
+/* in machine/cpufunc.h */
#else
void bzero __P((void *buf, size_t len));
#endif
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe cvs-all" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200106241702.f5OH2oN78720>
