Date: Fri, 21 Apr 2000 19:28:28 -0700 (PDT) From: Kris Kennaway <kris@FreeBSD.org> To: current@freebsd.org Subject: OpenSSL asm optimizations Message-ID: <Pine.BSF.4.21.0004211923001.68716-200000@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
OpenSSL includes asm code for several platforms to speed up various
operations. Currently we don't build any of this - the attached patch
turns on asm code for Pentiums and above (it relies on an uncommitted
patch to sys.mk which defined MACHINE_CPU ?= i386). Set MACHINE_CPU to
"i586" or "i686" (both are actually identical at present) and rebuild.
Typical speed improvements are between 2x and 5x for BigNum, Blowfish,
CAST, DES, MD5, RC4, RC5, RIPEMD-160, and SHA-1 operations, as measured by
'openssl speed' on my Pentium Pro 233 (speeds are in K/sec for packets of
the given size):
=== BLOWFISH ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
blowfish cbc 3325.35k 4417.62k 4553.81k 4572.46k 4595.71k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
blowfish cbc 7073.58k 8099.29k 8286.63k 8328.53k 8315.07k
=== CAST ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
cast cbc 3249.59k 4231.19k 4394.88k 4401.69k 4437.97k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
cast cbc 6956.17k 7999.28k 8208.47k 8199.05k 8192.00k
=== DES ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
des cbc 2291.38k 2404.86k 2426.97k 2436.64k 2443.95k
des ede3 492.61k 516.87k 519.83k 521.18k 520.20k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
des cbc 4830.90k 5301.97k 5412.18k 5419.02k 5355.72k
des ede3 1831.93k 1919.80k 1929.86k 1932.36k 1913.72k
=== MD5 ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
md5 1636.10k 7736.92k 13991.76k 17495.04k 18879.83k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
md5 2609.84k 13585.19k 26511.38k 34934.97k 38629.09k
=== RC4 ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
rc4 12947.45k 16970.70k 18044.59k 18370.22k 18275.58k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
rc4 18209.51k 25287.65k 26100.65k 26139.99k 26512.82k
=== RC5 ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
rc5-32/12 cbc 5115.15k 8124.08k 8766.29k 8814.59k 8928.03k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
rc5-32/12 cbc 12462.59k 15226.54k 15804.76k 16218.86k 16447.82k
=== RIPEMD-160 ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
rmd160 849.94k 3680.04k 6326.22k 7626.07k 8123.73k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
rmd160 1279.72k 5915.67k 10461.46k 12978.21k 13944.41k
=== SHA1 ===
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
sha1 1081.31k 2844.71k 5784.80k 7765.50k 8650.75k
type 8 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
sha1 1617.59k 7664.76k 13538.05k 17012.18k 18419.89k
Kris
----
In God we Trust -- all others must submit an X.509 certificate.
-- Charles Forsythe <forsythe@alum.mit.edu>
[-- Attachment #2 --]
Index: Makefile
===================================================================
RCS file: /home/ncvs/src/secure/lib/libcrypto/Makefile,v
retrieving revision 1.17
diff -u -r1.17 Makefile
--- Makefile 2000/04/13 07:36:09 1.17
+++ Makefile 2000/04/22 02:16:57
@@ -9,11 +9,11 @@
${LCRYPTO_SRC}/err ${LCRYPTO_SRC}/evp ${LCRYPTO_SRC}/hmac \
${LCRYPTO_SRC}/lhash ${LCRYPTO_SRC}/md2 ${LCRYPTO_SRC}/md5 \
${LCRYPTO_SRC}/mdc2 ${LCRYPTO_SRC}/objects ${LCRYPTO_SRC}/pem \
- ${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/pkcs12 ${LCRYPTO_SRC}/rand \
- ${LCRYPTO_SRC}/rc2 ${LCRYPTO_SRC}/rc4 ${LCRYPTO_SRC}/rc5 \
- ${LCRYPTO_SRC}/ripemd ${LCRYPTO_SRC}/rsa ${LCRYPTO_SRC}/../rsaref \
- ${LCRYPTO_SRC}/sha ${LCRYPTO_SRC}/stack ${LCRYPTO_SRC}/txt_db \
- ${LCRYPTO_SRC}/x509 ${LCRYPTO_SRC}/x509v3
+ ${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/pkcs7 ${LCRYPTO_SRC}/pkcs12 \
+ ${LCRYPTO_SRC}/rand ${LCRYPTO_SRC}/rc2 ${LCRYPTO_SRC}/rc4 \
+ ${LCRYPTO_SRC}/rc5 ${LCRYPTO_SRC}/ripemd ${LCRYPTO_SRC}/rsa \
+ ${LCRYPTO_SRC}/../rsaref ${LCRYPTO_SRC}/sha ${LCRYPTO_SRC}/stack \
+ ${LCRYPTO_SRC}/txt_db ${LCRYPTO_SRC}/x509 ${LCRYPTO_SRC}/x509v3
LIB= crypto
SHLIB_MAJOR= 1
@@ -41,24 +41,69 @@
x_x509a.c \
# blowfish
-SRCS+= bf_cfb64.c bf_ecb.c bf_enc.c bf_ofb64.c bf_skey.c
+SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= bx86-elf.o
+SOBJS+= bx86-elf.o
+.else
+SRCS+= bf_enc.c
+.endif
+
+bx86-elf.o: bx86unix.cpp
+ cpp -DELF -x c bx86unix.cpp | as -o bx86-elf.o
+
+bx86unix.cpp: asm/bf-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bf/asm/bf-586.pl cpp > bx86unix.cpp
+
# bio
SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_nbio.c bf_null.c \
bio_cb.c bio_err.c bio_lib.c bss_acpt.c bss_bio.c bss_conn.c \
bss_fd.c bss_file.c bss_log.c bss_mem.c bss_null.c bss_sock.c
# bn
-SRCS+= bn_add.c bn_asm.c bn_blind.c bn_ctx.c bn_div.c bn_err.c bn_exp.c \
+SRCS+= bn_add.c bn_blind.c bn_ctx.c bn_div.c bn_err.c bn_exp.c \
bn_exp2.c bn_gcd.c bn_lib.c bn_mont.c bn_mpi.c bn_mul.c bn_prime.c \
bn_print.c bn_rand.c bn_recp.c bn_shift.c bn_sqr.c bn_word.c
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= bn86-elf.o co86-elf.o
+SOBJS+= bn86-elf.o co86-elf.o
+.else
+SRCS+= bn_asm.c
+.endif
+
+bn86-elf.o: bn86unix.cpp
+ cpp -DELF -x c bn86unix.cpp | as -o bn86-elf.o
+
+bn86unix.cpp: asm/bn-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bn/asm/bn-586.pl cpp > bn86unix.cpp
+
+co86-elf.o: co86unix.cpp
+ cpp -DELF -x c co86unix.cpp | as -o co86-elf.o
+
+co86unix.cpp: asm/co-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/bn/asm/co-586.pl cpp > co86unix.cpp
+
# buffer
SRCS+= buf_err.c buffer.c
# cast
-SRCS+= c_cfb64.c c_ecb.c c_enc.c c_ofb64.c c_skey.c
+SRCS+= c_cfb64.c c_ecb.c c_ofb64.c c_skey.c
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= cx86-elf.o
+SOBJS+= cx86-elf.o
+.else
+SRCS+= c_enc.c
+.endif
+
+cx86-elf.o: cx86unix.cpp
+ cpp -DELF -x c cx86unix.cpp | as -o cx86-elf.o
+
+cx86unix.cpp: asm/cast-586.pl x86asm.pl cbc.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/cast/asm/cast-586.pl cpp > cx86unix.cpp
+
# comp
SRCS+= c_rle.c c_zlib.c comp_lib.c
@@ -66,12 +111,31 @@
SRCS+= conf.c conf_err.c
# des
-SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c des_enc.c \
- ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c fcrypt.c \
- fcrypt_b.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \
+SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \
+ ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c \
+ fcrypt.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \
rand_key.c read2pwd.c read_pwd.c rnd_keys.c rpc_enc.c set_key.c \
str2key.c supp.c xcbc_enc.c
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= dx86-elf.o yx86-elf.o
+SOBJS+= dx86-elf.o yx86-elf.o
+.else
+SRCS+= des_enc.c fcrypt_b.c
+.endif
+
+dx86-elf.o: dx86unix.cpp
+ cpp -DELF -x c dx86unix.cpp | as -o dx86-elf.o
+
+yx86-elf.o: yx86unix.cpp
+ cpp -DELF -x c yx86unix.cpp | as -o yx86-elf.o
+
+dx86unix.cpp: asm/des-586.pl x86asm.pl cbc.pl
+ perl -I${LCRYPTO_SRC}/perlasm -I${LCRYPTO_SRC}/des/asm ${LCRYPTO_SRC}/des/asm/des-586.pl cpp > dx86unix.cpp
+
+yx86unix.cpp: asm/crypt586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm -I${LCRYPTO_SRC}/des/asm ${LCRYPTO_SRC}/des/asm/crypt586.pl cpp > yx86unix.cpp
+
# dh
SRCS+= dh_check.c dh_err.c dh_gen.c dh_key.c dh_lib.c
@@ -106,6 +170,17 @@
# md5
SRCS+= md5_dgst.c md5_one.c
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= mx86-elf.o
+SOBJS+= mx86-elf.o
+CFLAGS+= -DMD5_ASM
+.endif
+
+mx86-elf.o: mx86unix.cpp
+ cpp -DELF -x c mx86unix.cpp | as -o mx86-elf.o
+
+mx86unix.cpp: asm/md5-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/md5/asm/md5-586.pl cpp > mx86unix.cpp
# mdc2
SRCS+= mdc2dgst.c mdc2_one.c
@@ -131,14 +206,52 @@
SRCS+= rc2_cbc.c rc2cfb64.c rc2_ecb.c rc2ofb64.c rc2_skey.c
# rc4
-SRCS+= rc4_enc.c rc4_skey.c
+SRCS+= rc4_skey.c
+
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= rx86-elf.o
+SOBJS+= rx86-elf.o
+.else
+SRCS+= rc4_enc.c
+.endif
+rx86-elf.o: rx86unix.cpp
+ cpp -DELF -x c rx86unix.cpp | as -o rx86-elf.o
+
+rx86unix.cpp: asm/rc4-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/rc4/asm/rc4-586.pl cpp > rx86unix.cpp
+
# rc5
-SRCS+= rc5cfb64.c rc5_ecb.c rc5_enc.c rc5ofb64.c rc5_skey.c
+SRCS+= rc5cfb64.c rc5_ecb.c rc5ofb64.c rc5_skey.c
+
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= r586-elf.o
+SOBJS+= r586-elf.o
+.else
+SRCS+= rc5_enc.c
+.endif
+
+r586-elf.o: r586unix.cpp
+ cpp -DELF -x c r586unix.cpp | as -o r586-elf.o
+r586unix.cpp: asm/rc5-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/rc5/asm/rc5-586.pl cpp > r586unix.cpp
+
# ripemd
SRCS+= rmd_dgst.c rmd_one.c
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= rm86-elf.o
+SOBJS+= rm86-elf.o
+CFLAGS+= -DRMD160_ASM
+.endif
+
+rm86-elf.o: rm86unix.cpp
+ cpp -DELF -x c rm86unix.cpp | as -o rm86-elf.o
+
+rm86unix.cpp: asm/rmd-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/ripemd/asm/rmd-586.pl cpp > rm86unix.cpp
+
# rsa
.if defined(WITH_RSA) && ${WITH_RSA} == YES
SRCS+= rsa_chk.c rsa_err.c rsa_gen.c rsa_lib.c rsa_none.c rsa_null.c \
@@ -147,6 +260,18 @@
# sha
SRCS+= sha_dgst.c sha_one.c sha1_one.c sha1dgst.c
+
+.if (${MACHINE_ARCH} == "i386" && (${MACHINE_CPU} == "i586" || ${MACHINE_CPU} == "i686"))
+STATICOBJS+= sx86-elf.o
+SOBJS+= sx86-elf.o
+CFLAGS+= -DSHA1_ASM
+.endif
+
+sx86-elf.o: sx86unix.cpp
+ cpp -DELF -x c sx86unix.cpp | as -o sx86-elf.o
+
+sx86unix.cpp: asm/sha1-586.pl x86asm.pl
+ perl -I${LCRYPTO_SRC}/perlasm ${LCRYPTO_SRC}/sha/asm/sha1-586.pl cpp > sx86unix.cpp
# stack
SRCS+= stack.c
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.4.21.0004211923001.68716-200000>
