Date: Thu, 20 Sep 2018 21:34:06 +0000 (UTC) From: Jung-uk Kim <jkim@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r338842 - in projects/openssl111/secure/lib/libcrypto: . i386 Message-ID: <201809202134.w8KLY6fp063849@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jkim Date: Thu Sep 20 21:34:05 2018 New Revision: 338842 URL: https://svnweb.freebsd.org/changeset/base/338842 Log: Regen assembly files for i386. Added: projects/openssl111/secure/lib/libcrypto/i386/cast-586.S (contents, props changed) projects/openssl111/secure/lib/libcrypto/i386/chacha-x86.S (contents, props changed) projects/openssl111/secure/lib/libcrypto/i386/e_padlock-x86.S (contents, props changed) projects/openssl111/secure/lib/libcrypto/i386/ecp_nistz256-x86.S (contents, props changed) projects/openssl111/secure/lib/libcrypto/i386/poly1305-x86.S (contents, props changed) Deleted: projects/openssl111/secure/lib/libcrypto/i386/bf-686.S Modified: projects/openssl111/secure/lib/libcrypto/Makefile.asm projects/openssl111/secure/lib/libcrypto/i386/aes-586.S projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S projects/openssl111/secure/lib/libcrypto/i386/bf-586.S projects/openssl111/secure/lib/libcrypto/i386/bn-586.S projects/openssl111/secure/lib/libcrypto/i386/cmll-x86.S projects/openssl111/secure/lib/libcrypto/i386/co-586.S projects/openssl111/secure/lib/libcrypto/i386/crypt586.S projects/openssl111/secure/lib/libcrypto/i386/des-586.S projects/openssl111/secure/lib/libcrypto/i386/ghash-x86.S projects/openssl111/secure/lib/libcrypto/i386/md5-586.S projects/openssl111/secure/lib/libcrypto/i386/rc4-586.S projects/openssl111/secure/lib/libcrypto/i386/rc5-586.S projects/openssl111/secure/lib/libcrypto/i386/rmd-586.S projects/openssl111/secure/lib/libcrypto/i386/sha1-586.S projects/openssl111/secure/lib/libcrypto/i386/sha256-586.S projects/openssl111/secure/lib/libcrypto/i386/sha512-586.S projects/openssl111/secure/lib/libcrypto/i386/vpaes-x86.S projects/openssl111/secure/lib/libcrypto/i386/wp-mmx.S projects/openssl111/secure/lib/libcrypto/i386/x86-gf2m.S projects/openssl111/secure/lib/libcrypto/i386/x86-mont.S projects/openssl111/secure/lib/libcrypto/i386/x86cpuid.S Modified: projects/openssl111/secure/lib/libcrypto/Makefile.asm ============================================================================== --- projects/openssl111/secure/lib/libcrypto/Makefile.asm Thu Sep 20 20:32:08 2018 (r338841) +++ projects/openssl111/secure/lib/libcrypto/Makefile.asm Thu Sep 20 21:34:05 2018 (r338842) @@ -59,6 +59,9 @@ sha256-armv8.S: sha512-armv8.pl ${LCRYPTO_SRC}/crypto/whrlpool/asm \ ${LCRYPTO_SRC}/engines/asm +# cpuid +SRCS+= x86_64cpuid.pl + # aes SRCS= aes-x86_64.pl aesni-mb-x86_64.pl aesni-sha1-x86_64.pl \ aesni-sha256-x86_64.pl aesni-x86_64.pl bsaes-x86_64.pl \ @@ -77,9 +80,6 @@ SRCS+= chacha-x86_64.pl # ec SRCS+= ecp_nistz256-x86_64.pl x25519-x86_64.pl -# engines -SRCS+= e_padlock-x86_64.pl - # md5 SRCS+= md5-x86_64.pl @@ -99,8 +99,8 @@ SRCS+= keccak1600-x86_64.pl sha1-mb-x86_64.pl sha1-x86 # whrlpool SRCS+= wp-x86_64.pl -# cpuid -SRCS+= x86_64cpuid.pl +# engines +SRCS+= e_padlock-x86_64.pl SHA_ASM= sha256-x86_64 sha512-x86_64 SHA_SRC= sha512-x86_64.pl @@ -175,22 +175,30 @@ aes-armv4.S: aes-armv4.pl ${LCRYPTO_SRC}/crypto/bf/asm \ ${LCRYPTO_SRC}/crypto/bn/asm \ ${LCRYPTO_SRC}/crypto/camellia/asm \ + ${LCRYPTO_SRC}/crypto/cast/asm \ + ${LCRYPTO_SRC}/crypto/chacha/asm \ ${LCRYPTO_SRC}/crypto/des/asm \ + ${LCRYPTO_SRC}/crypto/ec/asm \ ${LCRYPTO_SRC}/crypto/md5/asm \ ${LCRYPTO_SRC}/crypto/modes/asm \ + ${LCRYPTO_SRC}/crypto/poly1305/asm \ ${LCRYPTO_SRC}/crypto/rc4/asm \ ${LCRYPTO_SRC}/crypto/rc5/asm \ ${LCRYPTO_SRC}/crypto/ripemd/asm \ ${LCRYPTO_SRC}/crypto/sha/asm \ - ${LCRYPTO_SRC}/crypto/whrlpool/asm + ${LCRYPTO_SRC}/crypto/whrlpool/asm \ + ${LCRYPTO_SRC}/engines/asm -PERLPATH= -I${LCRYPTO_SRC}/crypto/des/asm -I${LCRYPTO_SRC}/crypto/perlasm +#PERLPATH= -I${LCRYPTO_SRC}/crypto/des/asm -I${LCRYPTO_SRC}/crypto/perlasm +# cpuid +SRCS= x86cpuid.pl + # aes -SRCS= aes-586.pl aesni-x86.pl vpaes-x86.pl +SRCS+= aes-586.pl aesni-x86.pl vpaes-x86.pl # blowfish -SRCS+= bf-586.pl bf-686.pl +SRCS+= bf-586.pl # bn SRCS+= bn-586.pl co-586.pl x86-gf2m.pl x86-mont.pl @@ -198,15 +206,27 @@ SRCS+= bn-586.pl co-586.pl x86-gf2m.pl x86-mont.pl # camellia SRCS+= cmll-x86.pl +# cast +SRCS+= cast-586.pl + +# chacha +SRCS+= chacha-x86.pl + # des SRCS+= crypt586.pl des-586.pl +# ec +SRCS+= ecp_nistz256-x86.pl + # md5 SRCS+= md5-586.pl # modes SRCS+= ghash-x86.pl +# poly1305 +SRCS+= poly1305-x86.pl + # rc4 SRCS+= rc4-586.pl @@ -222,25 +242,26 @@ SRCS+= sha1-586.pl sha256-586.pl sha512-586.pl # whrlpool SRCS+= wp-mmx.pl -# cpuid -SRCS+= x86cpuid.pl +# engines +SRCS+= e_padlock-x86.pl ASM= ${SRCS:R:S/$/.S/} all: ${ASM} -CLEANFILES= ${ASM} +CLEANFILES= ${ASM} ${SRCS:R:S/$/.s/} .SUFFIXES: .pl .pl.S: ( echo '/* $$'FreeBSD'$$ */' ;\ echo '/* Do not modify. This file is auto-generated from ${.IMPSRC:T}. */' ;\ echo '#ifdef PIC' ;\ - env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} -fpic -DPIC ;\ + env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} -fpic -DPIC ${.IMPSRC:R:S/$/.s/} ;\ + cat ${.IMPSRC:R:S/$/.s/} ;\ echo '#else' ;\ - env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ;\ - echo '#endif') |\ - sed -E 's|(\.file[[:blank:]]+)".*"|\1"${.TARGET}"|' > ${.TARGET} + env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ${.IMPSRC:R:S/$/.s/} ;\ + cat ${.IMPSRC:R:S/$/.s/} ;\ + echo '#endif' ) > ${.TARGET} .endif .include <bsd.prog.mk> Modified: projects/openssl111/secure/lib/libcrypto/i386/aes-586.S ============================================================================== --- projects/openssl111/secure/lib/libcrypto/i386/aes-586.S Thu Sep 20 20:32:08 2018 (r338841) +++ projects/openssl111/secure/lib/libcrypto/i386/aes-586.S Thu Sep 20 21:34:05 2018 (r338842) @@ -1,7 +1,6 @@ /* $FreeBSD$ */ /* Do not modify. This file is auto-generated from aes-586.pl. */ #ifdef PIC -.file "aes-586.S" .text .type _x86_AES_encrypt_compact,@function .align 16 @@ -2999,19 +2998,19 @@ _x86_AES_set_encrypt_key: popl %ebp ret .size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key -.globl private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,@function +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function .align 16 -private_AES_set_encrypt_key: -.L_private_AES_set_encrypt_key_begin: +AES_set_encrypt_key: +.L_AES_set_encrypt_key_begin: call _x86_AES_set_encrypt_key ret -.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin -.globl private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,@function +.size AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,@function .align 16 -private_AES_set_decrypt_key: -.L_private_AES_set_decrypt_key_begin: +AES_set_decrypt_key: +.L_AES_set_decrypt_key_begin: call _x86_AES_set_encrypt_key cmpl $0,%eax je .L054proceed @@ -3240,13 +3239,12 @@ private_AES_set_decrypt_key: popl %ebx popl %ebp ret -.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin +.size AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .comm OPENSSL_ia32cap_P,16,4 #else -.file "aes-586.S" .text .type _x86_AES_encrypt_compact,@function .align 16 @@ -6244,19 +6242,19 @@ _x86_AES_set_encrypt_key: popl %ebp ret .size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key -.globl private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,@function +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function .align 16 -private_AES_set_encrypt_key: -.L_private_AES_set_encrypt_key_begin: +AES_set_encrypt_key: +.L_AES_set_encrypt_key_begin: call _x86_AES_set_encrypt_key ret -.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin -.globl private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,@function +.size AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,@function .align 16 -private_AES_set_decrypt_key: -.L_private_AES_set_decrypt_key_begin: +AES_set_decrypt_key: +.L_AES_set_decrypt_key_begin: call _x86_AES_set_encrypt_key cmpl $0,%eax je .L054proceed @@ -6485,7 +6483,7 @@ private_AES_set_decrypt_key: popl %ebx popl %ebp ret -.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin +.size AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 Modified: projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S ============================================================================== --- projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S Thu Sep 20 20:32:08 2018 (r338841) +++ projects/openssl111/secure/lib/libcrypto/i386/aesni-x86.S Thu Sep 20 21:34:05 2018 (r338842) @@ -1,7 +1,6 @@ /* $FreeBSD$ */ /* Do not modify. This file is auto-generated from aesni-x86.pl. */ #ifdef PIC -.file "aesni-x86.S" .text .globl aesni_encrypt .type aesni_encrypt,@function @@ -1793,6 +1792,796 @@ aesni_xts_decrypt: popl %ebp ret .size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin +.globl aesni_ocb_encrypt +.type aesni_ocb_encrypt,@function +.align 16 +aesni_ocb_encrypt: +.L_aesni_ocb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 40(%esp),%ecx + movl 48(%esp),%ebx + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movdqu (%ecx),%xmm0 + movl 36(%esp),%ebp + movdqu (%ebx),%xmm1 + movl 44(%esp),%ebx + movl %esp,%ecx + subl $132,%esp + andl $-16,%esp + subl %esi,%edi + shll $4,%eax + leal -96(%esi,%eax,1),%eax + movl %edi,120(%esp) + movl %eax,124(%esp) + movl %ecx,128(%esp) + movl 240(%edx),%ecx + testl $1,%ebp + jnz .L074odd + bsfl %ebp,%eax + addl $1,%ebp + shll $4,%eax + movdqu (%ebx,%eax,1),%xmm7 + movl %edx,%eax + movdqu (%esi),%xmm2 + leal 16(%esi),%esi + pxor %xmm0,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L075enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L075enc1_loop_15 +.byte 102,15,56,221,209 + xorps %xmm7,%xmm2 + movdqa %xmm7,%xmm0 + movdqa %xmm6,%xmm1 + movups %xmm2,-16(%edi,%esi,1) + movl 240(%eax),%ecx + movl %eax,%edx + movl 124(%esp),%eax +.L074odd: + shll $4,%ecx + movl $16,%edi + subl %ecx,%edi + movl %edx,112(%esp) + leal 32(%edx,%ecx,1),%edx + movl %edi,116(%esp) + cmpl %eax,%esi + ja .L076short + jmp .L077grandloop +.align 32 +.L077grandloop: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + leal 5(%ebp),%edi + addl $6,%ebp + bsfl %ecx,%ecx + bsfl %eax,%eax + bsfl %edi,%edi + shll $4,%ecx + shll $4,%eax + shll $4,%edi + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + movdqu (%ebx,%edi,1),%xmm7 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm6,%xmm1 + pxor %xmm0,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm1,96(%esp) + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor 80(%esp),%xmm7 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movl 120(%esp),%edi + movl 124(%esp),%eax + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm0 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor %xmm0,%xmm7 + movdqa 96(%esp),%xmm1 + movdqu %xmm2,-96(%edi,%esi,1) + movdqu %xmm3,-80(%edi,%esi,1) + movdqu %xmm4,-64(%edi,%esi,1) + movdqu %xmm5,-48(%edi,%esi,1) + movdqu %xmm6,-32(%edi,%esi,1) + movdqu %xmm7,-16(%edi,%esi,1) + cmpl %eax,%esi + jb .L077grandloop +.L076short: + addl $96,%eax + subl %esi,%eax + jz .L078done + cmpl $32,%eax + jb .L079one + je .L080two + cmpl $64,%eax + jb .L081three + je .L082four + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm6,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm1,96(%esp) + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movl 120(%esp),%edi + call .L_aesni_encrypt6_enter + movdqa 64(%esp),%xmm0 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor %xmm0,%xmm6 + movdqa 96(%esp),%xmm1 + movdqu %xmm2,(%edi,%esi,1) + movdqu %xmm3,16(%edi,%esi,1) + movdqu %xmm4,32(%edi,%esi,1) + movdqu %xmm5,48(%edi,%esi,1) + movdqu %xmm6,64(%edi,%esi,1) + jmp .L078done +.align 16 +.L079one: + movdqu (%ebx),%xmm7 + movl 112(%esp),%edx + movdqu (%esi),%xmm2 + movl 240(%edx),%ecx + pxor %xmm0,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movl 120(%esp),%edi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L083enc1_loop_16: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L083enc1_loop_16 +.byte 102,15,56,221,209 + xorps %xmm7,%xmm2 + movdqa %xmm7,%xmm0 + movdqa %xmm6,%xmm1 + movups %xmm2,(%edi,%esi,1) + jmp .L078done +.align 16 +.L080two: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm6 + movdqu (%ebx,%ecx,1),%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movl 240(%edx),%ecx + pxor %xmm0,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm3 + movdqa %xmm1,%xmm5 + movl 120(%esp),%edi + call _aesni_encrypt2 + xorps %xmm6,%xmm2 + xorps %xmm7,%xmm3 + movdqa %xmm7,%xmm0 + movdqa %xmm5,%xmm1 + movups %xmm2,(%edi,%esi,1) + movups %xmm3,16(%edi,%esi,1) + jmp .L078done +.align 16 +.L081three: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm5 + movdqu (%ebx,%ecx,1),%xmm6 + movdqa %xmm5,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movl 240(%edx),%ecx + pxor %xmm0,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm5,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm6,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm7,%xmm4 + movdqa %xmm1,96(%esp) + movl 120(%esp),%edi + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movdqa %xmm7,%xmm0 + movdqa 96(%esp),%xmm1 + movups %xmm2,(%edi,%esi,1) + movups %xmm3,16(%edi,%esi,1) + movups %xmm4,32(%edi,%esi,1) + jmp .L078done +.align 16 +.L082four: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + movl 112(%esp),%edx + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm4 + movdqu (%ebx,%ecx,1),%xmm5 + movdqa %xmm4,%xmm6 + movdqu (%ebx,%eax,1),%xmm7 + pxor %xmm0,%xmm4 + movdqu (%esi),%xmm2 + pxor %xmm4,%xmm5 + movdqu 16(%esi),%xmm3 + pxor %xmm5,%xmm6 + movdqa %xmm4,(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm5,16(%esp) + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movl 240(%edx),%ecx + pxor %xmm2,%xmm1 + pxor (%esp),%xmm2 + pxor %xmm3,%xmm1 + pxor 16(%esp),%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm1 + pxor %xmm7,%xmm5 + movdqa %xmm1,96(%esp) + movl 120(%esp),%edi + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm6,%xmm4 + movups %xmm2,(%edi,%esi,1) + xorps %xmm7,%xmm5 + movups %xmm3,16(%edi,%esi,1) + movdqa %xmm7,%xmm0 + movups %xmm4,32(%edi,%esi,1) + movdqa 96(%esp),%xmm1 + movups %xmm5,48(%edi,%esi,1) +.L078done: + movl 128(%esp),%edx + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm2,16(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm2,32(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm2,48(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm2,64(%esp) + movdqa %xmm2,80(%esp) + movdqa %xmm2,96(%esp) + leal (%edx),%esp + movl 40(%esp),%ecx + movl 48(%esp),%ebx + movdqu %xmm0,(%ecx) + pxor %xmm0,%xmm0 + movdqu %xmm1,(%ebx) + pxor %xmm1,%xmm1 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin +.globl aesni_ocb_decrypt +.type aesni_ocb_decrypt,@function +.align 16 +aesni_ocb_decrypt: +.L_aesni_ocb_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 40(%esp),%ecx + movl 48(%esp),%ebx + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movdqu (%ecx),%xmm0 + movl 36(%esp),%ebp + movdqu (%ebx),%xmm1 + movl 44(%esp),%ebx + movl %esp,%ecx + subl $132,%esp + andl $-16,%esp + subl %esi,%edi + shll $4,%eax + leal -96(%esi,%eax,1),%eax + movl %edi,120(%esp) + movl %eax,124(%esp) + movl %ecx,128(%esp) + movl 240(%edx),%ecx + testl $1,%ebp + jnz .L084odd + bsfl %ebp,%eax + addl $1,%ebp + shll $4,%eax + movdqu (%ebx,%eax,1),%xmm7 + movl %edx,%eax + movdqu (%esi),%xmm2 + leal 16(%esi),%esi + pxor %xmm0,%xmm7 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L085dec1_loop_17: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L085dec1_loop_17 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm1 + movdqa %xmm7,%xmm0 + xorps %xmm2,%xmm1 + movups %xmm2,-16(%edi,%esi,1) + movl 240(%eax),%ecx + movl %eax,%edx + movl 124(%esp),%eax +.L084odd: + shll $4,%ecx + movl $16,%edi + subl %ecx,%edi + movl %edx,112(%esp) + leal 32(%edx,%ecx,1),%edx + movl %edi,116(%esp) + cmpl %eax,%esi + ja .L086short + jmp .L087grandloop +.align 32 +.L087grandloop: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + leal 5(%ebp),%edi + addl $6,%ebp + bsfl %ecx,%ecx + bsfl %eax,%eax + bsfl %edi,%edi + shll $4,%ecx + shll $4,%eax + shll $4,%edi + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + movdqu (%ebx,%edi,1),%xmm7 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + movdqa %xmm1,96(%esp) + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor 80(%esp),%xmm7 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movl 120(%esp),%edi + movl 124(%esp),%eax + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm0 + pxor (%esp),%xmm2 + movdqa 96(%esp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm2,%xmm1 + movdqu %xmm2,-96(%edi,%esi,1) + pxor %xmm3,%xmm1 + movdqu %xmm3,-80(%edi,%esi,1) + pxor %xmm4,%xmm1 + movdqu %xmm4,-64(%edi,%esi,1) + pxor %xmm5,%xmm1 + movdqu %xmm5,-48(%edi,%esi,1) + pxor %xmm6,%xmm1 + movdqu %xmm6,-32(%edi,%esi,1) + pxor %xmm7,%xmm1 + movdqu %xmm7,-16(%edi,%esi,1) + cmpl %eax,%esi + jb .L087grandloop +.L086short: + addl $96,%eax + subl %esi,%eax + jz .L088done + cmpl $32,%eax + jb .L089one + je .L090two + cmpl $64,%eax + jb .L091three + je .L092four + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + pxor %xmm7,%xmm7 + movdqa %xmm1,96(%esp) + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movl 120(%esp),%edi + call .L_aesni_decrypt6_enter + movdqa 64(%esp),%xmm0 + pxor (%esp),%xmm2 + movdqa 96(%esp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm2,%xmm1 + movdqu %xmm2,(%edi,%esi,1) + pxor %xmm3,%xmm1 + movdqu %xmm3,16(%edi,%esi,1) + pxor %xmm4,%xmm1 + movdqu %xmm4,32(%edi,%esi,1) + pxor %xmm5,%xmm1 + movdqu %xmm5,48(%edi,%esi,1) + pxor %xmm6,%xmm1 + movdqu %xmm6,64(%edi,%esi,1) + jmp .L088done +.align 16 +.L089one: + movdqu (%ebx),%xmm7 + movl 112(%esp),%edx + movdqu (%esi),%xmm2 + movl 240(%edx),%ecx + pxor %xmm0,%xmm7 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movl 120(%esp),%edi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L093dec1_loop_18: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L093dec1_loop_18 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm1 + movdqa %xmm7,%xmm0 + xorps %xmm2,%xmm1 + movups %xmm2,(%edi,%esi,1) + jmp .L088done +.align 16 +.L090two: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm6 + movdqu (%ebx,%ecx,1),%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movl 240(%edx),%ecx + movdqa %xmm1,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm6,%xmm2 + pxor %xmm7,%xmm3 + movl 120(%esp),%edi + call _aesni_decrypt2 + xorps %xmm6,%xmm2 + xorps %xmm7,%xmm3 + movdqa %xmm7,%xmm0 + xorps %xmm2,%xmm5 + movups %xmm2,(%edi,%esi,1) + xorps %xmm3,%xmm5 + movups %xmm3,16(%edi,%esi,1) + movaps %xmm5,%xmm1 + jmp .L088done +.align 16 +.L091three: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm5 + movdqu (%ebx,%ecx,1),%xmm6 + movdqa %xmm5,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movl 240(%edx),%ecx + movdqa %xmm1,96(%esp) + pxor %xmm0,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm5,%xmm2 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm4 + movl 120(%esp),%edi + call _aesni_decrypt3 + movdqa 96(%esp),%xmm1 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi,%esi,1) + pxor %xmm2,%xmm1 + movdqa %xmm7,%xmm0 + movups %xmm3,16(%edi,%esi,1) + pxor %xmm3,%xmm1 + movups %xmm4,32(%edi,%esi,1) + pxor %xmm4,%xmm1 + jmp .L088done +.align 16 +.L092four: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + movl 112(%esp),%edx + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm4 + movdqu (%ebx,%ecx,1),%xmm5 + movdqa %xmm4,%xmm6 + movdqu (%ebx,%eax,1),%xmm7 + pxor %xmm0,%xmm4 + movdqu (%esi),%xmm2 + pxor %xmm4,%xmm5 + movdqu 16(%esi),%xmm3 + pxor %xmm5,%xmm6 + movdqa %xmm4,(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm5,16(%esp) + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movl 240(%edx),%ecx + movdqa %xmm1,96(%esp) + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm7,%xmm5 + movl 120(%esp),%edi + call _aesni_decrypt4 + movdqa 96(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm6,%xmm4 + movups %xmm2,(%edi,%esi,1) + pxor %xmm2,%xmm1 + xorps %xmm7,%xmm5 + movups %xmm3,16(%edi,%esi,1) + pxor %xmm3,%xmm1 + movdqa %xmm7,%xmm0 + movups %xmm4,32(%edi,%esi,1) + pxor %xmm4,%xmm1 + movups %xmm5,48(%edi,%esi,1) + pxor %xmm5,%xmm1 +.L088done: + movl 128(%esp),%edx *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809202134.w8KLY6fp063849>