From owner-svn-src-projects@freebsd.org Tue Oct 27 21:17:39 2015 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id E0CC3A1C9CA for ; Tue, 27 Oct 2015 21:17:38 +0000 (UTC) (envelope-from jkim@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id AA3E019A8; Tue, 27 Oct 2015 21:17:38 +0000 (UTC) (envelope-from jkim@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id t9RLHbxm067129; Tue, 27 Oct 2015 21:17:37 GMT (envelope-from jkim@FreeBSD.org) Received: (from jkim@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id t9RLHb3X067122; Tue, 27 Oct 2015 21:17:37 GMT (envelope-from jkim@FreeBSD.org) Message-Id: <201510272117.t9RLHb3X067122@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jkim set sender to jkim@FreeBSD.org using -f From: Jung-uk Kim Date: Tue, 27 Oct 2015 21:17:37 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r290066 - in projects/openssl-1.0.2/secure/lib/libcrypto: . amd64 X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 27 Oct 2015 21:17:39 -0000 Author: jkim Date: Tue Oct 27 21:17:37 2015 New Revision: 290066 URL: https://svnweb.freebsd.org/changeset/base/290066 Log: Regen assembly files for amd64. Added: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S (contents, props changed) projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S (contents, props changed) projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S (contents, props changed) projects/openssl-1.0.2/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S (contents, props changed) projects/openssl-1.0.2/secure/lib/libcrypto/amd64/rsaz-avx2.S (contents, props changed) projects/openssl-1.0.2/secure/lib/libcrypto/amd64/rsaz-x86_64.S (contents, props changed) projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S (contents, props changed) projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha256-mb-x86_64.S (contents, props changed) Deleted: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/modexp512-x86_64.S Modified: projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/bsaes-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/cmll-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/ghash-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/md5-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/rc4-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha1-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha256-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/sha512-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/vpaes-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/wp-x86_64.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64-gf2m.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64-mont.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64-mont5.S projects/openssl-1.0.2/secure/lib/libcrypto/amd64/x86_64cpuid.S Modified: projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm ============================================================================== --- projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm Tue Oct 27 21:16:29 2015 (r290065) +++ projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.asm Tue Oct 27 21:17:37 2015 (r290066) @@ -12,34 +12,39 @@ ${LCRYPTO_SRC}/crypto/aes/asm \ ${LCRYPTO_SRC}/crypto/bn/asm \ ${LCRYPTO_SRC}/crypto/camellia/asm \ + ${LCRYPTO_SRC}/crypto/ec/asm \ ${LCRYPTO_SRC}/crypto/md5/asm \ ${LCRYPTO_SRC}/crypto/modes/asm \ ${LCRYPTO_SRC}/crypto/rc4/asm \ - ${LCRYPTO_SRC}/crypto/rc5/asm \ ${LCRYPTO_SRC}/crypto/sha/asm \ ${LCRYPTO_SRC}/crypto/whrlpool/asm # aes -SRCS= aes-x86_64.pl aesni-sha1-x86_64.pl aesni-x86_64.pl bsaes-x86_64.pl \ +SRCS= aes-x86_64.pl aesni-mb-x86_64.pl aesni-sha1-x86_64.pl \ + aesni-sha256-x86_64.pl aesni-x86_64.pl bsaes-x86_64.pl \ vpaes-x86_64.pl # bn -SRCS+= modexp512-x86_64.pl x86_64-gf2m.pl x86_64-mont.pl x86_64-mont5.pl +SRCS+= rsaz-avx2.pl rsaz-x86_64.pl x86_64-gf2m.pl x86_64-mont.pl \ + x86_64-mont5.pl # camellia SRCS+= cmll-x86_64.pl +# ec +SRCS+= ecp_nistz256-x86_64.pl + # md5 SRCS+= md5-x86_64.pl # modes -SRCS+= ghash-x86_64.pl +SRCS+= aesni-gcm-x86_64.pl ghash-x86_64.pl # rc4 SRCS+= rc4-md5-x86_64.pl rc4-x86_64.pl # sha -SRCS+= sha1-x86_64.pl sha512-x86_64.pl +SRCS+= sha1-mb-x86_64.pl sha1-x86_64.pl sha256-mb-x86_64.pl sha512-x86_64.pl # whrlpool SRCS+= wp-x86_64.pl Modified: projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc ============================================================================== --- projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc Tue Oct 27 21:16:29 2015 (r290065) +++ projects/openssl-1.0.2/secure/lib/libcrypto/Makefile.inc Tue Oct 27 21:17:37 2015 (r290066) @@ -16,6 +16,7 @@ CFLAGS+= -DOPENSSL_THREADS -DDSO_DLFCN - .if ${MACHINE_CPUARCH} == "amd64" CFLAGS+=-DL_ENDIAN -DOPENSSL_IA32_SSE2 CFLAGS+=-DAES_ASM -DBSAES_ASM -DVPAES_ASM +CFLAGS+=-DECP_NISTZ256_ASM CFLAGS+=-DOPENSSL_BN_ASM_MONT -DOPENSSL_BN_ASM_MONT5 -DOPENSSL_BN_ASM_GF2m CFLAGS+=-DMD5_ASM CFLAGS+=-DGHASH_ASM Modified: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S ============================================================================== --- projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S Tue Oct 27 21:16:29 2015 (r290065) +++ projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aes-x86_64.S Tue Oct 27 21:17:37 2015 (r290066) @@ -151,7 +151,7 @@ _x86_64_AES_encrypt: xorl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx -.byte 0xf3,0xc3 +.byte 0xf3,0xc3 .size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt .type _x86_64_AES_encrypt_compact,@function .align 16 @@ -176,80 +176,78 @@ _x86_64_AES_encrypt_compact: movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d - movzbl (%r14,%r10,1),%r10d - movzbl (%r14,%r11,1),%r11d - movzbl (%r14,%r12,1),%r12d - movzbl %dl,%r8d movzbl %bh,%esi movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d - movzbl (%r14,%rsi,1),%r9d - movzbl (%r14,%rdi,1),%r13d - movzbl %dh,%ebp + movzbl (%r14,%rsi,1),%r9d movzbl %ah,%esi - shrl $16,%ecx + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi - shrl $16,%edx - movzbl %cl,%edi shll $8,%r9d + shrl $16,%edx shll $8,%r13d - movzbl (%r14,%rdi,1),%edi xorl %r9d,%r10d - xorl %r13d,%r11d - - movzbl %dl,%r9d shrl $16,%eax + movzbl %dl,%r9d shrl $16,%ebx - movzbl %al,%r13d + xorl %r13d,%r11d shll $8,%ebp - shll $8,%esi - movzbl (%r14,%r9,1),%r9d - movzbl (%r14,%r13,1),%r13d + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d - xorl %esi,%r8d + shll $8,%esi movzbl %bl,%ebp - movzbl %dh,%esi shll $16,%edi - movzbl (%r14,%rbp,1),%ebp - movzbl (%r14,%rsi,1),%esi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d xorl %edi,%r10d - movzbl %ah,%edi shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi movzbl (%r14,%rcx,1),%edx movzbl (%r14,%rbx,1),%ecx - shll $16,%r9d - shll $16,%r13d + shll $16,%ebp - xorl %r9d,%r11d xorl %r13d,%r12d - xorl %ebp,%r8d - shll $24,%esi + xorl %ebp,%r8d shll $24,%edi - shll $24,%edx xorl %esi,%r10d - shll $24,%ecx + shll $24,%edx xorl %edi,%r11d + shll $24,%ecx movl %r10d,%eax movl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 je .Lenc_compact_done - movl %eax,%esi - movl %ebx,%edi - andl $2155905152,%esi - andl $2155905152,%edi - movl %esi,%r10d - movl %edi,%r11d + movl $2155905152,%r10d + movl $2155905152,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi shrl $7,%r10d leal (%rax,%rax,1),%r8d shrl $7,%r11d @@ -267,25 +265,25 @@ _x86_64_AES_encrypt_compact: xorl %r8d,%eax xorl %r9d,%ebx - movl %ecx,%esi - movl %edx,%edi + movl $2155905152,%r12d roll $24,%eax + movl $2155905152,%ebp roll $24,%ebx - andl $2155905152,%esi - andl $2155905152,%edi + andl %ecx,%r12d + andl %edx,%ebp xorl %r8d,%eax xorl %r9d,%ebx - movl %esi,%r12d - movl %edi,%ebp + movl %r12d,%esi rorl $16,%r10d + movl %ebp,%edi rorl $16,%r11d - shrl $7,%r12d leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d xorl %r10d,%eax - xorl %r11d,%ebx shrl $7,%ebp - leal (%rdx,%rdx,1),%r9d + xorl %r11d,%ebx rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d rorl $8,%r11d subl %r12d,%esi subl %ebp,%edi @@ -301,23 +299,23 @@ _x86_64_AES_encrypt_compact: xorl %esi,%r8d xorl %edi,%r9d + rorl $16,%r12d xorl %r8d,%ecx + rorl $16,%ebp xorl %r9d,%edx roll $24,%ecx + movl 0(%r14),%esi roll $24,%edx xorl %r8d,%ecx - xorl %r9d,%edx - movl 0(%r14),%esi - rorl $16,%r12d - rorl $16,%ebp movl 64(%r14),%edi - xorl %r12d,%ecx - xorl %ebp,%edx + xorl %r9d,%edx movl 128(%r14),%r8d + xorl %r12d,%ecx rorl $8,%r12d + xorl %ebp,%edx rorl $8,%ebp - movl 192(%r14),%r9d xorl %r12d,%ecx + movl 192(%r14),%r9d xorl %ebp,%edx jmp .Lenc_loop_compact .align 16 @@ -326,7 +324,7 @@ _x86_64_AES_encrypt_compact: xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx -.byte 0xf3,0xc3 +.byte 0xf3,0xc3 .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact .globl AES_encrypt .type AES_encrypt,@function @@ -548,7 +546,7 @@ _x86_64_AES_decrypt: xorl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx -.byte 0xf3,0xc3 +.byte 0xf3,0xc3 .size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt .type _x86_64_AES_decrypt_compact,@function .align 16 @@ -574,70 +572,69 @@ _x86_64_AES_decrypt_compact: movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d - movzbl (%r14,%r10,1),%r10d - movzbl (%r14,%r11,1),%r11d - movzbl (%r14,%r12,1),%r12d - movzbl %dl,%r8d movzbl %dh,%esi movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d - movzbl (%r14,%rsi,1),%r9d - movzbl (%r14,%rdi,1),%r13d - movzbl %bh,%ebp + movzbl (%r14,%rsi,1),%r9d movzbl %ch,%esi - shrl $16,%ecx + movzbl (%r14,%rdi,1),%r13d movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi - shrl $16,%edx - movzbl %cl,%edi - shll $8,%r9d + shrl $16,%ecx shll $8,%r13d - movzbl (%r14,%rdi,1),%edi - xorl %r9d,%r10d - xorl %r13d,%r11d - - movzbl %dl,%r9d + shll $8,%r9d + movzbl %cl,%edi shrl $16,%eax + xorl %r9d,%r10d shrl $16,%ebx - movzbl %al,%r13d + movzbl %dl,%r9d + shll $8,%ebp + xorl %r13d,%r11d shll $8,%esi - movzbl (%r14,%r9,1),%r9d - movzbl (%r14,%r13,1),%r13d + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d - xorl %esi,%r8d - movzbl %bl,%ebp - movzbl %bh,%esi + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi movzbl (%r14,%rbp,1),%ebp - movzbl (%r14,%rsi,1),%esi xorl %edi,%r10d - + movzbl (%r14,%r13,1),%r13d movzbl %ch,%edi + + shll $16,%ebp shll $16,%r9d shll $16,%r13d - movzbl (%r14,%rdi,1),%ebx + xorl %ebp,%r8d + movzbl %dh,%ebp xorl %r9d,%r11d + shrl $8,%eax xorl %r13d,%r12d - movzbl %dh,%edi - shrl $8,%eax - shll $16,%ebp - movzbl (%r14,%rdi,1),%ecx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx movzbl (%r14,%rax,1),%edx - xorl %ebp,%r8d + movl %r10d,%eax shll $24,%esi shll $24,%ebx shll $24,%ecx - xorl %esi,%r10d + xorl %esi,%eax shll $24,%edx xorl %r11d,%ebx - movl %r10d,%eax xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 @@ -650,12 +647,12 @@ _x86_64_AES_decrypt_compact: orq %rbx,%rax orq %rdx,%rcx movq 256+16(%r14),%rbp - movq %rax,%rbx - movq %rcx,%rdx - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r9 - movq %rdx,%r12 + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 @@ -666,15 +663,15 @@ _x86_64_AES_decrypt_compact: andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx - xorq %r8,%rbx - xorq %r11,%rdx - movq %rbx,%r8 - movq %rdx,%r11 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 @@ -685,15 +682,15 @@ _x86_64_AES_decrypt_compact: andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx - xorq %r9,%rbx - xorq %r12,%rdx - movq %rbx,%r9 - movq %rdx,%r12 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 @@ -718,51 +715,51 @@ _x86_64_AES_decrypt_compact: movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 - xorq %r13,%r12 shrq $32,%rbx + xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 - xorq %r11,%r13 roll $8,%eax + xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 + roll $8,%ebx xorq %r12,%r13 - roll $8,%ebx roll $8,%edx xorl %r10d,%eax - xorl %r13d,%ecx shrq $32,%r10 + xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 - movq %r11,%r13 - shrq $32,%r10 - shrq $32,%r13 roll $24,%r8d + movq %r11,%r13 roll $24,%r11d - roll $24,%r10d - roll $24,%r13d + shrq $32,%r10 xorl %r8d,%eax + shrq $32,%r13 xorl %r11d,%ecx + roll $24,%r10d movq %r9,%r8 + roll $24,%r13d movq %r12,%r11 + shrq $32,%r8 xorl %r10d,%ebx + shrq $32,%r11 xorl %r13d,%edx movq 0(%r14),%rsi - shrq $32,%r8 - shrq $32,%r11 - movq 64(%r14),%rdi roll $16,%r9d + movq 64(%r14),%rdi roll $16,%r12d movq 128(%r14),%rbp roll $16,%r8d - roll $16,%r11d movq 192(%r14),%r10 xorl %r9d,%eax + roll $16,%r11d xorl %r12d,%ecx movq 256(%r14),%r13 xorl %r8d,%ebx @@ -774,7 +771,7 @@ _x86_64_AES_decrypt_compact: xorl 4(%r15),%ebx xorl 8(%r15),%ecx xorl 12(%r15),%edx -.byte 0xf3,0xc3 +.byte 0xf3,0xc3 .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact .globl AES_decrypt .type AES_decrypt,@function @@ -860,10 +857,6 @@ private_AES_set_encrypt_key: call _x86_64_AES_set_encrypt_key - movq 8(%rsp),%r15 - movq 16(%rsp),%r14 - movq 24(%rsp),%r13 - movq 32(%rsp),%r12 movq 40(%rsp),%rbp movq 48(%rsp),%rbx addq $56,%rsp @@ -1108,7 +1101,7 @@ _x86_64_AES_set_encrypt_key: .Lbadpointer: movq $-1,%rax .Lexit: -.byte 0xf3,0xc3 +.byte 0xf3,0xc3 .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key .globl private_AES_set_decrypt_key .type private_AES_set_decrypt_key,@function @@ -1161,12 +1154,12 @@ private_AES_set_decrypt_key: leaq 16(%r15),%r15 movq 0(%r15),%rax movq 8(%r15),%rcx - movq %rax,%rbx - movq %rcx,%rdx - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r9 - movq %rdx,%r12 + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 @@ -1177,15 +1170,15 @@ private_AES_set_decrypt_key: andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx - xorq %r8,%rbx - xorq %r11,%rdx - movq %rbx,%r8 - movq %rdx,%r11 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 @@ -1196,15 +1189,15 @@ private_AES_set_decrypt_key: andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx - xorq %r9,%rbx - xorq %r12,%rdx - movq %rbx,%r9 - movq %rdx,%r12 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 @@ -1229,51 +1222,51 @@ private_AES_set_decrypt_key: movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 - xorq %r13,%r12 shrq $32,%rbx + xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 - xorq %r11,%r13 roll $8,%eax + xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 + roll $8,%ebx xorq %r12,%r13 - roll $8,%ebx roll $8,%edx xorl %r10d,%eax - xorl %r13d,%ecx shrq $32,%r10 + xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 - movq %r11,%r13 - shrq $32,%r10 - shrq $32,%r13 roll $24,%r8d + movq %r11,%r13 roll $24,%r11d - roll $24,%r10d - roll $24,%r13d + shrq $32,%r10 xorl %r8d,%eax + shrq $32,%r13 xorl %r11d,%ecx + roll $24,%r10d movq %r9,%r8 + roll $24,%r13d movq %r12,%r11 + shrq $32,%r8 xorl %r10d,%ebx + shrq $32,%r11 xorl %r13d,%edx - shrq $32,%r8 - shrq $32,%r11 - roll $16,%r9d + roll $16,%r12d roll $16,%r8d - roll $16,%r11d xorl %r9d,%eax + roll $16,%r11d xorl %r12d,%ecx xorl %r8d,%ebx @@ -1389,7 +1382,7 @@ AES_cbc_encrypt: leaq 80(%rsp),%rdi leaq 80(%rsp),%r15 movl $30,%ecx -.long 0x90A548F3 +.long 0x90A548F3 movl %eax,(%rdi) .Lcbc_skip_ecopy: movq %r15,0(%rsp) @@ -1551,7 +1544,7 @@ AES_cbc_encrypt: je .Lcbc_exit movl $30,%ecx xorq %rax,%rax -.long 0x90AB48F3 +.long 0x90AB48F3 jmp .Lcbc_exit @@ -1606,7 +1599,7 @@ AES_cbc_encrypt: movl 4(%rbp),%ebx movl 8(%rbp),%ecx movl 12(%rbp),%edx - jz .Lcbc_slow_enc_tail + jz .Lcbc_slow_enc_tail .align 4 .Lcbc_slow_enc_loop: @@ -1651,16 +1644,16 @@ AES_cbc_encrypt: movq %r10,%rcx movq %r8,%rsi movq %r9,%rdi -.long 0x9066A4F3 +.long 0x9066A4F3 movq $16,%rcx subq %r10,%rcx xorq %rax,%rax -.long 0x9066AAF3 +.long 0x9066AAF3 movq %r9,%r8 movq $16,%r10 movq %r11,%rax movq %r12,%rcx - jmp .Lcbc_slow_enc_loop + jmp .Lcbc_slow_enc_loop .align 16 .LSLOW_DECRYPT: @@ -1736,7 +1729,7 @@ AES_cbc_encrypt: movq %r9,%rdi leaq 64(%rsp),%rsi leaq 16(%r10),%rcx -.long 0x9066A4F3 +.long 0x9066A4F3 jmp .Lcbc_exit .align 16 Added: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S Tue Oct 27 21:17:37 2015 (r290066) @@ -0,0 +1,16 @@ + # $FreeBSD$ +.text + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +aesni_gcm_encrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt + +.globl aesni_gcm_decrypt +.type aesni_gcm_decrypt,@function +aesni_gcm_decrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt Added: projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ projects/openssl-1.0.2/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S Tue Oct 27 21:17:37 2015 (r290066) @@ -0,0 +1,507 @@ + # $FreeBSD$ +.text + + + +.globl aesni_multi_cbc_encrypt +.type aesni_multi_cbc_encrypt,@function +.align 32 +aesni_multi_cbc_encrypt: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +.Lenc4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +.Lenc4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz .Lenc4x_done + + movups 16-120(%rsi),%xmm1 + pxor %xmm12,%xmm2 + movups 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm3 + movl 240-120(%rsi),%eax + pxor %xmm12,%xmm4 + movdqu (%r8),%xmm6 + pxor %xmm12,%xmm5 + movdqu (%r9),%xmm7 + pxor %xmm6,%xmm2 + movdqu (%r10),%xmm8 + pxor %xmm7,%xmm3 + movdqu (%r11),%xmm9 + pxor %xmm8,%xmm4 + pxor %xmm9,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp .Loop_enc4x + +.align 32 +.Loop_enc4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,220,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,220,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r10,%rbx,1) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,220,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,220,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,220,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,220,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,220,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,220,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,220,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,220,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 160-120(%rsi),%xmm0 + + jb .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 192-120(%rsi),%xmm0 + + je .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 224-120(%rsi),%xmm0 + jmp .Lenc4x_tail + +.align 32 +.Lenc4x_tail: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqu (%r8,%rbx,1),%xmm6 + movdqu 16-120(%rsi),%xmm1 + +.byte 102,15,56,221,208 + movdqu (%r9,%rbx,1),%xmm7 + pxor %xmm12,%xmm6 +.byte 102,15,56,221,216 + movdqu (%r10,%rbx,1),%xmm8 + pxor %xmm12,%xmm7 +.byte 102,15,56,221,224 + movdqu (%r11,%rbx,1),%xmm9 + pxor %xmm12,%xmm8 +.byte 102,15,56,221,232 + movdqu 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + pxor %xmm6,%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + pxor %xmm7,%xmm3 + movups %xmm4,-16(%r14,%rbx,1) + pxor %xmm8,%xmm4 + movups %xmm5,-16(%r15,%rbx,1) + pxor %xmm9,%xmm5 + + decl %edx + jnz .Loop_enc4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + + + + + + + + + + leaq 160(%rdi),%rdi + decl %edx + jnz .Lenc4x_loop_grande + +.Lenc4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***