From owner-svn-src-projects@freebsd.org Sat Sep 22 02:42:53 2018 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 355CA10AD6BF for ; Sat, 22 Sep 2018 02:42:53 +0000 (UTC) (envelope-from jkim@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id DD8737C102; Sat, 22 Sep 2018 02:42:52 +0000 (UTC) (envelope-from jkim@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id D841215D85; Sat, 22 Sep 2018 02:42:52 +0000 (UTC) (envelope-from jkim@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w8M2gqam072773; Sat, 22 Sep 2018 02:42:52 GMT (envelope-from jkim@FreeBSD.org) Received: (from jkim@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w8M2gqQR072768; Sat, 22 Sep 2018 02:42:52 GMT (envelope-from jkim@FreeBSD.org) Message-Id: <201809220242.w8M2gqQR072768@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jkim set sender to jkim@FreeBSD.org using -f From: Jung-uk Kim Date: Sat, 22 Sep 2018 02:42:52 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r338877 - in projects/openssl111/secure/lib/libcrypto: . arm X-SVN-Group: projects X-SVN-Commit-Author: jkim X-SVN-Commit-Paths: in projects/openssl111/secure/lib/libcrypto: . arm X-SVN-Commit-Revision: 338877 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.27 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 22 Sep 2018 02:42:53 -0000 Author: jkim Date: Sat Sep 22 02:42:51 2018 New Revision: 338877 URL: https://svnweb.freebsd.org/changeset/base/338877 Log: Regen assembly files for arm. Added: projects/openssl111/secure/lib/libcrypto/arm/chacha-armv4.S (contents, props changed) projects/openssl111/secure/lib/libcrypto/arm/ecp_nistz256-armv4.S (contents, props changed) projects/openssl111/secure/lib/libcrypto/arm/keccak1600-armv4.S (contents, props changed) projects/openssl111/secure/lib/libcrypto/arm/poly1305-armv4.S (contents, props changed) Modified: projects/openssl111/secure/lib/libcrypto/Makefile.asm projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S projects/openssl111/secure/lib/libcrypto/arm/armv4-gf2m.S projects/openssl111/secure/lib/libcrypto/arm/armv4-mont.S projects/openssl111/secure/lib/libcrypto/arm/bsaes-armv7.S projects/openssl111/secure/lib/libcrypto/arm/ghash-armv4.S projects/openssl111/secure/lib/libcrypto/arm/ghashv8-armx.S projects/openssl111/secure/lib/libcrypto/arm/sha1-armv4-large.S projects/openssl111/secure/lib/libcrypto/arm/sha256-armv4.S projects/openssl111/secure/lib/libcrypto/arm/sha512-armv4.S Modified: projects/openssl111/secure/lib/libcrypto/Makefile.asm ============================================================================== --- projects/openssl111/secure/lib/libcrypto/Makefile.asm Sat Sep 22 02:23:42 2018 (r338876) +++ projects/openssl111/secure/lib/libcrypto/Makefile.asm Sat Sep 22 02:42:51 2018 (r338877) @@ -149,22 +149,34 @@ ${s}.S: ${s}.s .PATH: ${LCRYPTO_SRC}/crypto \ ${LCRYPTO_SRC}/crypto/aes/asm \ ${LCRYPTO_SRC}/crypto/bn/asm \ + ${LCRYPTO_SRC}/crypto/chacha/asm \ + ${LCRYPTO_SRC}/crypto/ec/asm \ ${LCRYPTO_SRC}/crypto/modes/asm \ + ${LCRYPTO_SRC}/crypto/poly1305/asm \ ${LCRYPTO_SRC}/crypto/sha/asm PERLPATH= -I${LCRYPTO_SRC}/crypto/perlasm # aes -SRCS= aesv8-armx.pl bsaes-armv7.pl +SRCS= aes-armv4.pl aesv8-armx.pl bsaes-armv7.pl # bn SRCS+= armv4-mont.pl armv4-gf2m.pl +# chacha +SRCS+= chacha-armv4.pl + +# ec +SRCS+= ecp_nistz256-armv4.pl + # modes SRCS+= ghash-armv4.pl ghashv8-armx.pl +# poly1305 +SRCS+= poly1305-armv4.pl + # sha -SRCS+= sha1-armv4-large.pl sha256-armv4.pl sha512-armv4.pl +SRCS+= keccak1600-armv4.pl sha1-armv4-large.pl sha256-armv4.pl sha512-armv4.pl ASM= aes-armv4.S ${SRCS:R:S/$/.S/} Modified: projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S ============================================================================== --- projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S Sat Sep 22 02:23:42 2018 (r338876) +++ projects/openssl111/secure/lib/libcrypto/arm/aes-armv4.S Sat Sep 22 02:42:51 2018 (r338877) @@ -1,6 +1,13 @@ /* $FreeBSD$ */ /* Do not modify. This file is auto-generated from aes-armv4.pl. */ +@ Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +@ +@ Licensed under the OpenSSL license (the "License"). You may not use +@ this file except in compliance with the License. You can obtain a copy +@ in the file LICENSE in the source distribution or at +@ https://www.openssl.org/source/license.html + @ ==================================================================== @ Written by Andy Polyakov for the OpenSSL @ project. The module is, however, dual licensed under OpenSSL and @@ -40,15 +47,12 @@ #endif .text -#if __ARM_ARCH__<7 -.code 32 -#else +#if defined(__thumb2__) && !defined(__APPLE__) .syntax unified -# ifdef __thumb2__ .thumb -# else +#else .code 32 -# endif +#undef __thumb2__ #endif .type AES_Te,%object @@ -159,19 +163,23 @@ AES_Te: @ void AES_encrypt(const unsigned char *in, unsigned char *out, @ const AES_KEY *key) { -.global AES_encrypt -.type AES_encrypt,%function +.globl AES_encrypt +.type AES_encrypt,%function .align 5 AES_encrypt: -#if __ARM_ARCH__<7 +#ifndef __thumb2__ sub r3,pc,#8 @ AES_encrypt #else adr r3,. #endif - stmdb sp!,{r1,r4-r12,lr} + stmdb sp!,{r1,r4-r12,lr} +#if defined(__thumb2__) || defined(__APPLE__) + adr r10,AES_Te +#else + sub r10,r3,#AES_encrypt-AES_Te @ Te +#endif mov r12,r0 @ inp mov r11,r2 - sub r10,r3,#AES_encrypt-AES_Te @ Te #if __ARM_ARCH__<7 ldrb r0,[r12,#3] @ load input data in endian-neutral ldrb r4,[r12,#2] @ manner... @@ -258,20 +266,20 @@ AES_encrypt: strb r3,[r12,#15] #endif #if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} #else - ldmia sp!,{r4-r12,lr} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) +.word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size AES_encrypt,.-AES_encrypt -.type _armv4_AES_encrypt,%function +.type _armv4_AES_encrypt,%function .align 2 _armv4_AES_encrypt: str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} + ldmia r11!,{r4,r5,r6,r7} eor r0,r0,r4 ldr r12,[r11,#240-16] eor r1,r1,r5 @@ -404,24 +412,24 @@ _armv4_AES_encrypt: ldr pc,[sp],#4 @ pop and return .size _armv4_AES_encrypt,.-_armv4_AES_encrypt -.global private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,%function +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,%function .align 5 -private_AES_set_encrypt_key: +AES_set_encrypt_key: _armv4_AES_set_encrypt_key: -#if __ARM_ARCH__<7 +#ifndef __thumb2__ sub r3,pc,#8 @ AES_set_encrypt_key #else adr r3,. #endif teq r0,#0 -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 beq .Labrt teq r2,#0 -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif moveq r0,#-1 @@ -432,19 +440,23 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt ne @ Thumb2 thing, sanity check in ARM #endif movne r0,#-1 bne .Labrt -.Lok: stmdb sp!,{r4-r12,lr} - sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 - +.Lok: stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} mov r12,r0 @ inp mov lr,r1 @ bits mov r11,r2 @ key +#if defined(__thumb2__) || defined(__APPLE__) + adr r10,AES_Te+1024 @ Te4 +#else + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 +#endif + #if __ARM_ARCH__<7 ldrb r0,[r12,#3] @ load input data in endian-neutral ldrb r4,[r12,#2] @ manner... @@ -589,7 +601,7 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-16] subs r12,r12,#1 str r3,[r11,#-12] -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,r11,#216 @@ -661,7 +673,7 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-24] subs r12,r12,#1 str r3,[r11,#-20] -#if __ARM_ARCH__>=7 +#ifdef __thumb2__ itt eq @ Thumb2 thing, sanity check in ARM #endif subeq r2,r11,#256 @@ -695,21 +707,21 @@ _armv4_AES_set_encrypt_key: .align 2 .Ldone: mov r0,#0 - ldmia sp!,{r4-r12,lr} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} .Labrt: #if __ARM_ARCH__>=5 bx lr @ .word 0xe12fff1e #else tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) +.word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key +.size AES_set_encrypt_key,.-AES_set_encrypt_key -.global private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,%function +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,%function .align 5 -private_AES_set_decrypt_key: +AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 @@ -719,20 +731,20 @@ private_AES_set_decrypt_key: mov r0,r2 @ AES_set_encrypt_key preserves r2, mov r1,r2 @ which is AES_KEY *key b _armv4_AES_set_enc2dec_key -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.size AES_set_decrypt_key,.-AES_set_decrypt_key @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) -.global AES_set_enc2dec_key +.globl AES_set_enc2dec_key .type AES_set_enc2dec_key,%function .align 5 AES_set_enc2dec_key: _armv4_AES_set_enc2dec_key: - stmdb sp!,{r4-r12,lr} + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ldr r12,[r0,#240] mov r7,r0 @ input add r8,r0,r12,lsl#4 - mov r11,r1 @ ouput + mov r11,r1 @ output add r10,r1,r12,lsl#4 str r12,[r1,#240] @@ -809,12 +821,12 @@ _armv4_AES_set_enc2dec_key: mov r0,#0 #if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} #else - ldmia sp!,{r4-r12,lr} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) +.word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size AES_set_enc2dec_key,.-AES_set_enc2dec_key @@ -922,19 +934,23 @@ AES_Td: @ void AES_decrypt(const unsigned char *in, unsigned char *out, @ const AES_KEY *key) { -.global AES_decrypt -.type AES_decrypt,%function +.globl AES_decrypt +.type AES_decrypt,%function .align 5 AES_decrypt: -#if __ARM_ARCH__<7 +#ifndef __thumb2__ sub r3,pc,#8 @ AES_decrypt #else adr r3,. #endif - stmdb sp!,{r1,r4-r12,lr} + stmdb sp!,{r1,r4-r12,lr} +#if defined(__thumb2__) || defined(__APPLE__) + adr r10,AES_Td +#else + sub r10,r3,#AES_decrypt-AES_Td @ Td +#endif mov r12,r0 @ inp mov r11,r2 - sub r10,r3,#AES_decrypt-AES_Td @ Td #if __ARM_ARCH__<7 ldrb r0,[r12,#3] @ load input data in endian-neutral ldrb r4,[r12,#2] @ manner... @@ -1021,20 +1037,20 @@ AES_decrypt: strb r3,[r12,#15] #endif #if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} #else - ldmia sp!,{r4-r12,lr} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) +.word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size AES_decrypt,.-AES_decrypt -.type _armv4_AES_decrypt,%function +.type _armv4_AES_decrypt,%function .align 2 _armv4_AES_decrypt: str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} + ldmia r11!,{r4,r5,r6,r7} eor r0,r0,r4 ldr r12,[r11,#240-16] eor r1,r1,r5 @@ -1175,5 +1191,6 @@ _armv4_AES_decrypt: sub r10,r10,#1024 ldr pc,[sp],#4 @ pop and return .size _armv4_AES_decrypt,.-_armv4_AES_decrypt -.asciz "AES for ARMv4, CRYPTOGAMS by " +.byte 65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 .align 2 Modified: projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S ============================================================================== --- projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S Sat Sep 22 02:23:42 2018 (r338876) +++ projects/openssl111/secure/lib/libcrypto/arm/aesv8-armx.S Sat Sep 22 02:42:51 2018 (r338877) @@ -4,11 +4,12 @@ #if __ARM_MAX_ARCH__>=7 .text -.arch armv7-a +.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) .fpu neon .code 32 +#undef __thumb2__ .align 5 -rcon: +.Lrcon: .long 0x01,0x01,0x01,0x01 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat .long 0x1b,0x1b,0x1b,0x1b @@ -31,7 +32,7 @@ aes_v8_set_encrypt_key: tst r1,#0x3f bne .Lenc_key_abort - adr r3,rcon + adr r3,.Lrcon cmp r1,#192 veor q0,q0,q0 @@ -49,14 +50,14 @@ aes_v8_set_encrypt_key: vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 - veor q10,q10,q1 + veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 @@ -68,13 +69,13 @@ aes_v8_set_encrypt_key: vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 - veor q10,q10,q1 + veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 @@ -83,13 +84,13 @@ aes_v8_set_encrypt_key: vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 - veor q10,q10,q1 + veor q10,q10,q1 veor q3,q3,q9 veor q3,q3,q10 vst1.32 {q3},[r2] @@ -110,7 +111,7 @@ aes_v8_set_encrypt_key: vtbl.8 d21,{q8},d5 vext.8 q9,q0,q3,#12 vst1.32 {d16},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 @@ -121,7 +122,7 @@ aes_v8_set_encrypt_key: vdup.32 q9,d7[1] veor q9,q9,q8 - veor q10,q10,q1 + veor q10,q10,q1 vext.8 q8,q0,q8,#12 vshl.u8 q1,q1,#1 veor q8,q8,q9 @@ -146,14 +147,14 @@ aes_v8_set_encrypt_key: vtbl.8 d21,{q8},d5 vext.8 q9,q0,q3,#12 vst1.32 {q8},[r2]! - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 - veor q10,q10,q1 + veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 @@ -162,7 +163,7 @@ aes_v8_set_encrypt_key: vdup.32 q10,d7[1] vext.8 q9,q0,q8,#12 - .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q8,q8,q9 vext.8 q9,q0,q9,#12 @@ -179,7 +180,7 @@ aes_v8_set_encrypt_key: .Lenc_key_abort: mov r0,r3 @ return value - + bx lr .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key @@ -205,15 +206,15 @@ aes_v8_set_decrypt_key: .Loop_imc: vld1.32 {q0},[r2] vld1.32 {q1},[r0] - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 vst1.32 {q0},[r0],r4 vst1.32 {q1},[r2]! cmp r0,r2 bhi .Loop_imc vld1.32 {q0},[r2] - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 vst1.32 {q0},[r0] eor r0,r0,r0 @ return value @@ -231,19 +232,19 @@ aes_v8_encrypt: vld1.32 {q1},[r2]! .Loop_enc: - .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 +.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q0},[r2]! subs r3,r3,#2 - .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 - .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 +.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q1},[r2]! bgt .Loop_enc - .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 +.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q0},[r2] - .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 +.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 veor q2,q2,q0 vst1.8 {q2},[r1] @@ -260,19 +261,19 @@ aes_v8_decrypt: vld1.32 {q1},[r2]! .Loop_dec: - .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 +.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 vld1.32 {q0},[r2]! subs r3,r3,#2 - .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 - .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 +.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 vld1.32 {q1},[r2]! bgt .Loop_dec - .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 +.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 vld1.32 {q0},[r2] - .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 +.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 veor q2,q2,q0 vst1.8 {q2},[r1] @@ -283,9 +284,9 @@ aes_v8_decrypt: .align 5 aes_v8_cbc_encrypt: mov ip,sp - stmdb sp!,{r4-r8,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldmia ip,{r4-r5} @ load remaining args + stmdb sp!,{r4,r5,r6,r7,r8,lr} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldmia ip,{r4,r5} @ load remaining args subs r2,r2,#16 mov r8,#16 blo .Lcbc_abort @@ -297,13 +298,13 @@ aes_v8_cbc_encrypt: vld1.8 {q6},[r4] vld1.8 {q0},[r0],r8 - vld1.32 {q8-q9},[r3] @ load key schedule... + vld1.32 {q8,q9},[r3] @ load key schedule... sub r5,r5,#6 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys sub r5,r5,#2 - vld1.32 {q10-q11},[r7]! - vld1.32 {q12-q13},[r7]! - vld1.32 {q14-q15},[r7]! + vld1.32 {q10,q11},[r7]! + vld1.32 {q12,q13},[r7]! + vld1.32 {q14,q15},[r7]! vld1.32 {q7},[r7] add r7,r3,#32 @@ -315,62 +316,62 @@ aes_v8_cbc_encrypt: veor q5,q8,q7 beq .Lcbc_enc128 - vld1.32 {q2-q3},[r7] + vld1.32 {q2,q3},[r7] add r7,r3,#16 add r6,r3,#16*4 add r12,r3,#16*5 - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 add r14,r3,#16*6 add r3,r3,#16*7 b .Lenter_cbc_enc .align 4 .Loop_cbc_enc: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vst1.8 {q6},[r1]! +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! .Lenter_cbc_enc: - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q8},[r6] cmp r5,#4 - .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q9},[r12] beq .Lcbc_enc192 - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q8},[r14] - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q9},[r3] nop .Lcbc_enc192: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r2,r2,#16 - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - moveq r8,#0 - .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.8 {q8},[r0],r8 - .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - veor q8,q8,q5 - .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.32 {q9},[r7] @ re-pre-load rndkey[1] - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 +.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r7] @ re-pre-load rndkey[1] +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 veor q6,q0,q7 bhs .Loop_cbc_enc @@ -379,36 +380,36 @@ aes_v8_cbc_encrypt: .align 5 .Lcbc_enc128: - vld1.32 {q2-q3},[r7] - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q2,q3},[r7] +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 b .Lenter_cbc_enc128 .Loop_cbc_enc128: - .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vst1.8 {q6},[r1]! +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! .Lenter_cbc_enc128: - .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r2,r2,#16 - .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - moveq r8,#0 - .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.8 {q8},[r0],r8 - .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - veor q8,q8,q5 - .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 +.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 +.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 veor q6,q0,q7 bhs .Loop_cbc_enc128 @@ -431,81 +432,81 @@ aes_v8_cbc_encrypt: vorr q11,q10,q10 .Loop3x_cbc_dec: - .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q8},[r7]! subs r6,r6,#2 - .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q9},[r7]! bgt .Loop3x_cbc_dec - .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - veor q4,q6,q7 - subs r2,r2,#0x30 - veor q5,q2,q7 - movlo r6,r2 @ r6, r6, is zero at this point - .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - veor q9,q3,q7 - add r0,r0,r6 @ r0 is adjusted in such way that +.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q4,q6,q7 + subs r2,r2,#0x30 + veor q5,q2,q7 + movlo r6,r2 @ r6, r6, is zero at this point +.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 + add r0,r0,r6 @ r0 is adjusted in such way that @ at exit from the loop q1-q10 @ are loaded with last "words" - vorr q6,q11,q11 - mov r7,r3 - .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - vld1.8 {q2},[r0]! - .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - vld1.8 {q3},[r0]! - .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 - .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 - .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - vld1.8 {q11},[r0]! - .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 - .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 - .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 - vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] - add r6,r5,#2 + vorr q6,q11,q11 + mov r7,r3 +.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q2},[r0]! +.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q3},[r0]! +.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q11},[r0]! +.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 +.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 +.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + add r6,r5,#2 veor q4,q4,q0 veor q5,q5,q1 veor q10,q10,q9 - vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] vst1.8 {q4},[r1]! - vorr q0,q2,q2 + vorr q0,q2,q2 vst1.8 {q5},[r1]! - vorr q1,q3,q3 + vorr q1,q3,q3 vst1.8 {q10},[r1]! - vorr q10,q11,q11 + vorr q10,q11,q11 bhs .Loop3x_cbc_dec cmn r2,#0x30 @@ -513,244 +514,244 @@ aes_v8_cbc_encrypt: nop .Lcbc_dec_tail: - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q8},[r7]! subs r6,r6,#2 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q9},[r7]! bgt .Lcbc_dec_tail - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - cmn r2,#0x20 - .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - veor q5,q6,q7 - .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 - .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 - .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - veor q9,q3,q7 - .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 - .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + cmn r2,#0x20 +.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q5,q6,q7 +.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 +.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 +.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 beq .Lcbc_dec_one veor q5,q5,q1 veor q9,q9,q10 - vorr q6,q11,q11 + vorr q6,q11,q11 vst1.8 {q5},[r1]! vst1.8 {q9},[r1]! b .Lcbc_done .Lcbc_dec_one: veor q5,q5,q10 - vorr q6,q11,q11 + vorr q6,q11,q11 vst1.8 {q5},[r1]! .Lcbc_done: vst1.8 {q6},[r4] .Lcbc_abort: - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r8,pc} + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!,{r4,r5,r6,r7,r8,pc} .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt .globl aes_v8_ctr32_encrypt_blocks .type aes_v8_ctr32_encrypt_blocks,%function .align 5 aes_v8_ctr32_encrypt_blocks: - mov ip,sp - stmdb sp!,{r4-r10,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - ldr r4, [ip] @ load remaining arg - ldr r5,[r3,#240] + mov ip,sp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg + ldr r5,[r3,#240] - ldr r8, [r4, #12] - vld1.32 {q0},[r4] + ldr r8, [r4, #12] + vld1.32 {q0},[r4] - vld1.32 {q8-q9},[r3] @ load key schedule... - sub r5,r5,#4 - mov r12,#16 - cmp r2,#2 - add r7,r3,r5,lsl#4 @ pointer to last 5 round keys *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***