Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 16 May 2016 19:30:27 +0000 (UTC)
From:      Jung-uk Kim <jkim@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r299966 - in stable/10: secure/lib/libcrypto secure/lib/libcrypto/amd64 secure/lib/libcrypto/i386 sys/sys
Message-ID:  <201605161930.u4GJURNY007107@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jkim
Date: Mon May 16 19:30:27 2016
New Revision: 299966
URL: https://svnweb.freebsd.org/changeset/base/299966

Log:
  - Make libcrypto.so position independent on i386.
  - Enable linker error when libcrypto.so contains a relocation against text.
  - Add "Do not modify" comment to generated source files.
  - Set CC environment variable for Perl scripts to enable AVX instructions.
  - Update __FreeBSD_version to indicate libcrypto.so is position independent.
  
  Note this is a direct commit because head has OpenSSL 1.0.2 branch but based
  on r299389, r299462, r299464, r299479, and r299480.

Added:
  stable/10/secure/lib/libcrypto/i386/aes-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/aes-586.s
  stable/10/secure/lib/libcrypto/i386/aesni-x86.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/aesni-x86.s
  stable/10/secure/lib/libcrypto/i386/bf-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/bf-586.s
  stable/10/secure/lib/libcrypto/i386/bf-686.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/bf-686.s
  stable/10/secure/lib/libcrypto/i386/bn-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/bn-586.s
  stable/10/secure/lib/libcrypto/i386/cast-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/cast-586.s
  stable/10/secure/lib/libcrypto/i386/cmll-x86.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/cmll-x86.s
  stable/10/secure/lib/libcrypto/i386/co-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/co-586.s
  stable/10/secure/lib/libcrypto/i386/crypt586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/crypt586.s
  stable/10/secure/lib/libcrypto/i386/des-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/des-586.s
  stable/10/secure/lib/libcrypto/i386/ghash-x86.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/ghash-x86.s
  stable/10/secure/lib/libcrypto/i386/md5-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/md5-586.s
  stable/10/secure/lib/libcrypto/i386/rc4-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/rc4-586.s
  stable/10/secure/lib/libcrypto/i386/rc5-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/rc5-586.s
  stable/10/secure/lib/libcrypto/i386/rmd-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/rmd-586.s
  stable/10/secure/lib/libcrypto/i386/sha1-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/sha1-586.s
  stable/10/secure/lib/libcrypto/i386/sha256-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/sha256-586.s
  stable/10/secure/lib/libcrypto/i386/sha512-586.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/sha512-586.s
  stable/10/secure/lib/libcrypto/i386/vpaes-x86.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/vpaes-x86.s
  stable/10/secure/lib/libcrypto/i386/wp-mmx.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/wp-mmx.s
  stable/10/secure/lib/libcrypto/i386/x86-gf2m.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/x86-gf2m.s
  stable/10/secure/lib/libcrypto/i386/x86-mont.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/x86-mont.s
  stable/10/secure/lib/libcrypto/i386/x86cpuid.S
     - copied, changed from r299965, stable/10/secure/lib/libcrypto/i386/x86cpuid.s
Deleted:
  stable/10/secure/lib/libcrypto/i386/aes-586.s
  stable/10/secure/lib/libcrypto/i386/aesni-x86.s
  stable/10/secure/lib/libcrypto/i386/bf-586.s
  stable/10/secure/lib/libcrypto/i386/bf-686.s
  stable/10/secure/lib/libcrypto/i386/bn-586.s
  stable/10/secure/lib/libcrypto/i386/cast-586.s
  stable/10/secure/lib/libcrypto/i386/cmll-x86.s
  stable/10/secure/lib/libcrypto/i386/co-586.s
  stable/10/secure/lib/libcrypto/i386/crypt586.s
  stable/10/secure/lib/libcrypto/i386/des-586.s
  stable/10/secure/lib/libcrypto/i386/ghash-x86.s
  stable/10/secure/lib/libcrypto/i386/md5-586.s
  stable/10/secure/lib/libcrypto/i386/rc4-586.s
  stable/10/secure/lib/libcrypto/i386/rc5-586.s
  stable/10/secure/lib/libcrypto/i386/rmd-586.s
  stable/10/secure/lib/libcrypto/i386/sha1-586.s
  stable/10/secure/lib/libcrypto/i386/sha256-586.s
  stable/10/secure/lib/libcrypto/i386/sha512-586.s
  stable/10/secure/lib/libcrypto/i386/vpaes-x86.s
  stable/10/secure/lib/libcrypto/i386/wp-mmx.s
  stable/10/secure/lib/libcrypto/i386/x86-gf2m.s
  stable/10/secure/lib/libcrypto/i386/x86-mont.s
  stable/10/secure/lib/libcrypto/i386/x86cpuid.s
Modified:
  stable/10/secure/lib/libcrypto/Makefile
  stable/10/secure/lib/libcrypto/Makefile.asm
  stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/aesni-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/bsaes-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/cmll-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/ghash-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/md5-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/modexp512-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/rc4-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/sha1-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/sha256-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/sha512-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/vpaes-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/wp-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/x86_64-gf2m.S
  stable/10/secure/lib/libcrypto/amd64/x86_64-mont.S
  stable/10/secure/lib/libcrypto/amd64/x86_64-mont5.S
  stable/10/secure/lib/libcrypto/amd64/x86_64cpuid.S
  stable/10/sys/sys/param.h

Modified: stable/10/secure/lib/libcrypto/Makefile
==============================================================================
--- stable/10/secure/lib/libcrypto/Makefile	Mon May 16 19:10:59 2016	(r299965)
+++ stable/10/secure/lib/libcrypto/Makefile	Mon May 16 19:30:27 2016	(r299966)
@@ -7,7 +7,6 @@ SUBDIR=		engines
 
 LIB=		crypto
 SHLIB_MAJOR=	7
-ALLOW_SHARED_TEXTREL=
 
 NO_LINT=
 
@@ -26,7 +25,7 @@ SRCS=	cpt_err.c cryptlib.c cversion.c ex
 .if ${MACHINE_CPUARCH} == "amd64"
 SRCS+=	x86_64cpuid.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	x86cpuid.s
+SRCS+=	x86cpuid.S
 .else
 SRCS+=	mem_clr.c
 .endif
@@ -38,7 +37,7 @@ SRCS+=	aes_cfb.c aes_ctr.c aes_ecb.c aes
 SRCS+=	aes-x86_64.S aesni-sha1-x86_64.S aesni-x86_64.S bsaes-x86_64.S \
 	vpaes-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	aes-586.s aesni-x86.s vpaes-x86.s
+SRCS+=	aes-586.S aesni-x86.S vpaes-x86.S
 .else
 SRCS+=	aes_cbc.c aes_core.c
 .endif
@@ -63,9 +62,9 @@ INCS+=	asn1.h asn1_mac.h asn1t.h
 SRCS+=	bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c
 .if ${MACHINE_CPUARCH} == "i386"
 .if ${MACHINE_CPU:Mi686}
-SRCS+=	bf-686.s
+SRCS+=	bf-686.S
 .else
-SRCS+=	bf-586.s
+SRCS+=	bf-586.S
 .endif
 .else
 SRCS+=	bf_enc.c
@@ -87,7 +86,7 @@ SRCS+=	bn_add.c bn_blind.c bn_const.c bn
 SRCS+=	modexp512-x86_64.S x86_64-gcc.c x86_64-gf2m.S x86_64-mont.S \
 	x86_64-mont5.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	bn-586.s co-586.s x86-gf2m.s x86-mont.s
+SRCS+=	bn-586.S co-586.S x86-gf2m.S x86-mont.S
 .else
 SRCS+=	bn_asm.c
 .endif
@@ -102,7 +101,7 @@ SRCS+=	cmll_cfb.c cmll_ctr.c cmll_ecb.c 
 .if ${MACHINE_CPUARCH} == "amd64"
 SRCS+=	cmll_misc.c cmll-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	cmll-x86.s
+SRCS+=	cmll-x86.S
 .else
 SRCS+=	camellia.c cmll_cbc.c cmll_misc.c
 .endif
@@ -136,7 +135,7 @@ SRCS+=	cbc_cksm.c cbc_enc.c cfb64ede.c c
 	fcrypt.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \
 	rand_key.c read2pwd.c rpc_enc.c set_key.c str2key.c xcbc_enc.c
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+=	crypt586.s des-586.s
+SRCS+=	crypt586.S des-586.S
 .else
 SRCS+=	des_enc.c fcrypt_b.c
 .endif
@@ -219,7 +218,7 @@ SRCS+=	md5_dgst.c md5_one.c
 .if ${MACHINE_CPUARCH} == "amd64"
 SRCS+=	md5-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	md5-586.s
+SRCS+=	md5-586.S
 .endif
 INCS+=	md5.h
 
@@ -232,7 +231,7 @@ SRCS+=	cbc128.c ccm128.c cfb128.c ctr128
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+=	ghash-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	ghash-x86.s
+SRCS+=	ghash-x86.S
 .endif
 INCS+=	modes.h
 
@@ -278,7 +277,7 @@ SRCS+=	rc4_utl.c
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+=	rc4-md5-x86_64.S rc4-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	rc4-586.s
+SRCS+=	rc4-586.S
 .else
 SRCS+=	rc4_enc.c rc4_skey.c
 .endif
@@ -287,7 +286,7 @@ INCS+=	rc4.h
 # rc5
 SRCS+=	rc5_ecb.c rc5_skey.c rc5cfb64.c rc5ofb64.c
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+=	rc5-586.s
+SRCS+=	rc5-586.S
 .else
 SRCS+=	rc5_enc.c
 .endif
@@ -296,7 +295,7 @@ INCS+=	rc5.h
 # ripemd
 SRCS+=	rmd_dgst.c rmd_one.c
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+=	rmd-586.s
+SRCS+=	rmd-586.S
 .endif
 INCS+=	ripemd.h
 
@@ -316,7 +315,7 @@ SRCS+=	sha1_one.c sha1dgst.c sha256.c sh
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+=	sha1-x86_64.S sha256-x86_64.S sha512-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	sha1-586.s sha256-586.s sha512-586.s
+SRCS+=	sha1-586.S sha256-586.S sha512-586.S
 .endif
 INCS+=	sha.h
 
@@ -347,7 +346,7 @@ SRCS+=	wp_dgst.c
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+=	wp-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+=	wp-mmx.s wp_block.c
+SRCS+=	wp-mmx.S wp_block.c
 .else
 SRCS+=	wp_block.c
 .endif
@@ -379,9 +378,6 @@ CFLAGS+=	-I${LCRYPTO_SRC}/crypto/asn1
 CFLAGS+=	-I${LCRYPTO_SRC}/crypto/evp
 CFLAGS+=	-I${LCRYPTO_SRC}/crypto/modes
 
-.if !empty(SRCS:M*.s)
-AFLAGS+=	--noexecstack
-.endif
 .if !empty(SRCS:M*.S)
 ACFLAGS+=	-Wa,--noexecstack
 .endif

Modified: stable/10/secure/lib/libcrypto/Makefile.asm
==============================================================================
--- stable/10/secure/lib/libcrypto/Makefile.asm	Mon May 16 19:10:59 2016	(r299965)
+++ stable/10/secure/lib/libcrypto/Makefile.asm	Mon May 16 19:30:27 2016	(r299966)
@@ -1,8 +1,8 @@
 # $FreeBSD$
-# Use this to help generate the asm *.[Ss] files after an import.  It is not
+# Use this to help generate the asm *.S files after an import.  It is not
 # perfect by any means, but does what is needed.
-# Do a 'make -f Makefile.asm all' and it will generate *.s.  Move them
-# to the i386 subdir, and correct any exposed paths and $ FreeBSD $ tags.
+# Do a 'make -f Makefile.asm all' and it will generate *.S.  Move them
+# to the arch subdir, and correct any exposed paths and $ FreeBSD $ tags.
 
 .include "Makefile.inc"
 
@@ -39,31 +39,39 @@ SRCS+=	ghash-x86_64.pl
 SRCS+=	rc4-md5-x86_64.pl rc4-x86_64.pl
 
 # sha
-SRCS+=	sha1-x86_64.pl sha512-x86_64.pl
+SRCS+=	sha1-x86_64.pl
 
 # whrlpool
 SRCS+=	wp-x86_64.pl
 
-ASM=	${SRCS:S/.pl/.S/}
-ASM+=	sha256-x86_64.S x86_64cpuid.S
+# cpuid
+SRCS+=	x86_64cpuid.pl
 
-all:	${ASM}
+SHA_ASM=	sha256-x86_64 sha512-x86_64
+SHA_SRC=	sha512-x86_64.pl
+SHA_TMP=	${SHA_ASM:S/$/.s/}
 
-CLEANFILES+=	${SRCS:M*.pl:S/.pl$/.cmt/} ${SRCS:M*.pl:S/.pl$/.S/}
-CLEANFILES+=	sha256-x86_64.cmt sha256-x86_64.S x86_64cpuid.cmt x86_64cpuid.S
-.SUFFIXES:	.pl .cmt
+ASM=	${SRCS:R:S/$/.S/} ${SHA_ASM:S/$/.S/}
 
-.pl.cmt:
-	( cd `dirname ${.IMPSRC}`/.. ; perl ${.IMPSRC} ${.OBJDIR}/${.TARGET} )
+all:	${ASM}
 
-.cmt.S:
-	( echo '	# $$'FreeBSD'$$'; cat ${.IMPSRC} ) > ${.TARGET}
+CLEANFILES=	${ASM} ${SHA_ASM:S/$/.s/}
+.SUFFIXES:	.pl
 
-sha256-x86_64.cmt: sha512-x86_64.pl
-	( cd `dirname ${.ALLSRC}`/.. ; perl ${.ALLSRC} ${.OBJDIR}/${.TARGET} )
+.pl.S:
+	( echo '# $$'FreeBSD'$$' ;\
+	echo '# Do not modify. This file is auto-generated from ${.IMPSRC:T}.' ;\
+	env CC=cc perl ${.IMPSRC} elf ) > ${.TARGET}
 
-x86_64cpuid.cmt: x86_64cpuid.pl
-	( cd `dirname ${.ALLSRC}` ; perl ${.ALLSRC} ${.OBJDIR}/${.TARGET} )
+${SHA_TMP}: ${SHA_SRC}
+	env CC=cc perl ${.ALLSRC} elf ${.TARGET}
+
+.for s in ${SHA_ASM}
+${s}.S: ${s}.s
+	( echo '	# $$'FreeBSD'$$' ;\
+	echo '	# Do not modify. This file is auto-generated from ${SHA_SRC}.' ;\
+	cat ${s}.s ) > ${.TARGET}
+.endfor
 
 .elif ${MACHINE_CPUARCH} == "i386"
 
@@ -126,16 +134,22 @@ SRCS+=	wp-mmx.pl
 # cpuid
 SRCS+=	x86cpuid.pl
 
-ASM=	${SRCS:S/.pl/.s/}
+ASM=	${SRCS:R:S/$/.S/}
 
 all:	${ASM}
 
-CLEANFILES+=	${SRCS:M*.pl:S/.pl$/.s/}
+CLEANFILES=	${ASM}
 .SUFFIXES:	.pl
 
-.pl.s:
-	( echo '	# $$'FreeBSD'$$' ;\
-	perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ) > ${.TARGET}
+.pl.S:
+	( echo '# $$'FreeBSD'$$' ;\
+	echo '# Do not modify. This file is auto-generated from ${.IMPSRC:T}.' ;\
+	echo '#ifdef PIC' ;\
+	env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} -fpic -DPIC ;\
+	echo '#else' ;\
+	env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ;\
+	echo '#endif') |\
+	sed -E 's|(\.file[[:blank:]]+)".*"|\1"${.TARGET}"|' > ${.TARGET}
 .endif
 
 .include <bsd.prog.mk>

Modified: stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S
==============================================================================
--- stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S	Mon May 16 19:10:59 2016	(r299965)
+++ stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S	Mon May 16 19:30:27 2016	(r299966)
@@ -1,4 +1,5 @@
-	# $FreeBSD$
+# $FreeBSD$
+# Do not modify. This file is auto-generated from aes-x86_64.pl.
 .text	
 .type	_x86_64_AES_encrypt,@function
 .align	16

Modified: stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
==============================================================================
--- stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S	Mon May 16 19:10:59 2016	(r299965)
+++ stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S	Mon May 16 19:30:27 2016	(r299966)
@@ -1,4 +1,5 @@
-	# $FreeBSD$
+# $FreeBSD$
+# Do not modify. This file is auto-generated from aesni-sha1-x86_64.pl.
 .text	
 
 
@@ -9,6 +10,11 @@ aesni_cbc_sha1_enc:
 
 	movl	OPENSSL_ia32cap_P+0(%rip),%r10d
 	movl	OPENSSL_ia32cap_P+4(%rip),%r11d
+	andl	$268435456,%r11d
+	andl	$1073741824,%r10d
+	orl	%r11d,%r10d
+	cmpl	$1342177280,%r10d
+	je	aesni_cbc_sha1_enc_avx
 	jmp	aesni_cbc_sha1_enc_ssse3
 	.byte	0xf3,0xc3
 .size	aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
@@ -1385,6 +1391,1343 @@ aesni_cbc_sha1_enc_ssse3:
 .Lepilogue_ssse3:
 	.byte	0xf3,0xc3
 .size	aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.type	aesni_cbc_sha1_enc_avx,@function
+.align	16
+aesni_cbc_sha1_enc_avx:
+	movq	8(%rsp),%r10
+
+
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	leaq	-104(%rsp),%rsp
+
+
+	vzeroall
+	movq	%rdi,%r12
+	movq	%rsi,%r13
+	movq	%rdx,%r14
+	movq	%rcx,%r15
+	vmovdqu	(%r8),%xmm11
+	movq	%r8,88(%rsp)
+	shlq	$6,%r14
+	subq	%r12,%r13
+	movl	240(%r15),%r8d
+	addq	$112,%r15
+	addq	%r10,%r14
+
+	leaq	K_XX_XX(%rip),%r11
+	movl	0(%r9),%eax
+	movl	4(%r9),%ebx
+	movl	8(%r9),%ecx
+	movl	12(%r9),%edx
+	movl	%ebx,%esi
+	movl	16(%r9),%ebp
+
+	vmovdqa	64(%r11),%xmm6
+	vmovdqa	0(%r11),%xmm9
+	vmovdqu	0(%r10),%xmm0
+	vmovdqu	16(%r10),%xmm1
+	vmovdqu	32(%r10),%xmm2
+	vmovdqu	48(%r10),%xmm3
+	vpshufb	%xmm6,%xmm0,%xmm0
+	addq	$64,%r10
+	vpshufb	%xmm6,%xmm1,%xmm1
+	vpshufb	%xmm6,%xmm2,%xmm2
+	vpshufb	%xmm6,%xmm3,%xmm3
+	vpaddd	%xmm9,%xmm0,%xmm4
+	vpaddd	%xmm9,%xmm1,%xmm5
+	vpaddd	%xmm9,%xmm2,%xmm6
+	vmovdqa	%xmm4,0(%rsp)
+	vmovdqa	%xmm5,16(%rsp)
+	vmovdqa	%xmm6,32(%rsp)
+	vmovups	-112(%r15),%xmm13
+	vmovups	16-112(%r15),%xmm14
+	jmp	.Loop_avx
+.align	16
+.Loop_avx:
+	addl	0(%rsp),%ebp
+	vmovups	0(%r12),%xmm12
+	vxorps	%xmm13,%xmm12,%xmm12
+	vxorps	%xmm12,%xmm11,%xmm11
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-80(%r15),%xmm15
+	xorl	%edx,%ecx
+	vpalignr	$8,%xmm0,%xmm1,%xmm4
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	vpaddd	%xmm3,%xmm9,%xmm9
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	vpsrldq	$4,%xmm3,%xmm8
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	vpxor	%xmm0,%xmm4,%xmm4
+	shrdl	$2,%ebx,%ebx
+	addl	%esi,%ebp
+	vpxor	%xmm2,%xmm8,%xmm8
+	addl	4(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vpxor	%xmm8,%xmm4,%xmm4
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	vmovdqa	%xmm9,48(%rsp)
+	xorl	%ecx,%edi
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	-64(%r15),%xmm14
+	addl	%ebp,%edx
+	vpsrld	$31,%xmm4,%xmm8
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	addl	8(%rsp),%ecx
+	xorl	%ebx,%eax
+	vpslldq	$12,%xmm4,%xmm10
+	vpaddd	%xmm4,%xmm4,%xmm4
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm4,%xmm4
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	shrdl	$7,%ebp,%ebp
+	addl	%esi,%ecx
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm4,%xmm4
+	addl	12(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-48(%r15),%xmm15
+	vpxor	%xmm10,%xmm4,%xmm4
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	vmovdqa	0(%r11),%xmm10
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	shrdl	$7,%edx,%edx
+	addl	%edi,%ebx
+	addl	16(%rsp),%eax
+	xorl	%ebp,%edx
+	vpalignr	$8,%xmm1,%xmm2,%xmm5
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	vpaddd	%xmm4,%xmm10,%xmm10
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	vpsrldq	$4,%xmm4,%xmm9
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	vpxor	%xmm1,%xmm5,%xmm5
+	shrdl	$7,%ecx,%ecx
+	addl	%esi,%eax
+	vpxor	%xmm3,%xmm9,%xmm9
+	addl	20(%rsp),%ebp
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	-32(%r15),%xmm14
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	vpxor	%xmm9,%xmm5,%xmm5
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	vmovdqa	%xmm10,0(%rsp)
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	vpsrld	$31,%xmm5,%xmm9
+	shrdl	$7,%ebx,%ebx
+	addl	%edi,%ebp
+	addl	24(%rsp),%edx
+	xorl	%ecx,%ebx
+	vpslldq	$12,%xmm5,%xmm8
+	vpaddd	%xmm5,%xmm5,%xmm5
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	vpsrld	$30,%xmm8,%xmm10
+	vpor	%xmm9,%xmm5,%xmm5
+	xorl	%ecx,%esi
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-16(%r15),%xmm15
+	addl	%ebp,%edx
+	shrdl	$7,%eax,%eax
+	addl	%esi,%edx
+	vpslld	$2,%xmm8,%xmm8
+	vpxor	%xmm10,%xmm5,%xmm5
+	addl	28(%rsp),%ecx
+	xorl	%ebx,%eax
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	vpxor	%xmm8,%xmm5,%xmm5
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	vmovdqa	16(%r11),%xmm8
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	shrdl	$7,%ebp,%ebp
+	addl	%edi,%ecx
+	addl	32(%rsp),%ebx
+	xorl	%eax,%ebp
+	vpalignr	$8,%xmm2,%xmm3,%xmm6
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	0(%r15),%xmm14
+	vpaddd	%xmm5,%xmm8,%xmm8
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	vpsrldq	$4,%xmm5,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	vpxor	%xmm2,%xmm6,%xmm6
+	shrdl	$7,%edx,%edx
+	addl	%esi,%ebx
+	vpxor	%xmm4,%xmm10,%xmm10
+	addl	36(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	vpxor	%xmm10,%xmm6,%xmm6
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	vmovdqa	%xmm8,16(%rsp)
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	vpsrld	$31,%xmm6,%xmm10
+	shrdl	$7,%ecx,%ecx
+	addl	%edi,%eax
+	addl	40(%rsp),%ebp
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	16(%r15),%xmm15
+	xorl	%edx,%ecx
+	vpslldq	$12,%xmm6,%xmm9
+	vpaddd	%xmm6,%xmm6,%xmm6
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	vpsrld	$30,%xmm9,%xmm8
+	vpor	%xmm10,%xmm6,%xmm6
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%esi,%ebp
+	vpslld	$2,%xmm9,%xmm9
+	vpxor	%xmm8,%xmm6,%xmm6
+	addl	44(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vpxor	%xmm9,%xmm6,%xmm6
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	vmovdqa	16(%r11),%xmm9
+	xorl	%ecx,%edi
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	32(%r15),%xmm14
+	addl	%ebp,%edx
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	addl	48(%rsp),%ecx
+	xorl	%ebx,%eax
+	vpalignr	$8,%xmm3,%xmm4,%xmm7
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	vpaddd	%xmm6,%xmm9,%xmm9
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	vpsrldq	$4,%xmm6,%xmm8
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	vpxor	%xmm3,%xmm7,%xmm7
+	shrdl	$7,%ebp,%ebp
+	addl	%esi,%ecx
+	vpxor	%xmm5,%xmm8,%xmm8
+	addl	52(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	48(%r15),%xmm15
+	vpxor	%xmm8,%xmm7,%xmm7
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	vmovdqa	%xmm9,32(%rsp)
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	vpsrld	$31,%xmm7,%xmm8
+	shrdl	$7,%edx,%edx
+	addl	%edi,%ebx
+	addl	56(%rsp),%eax
+	xorl	%ebp,%edx
+	vpslldq	$12,%xmm7,%xmm10
+	vpaddd	%xmm7,%xmm7,%xmm7
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	vpsrld	$30,%xmm10,%xmm9
+	vpor	%xmm8,%xmm7,%xmm7
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%esi,%eax
+	vpslld	$2,%xmm10,%xmm10
+	vpxor	%xmm9,%xmm7,%xmm7
+	addl	60(%rsp),%ebp
+	cmpl	$11,%r8d
+	jb	.Lvaesenclast1
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	64(%r15),%xmm14
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	80(%r15),%xmm15
+	je	.Lvaesenclast1
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	96(%r15),%xmm14
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	112(%r15),%xmm15
+.Lvaesenclast1:
+	vaesenclast	%xmm15,%xmm11,%xmm11
+	vmovups	16-112(%r15),%xmm14
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	vpxor	%xmm10,%xmm7,%xmm7
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	vmovdqa	16(%r11),%xmm10
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%edi,%ebp
+	vpalignr	$8,%xmm6,%xmm7,%xmm9
+	vpxor	%xmm4,%xmm0,%xmm0
+	addl	0(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	vpxor	%xmm1,%xmm0,%xmm0
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	vmovdqa	%xmm10,%xmm8
+	vpaddd	%xmm7,%xmm10,%xmm10
+	xorl	%ecx,%esi
+	vmovups	16(%r12),%xmm12
+	vxorps	%xmm13,%xmm12,%xmm12
+	vmovups	%xmm11,0(%r13,%r12,1)
+	vxorps	%xmm12,%xmm11,%xmm11
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-80(%r15),%xmm15
+	addl	%ebp,%edx
+	vpxor	%xmm9,%xmm0,%xmm0
+	shrdl	$7,%eax,%eax
+	addl	%esi,%edx
+	addl	4(%rsp),%ecx
+	xorl	%ebx,%eax
+	vpsrld	$30,%xmm0,%xmm9
+	vmovdqa	%xmm10,48(%rsp)
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	vpslld	$2,%xmm0,%xmm0
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	shrdl	$7,%ebp,%ebp
+	addl	%edi,%ecx
+	addl	8(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	-64(%r15),%xmm14
+	vpor	%xmm9,%xmm0,%xmm0
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	vmovdqa	%xmm0,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	shrdl	$7,%edx,%edx
+	addl	%esi,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%edi,%eax
+	vpalignr	$8,%xmm7,%xmm0,%xmm10
+	vpxor	%xmm5,%xmm1,%xmm1
+	addl	16(%rsp),%ebp
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-48(%r15),%xmm15
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	vpxor	%xmm2,%xmm1,%xmm1
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	vmovdqa	%xmm8,%xmm9
+	vpaddd	%xmm0,%xmm8,%xmm8
+	shrdl	$7,%ebx,%ebx
+	addl	%esi,%ebp
+	vpxor	%xmm10,%xmm1,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vpsrld	$30,%xmm1,%xmm10
+	vmovdqa	%xmm8,0(%rsp)
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vpslld	$2,%xmm1,%xmm1
+	addl	24(%rsp),%ecx
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	xorl	%eax,%esi
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	-32(%r15),%xmm14
+	addl	%edx,%ecx
+	shrdl	$7,%ebp,%ebp
+	addl	%esi,%ecx
+	vpor	%xmm10,%xmm1,%xmm1
+	addl	28(%rsp),%ebx
+	xorl	%eax,%edi
+	vmovdqa	%xmm1,%xmm8
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	shrdl	$7,%edx,%edx
+	addl	%edi,%ebx
+	vpalignr	$8,%xmm0,%xmm1,%xmm8
+	vpxor	%xmm6,%xmm2,%xmm2
+	addl	32(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	vpxor	%xmm3,%xmm2,%xmm2
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	vmovdqa	32(%r11),%xmm10
+	vpaddd	%xmm1,%xmm9,%xmm9
+	shrdl	$7,%ecx,%ecx
+	addl	%esi,%eax
+	vpxor	%xmm8,%xmm2,%xmm2
+	addl	36(%rsp),%ebp
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-16(%r15),%xmm15
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	vpsrld	$30,%xmm2,%xmm8
+	vmovdqa	%xmm9,16(%rsp)
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%edi,%ebp
+	vpslld	$2,%xmm2,%xmm2
+	addl	40(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	shrdl	$7,%eax,%eax
+	addl	%esi,%edx
+	vpor	%xmm8,%xmm2,%xmm2
+	addl	44(%rsp),%ecx
+	xorl	%ebx,%edi
+	vmovdqa	%xmm2,%xmm9
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	xorl	%eax,%edi
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	0(%r15),%xmm14
+	addl	%edx,%ecx
+	shrdl	$7,%ebp,%ebp
+	addl	%edi,%ecx
+	vpalignr	$8,%xmm1,%xmm2,%xmm9
+	vpxor	%xmm7,%xmm3,%xmm3
+	addl	48(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	vpxor	%xmm4,%xmm3,%xmm3
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	vmovdqa	%xmm10,%xmm8
+	vpaddd	%xmm2,%xmm10,%xmm10
+	shrdl	$7,%edx,%edx
+	addl	%esi,%ebx
+	vpxor	%xmm9,%xmm3,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	vpsrld	$30,%xmm3,%xmm9
+	vmovdqa	%xmm10,32(%rsp)
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%edi,%eax
+	vpslld	$2,%xmm3,%xmm3
+	addl	56(%rsp),%ebp
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	16(%r15),%xmm15
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	shldl	$5,%eax,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%esi,%ebp
+	vpor	%xmm9,%xmm3,%xmm3
+	addl	60(%rsp),%edx
+	xorl	%ecx,%edi
+	vmovdqa	%xmm3,%xmm10
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	shrdl	$7,%eax,%eax
+	addl	%edi,%edx
+	vpalignr	$8,%xmm2,%xmm3,%xmm10
+	vpxor	%xmm0,%xmm4,%xmm4
+	addl	0(%rsp),%ecx
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	vpxor	%xmm5,%xmm4,%xmm4
+	xorl	%eax,%esi
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	32(%r15),%xmm14
+	addl	%edx,%ecx
+	vmovdqa	%xmm8,%xmm9
+	vpaddd	%xmm3,%xmm8,%xmm8
+	shrdl	$7,%ebp,%ebp
+	addl	%esi,%ecx
+	vpxor	%xmm10,%xmm4,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	vpsrld	$30,%xmm4,%xmm10
+	vmovdqa	%xmm8,48(%rsp)
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	shrdl	$7,%edx,%edx
+	addl	%edi,%ebx
+	vpslld	$2,%xmm4,%xmm4
+	addl	8(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	shldl	$5,%ebx,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%esi,%eax
+	vpor	%xmm10,%xmm4,%xmm4
+	addl	12(%rsp),%ebp
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	48(%r15),%xmm15
+	xorl	%edx,%edi
+	vmovdqa	%xmm4,%xmm8
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	shrdl	$7,%ebx,%ebx
+	addl	%edi,%ebp
+	vpalignr	$8,%xmm3,%xmm4,%xmm8
+	vpxor	%xmm1,%xmm5,%xmm5
+	addl	16(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	shldl	$5,%ebp,%ebp
+	vpxor	%xmm6,%xmm5,%xmm5
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	vmovdqa	%xmm9,%xmm10
+	vpaddd	%xmm4,%xmm9,%xmm9
+	shrdl	$7,%eax,%eax
+	addl	%esi,%edx
+	vpxor	%xmm8,%xmm5,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	shldl	$5,%edx,%edx
+	vpsrld	$30,%xmm5,%xmm8
+	vmovdqa	%xmm9,0(%rsp)
+	xorl	%eax,%edi
+	cmpl	$11,%r8d
+	jb	.Lvaesenclast2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	64(%r15),%xmm14
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	80(%r15),%xmm15
+	je	.Lvaesenclast2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	96(%r15),%xmm14
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	112(%r15),%xmm15
+.Lvaesenclast2:
+	vaesenclast	%xmm15,%xmm11,%xmm11
+	vmovups	16-112(%r15),%xmm14
+	addl	%edx,%ecx
+	shrdl	$7,%ebp,%ebp
+	addl	%edi,%ecx
+	vpslld	$2,%xmm5,%xmm5
+	addl	24(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	shldl	$5,%ecx,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	shrdl	$7,%edx,%edx
+	addl	%esi,%ebx
+	vpor	%xmm8,%xmm5,%xmm5
+	addl	28(%rsp),%eax
+	xorl	%ebp,%edi
+	vmovdqa	%xmm5,%xmm9
+	movl	%ebx,%esi
+	shldl	$5,%ebx,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	shrdl	$7,%ecx,%ecx
+	addl	%edi,%eax
+	vpalignr	$8,%xmm4,%xmm5,%xmm9
+	vpxor	%xmm2,%xmm6,%xmm6
+	movl	%ecx,%edi
+	vmovups	32(%r12),%xmm12
+	vxorps	%xmm13,%xmm12,%xmm12
+	vmovups	%xmm11,16(%r13,%r12,1)
+	vxorps	%xmm12,%xmm11,%xmm11
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-80(%r15),%xmm15
+	xorl	%edx,%ecx
+	addl	32(%rsp),%ebp
+	andl	%edx,%edi
+	vpxor	%xmm7,%xmm6,%xmm6
+	andl	%ecx,%esi
+	shrdl	$7,%ebx,%ebx
+	vmovdqa	%xmm10,%xmm8
+	vpaddd	%xmm5,%xmm10,%xmm10
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	vpxor	%xmm9,%xmm6,%xmm6
+	shldl	$5,%eax,%eax
+	addl	%esi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	vpsrld	$30,%xmm6,%xmm9
+	vmovdqa	%xmm10,16(%rsp)
+	movl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	36(%rsp),%edx
+	andl	%ecx,%esi
+	vpslld	$2,%xmm6,%xmm6
+	andl	%ebx,%edi
+	shrdl	$7,%eax,%eax
+	addl	%esi,%edx
+	movl	%ebp,%esi
+	shldl	$5,%ebp,%ebp
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	-64(%r15),%xmm14
+	addl	%edi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	vpor	%xmm9,%xmm6,%xmm6
+	movl	%eax,%edi
+	xorl	%ebx,%eax
+	vmovdqa	%xmm6,%xmm10
+	addl	40(%rsp),%ecx
+	andl	%ebx,%edi
+	andl	%eax,%esi
+	shrdl	$7,%ebp,%ebp
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	shldl	$5,%edx,%edx
+	addl	%esi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	44(%rsp),%ebx
+	andl	%eax,%esi
+	andl	%ebp,%edi
+	vaesenc	%xmm14,%xmm11,%xmm11
+	vmovups	-48(%r15),%xmm15
+	shrdl	$7,%edx,%edx
+	addl	%esi,%ebx
+	movl	%ecx,%esi
+	shldl	$5,%ecx,%ecx
+	addl	%edi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	vpalignr	$8,%xmm5,%xmm6,%xmm10
+	vpxor	%xmm3,%xmm7,%xmm7
+	movl	%edx,%edi
+	xorl	%ebp,%edx
+	addl	48(%rsp),%eax
+	andl	%ebp,%edi
+	vpxor	%xmm0,%xmm7,%xmm7
+	andl	%edx,%esi
+	shrdl	$7,%ecx,%ecx
+	vmovdqa	48(%r11),%xmm9
+	vpaddd	%xmm6,%xmm8,%xmm8
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	vpxor	%xmm10,%xmm7,%xmm7
+	shldl	$5,%ebx,%ebx
+	addl	%esi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	vpsrld	$30,%xmm7,%xmm10
+	vmovdqa	%xmm8,32(%rsp)
+	movl	%ecx,%esi
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vmovups	-32(%r15),%xmm14
+	xorl	%edx,%ecx
+	addl	52(%rsp),%ebp
+	andl	%edx,%esi
+	vpslld	$2,%xmm7,%xmm7
+	andl	%ecx,%edi
+	shrdl	$7,%ebx,%ebx
+	addl	%esi,%ebp
+	movl	%eax,%esi
+	shldl	$5,%eax,%eax
+	addl	%edi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	vpor	%xmm10,%xmm7,%xmm7
+	movl	%ebx,%edi
+	xorl	%ecx,%ebx
+	vmovdqa	%xmm7,%xmm8
+	addl	56(%rsp),%edx
+	andl	%ecx,%edi

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201605161930.u4GJURNY007107>