Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 16 Nov 2013 09:01:24 +0000 (UTC)
From:      John-Mark Gurney <jmg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r258212 - stable/10/sys/crypto/aesni
Message-ID:  <201311160901.rAG91O4g025564@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jmg
Date: Sat Nov 16 09:01:24 2013
New Revision: 258212
URL: http://svnweb.freebsd.org/changeset/base/258212

Log:
  MFC r257757:
  fix issues w/ AES-NI on unaligned data blocks...
  
  Approved by:	re (kib)

Modified:
  stable/10/sys/crypto/aesni/aesencdec.h
  stable/10/sys/crypto/aesni/aesni.h
  stable/10/sys/crypto/aesni/aesni_wrap.c
Directory Properties:
  stable/10/sys/   (props changed)

Modified: stable/10/sys/crypto/aesni/aesencdec.h
==============================================================================
--- stable/10/sys/crypto/aesni/aesencdec.h	Sat Nov 16 08:28:14 2013	(r258211)
+++ stable/10/sys/crypto/aesni/aesencdec.h	Sat Nov 16 09:01:24 2013	(r258212)
@@ -30,11 +30,10 @@
 #include <wmmintrin.h>
 
 static inline void
-aesni_enc8(int rounds, const uint8_t *key_schedule, __m128i a,
+aesni_enc8(int rounds, const __m128i *keysched, __m128i a,
     __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
     __m128i h, __m128i out[8])
 {
-	const __m128i *keysched = (const __m128i *)key_schedule;
 	int i;
 
 	a ^= keysched[0];
@@ -68,11 +67,10 @@ aesni_enc8(int rounds, const uint8_t *ke
 }
 
 static inline void
-aesni_dec8(int rounds, const uint8_t *key_schedule, __m128i a,
+aesni_dec8(int rounds, const __m128i *keysched, __m128i a,
     __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
     __m128i h, __m128i out[8])
 {
-	const __m128i *keysched = (const __m128i *)key_schedule;
 	int i;
 
 	a ^= keysched[0];
@@ -106,10 +104,9 @@ aesni_dec8(int rounds, const uint8_t *ke
 }
 
 static inline __m128i
-aesni_enc(int rounds, const uint8_t *key_schedule, const __m128i from)
+aesni_enc(int rounds, const __m128i *keysched, const __m128i from)
 {
 	__m128i tmp;
-	const __m128i *keysched = (const __m128i *)key_schedule;
 	int i;
 
 	tmp = from ^ keysched[0];
@@ -121,10 +118,9 @@ aesni_enc(int rounds, const uint8_t *key
 }
 
 static inline __m128i
-aesni_dec(int rounds, const uint8_t *key_schedule, const __m128i from)
+aesni_dec(int rounds, const __m128i *keysched, const __m128i from)
 {
 	__m128i tmp;
-	const __m128i *keysched = (const __m128i *)key_schedule;
 	int i;
 
 	tmp = from ^ keysched[0];

Modified: stable/10/sys/crypto/aesni/aesni.h
==============================================================================
--- stable/10/sys/crypto/aesni/aesni.h	Sat Nov 16 08:28:14 2013	(r258211)
+++ stable/10/sys/crypto/aesni/aesni.h	Sat Nov 16 09:01:24 2013	(r258212)
@@ -71,29 +71,30 @@ struct aesni_session {
 /*
  * Internal functions, implemented in assembler.
  */
-void aesni_set_enckey(const uint8_t *userkey, uint8_t *encrypt_schedule,
-    int number_of_rounds);
-void aesni_set_deckey(const uint8_t *encrypt_schedule,
-    uint8_t *decrypt_schedule, int number_of_rounds);
+void aesni_set_enckey(const uint8_t *userkey,
+    uint8_t *encrypt_schedule /*__aligned(16)*/, int number_of_rounds);
+void aesni_set_deckey(const uint8_t *encrypt_schedule /*__aligned(16)*/,
+    uint8_t *decrypt_schedule /*__aligned(16)*/, int number_of_rounds);
 
 /*
  * Slightly more public interfaces.
  */
-void aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
-    const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]);
-void aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
-    uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]);
-void aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
-    const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]);
-void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
-    const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]);
-
-void aesni_encrypt_xts(int rounds, const void *data_schedule,
-    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
-    const uint8_t iv[AES_BLOCK_LEN]);
-void aesni_decrypt_xts(int rounds, const void *data_schedule,
-    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
+void aesni_encrypt_cbc(int rounds, const void *key_schedule /*__aligned(16)*/,
+    size_t len, const uint8_t *from, uint8_t *to,
     const uint8_t iv[AES_BLOCK_LEN]);
+void aesni_decrypt_cbc(int rounds, const void *key_schedule /*__aligned(16)*/,
+    size_t len, uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]);
+void aesni_encrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/,
+    size_t len, const uint8_t *from, uint8_t *to);
+void aesni_decrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/,
+    size_t len, const uint8_t *from, uint8_t *to);
+
+void aesni_encrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/,
+    const void *tweak_schedule /*__aligned(16)*/, size_t len,
+    const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]);
+void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/,
+    const void *tweak_schedule /*__aligned(16)*/, size_t len,
+    const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]);
 
 int aesni_cipher_setup(struct aesni_session *ses,
     struct cryptoini *encini);
@@ -103,4 +104,4 @@ int aesni_cipher_process(struct aesni_se
 uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp,
     int *allocated);
 
-#endif
+#endif /* _AESNI_H_ */

Modified: stable/10/sys/crypto/aesni/aesni_wrap.c
==============================================================================
--- stable/10/sys/crypto/aesni/aesni_wrap.c	Sat Nov 16 08:28:14 2013	(r258211)
+++ stable/10/sys/crypto/aesni/aesni_wrap.c	Sat Nov 16 09:01:24 2013	(r258212)
@@ -41,6 +41,10 @@ __FBSDID("$FreeBSD$");
 
 MALLOC_DECLARE(M_AESNI);
 
+struct blocks8 {
+	__m128i	blk[8];
+} __packed;
+
 void
 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
     const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
@@ -65,20 +69,20 @@ aesni_decrypt_cbc(int rounds, const void
     uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
 {
 	__m128i blocks[8];
-	__m128i *bufs;
+	struct blocks8 *blks;
 	__m128i ivreg, nextiv;
 	size_t i, j, cnt;
 
 	ivreg = _mm_loadu_si128((const __m128i *)iv);
 	cnt = len / AES_BLOCK_LEN / 8;
 	for (i = 0; i < cnt; i++) {
-		bufs = (__m128i *)buf;
-		aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1],
-		    bufs[2], bufs[3], bufs[4], bufs[5], bufs[6],
-		    bufs[7], &blocks[0]);
+		blks = (struct blocks8 *)buf;
+		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
+		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
+		    blks->blk[6], blks->blk[7], &blocks[0]);
 		for (j = 0; j < 8; j++) {
-			nextiv = bufs[j];
-			bufs[j] = blocks[j] ^ ivreg;
+			nextiv = blks->blk[j];
+			blks->blk[j] = blocks[j] ^ ivreg;
 			ivreg = nextiv;
 		}
 		buf += AES_BLOCK_LEN * 8;
@@ -86,9 +90,9 @@ aesni_decrypt_cbc(int rounds, const void
 	i *= 8;
 	cnt = len / AES_BLOCK_LEN;
 	for (; i < cnt; i++) {
-		bufs = (__m128i *)buf;
-		nextiv = bufs[0];
-		bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg;
+		nextiv = _mm_loadu_si128((void *)buf);
+		_mm_storeu_si128((void *)buf,
+		    aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
 		ivreg = nextiv;
 		buf += AES_BLOCK_LEN;
 	}
@@ -99,15 +103,26 @@ aesni_encrypt_ecb(int rounds, const void
     const uint8_t *from, uint8_t *to)
 {
 	__m128i tot;
-	const __m128i *blocks;
+	__m128i tout[8];
+	struct blocks8 *top;
+	const struct blocks8 *blks;
 	size_t i, cnt;
 
 	cnt = len / AES_BLOCK_LEN / 8;
 	for (i = 0; i < cnt; i++) {
-		blocks = (const __m128i *)from;
-		aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1],
-		    blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
-		    blocks[7], (__m128i *)to);
+		blks = (const struct blocks8 *)from;
+		top = (struct blocks8 *)to;
+		aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
+		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
+		    blks->blk[6], blks->blk[7], tout);
+		top->blk[0] = tout[0];
+		top->blk[1] = tout[1];
+		top->blk[2] = tout[2];
+		top->blk[3] = tout[3];
+		top->blk[4] = tout[4];
+		top->blk[5] = tout[5];
+		top->blk[6] = tout[6];
+		top->blk[7] = tout[7];
 		from += AES_BLOCK_LEN * 8;
 		to += AES_BLOCK_LEN * 8;
 	}
@@ -127,15 +142,26 @@ aesni_decrypt_ecb(int rounds, const void
     const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
 {
 	__m128i tot;
-	const __m128i *blocks;
+	__m128i tout[8];
+	const struct blocks8 *blks;
+	struct blocks8 *top;
 	size_t i, cnt;
 
 	cnt = len / AES_BLOCK_LEN / 8;
 	for (i = 0; i < cnt; i++) {
-		blocks = (const __m128i *)from;
-		aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1],
-		    blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
-		    blocks[7], (__m128i *)to);
+		blks = (const struct blocks8 *)from;
+		top = (struct blocks8 *)to;
+		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
+		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
+		    blks->blk[6], blks->blk[7], tout);
+		top->blk[0] = tout[0];
+		top->blk[1] = tout[1];
+		top->blk[2] = tout[2];
+		top->blk[3] = tout[3];
+		top->blk[4] = tout[4];
+		top->blk[5] = tout[5];
+		top->blk[6] = tout[6];
+		top->blk[7] = tout[7];
 		from += AES_BLOCK_LEN * 8;
 		to += AES_BLOCK_LEN * 8;
 	}
@@ -173,31 +199,33 @@ xts_crank_lfsr(__m128i inp)
 }
 
 static void
-aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak,
-    const __m128i *from, __m128i *to, int do_encrypt)
+aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
+    const uint8_t *from, uint8_t *to, int do_encrypt)
 {
 	__m128i block;
 
-	block = *from ^ *tweak;
+	block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
 
 	if (do_encrypt)
 		block = aesni_enc(rounds - 1, key_schedule, block);
 	else
 		block = aesni_dec(rounds - 1, key_schedule, block);
 
-	*to = block ^ *tweak;
+	_mm_storeu_si128((__m128i *)to, block ^ *tweak);
 
 	*tweak = xts_crank_lfsr(*tweak);
 }
 
 static void
-aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak,
-    const __m128i *from, __m128i *to, int do_encrypt)
+aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
+    const uint8_t *from, uint8_t *to, int do_encrypt)
 {
 	__m128i tmptweak;
 	__m128i a, b, c, d, e, f, g, h;
 	__m128i tweaks[8];
 	__m128i tmp[8];
+	__m128i *top;
+	const __m128i *fromp;
 
 	tmptweak = *tweak;
 
@@ -205,10 +233,12 @@ aesni_crypt_xts_block8(int rounds, const
 	 * unroll the loop.  This lets gcc put values directly in the
 	 * register and saves memory accesses.
 	 */
+	fromp = (const __m128i *)from;
 #define PREPINP(v, pos) 					\
 		do {						\
 			tweaks[(pos)] = tmptweak;		\
-			(v) = from[(pos)] ^ tmptweak;		\
+			(v) = _mm_loadu_si128(&fromp[pos]) ^	\
+			    tmptweak;				\
 			tmptweak = xts_crank_lfsr(tmptweak);	\
 		} while (0)
 	PREPINP(a, 0);
@@ -228,20 +258,21 @@ aesni_crypt_xts_block8(int rounds, const
 		aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
 		    tmp);
 
-	to[0] = tmp[0] ^ tweaks[0];
-	to[1] = tmp[1] ^ tweaks[1];
-	to[2] = tmp[2] ^ tweaks[2];
-	to[3] = tmp[3] ^ tweaks[3];
-	to[4] = tmp[4] ^ tweaks[4];
-	to[5] = tmp[5] ^ tweaks[5];
-	to[6] = tmp[6] ^ tweaks[6];
-	to[7] = tmp[7] ^ tweaks[7];
+	top = (__m128i *)to;
+	_mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
+	_mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
+	_mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
+	_mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
+	_mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
+	_mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
+	_mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
+	_mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
 }
 
 static void
-aesni_crypt_xts(int rounds, const void *data_schedule,
-    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
-    const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
+aesni_crypt_xts(int rounds, const __m128i *data_schedule,
+    const __m128i *tweak_schedule, size_t len, const uint8_t *from,
+    uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
 {
 	__m128i tweakreg;
 	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
@@ -264,7 +295,7 @@ aesni_crypt_xts(int rounds, const void *
 	cnt = len / AES_XTS_BLOCKSIZE / 8;
 	for (i = 0; i < cnt; i++) {
 		aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
-		    (const __m128i *)from, (__m128i *)to, do_encrypt);
+		    from, to, do_encrypt);
 		from += AES_XTS_BLOCKSIZE * 8;
 		to += AES_XTS_BLOCKSIZE * 8;
 	}
@@ -272,7 +303,7 @@ aesni_crypt_xts(int rounds, const void *
 	cnt = len / AES_XTS_BLOCKSIZE;
 	for (; i < cnt; i++) {
 		aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
-		    (const __m128i *)from, (__m128i *)to, do_encrypt);
+		    from, to, do_encrypt);
 		from += AES_XTS_BLOCKSIZE;
 		to += AES_XTS_BLOCKSIZE;
 	}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201311160901.rAG91O4g025564>