Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 17 Jun 2019 15:09:12 +0000 (UTC)
From:      Conrad Meyer <cem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r349138 - in head: sys/dev/random tests/sys/devrandom
Message-ID:  <201906171509.x5HF9CFG045599@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cem
Date: Mon Jun 17 15:09:12 2019
New Revision: 349138
URL: https://svnweb.freebsd.org/changeset/base/349138

Log:
  random(4): Generalize algorithm-independent APIs
  
  At a basic level, remove assumptions about the underlying algorithm (such as
  output block size and reseeding requirements) from the algorithm-independent
  logic in randomdev.c.  Chacha20 does not have many of the restrictions that
  AES-ICM does as a PRF (Pseudo-Random Function), because it has a cipher
  block size of 512 bits.  The motivation is that by generalizing the API,
  Chacha is not penalized by the limitations of AES.
  
  In READ_RANDOM_UIO, first attempt to NOWAIT allocate a large enough buffer
  for the entire user request, or the maximal input we'll accept between
  signal checking, whichever is smaller.  The idea is that the implementation
  of any randomdev algorithm is then free to divide up large requests in
  whatever fashion it sees fit.
  
  As part of this, two responsibilities from the "algorithm-generic" randomdev
  code are pushed down into the Fortuna ra_read implementation (and any other
  future or out-of-tree ra_read implementations):
  
    1. If an algorithm needs to rekey every N bytes, it is responsible for
    handling that in ra_read(). (I.e., Fortuna's 1MB rekey interval for AES
    block generation.)
  
    2. If an algorithm uses a block cipher that doesn't tolerate partial-block
    requests (again, e.g., AES), it is also responsible for handling that in
    ra_read().
  
  Several APIs are changed from u_int buffer length to the more canonical
  size_t.  Several APIs are changed from taking a blockcount to a bytecount,
  to permit PRFs like Chacha20 to directly generate quantities of output that
  are not multiples of RANDOM_BLOCKSIZE (AES block size).
  
  The Fortuna algorithm is changed to NOT rekey every 1MiB when in Chacha20
  mode (kern.random.use_chacha20_cipher="1").  This is explicitly supported by
  the math in FS&K ยง9.4 (Ferguson, Schneier, and Kohno; "Cryptography
  Engineering"), as well as by their conclusion: "If we had a block cipher
  with a 256-bit [or greater] block size, then the collisions would not
  have been an issue at all."
  
  For now, continue to break up reads into PAGE_SIZE chunks, as they were
  before.  So, no functional change, mostly.
  
  Reviewed by:	markm
  Approved by:	secteam(delphij)
  Differential Revision:	https://reviews.freebsd.org/D20312

Modified:
  head/sys/dev/random/fortuna.c
  head/sys/dev/random/hash.c
  head/sys/dev/random/hash.h
  head/sys/dev/random/other_algorithm.c
  head/sys/dev/random/randomdev.c
  head/sys/dev/random/randomdev.h
  head/tests/sys/devrandom/uint128_test.c

Modified: head/sys/dev/random/fortuna.c
==============================================================================
--- head/sys/dev/random/fortuna.c	Mon Jun 17 14:59:45 2019	(r349137)
+++ head/sys/dev/random/fortuna.c	Mon Jun 17 15:09:12 2019	(r349138)
@@ -128,7 +128,7 @@ static uint8_t zero_region[RANDOM_ZERO_BLOCKSIZE];
 #endif
 
 static void random_fortuna_pre_read(void);
-static void random_fortuna_read(uint8_t *, u_int);
+static void random_fortuna_read(uint8_t *, size_t);
 static bool random_fortuna_seeded(void);
 static bool random_fortuna_seeded_internal(void);
 static void random_fortuna_process_event(struct harvest_event *);
@@ -306,49 +306,45 @@ random_fortuna_reseed_internal(uint32_t *entropy_data,
 }
 
 /*-
- * FS&K - GenerateBlocks()
- * Generate a number of complete blocks of random output.
- */
-static __inline void
-random_fortuna_genblocks(uint8_t *buf, u_int blockcount)
-{
-
-	RANDOM_RESEED_ASSERT_LOCK_OWNED();
-	KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
-
-	/*
-	 * Fills buf with RANDOM_BLOCKSIZE * blockcount bytes of keystream.
-	 * Increments fs_counter as it goes.
-	 */
-	randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
-	    buf, blockcount);
-}
-
-/*-
  * FS&K - PseudoRandomData()
- * This generates no more than 2^20 bytes of data, and cleans up its
- * internal state when finished. It is assumed that a whole number of
- * blocks are available for writing; any excess generated will be
- * ignored.
+ *
+ * If Chacha20 is used, output size is unrestricted.  If AES-CTR is used,
+ * output size MUST be <= 1MB and a multiple of RANDOM_BLOCKSIZE.  The
+ * reasoning for this is discussed in FS&K 9.4; the significant distinction
+ * between the two ciphers is that AES has a *block* size of 128 bits while
+ * Chacha has a *block* size of 512 bits.
  */
 static __inline void
-random_fortuna_genrandom(uint8_t *buf, u_int bytecount)
+random_fortuna_genrandom(uint8_t *buf, size_t bytecount)
 {
-	uint8_t temp[RANDOM_BLOCKSIZE * RANDOM_KEYS_PER_BLOCK];
-	u_int blockcount;
+	uint8_t newkey[RANDOM_KEYSIZE];
 
 	RANDOM_RESEED_ASSERT_LOCK_OWNED();
+
 	/*-
-	 * FS&K - assert(n < 2^20 (== 1 MB)
+	 * FS&K - assert(n < 2^20 (== 1 MB)) when 128-bit block cipher is used
 	 *      - r = first-n-bytes(GenerateBlocks(ceil(n/16)))
 	 *      - K = GenerateBlocks(2)
 	 */
-	KASSERT((bytecount <= RANDOM_FORTUNA_MAX_READ), ("invalid single read request to Fortuna of %d bytes", bytecount));
-	blockcount = howmany(bytecount, RANDOM_BLOCKSIZE);
-	random_fortuna_genblocks(buf, blockcount);
-	random_fortuna_genblocks(temp, RANDOM_KEYS_PER_BLOCK);
-	randomdev_encrypt_init(&fortuna_state.fs_key, temp);
-	explicit_bzero(temp, sizeof(temp));
+	KASSERT(random_chachamode || bytecount <= RANDOM_FORTUNA_MAX_READ,
+	    ("%s: invalid large read request: %zu bytes", __func__,
+	     bytecount));
+
+	/*
+	 * This is where FS&K would invoke GenerateBlocks().  GenerateBlocks()
+	 * doesn't make a lot of sense or have much value if we use bytecount
+	 * for the API (which is useful for ciphers that do not require
+	 * block-sized output, like Chacha20).
+	 *
+	 * Just invoke our PRF abstraction directly, which is responsible for
+	 * updating fs_counter ('C').
+	 */
+	randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
+	    buf, bytecount);
+	randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
+	    newkey, sizeof(newkey));
+	randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
+	explicit_bzero(newkey, sizeof(newkey));
 }
 
 /*-
@@ -441,18 +437,71 @@ random_fortuna_pre_read(void)
  * FS&K - RandomData() (Part 2)
  * Main read from Fortuna, continued. May be called multiple times after
  * the random_fortuna_pre_read() above.
- * The supplied buf MUST be a multiple of RANDOM_BLOCKSIZE in size.
- * Lots of code presumes this for efficiency, both here and in other
- * routines. You are NOT allowed to break this!
+ *
+ * The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is
+ * the responsibility of the algorithm to accommodate partial block reads, if a
+ * block output mode is used.
  */
 void
-random_fortuna_read(uint8_t *buf, u_int bytecount)
+random_fortuna_read(uint8_t *buf, size_t bytecount)
 {
+	uint8_t remainder_buf[RANDOM_BLOCKSIZE];
+	size_t read_directly_len, read_chunk;
 
-	KASSERT((bytecount % RANDOM_BLOCKSIZE) == 0, ("%s(): bytecount (= %d) must be a multiple of %d", __func__, bytecount, RANDOM_BLOCKSIZE ));
+	/*
+	 * The underlying AES generator expects multiples of RANDOM_BLOCKSIZE.
+	 */
+	if (random_chachamode)
+		read_directly_len = bytecount;
+	else
+		read_directly_len = rounddown(bytecount, RANDOM_BLOCKSIZE);
+
 	RANDOM_RESEED_LOCK();
-	random_fortuna_genrandom(buf, bytecount);
+	KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
+
+	while (read_directly_len > 0) {
+		/*
+		 * 128-bit block ciphers like AES must be re-keyed at 1MB
+		 * intervals to avoid unacceptable statistical differentiation
+		 * from true random data.
+		 *
+		 * 512-bit block ciphers like Chacha20 do not have this
+		 * problem. (FS&K 9.4)
+		 */
+		if (random_chachamode)
+			read_chunk = read_directly_len;
+		else
+			read_chunk = MIN(read_directly_len,
+			    RANDOM_FORTUNA_MAX_READ);
+
+		/*
+		 * For now, we hold the global Fortuna mutex, so yield
+		 * periodically to provide vague availability to other lock
+		 * users.  PAGE_SIZE is chosen to match existing behavior.
+		 */
+		read_chunk = MIN(read_chunk, PAGE_SIZE);
+
+		random_fortuna_genrandom(buf, read_chunk);
+		buf += read_chunk;
+		read_directly_len -= read_chunk;
+		bytecount -= read_chunk;
+
+		/* Perform the actual yield. */
+		if (read_directly_len != 0) {
+			RANDOM_RESEED_UNLOCK();
+			RANDOM_RESEED_LOCK();
+		}
+	}
+
+	if (bytecount > 0)
+		random_fortuna_genrandom(remainder_buf, sizeof(remainder_buf));
+
 	RANDOM_RESEED_UNLOCK();
+
+	if (bytecount > 0) {
+		memcpy(buf, remainder_buf, bytecount);
+		explicit_bzero(remainder_buf, sizeof(remainder_buf));
+	}
 }
 
 #ifdef _KERNEL

Modified: head/sys/dev/random/hash.c
==============================================================================
--- head/sys/dev/random/hash.c	Mon Jun 17 14:59:45 2019	(r349137)
+++ head/sys/dev/random/hash.c	Mon Jun 17 15:09:12 2019	(r349138)
@@ -125,16 +125,18 @@ randomdev_encrypt_init(union randomdev_key *context, c
 }
 
 /*
- * Create a psuedorandom output stream of 'blockcount' blocks using a CTR-mode
+ * Create a psuedorandom output stream of 'bytecount' bytes using a CTR-mode
  * cipher or similar.  The 128-bit counter is supplied in the in-out parmeter
- * 'ctr.'  The output stream goes to 'd_out.'  'blockcount' RANDOM_BLOCKSIZE
- * bytes are generated.
+ * 'ctr.'  The output stream goes to 'd_out.'
+ *
+ * If AES is used, 'bytecount' is guaranteed to be a multiple of
+ * RANDOM_BLOCKSIZE.
  */
 void
 randomdev_keystream(union randomdev_key *context, uint128_t *ctr,
-    void *d_out, u_int blockcount)
+    void *d_out, size_t bytecount)
 {
-	u_int i;
+	size_t i, blockcount, read_chunk;
 
 	if (random_chachamode) {
 		uint128_t lectr;
@@ -147,16 +149,35 @@ randomdev_keystream(union randomdev_key *context, uint
 		le128enc(&lectr, *ctr);
 
 		chacha_ivsetup(&context->chacha, NULL, (const void *)&lectr);
-		chacha_encrypt_bytes(&context->chacha, NULL, d_out,
-		    RANDOM_BLOCKSIZE * blockcount);
+		while (bytecount > 0) {
+			/*
+			 * We are limited by the chacha_encrypt_bytes API to
+			 * u32 bytes per chunk.
+			 */
+			read_chunk = MIN(bytecount,
+			    rounddown((size_t)UINT32_MAX, CHACHA_BLOCKLEN));
 
+			chacha_encrypt_bytes(&context->chacha, NULL, d_out,
+			    read_chunk);
+
+			d_out = (char *)d_out + read_chunk;
+			bytecount -= read_chunk;
+		}
+
 		/*
 		 * Decode Chacha-updated LE counter to native endian and store
 		 * it back in the caller's in-out parameter.
 		 */
 		chacha_ctrsave(&context->chacha, (void *)&lectr);
 		*ctr = le128dec(&lectr);
+
+		explicit_bzero(&lectr, sizeof(lectr));
 	} else {
+		KASSERT(bytecount % RANDOM_BLOCKSIZE == 0,
+		    ("%s: AES mode invalid bytecount, not a multiple of native "
+		     "block size", __func__));
+
+		blockcount = bytecount / RANDOM_BLOCKSIZE;
 		for (i = 0; i < blockcount; i++) {
 			/*-
 			 * FS&K - r = r|E(K,C)

Modified: head/sys/dev/random/hash.h
==============================================================================
--- head/sys/dev/random/hash.h	Mon Jun 17 14:59:45 2019	(r349137)
+++ head/sys/dev/random/hash.h	Mon Jun 17 15:09:12 2019	(r349138)
@@ -61,7 +61,7 @@ void randomdev_hash_iterate(struct randomdev_hash *, c
 void randomdev_hash_finish(struct randomdev_hash *, void *);
 
 void randomdev_encrypt_init(union randomdev_key *, const void *);
-void randomdev_keystream(union randomdev_key *context, uint128_t *, void *, u_int);
+void randomdev_keystream(union randomdev_key *context, uint128_t *, void *, size_t);
 void randomdev_getkey(union randomdev_key *, const void **, size_t *);
 
 #endif /* SYS_DEV_RANDOM_HASH_H_INCLUDED */

Modified: head/sys/dev/random/other_algorithm.c
==============================================================================
--- head/sys/dev/random/other_algorithm.c	Mon Jun 17 14:59:45 2019	(r349137)
+++ head/sys/dev/random/other_algorithm.c	Mon Jun 17 15:09:12 2019	(r349138)
@@ -84,7 +84,7 @@ __FBSDID("$FreeBSD$");
 #endif /* _KERNEL */
 
 static void random_other_pre_read(void);
-static void random_other_read(uint8_t *, u_int);
+static void random_other_read(uint8_t *, size_t);
 static bool random_other_seeded(void);
 static void random_other_process_event(struct harvest_event *);
 static void random_other_init_alg(void *);
@@ -165,10 +165,10 @@ random_other_pre_read(void)
 }
 
 /*
- * void random_other_read(uint8_t *buf, u_int count)
+ * void random_other_read(uint8_t *buf, size_t count)
  *
  * Generate <count> bytes of output into <*buf>.
- * You may use the fact that <count> will be a multiple of
+ * You may NOT use the fact that <count> will be a multiple of
  * RANDOM_BLOCKSIZE for optimization purposes.
  *
  * This function will always be called with your generator
@@ -176,7 +176,7 @@ random_other_pre_read(void)
  * output here, then feel free to KASSERT() or panic().
  */
 static void
-random_other_read(uint8_t *buf, u_int count)
+random_other_read(uint8_t *buf, size_t count)
 {
 
 	RANDOM_RESEED_LOCK();

Modified: head/sys/dev/random/randomdev.c
==============================================================================
--- head/sys/dev/random/randomdev.c	Mon Jun 17 14:59:45 2019	(r349137)
+++ head/sys/dev/random/randomdev.c	Mon Jun 17 15:09:12 2019	(r349138)
@@ -172,17 +172,21 @@ randomdev_wait_until_seeded(bool interruptible)
 int
 READ_RANDOM_UIO(struct uio *uio, bool nonblock)
 {
-	uint8_t *random_buf;
-	int error;
-	ssize_t read_len, total_read, c;
 	/* 16 MiB takes about 0.08 s CPU time on my 2017 AMD Zen CPU */
 #define SIGCHK_PERIOD (16 * 1024 * 1024)
 	const size_t sigchk_period = SIGCHK_PERIOD;
-
 	CTASSERT(SIGCHK_PERIOD % PAGE_SIZE == 0);
 #undef SIGCHK_PERIOD
 
-	random_buf = malloc(PAGE_SIZE, M_ENTROPY, M_WAITOK);
+	uint8_t *random_buf;
+	size_t total_read, read_len;
+	ssize_t bufsize;
+	int error;
+
+
+	KASSERT(uio->uio_rw == UIO_READ, ("%s: bogus write", __func__));
+	KASSERT(uio->uio_resid >= 0, ("%s: bogus negative resid", __func__));
+
 	p_random_alg_context->ra_pre_read();
 	error = 0;
 	/* (Un)Blocking logic */
@@ -193,44 +197,64 @@ READ_RANDOM_UIO(struct uio *uio, bool nonblock)
 			error = randomdev_wait_until_seeded(
 			    SEEDWAIT_INTERRUPTIBLE);
 	}
-	if (error == 0) {
-		read_rate_increment((uio->uio_resid + sizeof(uint32_t))/sizeof(uint32_t));
-		total_read = 0;
-		while (uio->uio_resid && !error) {
-			read_len = uio->uio_resid;
-			/*
-			 * Belt-and-braces.
-			 * Round up the read length to a crypto block size multiple,
-			 * which is what the underlying generator is expecting.
-			 * See the random_buf size requirements in the Fortuna code.
-			 */
-			read_len = roundup(read_len, RANDOM_BLOCKSIZE);
-			/* Work in chunks page-sized or less */
-			read_len = MIN(read_len, PAGE_SIZE);
-			p_random_alg_context->ra_read(random_buf, read_len);
-			c = MIN(uio->uio_resid, read_len);
-			/*
-			 * uiomove() may yield the CPU before each 'c' bytes
-			 * (up to PAGE_SIZE) are copied out.
-			 */
-			error = uiomove(random_buf, c, uio);
-			total_read += c;
-			/*
-			 * Poll for signals every few MBs to avoid very long
-			 * uninterruptible syscalls.
-			 */
-			if (error == 0 && uio->uio_resid != 0 &&
-			    total_read % sigchk_period == 0) {
-				error = tsleep_sbt(&random_alg_context, PCATCH,
-				    "randrd", SBT_1NS, 0, C_HARDCLOCK);
-				/* Squash tsleep timeout condition */
-				if (error == EWOULDBLOCK)
-					error = 0;
-			}
+	if (error != 0)
+		return (error);
+
+	read_rate_increment(howmany(uio->uio_resid + 1, sizeof(uint32_t)));
+	total_read = 0;
+
+	/* Easy to deal with the trivial 0 byte case. */
+	if (__predict_false(uio->uio_resid == 0))
+		return (0);
+
+	/*
+	 * If memory is plentiful, use maximally sized requests to avoid
+	 * per-call algorithm overhead.  But fall back to a single page
+	 * allocation if the full request isn't immediately available.
+	 */
+	bufsize = MIN(sigchk_period, (size_t)uio->uio_resid);
+	random_buf = malloc(bufsize, M_ENTROPY, M_NOWAIT);
+	if (random_buf == NULL) {
+		bufsize = PAGE_SIZE;
+		random_buf = malloc(bufsize, M_ENTROPY, M_WAITOK);
+	}
+
+	error = 0;
+	while (uio->uio_resid > 0 && error == 0) {
+		read_len = MIN((size_t)uio->uio_resid, bufsize);
+
+		p_random_alg_context->ra_read(random_buf, read_len);
+
+		/*
+		 * uiomove() may yield the CPU before each 'read_len' bytes (up
+		 * to bufsize) are copied out.
+		 */
+		error = uiomove(random_buf, read_len, uio);
+		total_read += read_len;
+
+		/*
+		 * Poll for signals every few MBs to avoid very long
+		 * uninterruptible syscalls.
+		 */
+		if (error == 0 && uio->uio_resid != 0 &&
+		    total_read % sigchk_period == 0) {
+			error = tsleep_sbt(&random_alg_context, PCATCH,
+			    "randrd", SBT_1NS, 0, C_HARDCLOCK);
+			/* Squash tsleep timeout condition */
+			if (error == EWOULDBLOCK)
+				error = 0;
 		}
-		if (error == ERESTART || error == EINTR)
-			error = 0;
 	}
+
+	/*
+	 * Short reads due to signal interrupt should not indicate error.
+	 * Instead, the uio will reflect that the read was shorter than
+	 * requested.
+	 */
+	if (error == ERESTART || error == EINTR)
+		error = 0;
+
+	explicit_bzero(random_buf, bufsize);
 	free(random_buf, M_ENTROPY);
 	return (error);
 }
@@ -249,7 +273,6 @@ READ_RANDOM_UIO(struct uio *uio, bool nonblock)
 void
 READ_RANDOM(void *random_buf, u_int len)
 {
-	u_int read_directly_len;
 
 	KASSERT(random_buf != NULL, ("No suitable random buffer in %s", __func__));
 	p_random_alg_context->ra_pre_read();
@@ -278,23 +301,7 @@ READ_RANDOM(void *random_buf, u_int len)
 		(void)randomdev_wait_until_seeded(SEEDWAIT_UNINTERRUPTIBLE);
 	}
 	read_rate_increment(roundup2(len, sizeof(uint32_t)));
-	/*
-	 * The underlying generator expects multiples of
-	 * RANDOM_BLOCKSIZE.
-	 */
-	read_directly_len = rounddown(len, RANDOM_BLOCKSIZE);
-	if (read_directly_len > 0)
-		p_random_alg_context->ra_read(random_buf, read_directly_len);
-	if (read_directly_len < len) {
-		uint8_t remainder_buf[RANDOM_BLOCKSIZE];
-
-		p_random_alg_context->ra_read(remainder_buf,
-		    sizeof(remainder_buf));
-		memcpy((char *)random_buf + read_directly_len, remainder_buf,
-		    len - read_directly_len);
-
-		explicit_bzero(remainder_buf, sizeof(remainder_buf));
-	}
+	p_random_alg_context->ra_read(random_buf, len);
 }
 
 bool

Modified: head/sys/dev/random/randomdev.h
==============================================================================
--- head/sys/dev/random/randomdev.h	Mon Jun 17 14:59:45 2019	(r349137)
+++ head/sys/dev/random/randomdev.h	Mon Jun 17 15:09:12 2019	(r349138)
@@ -68,7 +68,7 @@ struct harvest_event;
 typedef void random_alg_init_t(void *);
 typedef void random_alg_deinit_t(void *);
 typedef void random_alg_pre_read_t(void);
-typedef void random_alg_read_t(uint8_t *, u_int);
+typedef void random_alg_read_t(uint8_t *, size_t);
 typedef bool random_alg_seeded_t(void);
 typedef void random_alg_reseed_t(void);
 typedef void random_alg_eventprocessor_t(struct harvest_event *);

Modified: head/tests/sys/devrandom/uint128_test.c
==============================================================================
--- head/tests/sys/devrandom/uint128_test.c	Mon Jun 17 14:59:45 2019	(r349137)
+++ head/tests/sys/devrandom/uint128_test.c	Mon Jun 17 15:09:12 2019	(r349138)
@@ -209,8 +209,7 @@ ATF_TC_BODY(uint128_chacha_ctr, tc)
 		vec_u32_tole128(expectedle, tests[i].expected);
 
 		a = le128dec(inputle);
-		randomdev_keystream(&context, &a, trash, sizeof(trash) /
-		    RANDOM_BLOCKSIZE);
+		randomdev_keystream(&context, &a, trash, sizeof(trash));
 		u128_check_equality(le128dec(expectedle), a, tests[i].descr);
 	}
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201906171509.x5HF9CFG045599>