From owner-svn-src-all@freebsd.org Fri Nov 25 13:49:34 2016 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 5D2CAC54F98; Fri, 25 Nov 2016 13:49:34 +0000 (UTC) (envelope-from fabient@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 1F563D90; Fri, 25 Nov 2016 13:49:34 +0000 (UTC) (envelope-from fabient@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id uAPDnXuV042111; Fri, 25 Nov 2016 13:49:33 GMT (envelope-from fabient@FreeBSD.org) Received: (from fabient@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id uAPDnX09042110; Fri, 25 Nov 2016 13:49:33 GMT (envelope-from fabient@FreeBSD.org) Message-Id: <201611251349.uAPDnX09042110@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: fabient set sender to fabient@FreeBSD.org using -f From: Fabien Thomas Date: Fri, 25 Nov 2016 13:49:33 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r309143 - head/sys/libkern X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 25 Nov 2016 13:49:34 -0000 Author: fabient Date: Fri Nov 25 13:49:33 2016 New Revision: 309143 URL: https://svnweb.freebsd.org/changeset/base/309143 Log: In a dual processor system (2*6 cores) during IPSec throughput tests, we see a lot of contention on the arc4 lock, used to generate the IV of the ESP output packets. The idea of this patch is to split this mutex in order to reduce the contention on this lock. Reviewed by: delphij, markm, ache Approved by: so Obtained from: emeric.poupon@stormshield.eu MFC after: 1 month Sponsored by: Stormshield Differential Revision: https://reviews.freebsd.org/D8130 Modified: head/sys/libkern/arc4random.c Modified: head/sys/libkern/arc4random.c ============================================================================== --- head/sys/libkern/arc4random.c Fri Nov 25 13:15:28 2016 (r309142) +++ head/sys/libkern/arc4random.c Fri Nov 25 13:49:33 2016 (r309143) @@ -19,6 +19,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #define ARC4_RESEED_BYTES 65536 #define ARC4_RESEED_SECONDS 300 @@ -26,13 +28,23 @@ __FBSDID("$FreeBSD$"); int arc4rand_iniseed_state = ARC4_ENTR_NONE; -static u_int8_t arc4_i, arc4_j; -static int arc4_numruns = 0; -static u_int8_t arc4_sbox[256]; -static time_t arc4_t_reseed; -static struct mtx arc4_mtx; +MALLOC_DEFINE(M_ARC4RANDOM, "arc4random", "arc4random structures"); -static u_int8_t arc4_randbyte(void); +struct arc4_s { + u_int8_t i, j; + int numruns; + u_int8_t sbox[256]; + time_t t_reseed; + + struct mtx mtx; +}; + +static struct arc4_s *arc4inst = NULL; + +#define ARC4_FOREACH(_arc4) \ + for (_arc4 = &arc4inst[0]; _arc4 <= &arc4inst[mp_maxid]; _arc4++) + +static u_int8_t arc4_randbyte(struct arc4_s *arc4); static __inline void arc4_swap(u_int8_t *a, u_int8_t *b) @@ -48,7 +60,7 @@ arc4_swap(u_int8_t *a, u_int8_t *b) * Stir our S-box. */ static void -arc4_randomstir(void) +arc4_randomstir(struct arc4_s* arc4) { u_int8_t key[ARC4_KEYBYTES]; int n; @@ -60,15 +72,15 @@ arc4_randomstir(void) */ (void)read_random(key, ARC4_KEYBYTES); getmicrouptime(&tv_now); - mtx_lock(&arc4_mtx); + mtx_lock(&arc4->mtx); for (n = 0; n < 256; n++) { - arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256; - arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]); + arc4->j = (arc4->j + arc4->sbox[n] + key[n]) % 256; + arc4_swap(&arc4->sbox[n], &arc4->sbox[arc4->j]); } - arc4_i = arc4_j = 0; + arc4->i = arc4->j = 0; /* Reset for next reseed cycle. */ - arc4_t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; - arc4_numruns = 0; + arc4->t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; + arc4->numruns = 0; /* * Throw away the first N words of output, as suggested in the * paper "Weaknesses in the Key Scheduling Algorithm of RC4" @@ -77,8 +89,9 @@ arc4_randomstir(void) * http://dl.acm.org/citation.cfm?id=646557.694759 */ for (n = 0; n < 256*4; n++) - arc4_randbyte(); - mtx_unlock(&arc4_mtx); + arc4_randbyte(arc4); + + mtx_unlock(&arc4->mtx); } /* @@ -87,33 +100,57 @@ arc4_randomstir(void) static void arc4_init(void) { + struct arc4_s *arc4; int n; - mtx_init(&arc4_mtx, "arc4_mtx", NULL, MTX_DEF); - arc4_i = arc4_j = 0; - for (n = 0; n < 256; n++) - arc4_sbox[n] = (u_int8_t) n; + arc4inst = malloc((mp_maxid + 1) * sizeof(struct arc4_s), + M_ARC4RANDOM, M_NOWAIT | M_ZERO); + KASSERT(arc4inst != NULL, ("arc4_init: memory allocation error")); + + ARC4_FOREACH(arc4) { + mtx_init(&arc4->mtx, "arc4_mtx", NULL, MTX_DEF); + + arc4->i = arc4->j = 0; + for (n = 0; n < 256; n++) + arc4->sbox[n] = (u_int8_t) n; - arc4_t_reseed = 0; + arc4->t_reseed = -1; + arc4->numruns = 0; + } } +SYSINIT(arc4, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); + + +static void +arc4_uninit(void) +{ + struct arc4_s *arc4; + + ARC4_FOREACH(arc4) { + mtx_destroy(&arc4->mtx); + } + + free(arc4inst, M_ARC4RANDOM); +} + +SYSUNINIT(arc4, SI_SUB_LOCK, SI_ORDER_ANY, arc4_uninit, NULL); -SYSINIT(arc4_init, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); /* * Generate a random byte. */ static u_int8_t -arc4_randbyte(void) +arc4_randbyte(struct arc4_s *arc4) { u_int8_t arc4_t; - arc4_i = (arc4_i + 1) % 256; - arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256; + arc4->i = (arc4->i + 1) % 256; + arc4->j = (arc4->j + arc4->sbox[arc4->i]) % 256; - arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]); + arc4_swap(&arc4->sbox[arc4->i], &arc4->sbox[arc4->j]); - arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256; - return arc4_sbox[arc4_t]; + arc4_t = (arc4->sbox[arc4->i] + arc4->sbox[arc4->j]) % 256; + return arc4->sbox[arc4_t]; } /* @@ -124,20 +161,26 @@ arc4rand(void *ptr, u_int len, int resee { u_char *p; struct timeval tv; + struct arc4_s *arc4; + + if (reseed || atomic_cmpset_int(&arc4rand_iniseed_state, + ARC4_ENTR_HAVE, ARC4_ENTR_SEED)) { + ARC4_FOREACH(arc4) + arc4_randomstir(arc4); + } + arc4 = &arc4inst[curcpu]; getmicrouptime(&tv); - if (atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_HAVE, - ARC4_ENTR_SEED) || reseed || - (arc4_numruns > ARC4_RESEED_BYTES) || - (tv.tv_sec > arc4_t_reseed)) - arc4_randomstir(); + if ((arc4->numruns > ARC4_RESEED_BYTES) || + (tv.tv_sec > arc4->t_reseed)) + arc4_randomstir(arc4); - mtx_lock(&arc4_mtx); - arc4_numruns += len; + mtx_lock(&arc4->mtx); + arc4->numruns += len; p = ptr; while (len--) - *p++ = arc4_randbyte(); - mtx_unlock(&arc4_mtx); + *p++ = arc4_randbyte(arc4); + mtx_unlock(&arc4->mtx); } uint32_t