From 82639e6f5341a3129b7cb62a5a2dd9b65f3c91ef Mon Sep 17 00:00:00 2001 From: Adam Langley Date: Tue, 3 Jul 2018 09:19:54 -0700 Subject: [PATCH] Use a pool of |rand_state| objects. Previously we used thread-local state objects in rand.c. However, for applications with large numbers of threads, this can lead to excessive memory usage. This change causes us to maintain a mutex-protected pool of state objects where the size of the pool equals the maximum concurrency of |RAND_bytes|. This might lead to state objects bouncing between CPUs more often, but should help the memory usage problem. Change-Id: Ie83763d3bc139e64ac17bf7e015ad082b2f8a81a Reviewed-on: https://boringssl-review.googlesource.com/29565 Commit-Queue: Adam Langley CQ-Verified: CQ bot account: commit-bot@chromium.org Reviewed-by: David Benjamin --- crypto/fipsmodule/delocate.h | 2 +- crypto/fipsmodule/rand/rand.c | 237 ++++++++++++++++++---------------- crypto/internal.h | 1 - 3 files changed, 130 insertions(+), 110 deletions(-) diff --git a/crypto/fipsmodule/delocate.h b/crypto/fipsmodule/delocate.h index 065a21ca..59effde1 100644 --- a/crypto/fipsmodule/delocate.h +++ b/crypto/fipsmodule/delocate.h @@ -23,7 +23,7 @@ #if defined(BORINGSSL_FIPS) && !defined(OPENSSL_ASAN) && !defined(OPENSSL_MSAN) #define DEFINE_BSS_GET(type, name) \ static type name __attribute__((used)); \ - type *name##_bss_get(void); + type *name##_bss_get(void) __attribute__((const)); // For FIPS builds we require that CRYPTO_ONCE_INIT be zero. #define DEFINE_STATIC_ONCE(name) DEFINE_BSS_GET(CRYPTO_once_t, name) // For FIPS builds we require that CRYPTO_STATIC_MUTEX_INIT be zero. diff --git a/crypto/fipsmodule/rand/rand.c b/crypto/fipsmodule/rand/rand.c index 3ec92e6a..02e63bc7 100644 --- a/crypto/fipsmodule/rand/rand.c +++ b/crypto/fipsmodule/rand/rand.c @@ -54,75 +54,6 @@ static const unsigned kReseedInterval = 4096; // continuous random number generator test in FIPS 140-2, section 4.9.2. #define CRNGT_BLOCK_SIZE 16 -// rand_thread_state contains the per-thread state for the RNG. -struct rand_thread_state { - CTR_DRBG_STATE drbg; - // calls is the number of generate calls made on |drbg| since it was last - // (re)seeded. This is bound by |kReseedInterval|. - unsigned calls; - // last_block_valid is non-zero iff |last_block| contains data from - // |CRYPTO_sysrand|. - int last_block_valid; - -#if defined(BORINGSSL_FIPS) - // last_block contains the previous block from |CRYPTO_sysrand|. - uint8_t last_block[CRNGT_BLOCK_SIZE]; - // next and prev form a NULL-terminated, double-linked list of all states in - // a process. - struct rand_thread_state *next, *prev; -#endif -}; - -#if defined(BORINGSSL_FIPS) -// thread_states_list is the head of a linked-list of all |rand_thread_state| -// objects in the process, one per thread. This is needed because FIPS requires -// that they be zeroed on process exit, but thread-local destructors aren't -// called when the whole process is exiting. -DEFINE_BSS_GET(struct rand_thread_state *, thread_states_list); -DEFINE_STATIC_MUTEX(thread_states_list_lock); - -static void rand_thread_state_clear_all(void) __attribute__((destructor)); -static void rand_thread_state_clear_all(void) { - CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); - for (struct rand_thread_state *cur = *thread_states_list_bss_get(); - cur != NULL; cur = cur->next) { - CTR_DRBG_clear(&cur->drbg); - } - // |thread_states_list_lock is deliberately left locked so that any threads - // that are still running will hang if they try to call |RAND_bytes|. -} -#endif - -// rand_thread_state_free frees a |rand_thread_state|. This is called when a -// thread exits. -static void rand_thread_state_free(void *state_in) { - struct rand_thread_state *state = state_in; - - if (state_in == NULL) { - return; - } - -#if defined(BORINGSSL_FIPS) - CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); - - if (state->prev != NULL) { - state->prev->next = state->next; - } else { - *thread_states_list_bss_get() = state->next; - } - - if (state->next != NULL) { - state->next->prev = state->prev; - } - - CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get()); - - CTR_DRBG_clear(&state->drbg); -#endif - - OPENSSL_free(state); -} - #if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \ !defined(BORINGSSL_UNSAFE_DETERMINISTIC_MODE) @@ -172,9 +103,31 @@ static int hwrand(uint8_t *buf, size_t len) { #endif +// rand_state contains an RNG state. +struct rand_state { + CTR_DRBG_STATE drbg; + // next forms a NULL-terminated linked-list of all free |rand_state| objects. + struct rand_state *next; + // calls is the number of generate calls made on |drbg| since it was last + // (re)seeded. This is bound by |kReseedInterval|. + unsigned calls; + +#if defined(BORINGSSL_FIPS) + // next_all forms another NULL-terminated linked-list, this time of all + // |rand_state| objects that have been allocated including those that might + // currently be in use. + struct rand_state *next_all; + // last_block contains the previous block from |CRYPTO_sysrand|. + uint8_t last_block[CRNGT_BLOCK_SIZE]; + // last_block_valid is non-zero iff |last_block| contains data from + // |CRYPTO_sysrand|. + int last_block_valid; +#endif +}; + #if defined(BORINGSSL_FIPS) -static void rand_get_seed(struct rand_thread_state *state, +static void rand_get_seed(struct rand_state *state, uint8_t seed[CTR_DRBG_ENTROPY_LEN]) { if (!state->last_block_valid) { if (!hwrand(state->last_block, sizeof(state->last_block))) { @@ -223,7 +176,7 @@ static void rand_get_seed(struct rand_thread_state *state, #else -static void rand_get_seed(struct rand_thread_state *state, +static void rand_get_seed(struct rand_state *state, uint8_t seed[CTR_DRBG_ENTROPY_LEN]) { // If not in FIPS mode, we don't overread from the system entropy source and // we don't depend only on the hardware RDRAND. @@ -232,6 +185,97 @@ static void rand_get_seed(struct rand_thread_state *state, #endif +// rand_state_free_list is a list of currently free, |rand_state| structures. +// When a thread needs a |rand_state| it picks the head element of this list and +// allocs a new one if the list is empty. Once it's finished, it pushes the +// state back onto the front of the list. +// +// Previously we used a thread-local state but for processes with large numbers +// of threads this can result in excessive memory usage. Since we don't free +// |rand_state| objects, the number of objects in memory will eventually equal +// the maximum concurrency of |RAND_bytes|. +DEFINE_BSS_GET(struct rand_state *, rand_state_free_list); + +// rand_state_lock protects |rand_state_free_list| (and |rand_state_all_list|, +// in FIPS mode). +DEFINE_STATIC_MUTEX(rand_state_lock); + +#if defined(BORINGSSL_FIPS) +// rand_state_all_list is the head of a linked-list of all |rand_state| objects +// in the process. This is needed because FIPS requires that they be zeroed on +// process exit. +DEFINE_BSS_GET(struct rand_state *, rand_state_all_list); + +// rand_drbg_lock is taken in write mode by |rand_state_clear_all|, and +// in read mode by any operation on the |drbg| member of |rand_state|. +// This ensures that, in the event that a thread races destructor functions, we +// never return bogus random data. At worst, the thread will deadlock. +DEFINE_STATIC_MUTEX(rand_drbg_lock); + +static void rand_state_clear_all(void) __attribute__((destructor)); +static void rand_state_clear_all(void) { + CRYPTO_STATIC_MUTEX_lock_write(rand_drbg_lock_bss_get()); + CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get()); + for (struct rand_state *cur = *rand_state_all_list_bss_get(); + cur != NULL; cur = cur->next_all) { + CTR_DRBG_clear(&cur->drbg); + } + // Both locks are deliberately left locked so that any threads that are still + // running will hang if they try to call |RAND_bytes|. +} +#endif + +// rand_state_init seeds a |rand_state|. +static void rand_state_init(struct rand_state *state) { + OPENSSL_memset(state, 0, sizeof(struct rand_state)); + uint8_t seed[CTR_DRBG_ENTROPY_LEN]; + rand_get_seed(state, seed); + if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) { + abort(); + } +} + +// rand_state_get pops a |rand_state| from the head of +// |rand_state_free_list| and returns it. If the list is empty, it +// creates a fresh |rand_state| and returns that instead. +static struct rand_state *rand_state_get(void) { + struct rand_state *state = NULL; + CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get()); + state = *rand_state_free_list_bss_get(); + if (state != NULL) { + *rand_state_free_list_bss_get() = state->next; + } + CRYPTO_STATIC_MUTEX_unlock_write(rand_state_lock_bss_get()); + + if (state != NULL) { + return state; + } + + state = OPENSSL_malloc(sizeof(struct rand_state)); + if (state == NULL) { + return NULL; + } + + rand_state_init(state); + +#if defined(BORINGSSL_FIPS) + CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get()); + state->next_all = *rand_state_all_list_bss_get(); + *rand_state_all_list_bss_get() = state; + CRYPTO_STATIC_MUTEX_unlock_write(rand_state_lock_bss_get()); +#endif + + return state; +} + +// rand_state_put pushes |state| onto |rand_state_free_list|. +static void rand_state_put(struct rand_state *state) { + CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get()); + state->next = *rand_state_free_list_bss_get(); + *rand_state_free_list_bss_get() = state; + CRYPTO_STATIC_MUTEX_unlock_write(rand_state_lock_bss_get()); +} + void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len, const uint8_t user_additional_data[32]) { if (out_len == 0) { @@ -259,41 +303,14 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len, additional_data[i] ^= user_additional_data[i]; } - struct rand_thread_state stack_state; - struct rand_thread_state *state = - CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_RAND); + struct rand_state stack_state; + struct rand_state *state = rand_state_get(); if (state == NULL) { - state = OPENSSL_malloc(sizeof(struct rand_thread_state)); - if (state == NULL || - !CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_RAND, state, - rand_thread_state_free)) { - // If the system is out of memory, use an ephemeral state on the - // stack. - state = &stack_state; - } - - state->last_block_valid = 0; - uint8_t seed[CTR_DRBG_ENTROPY_LEN]; - rand_get_seed(state, seed); - if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) { - abort(); - } - state->calls = 0; - -#if defined(BORINGSSL_FIPS) - if (state != &stack_state) { - CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); - struct rand_thread_state **states_list = thread_states_list_bss_get(); - state->next = *states_list; - if (state->next != NULL) { - state->next->prev = state; - } - state->prev = NULL; - *states_list = state; - CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get()); - } -#endif + // If the system is out of memory, use an ephemeral state on the + // stack. + state = &stack_state; + rand_state_init(state); } if (state->calls >= kReseedInterval) { @@ -302,13 +319,13 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len, #if defined(BORINGSSL_FIPS) // Take a read lock around accesses to |state->drbg|. This is needed to // avoid returning bad entropy if we race with - // |rand_thread_state_clear_all|. + // |rand_state_clear_all|. // // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a // bug on ppc64le. glibc may implement pthread locks by wrapping user code // in a hardware transaction, but, on some older versions of glibc and the // kernel, syscalls made with |syscall| did not abort the transaction. - CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get()); + CRYPTO_STATIC_MUTEX_lock_read(rand_drbg_lock_bss_get()); #endif if (!CTR_DRBG_reseed(&state->drbg, seed, NULL, 0)) { abort(); @@ -316,7 +333,7 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len, state->calls = 0; } else { #if defined(BORINGSSL_FIPS) - CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get()); + CRYPTO_STATIC_MUTEX_lock_read(rand_drbg_lock_bss_get()); #endif } @@ -343,8 +360,12 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len, } #if defined(BORINGSSL_FIPS) - CRYPTO_STATIC_MUTEX_unlock_read(thread_states_list_lock_bss_get()); + CRYPTO_STATIC_MUTEX_unlock_read(rand_drbg_lock_bss_get()); #endif + + if (state != &stack_state) { + rand_state_put(state); + } } int RAND_bytes(uint8_t *out, size_t out_len) { diff --git a/crypto/internal.h b/crypto/internal.h index 3dde4762..c4e2e517 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -528,7 +528,6 @@ using MutexReadLock = // stored. typedef enum { OPENSSL_THREAD_LOCAL_ERR = 0, - OPENSSL_THREAD_LOCAL_RAND, OPENSSL_THREAD_LOCAL_TEST, NUM_OPENSSL_THREAD_LOCALS, } thread_local_data_t;