Replace |alloca| in |BN_mod_exp_mont_consttime|.
|alloca| is dangerous and poorly specified, according to any description of |alloca|. It's also hard for some analysis tools to reason about. The code here assumed |alloca| is a macro, which isn't a valid assumption. Depending on what which headers are included and what toolchain is being used, |alloca| may or may not be defined as a macro, and this might change over time if/when toolchains are updated. Or, we might be doing static analysis and/or dynamic analysis with a different configuration w.r.t. the availability of |alloca| than production builds use. Regardless, the |alloca| code path only kicked in when the inputs are 840 bits or smaller. Since the multi-prime RSA support was removed, for interesting RSA key sizes the input will be at least 1024 bits and this code path won't be triggered since powerbufLen will be larger than 3072 bytes in those cases. ECC inversion via Fermat's Little Theorem has its own constant-time exponentiation so there are no cases where smaller inputs need to be fast. The RSAZ code avoids the |OPENSSL_malloc| for 2048-bit RSA keys. Increasingly the RSAZ code won't be used though, since it will be skipped over on Broadwell+ CPUs. Generalize the RSAZ stack allocation to work for non-RSAZ code paths. In order to ensure this doesn't cause too much stack usage on platforms where RSAZ wasn't already being used, only do so on x86-64, which already has this large stack size requirement due to RSAZ. This change will make it easier to refactor |BN_mod_exp_mont_consttime| to do that more safely and in a way that's more compatible with various analysis tools. This is also a step towards eliminating the |uintptr_t|-based alignment hack. Since this change increases the number of times |OPENSSL_free| is skipped, I've added an explicit |OPENSSL_cleanse| to ensure the zeroization is done. This should be done regardless of the other changes here. Change-Id: I8a161ce2720a26127e85fff7513f394883e50b2e Reviewed-on: https://boringssl-review.googlesource.com/28584 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: David Benjamin <davidben@google.com>
This commit is contained in:
parent
63e2a08123
commit
fee8709f69
@ -914,9 +914,6 @@ static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
|
|
||||||
// line width of the target processor is at least the following value.
|
|
||||||
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
|
|
||||||
#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
|
#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
|
||||||
(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
|
(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
|
||||||
|
|
||||||
@ -1004,6 +1001,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|||||||
// implementation assumes it can use |top| to size R.
|
// implementation assumes it can use |top| to size R.
|
||||||
int top = mont->N.width;
|
int top = mont->N.width;
|
||||||
|
|
||||||
|
#if defined(OPENSSL_BN_ASM_MONT5) || defined(RSAZ_ENABLED)
|
||||||
|
// Share one large stack-allocated buffer between the RSAZ and non-RSAZ code
|
||||||
|
// paths. If we were to use separate static buffers for each then there is
|
||||||
|
// some chance that both large buffers would be allocated on the stack,
|
||||||
|
// causing the stack space requirement to be truly huge (~10KB).
|
||||||
|
alignas(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH) BN_ULONG
|
||||||
|
storage[MOD_EXP_CTIME_STORAGE_LEN];
|
||||||
|
#endif
|
||||||
#ifdef RSAZ_ENABLED
|
#ifdef RSAZ_ENABLED
|
||||||
// If the size of the operands allow it, perform the optimized
|
// If the size of the operands allow it, perform the optimized
|
||||||
// RSAZ exponentiation. For further information see
|
// RSAZ exponentiation. For further information see
|
||||||
@ -1013,7 +1018,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|||||||
if (!bn_wexpand(rr, 16)) {
|
if (!bn_wexpand(rr, 16)) {
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
|
RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0],
|
||||||
|
storage);
|
||||||
rr->width = 16;
|
rr->width = 16;
|
||||||
rr->neg = 0;
|
rr->neg = 0;
|
||||||
ret = 1;
|
ret = 1;
|
||||||
@ -1037,27 +1043,24 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|||||||
powerbufLen +=
|
powerbufLen +=
|
||||||
sizeof(m->d[0]) *
|
sizeof(m->d[0]) *
|
||||||
(top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
|
(top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
|
||||||
#ifdef alloca
|
|
||||||
if (powerbufLen < 3072) {
|
#if defined(OPENSSL_BN_ASM_MONT5)
|
||||||
powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
|
if ((size_t)powerbufLen <= sizeof(storage)) {
|
||||||
} else
|
powerbuf = (unsigned char *)storage;
|
||||||
|
}
|
||||||
|
// |storage| is more than large enough to handle 1024-bit inputs.
|
||||||
|
assert(powerbuf != NULL || top * BN_BITS2 > 1024);
|
||||||
#endif
|
#endif
|
||||||
{
|
if (powerbuf == NULL) {
|
||||||
if ((powerbufFree = OPENSSL_malloc(
|
powerbufFree =
|
||||||
powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
|
OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
|
||||||
|
if (powerbufFree == NULL) {
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
|
||||||
}
|
}
|
||||||
|
|
||||||
powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
|
|
||||||
OPENSSL_memset(powerbuf, 0, powerbufLen);
|
OPENSSL_memset(powerbuf, 0, powerbufLen);
|
||||||
|
|
||||||
#ifdef alloca
|
|
||||||
if (powerbufLen < 3072) {
|
|
||||||
powerbufFree = NULL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// lay down tmp and am right after powers table
|
// lay down tmp and am right after powers table
|
||||||
tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
|
tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
|
||||||
am.d = tmp.d + top;
|
am.d = tmp.d + top;
|
||||||
@ -1264,6 +1267,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
|||||||
|
|
||||||
err:
|
err:
|
||||||
BN_MONT_CTX_free(new_mont);
|
BN_MONT_CTX_free(new_mont);
|
||||||
|
if (powerbuf != NULL && powerbufFree == NULL) {
|
||||||
|
OPENSSL_cleanse(powerbuf, powerbufLen);
|
||||||
|
}
|
||||||
OPENSSL_free(powerbufFree);
|
OPENSSL_free(powerbufFree);
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
@ -185,6 +185,16 @@ extern "C" {
|
|||||||
#error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
|
#error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// |BN_mod_exp_mont_consttime| is based on the assumption that the L1 data
|
||||||
|
// cache line width of the target processor is at least the following value.
|
||||||
|
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH 64
|
||||||
|
|
||||||
|
// The number of |BN_ULONG|s needed for the |BN_mod_exp_mont_consttime| stack-
|
||||||
|
// allocated storage buffer. The buffer is just the right size for the RSAZ
|
||||||
|
// and is about ~1KB larger than what's necessary (4480 bytes) for 1024-bit
|
||||||
|
// inputs.
|
||||||
|
#define MOD_EXP_CTIME_STORAGE_LEN \
|
||||||
|
(((320u * 3u) + (32u * 9u * 16u)) / sizeof(BN_ULONG))
|
||||||
|
|
||||||
#define STATIC_BIGNUM(x) \
|
#define STATIC_BIGNUM(x) \
|
||||||
{ \
|
{ \
|
||||||
|
@ -45,8 +45,13 @@ alignas(64) static const BN_ULONG two80[40] = {
|
|||||||
|
|
||||||
void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
|
void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
|
||||||
const BN_ULONG base_norm[16], const BN_ULONG exponent[16],
|
const BN_ULONG base_norm[16], const BN_ULONG exponent[16],
|
||||||
const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0) {
|
const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0,
|
||||||
alignas(64) uint8_t storage[(320 * 3) + (32 * 9 * 16)]; // 5.5KB
|
BN_ULONG storage_words[MOD_EXP_CTIME_STORAGE_LEN]) {
|
||||||
|
OPENSSL_COMPILE_ASSERT(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH % 64 == 0,
|
||||||
|
MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH_is_large_enough);
|
||||||
|
unsigned char *storage = (unsigned char *)storage_words;
|
||||||
|
assert((uintptr_t)storage % 64 == 0);
|
||||||
|
|
||||||
unsigned char *a_inv, *m, *result, *table_s = storage + (320 * 3),
|
unsigned char *a_inv, *m, *result, *table_s = storage + (320 * 3),
|
||||||
*R2 = table_s; // borrow
|
*R2 = table_s; // borrow
|
||||||
if (((((uintptr_t)storage & 4095) + 320) >> 12) != 0) {
|
if (((((uintptr_t)storage & 4095) + 320) >> 12) != 0) {
|
||||||
|
@ -20,11 +20,14 @@
|
|||||||
// RSAZ_1024_mod_exp_avx2 sets |result| to |base_norm| raised to |exponent|
|
// RSAZ_1024_mod_exp_avx2 sets |result| to |base_norm| raised to |exponent|
|
||||||
// modulo |m_norm|. |base_norm| must be fully-reduced and |exponent| must have
|
// modulo |m_norm|. |base_norm| must be fully-reduced and |exponent| must have
|
||||||
// the high bit set (it is 1024 bits wide). |RR| and |k0| must be |RR| and |n0|,
|
// the high bit set (it is 1024 bits wide). |RR| and |k0| must be |RR| and |n0|,
|
||||||
// respectively, extracted from |m_norm|'s |BN_MONT_CTX|.
|
// respectively, extracted from |m_norm|'s |BN_MONT_CTX|. |storage_words| is a
|
||||||
|
// temporary buffer that must be aligned to |MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH|
|
||||||
|
// bytes.
|
||||||
void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG base_norm[16],
|
void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG base_norm[16],
|
||||||
const BN_ULONG exponent[16],
|
const BN_ULONG exponent[16],
|
||||||
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
|
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
|
||||||
BN_ULONG k0);
|
BN_ULONG k0,
|
||||||
|
BN_ULONG storage_words[MOD_EXP_CTIME_STORAGE_LEN]);
|
||||||
|
|
||||||
// rsaz_avx2_eligible returns one if |RSAZ_1024_mod_exp_avx2| should be used and
|
// rsaz_avx2_eligible returns one if |RSAZ_1024_mod_exp_avx2| should be used and
|
||||||
// zero otherwise.
|
// zero otherwise.
|
||||||
|
Loading…
Reference in New Issue
Block a user