From 8370fb6b416d8177f9775347440925b831fcce69 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Tue, 24 Apr 2018 23:40:01 -0400 Subject: [PATCH] Implement constant-time generic multiplication. This is slower, but constant-time. It intentionally omits the signed digit optimization because we cannot be sure the doubling case will be unreachable for all curves. This is a fallback generic implementation for curves which we must support for compatibility but which are not common or important enough to justify curve-specific work. Before: Did 814 ECDH P-384 operations in 1085384us (750.0 ops/sec) Did 1430 ECDSA P-384 signing operations in 1081988us (1321.6 ops/sec) Did 308 ECDH P-521 operations in 1057741us (291.2 ops/sec) Did 539 ECDSA P-521 signing operations in 1049797us (513.4 ops/sec) After: Did 715 ECDH P-384 operations in 1080161us (661.9 ops/sec) Did 1188 ECDSA P-384 verify operations in 1069567us (1110.7 ops/sec) Did 275 ECDH P-521 operations in 1060503us (259.3 ops/sec) Did 506 ECDSA P-521 signing operations in 1084739us (466.5 ops/sec) But we're still faster than the old BIGNUM implementation. EC_FELEM more than paid for both the loss of points_make_affine and this CL. Bug: 239 Change-Id: I65d71a731aad16b523928ee47618822d503ea704 Reviewed-on: https://boringssl-review.googlesource.com/27708 Commit-Queue: David Benjamin CQ-Verified: CQ bot account: commit-bot@chromium.org Reviewed-by: Adam Langley --- crypto/fipsmodule/bcm.c | 1 + crypto/fipsmodule/ec/ec_montgomery.c | 4 +- crypto/fipsmodule/ec/internal.h | 10 ++- crypto/fipsmodule/ec/simple.c | 4 +- crypto/fipsmodule/ec/simple_mul.c | 98 ++++++++++++++++++++++++++++ crypto/fipsmodule/ec/wnaf.c | 35 ++++------ 6 files changed, 121 insertions(+), 31 deletions(-) create mode 100644 crypto/fipsmodule/ec/simple_mul.c diff --git a/crypto/fipsmodule/bcm.c b/crypto/fipsmodule/bcm.c index 5d9ebfc7..51ac7d50 100644 --- a/crypto/fipsmodule/bcm.c +++ b/crypto/fipsmodule/bcm.c @@ -66,6 +66,7 @@ #include "ec/p256-x86_64.c" #include "ec/scalar.c" #include "ec/simple.c" +#include "ec/simple_mul.c" #include "ec/util.c" #include "ec/wnaf.c" #include "hmac/hmac.c" diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c index eb37e501..9eace95f 100644 --- a/crypto/fipsmodule/ec/ec_montgomery.c +++ b/crypto/fipsmodule/ec/ec_montgomery.c @@ -225,8 +225,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) { out->group_finish = ec_GFp_mont_group_finish; out->group_set_curve = ec_GFp_mont_group_set_curve; out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates; - out->mul = ec_wNAF_mul /* XXX: Not constant time. */; - out->mul_public = ec_wNAF_mul; + out->mul = ec_GFp_simple_mul; + out->mul_public = ec_GFp_simple_mul_public; out->felem_mul = ec_GFp_mont_felem_mul; out->felem_sqr = ec_GFp_mont_felem_sqr; out->bignum_to_felem = ec_GFp_mont_bignum_to_felem; diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h index aba7d1dd..bb172b26 100644 --- a/crypto/fipsmodule/ec/internal.h +++ b/crypto/fipsmodule/ec/internal.h @@ -287,6 +287,10 @@ OPENSSL_EXPORT int ec_point_mul_scalar_public( const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar, const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx); +void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, + const EC_SCALAR *p_scalar); + // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of // which will be either zero or odd with an absolute value less than 2^w @@ -298,9 +302,9 @@ OPENSSL_EXPORT int ec_point_mul_scalar_public( void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, const EC_SCALAR *scalar, size_t bits, int w); -void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, - const EC_SCALAR *p_scalar); +void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, + const EC_SCALAR *p_scalar); // method functions in simple.c int ec_GFp_simple_group_init(EC_GROUP *); diff --git a/crypto/fipsmodule/ec/simple.c b/crypto/fipsmodule/ec/simple.c index 8bbb5a96..5c637118 100644 --- a/crypto/fipsmodule/ec/simple.c +++ b/crypto/fipsmodule/ec/simple.c @@ -287,9 +287,7 @@ void ec_GFp_simple_add(const EC_GROUP *group, EC_RAW_POINT *out, BN_ULONG yneq = ec_felem_non_zero_mask(group, &r); - // TODO(davidben): Analyze how case relates to timing considerations for the - // supported curves which hit it (P-224, P-384, and P-521) and the - // to-be-written constant-time generic multiplication implementation. + // This case will never occur in the constant-time |ec_GFp_simple_mul|. if (!xneq && !yneq && z1nz && z2nz) { ec_GFp_simple_dbl(group, out, a); return; diff --git a/crypto/fipsmodule/ec/simple_mul.c b/crypto/fipsmodule/ec/simple_mul.c new file mode 100644 index 00000000..394f3199 --- /dev/null +++ b/crypto/fipsmodule/ec/simple_mul.c @@ -0,0 +1,98 @@ +/* Copyright (c) 2018, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#include + +#include + +#include "internal.h" +#include "../bn/internal.h" +#include "../../internal.h" + + +static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_RAW_POINT *p, + const EC_SCALAR *scalar) { + // This is a generic implementation for uncommon curves that not do not + // warrant a tuned one. It uses unsigned digits so that the doubling case in + // |ec_GFp_simple_add| is always unreachable, erring on safety and simplicity. + + // Compute a table of the first 32 multiples of |p| (including infinity). + EC_RAW_POINT precomp[32]; + ec_GFp_simple_point_set_to_infinity(group, &precomp[0]); + ec_GFp_simple_point_copy(&precomp[1], p); + for (size_t j = 2; j < OPENSSL_ARRAY_SIZE(precomp); j++) { + if (j & 1) { + ec_GFp_simple_add(group, &precomp[j], &precomp[1], &precomp[j - 1]); + } else { + ec_GFp_simple_dbl(group, &precomp[j], &precomp[j / 2]); + } + } + + // Divide bits in |scalar| into windows. + unsigned bits = BN_num_bits(&group->order); + int r_is_at_infinity = 1; + for (unsigned i = bits - 1; i < bits; i--) { + if (!r_is_at_infinity) { + ec_GFp_simple_dbl(group, r, r); + } + if (i % 5 == 0) { + // Compute the next window value. + const size_t width = group->order.width; + uint8_t window = bn_is_bit_set_words(scalar->words, width, i + 4) << 4; + window |= bn_is_bit_set_words(scalar->words, width, i + 3) << 3; + window |= bn_is_bit_set_words(scalar->words, width, i + 2) << 2; + window |= bn_is_bit_set_words(scalar->words, width, i + 1) << 1; + window |= bn_is_bit_set_words(scalar->words, width, i); + + // Select the entry in constant-time. + EC_RAW_POINT tmp; + for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(precomp); j++) { + BN_ULONG mask = constant_time_eq_w(j, window); + ec_felem_select(group, &tmp.X, mask, &precomp[j].X, &tmp.X); + ec_felem_select(group, &tmp.Y, mask, &precomp[j].Y, &tmp.Y); + ec_felem_select(group, &tmp.Z, mask, &precomp[j].Z, &tmp.Z); + } + + if (r_is_at_infinity) { + ec_GFp_simple_point_copy(r, &tmp); + r_is_at_infinity = 0; + } else { + ec_GFp_simple_add(group, r, r, &tmp); + } + } + } + if (r_is_at_infinity) { + ec_GFp_simple_point_set_to_infinity(group, r); + } +} + +void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, + const EC_SCALAR *p_scalar) { + assert(g_scalar != NULL || p_scalar != NULL); + if (p_scalar == NULL) { + ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar); + } else if (g_scalar == NULL) { + ec_GFp_simple_mul_single(group, r, p, p_scalar); + } else { + // Support constant-time two-point multiplication for compatibility. This + // does not actually come up in keygen, ECDH, or ECDSA, so we implement it + // the naive way. + ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar); + EC_RAW_POINT tmp; + ec_GFp_simple_mul_single(group, &tmp, p, p_scalar); + ec_GFp_simple_add(group, r, r, &tmp); + } +} diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c index e1360790..145caa0c 100644 --- a/crypto/fipsmodule/ec/wnaf.c +++ b/crypto/fipsmodule/ec/wnaf.c @@ -72,7 +72,6 @@ #include #include -#include #include #include "internal.h" @@ -169,37 +168,30 @@ static void lookup_precomp(const EC_GROUP *group, EC_RAW_POINT *out, } } -// EC_WNAF_WINDOW_BITS is the window size to use for |ec_wNAF_mul|. +// EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_simple_mul_public|. #define EC_WNAF_WINDOW_BITS 4 -// EC_WNAF_TABLE_SIZE is the table size to use for |ec_wNAF_mul|. +// EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_simple_mul_public|. #define EC_WNAF_TABLE_SIZE (1 << (EC_WNAF_WINDOW_BITS - 1)) -void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, - const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, - const EC_SCALAR *p_scalar) { +void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, + const EC_SCALAR *p_scalar) { size_t bits = BN_num_bits(&group->order); size_t wNAF_len = bits + 1; - // TODO(davidben): |mul_public| is for ECDSA verification which can assume - // non-NULL inputs, but this code is also used for |mul| which cannot. It's - // not constant-time, so replace the generic |mul| and remove the NULL checks. int8_t g_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1]; EC_RAW_POINT g_precomp[EC_WNAF_TABLE_SIZE]; assert(wNAF_len <= OPENSSL_ARRAY_SIZE(g_wNAF)); - if (g_scalar != NULL) { - const EC_RAW_POINT *g = &group->generator->raw; - ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS); - compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE); - } + const EC_RAW_POINT *g = &group->generator->raw; + ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS); + compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE); int8_t p_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1]; EC_RAW_POINT p_precomp[EC_WNAF_TABLE_SIZE]; assert(wNAF_len <= OPENSSL_ARRAY_SIZE(p_wNAF)); - if (p_scalar != NULL) { - ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS); - compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE); - } + ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS); + compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE); EC_RAW_POINT tmp; int r_is_at_infinity = 1; @@ -208,7 +200,7 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, ec_GFp_simple_dbl(group, r, r); } - if (g_scalar != NULL && g_wNAF[k] != 0) { + if (g_wNAF[k] != 0) { lookup_precomp(group, &tmp, g_precomp, g_wNAF[k]); if (r_is_at_infinity) { ec_GFp_simple_point_copy(r, &tmp); @@ -218,7 +210,7 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, } } - if (p_scalar != NULL && p_wNAF[k] != 0) { + if (p_wNAF[k] != 0) { lookup_precomp(group, &tmp, p_precomp, p_wNAF[k]); if (r_is_at_infinity) { ec_GFp_simple_point_copy(r, &tmp); @@ -232,7 +224,4 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, if (r_is_at_infinity) { ec_GFp_simple_point_set_to_infinity(group, r); } - - OPENSSL_cleanse(&g_wNAF, sizeof(g_wNAF)); - OPENSSL_cleanse(&p_wNAF, sizeof(p_wNAF)); }