This is slower, but constant-time. It intentionally omits the signed digit optimization because we cannot be sure the doubling case will be unreachable for all curves. This is a fallback generic implementation for curves which we must support for compatibility but which are not common or important enough to justify curve-specific work. Before: Did 814 ECDH P-384 operations in 1085384us (750.0 ops/sec) Did 1430 ECDSA P-384 signing operations in 1081988us (1321.6 ops/sec) Did 308 ECDH P-521 operations in 1057741us (291.2 ops/sec) Did 539 ECDSA P-521 signing operations in 1049797us (513.4 ops/sec) After: Did 715 ECDH P-384 operations in 1080161us (661.9 ops/sec) Did 1188 ECDSA P-384 verify operations in 1069567us (1110.7 ops/sec) Did 275 ECDH P-521 operations in 1060503us (259.3 ops/sec) Did 506 ECDSA P-521 signing operations in 1084739us (466.5 ops/sec) But we're still faster than the old BIGNUM implementation. EC_FELEM more than paid for both the loss of points_make_affine and this CL. Bug: 239 Change-Id: I65d71a731aad16b523928ee47618822d503ea704 Reviewed-on: https://boringssl-review.googlesource.com/27708 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>kris/onging/CECPQ3_patch15
@@ -66,6 +66,7 @@ | |||||
#include "ec/p256-x86_64.c" | #include "ec/p256-x86_64.c" | ||||
#include "ec/scalar.c" | #include "ec/scalar.c" | ||||
#include "ec/simple.c" | #include "ec/simple.c" | ||||
#include "ec/simple_mul.c" | |||||
#include "ec/util.c" | #include "ec/util.c" | ||||
#include "ec/wnaf.c" | #include "ec/wnaf.c" | ||||
#include "hmac/hmac.c" | #include "hmac/hmac.c" | ||||
@@ -225,8 +225,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) { | |||||
out->group_finish = ec_GFp_mont_group_finish; | out->group_finish = ec_GFp_mont_group_finish; | ||||
out->group_set_curve = ec_GFp_mont_group_set_curve; | out->group_set_curve = ec_GFp_mont_group_set_curve; | ||||
out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates; | out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates; | ||||
out->mul = ec_wNAF_mul /* XXX: Not constant time. */; | |||||
out->mul_public = ec_wNAF_mul; | |||||
out->mul = ec_GFp_simple_mul; | |||||
out->mul_public = ec_GFp_simple_mul_public; | |||||
out->felem_mul = ec_GFp_mont_felem_mul; | out->felem_mul = ec_GFp_mont_felem_mul; | ||||
out->felem_sqr = ec_GFp_mont_felem_sqr; | out->felem_sqr = ec_GFp_mont_felem_sqr; | ||||
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem; | out->bignum_to_felem = ec_GFp_mont_bignum_to_felem; | ||||
@@ -287,6 +287,10 @@ OPENSSL_EXPORT int ec_point_mul_scalar_public( | |||||
const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar, | const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar, | ||||
const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx); | const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx); | ||||
void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, | |||||
const EC_SCALAR *p_scalar); | |||||
// ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of | // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of | ||||
// |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of | // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of | ||||
// which will be either zero or odd with an absolute value less than 2^w | // which will be either zero or odd with an absolute value less than 2^w | ||||
@@ -298,9 +302,9 @@ OPENSSL_EXPORT int ec_point_mul_scalar_public( | |||||
void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, | void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, | ||||
const EC_SCALAR *scalar, size_t bits, int w); | const EC_SCALAR *scalar, size_t bits, int w); | ||||
void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, | |||||
const EC_SCALAR *p_scalar); | |||||
void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, | |||||
const EC_SCALAR *p_scalar); | |||||
// method functions in simple.c | // method functions in simple.c | ||||
int ec_GFp_simple_group_init(EC_GROUP *); | int ec_GFp_simple_group_init(EC_GROUP *); | ||||
@@ -287,9 +287,7 @@ void ec_GFp_simple_add(const EC_GROUP *group, EC_RAW_POINT *out, | |||||
BN_ULONG yneq = ec_felem_non_zero_mask(group, &r); | BN_ULONG yneq = ec_felem_non_zero_mask(group, &r); | ||||
// TODO(davidben): Analyze how case relates to timing considerations for the | |||||
// supported curves which hit it (P-224, P-384, and P-521) and the | |||||
// to-be-written constant-time generic multiplication implementation. | |||||
// This case will never occur in the constant-time |ec_GFp_simple_mul|. | |||||
if (!xneq && !yneq && z1nz && z2nz) { | if (!xneq && !yneq && z1nz && z2nz) { | ||||
ec_GFp_simple_dbl(group, out, a); | ec_GFp_simple_dbl(group, out, a); | ||||
return; | return; | ||||
@@ -0,0 +1,98 @@ | |||||
/* Copyright (c) 2018, Google Inc. | |||||
* | |||||
* Permission to use, copy, modify, and/or distribute this software for any | |||||
* purpose with or without fee is hereby granted, provided that the above | |||||
* copyright notice and this permission notice appear in all copies. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | |||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ | |||||
#include <openssl/ec.h> | |||||
#include <assert.h> | |||||
#include "internal.h" | |||||
#include "../bn/internal.h" | |||||
#include "../../internal.h" | |||||
static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
const EC_RAW_POINT *p, | |||||
const EC_SCALAR *scalar) { | |||||
// This is a generic implementation for uncommon curves that not do not | |||||
// warrant a tuned one. It uses unsigned digits so that the doubling case in | |||||
// |ec_GFp_simple_add| is always unreachable, erring on safety and simplicity. | |||||
// Compute a table of the first 32 multiples of |p| (including infinity). | |||||
EC_RAW_POINT precomp[32]; | |||||
ec_GFp_simple_point_set_to_infinity(group, &precomp[0]); | |||||
ec_GFp_simple_point_copy(&precomp[1], p); | |||||
for (size_t j = 2; j < OPENSSL_ARRAY_SIZE(precomp); j++) { | |||||
if (j & 1) { | |||||
ec_GFp_simple_add(group, &precomp[j], &precomp[1], &precomp[j - 1]); | |||||
} else { | |||||
ec_GFp_simple_dbl(group, &precomp[j], &precomp[j / 2]); | |||||
} | |||||
} | |||||
// Divide bits in |scalar| into windows. | |||||
unsigned bits = BN_num_bits(&group->order); | |||||
int r_is_at_infinity = 1; | |||||
for (unsigned i = bits - 1; i < bits; i--) { | |||||
if (!r_is_at_infinity) { | |||||
ec_GFp_simple_dbl(group, r, r); | |||||
} | |||||
if (i % 5 == 0) { | |||||
// Compute the next window value. | |||||
const size_t width = group->order.width; | |||||
uint8_t window = bn_is_bit_set_words(scalar->words, width, i + 4) << 4; | |||||
window |= bn_is_bit_set_words(scalar->words, width, i + 3) << 3; | |||||
window |= bn_is_bit_set_words(scalar->words, width, i + 2) << 2; | |||||
window |= bn_is_bit_set_words(scalar->words, width, i + 1) << 1; | |||||
window |= bn_is_bit_set_words(scalar->words, width, i); | |||||
// Select the entry in constant-time. | |||||
EC_RAW_POINT tmp; | |||||
for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(precomp); j++) { | |||||
BN_ULONG mask = constant_time_eq_w(j, window); | |||||
ec_felem_select(group, &tmp.X, mask, &precomp[j].X, &tmp.X); | |||||
ec_felem_select(group, &tmp.Y, mask, &precomp[j].Y, &tmp.Y); | |||||
ec_felem_select(group, &tmp.Z, mask, &precomp[j].Z, &tmp.Z); | |||||
} | |||||
if (r_is_at_infinity) { | |||||
ec_GFp_simple_point_copy(r, &tmp); | |||||
r_is_at_infinity = 0; | |||||
} else { | |||||
ec_GFp_simple_add(group, r, r, &tmp); | |||||
} | |||||
} | |||||
} | |||||
if (r_is_at_infinity) { | |||||
ec_GFp_simple_point_set_to_infinity(group, r); | |||||
} | |||||
} | |||||
void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, | |||||
const EC_SCALAR *p_scalar) { | |||||
assert(g_scalar != NULL || p_scalar != NULL); | |||||
if (p_scalar == NULL) { | |||||
ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar); | |||||
} else if (g_scalar == NULL) { | |||||
ec_GFp_simple_mul_single(group, r, p, p_scalar); | |||||
} else { | |||||
// Support constant-time two-point multiplication for compatibility. This | |||||
// does not actually come up in keygen, ECDH, or ECDSA, so we implement it | |||||
// the naive way. | |||||
ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar); | |||||
EC_RAW_POINT tmp; | |||||
ec_GFp_simple_mul_single(group, &tmp, p, p_scalar); | |||||
ec_GFp_simple_add(group, r, r, &tmp); | |||||
} | |||||
} |
@@ -72,7 +72,6 @@ | |||||
#include <openssl/bn.h> | #include <openssl/bn.h> | ||||
#include <openssl/err.h> | #include <openssl/err.h> | ||||
#include <openssl/mem.h> | |||||
#include <openssl/thread.h> | #include <openssl/thread.h> | ||||
#include "internal.h" | #include "internal.h" | ||||
@@ -169,37 +168,30 @@ static void lookup_precomp(const EC_GROUP *group, EC_RAW_POINT *out, | |||||
} | } | ||||
} | } | ||||
// EC_WNAF_WINDOW_BITS is the window size to use for |ec_wNAF_mul|. | |||||
// EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_simple_mul_public|. | |||||
#define EC_WNAF_WINDOW_BITS 4 | #define EC_WNAF_WINDOW_BITS 4 | ||||
// EC_WNAF_TABLE_SIZE is the table size to use for |ec_wNAF_mul|. | |||||
// EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_simple_mul_public|. | |||||
#define EC_WNAF_TABLE_SIZE (1 << (EC_WNAF_WINDOW_BITS - 1)) | #define EC_WNAF_TABLE_SIZE (1 << (EC_WNAF_WINDOW_BITS - 1)) | ||||
void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, | |||||
const EC_SCALAR *p_scalar) { | |||||
void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, | |||||
const EC_SCALAR *p_scalar) { | |||||
size_t bits = BN_num_bits(&group->order); | size_t bits = BN_num_bits(&group->order); | ||||
size_t wNAF_len = bits + 1; | size_t wNAF_len = bits + 1; | ||||
// TODO(davidben): |mul_public| is for ECDSA verification which can assume | |||||
// non-NULL inputs, but this code is also used for |mul| which cannot. It's | |||||
// not constant-time, so replace the generic |mul| and remove the NULL checks. | |||||
int8_t g_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1]; | int8_t g_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1]; | ||||
EC_RAW_POINT g_precomp[EC_WNAF_TABLE_SIZE]; | EC_RAW_POINT g_precomp[EC_WNAF_TABLE_SIZE]; | ||||
assert(wNAF_len <= OPENSSL_ARRAY_SIZE(g_wNAF)); | assert(wNAF_len <= OPENSSL_ARRAY_SIZE(g_wNAF)); | ||||
if (g_scalar != NULL) { | |||||
const EC_RAW_POINT *g = &group->generator->raw; | |||||
ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS); | |||||
compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE); | |||||
} | |||||
const EC_RAW_POINT *g = &group->generator->raw; | |||||
ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS); | |||||
compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE); | |||||
int8_t p_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1]; | int8_t p_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1]; | ||||
EC_RAW_POINT p_precomp[EC_WNAF_TABLE_SIZE]; | EC_RAW_POINT p_precomp[EC_WNAF_TABLE_SIZE]; | ||||
assert(wNAF_len <= OPENSSL_ARRAY_SIZE(p_wNAF)); | assert(wNAF_len <= OPENSSL_ARRAY_SIZE(p_wNAF)); | ||||
if (p_scalar != NULL) { | |||||
ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS); | |||||
compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE); | |||||
} | |||||
ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS); | |||||
compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE); | |||||
EC_RAW_POINT tmp; | EC_RAW_POINT tmp; | ||||
int r_is_at_infinity = 1; | int r_is_at_infinity = 1; | ||||
@@ -208,7 +200,7 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
ec_GFp_simple_dbl(group, r, r); | ec_GFp_simple_dbl(group, r, r); | ||||
} | } | ||||
if (g_scalar != NULL && g_wNAF[k] != 0) { | |||||
if (g_wNAF[k] != 0) { | |||||
lookup_precomp(group, &tmp, g_precomp, g_wNAF[k]); | lookup_precomp(group, &tmp, g_precomp, g_wNAF[k]); | ||||
if (r_is_at_infinity) { | if (r_is_at_infinity) { | ||||
ec_GFp_simple_point_copy(r, &tmp); | ec_GFp_simple_point_copy(r, &tmp); | ||||
@@ -218,7 +210,7 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
} | } | ||||
} | } | ||||
if (p_scalar != NULL && p_wNAF[k] != 0) { | |||||
if (p_wNAF[k] != 0) { | |||||
lookup_precomp(group, &tmp, p_precomp, p_wNAF[k]); | lookup_precomp(group, &tmp, p_precomp, p_wNAF[k]); | ||||
if (r_is_at_infinity) { | if (r_is_at_infinity) { | ||||
ec_GFp_simple_point_copy(r, &tmp); | ec_GFp_simple_point_copy(r, &tmp); | ||||
@@ -232,7 +224,4 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r, | |||||
if (r_is_at_infinity) { | if (r_is_at_infinity) { | ||||
ec_GFp_simple_point_set_to_infinity(group, r); | ec_GFp_simple_point_set_to_infinity(group, r); | ||||
} | } | ||||
OPENSSL_cleanse(&g_wNAF, sizeof(g_wNAF)); | |||||
OPENSSL_cleanse(&p_wNAF, sizeof(p_wNAF)); | |||||
} | } |