From eda47f5d98d46cca612152c648ad98c74619d946 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sun, 1 Apr 2018 14:31:44 -0400 Subject: [PATCH] Make generic point arithmetic slightly less variable-time. The generic code special-cases affine points, but this leaks information. (Of course, the generic code also doesn't have a constant-time multiply and other problems, but one thing at a time.) The optimization in point doubling is not useful. Point multiplication more-or-less never doubles an affine point. The optimization in point addition *is* useful because the wNAF code converts the tables to affine. Accordingly, align with the P-256 code which adds a 'mixed' parameter. (I haven't aligned the formally-verified point formulas themselves yet; initial testing suggests that the large number of temporaries take a perf hit with BIGNUM. I'll check the results in EC_FELEM, which will be stack-allocated, to see if we still need to help the compiler out.) Strangly, it actually got a bit faster with this change. I'm guessing because now it doesn't need to bother with unnecessary comparisons and maybe was kinder to the branch predictor? Before: Did 2201 ECDH P-384 operations in 3068341us (717.3 ops/sec) Did 4092 ECDSA P-384 signing operations in 3076981us (1329.9 ops/sec) Did 3503 ECDSA P-384 verify operations in 3024753us (1158.1 ops/sec) Did 992 ECDH P-521 operations in 3017884us (328.7 ops/sec) Did 1798 ECDSA P-521 signing operations in 3059000us (587.8 ops/sec) Did 1581 ECDSA P-521 verify operations in 3033142us (521.2 ops/sec) After: Did 2310 ECDH P-384 operations in 3092648us (746.9 ops/sec) Did 4080 ECDSA P-384 signing operations in 3044588us (1340.1 ops/sec) Did 3520 ECDSA P-384 verify operations in 3056070us (1151.8 ops/sec) Did 992 ECDH P-521 operations in 3012779us (329.3 ops/sec) Did 1792 ECDSA P-521 signing operations in 3019459us (593.5 ops/sec) Did 1600 ECDSA P-521 verify operations in 3047749us (525.0 ops/sec) Bug: 239 Change-Id: If5d13825fc98e4c58bdd1580cf0245bf7ce93a82 Reviewed-on: https://boringssl-review.googlesource.com/27004 Reviewed-by: Adam Langley Commit-Queue: David Benjamin CQ-Verified: CQ bot account: commit-bot@chromium.org --- crypto/fipsmodule/ec/ec.c | 12 ++++- crypto/fipsmodule/ec/internal.h | 7 ++- crypto/fipsmodule/ec/simple.c | 81 ++++++++++----------------------- crypto/fipsmodule/ec/wnaf.c | 44 +++++++++--------- 4 files changed, 63 insertions(+), 81 deletions(-) diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c index 904466af..ee7ec55e 100644 --- a/crypto/fipsmodule/ec/ec.c +++ b/crypto/fipsmodule/ec/ec.c @@ -792,9 +792,19 @@ int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS); return 0; } - return ec_GFp_simple_add(group, r, a, b, ctx); + return ec_GFp_simple_add(group, r, a, b, 0 /* both Jacobian */, ctx); } +int ec_point_add_mixed(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, + const EC_POINT *b, BN_CTX *ctx) { + if (EC_GROUP_cmp(group, r->group, NULL) != 0 || + EC_GROUP_cmp(group, a->group, NULL) != 0 || + EC_GROUP_cmp(group, b->group, NULL) != 0) { + OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + return ec_GFp_simple_add(group, r, a, b, 1 /* |b| is affine */, ctx); +} int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, BN_CTX *ctx) { diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h index 742e94e1..c5d72913 100644 --- a/crypto/fipsmodule/ec/internal.h +++ b/crypto/fipsmodule/ec/internal.h @@ -193,6 +193,11 @@ int ec_bignum_to_scalar_unchecked(const EC_GROUP *group, EC_SCALAR *out, int ec_random_nonzero_scalar(const EC_GROUP *group, EC_SCALAR *out, const uint8_t additional_data[32]); +// ec_point_add_mixed behaves like |EC_POINT_add|, but |&b->Z| must be zero or +// one. +int ec_point_add_mixed(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, + const EC_POINT *b, BN_CTX *ctx); + // ec_point_mul_scalar sets |r| to generator * |g_scalar| + |p| * // |p_scalar|. Unlike other functions which take |EC_SCALAR|, |g_scalar| and // |p_scalar| need not be fully reduced. They need only contain as many bits as @@ -238,7 +243,7 @@ int ec_GFp_simple_point_set_affine_coordinates(const EC_GROUP *, EC_POINT *, const BIGNUM *x, const BIGNUM *y, BN_CTX *); int ec_GFp_simple_add(const EC_GROUP *, EC_POINT *r, const EC_POINT *a, - const EC_POINT *b, BN_CTX *); + const EC_POINT *b, int mixed, BN_CTX *); int ec_GFp_simple_dbl(const EC_GROUP *, EC_POINT *r, const EC_POINT *a, BN_CTX *); int ec_GFp_simple_invert(const EC_GROUP *, EC_POINT *, BN_CTX *); diff --git a/crypto/fipsmodule/ec/simple.c b/crypto/fipsmodule/ec/simple.c index e87409c6..4fb394c5 100644 --- a/crypto/fipsmodule/ec/simple.c +++ b/crypto/fipsmodule/ec/simple.c @@ -303,7 +303,11 @@ err: } int ec_GFp_simple_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, - const EC_POINT *b, BN_CTX *ctx) { + const EC_POINT *b, int mixed, BN_CTX *ctx) { + if (mixed) { + assert(BN_is_zero(&b->Z) || BN_cmp(&b->Z, &group->one) == 0); + } + int (*field_mul)(const EC_GROUP *, BIGNUM *, const BIGNUM *, const BIGNUM *, BN_CTX *); int (*field_sqr)(const EC_GROUP *, BIGNUM *, const BIGNUM *, BN_CTX *); @@ -350,9 +354,7 @@ int ec_GFp_simple_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, // ('r' might be one of 'a' or 'b'.) // n1, n2 - int b_Z_is_one = BN_cmp(&b->Z, &group->one) == 0; - - if (b_Z_is_one) { + if (mixed) { if (!BN_copy(n1, &a->X) || !BN_copy(n2, &a->Y)) { goto end; } @@ -373,26 +375,17 @@ int ec_GFp_simple_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, } // n3, n4 - int a_Z_is_one = BN_cmp(&a->Z, &group->one) == 0; - if (a_Z_is_one) { - if (!BN_copy(n3, &b->X) || !BN_copy(n4, &b->Y)) { - goto end; - } - // n3 = X_b - // n4 = Y_b - } else { - if (!field_sqr(group, n0, &a->Z, ctx) || - !field_mul(group, n3, &b->X, n0, ctx)) { - goto end; - } - // n3 = X_b * Z_a^2 + if (!field_sqr(group, n0, &a->Z, ctx) || + !field_mul(group, n3, &b->X, n0, ctx)) { + goto end; + } + // n3 = X_b * Z_a^2 - if (!field_mul(group, n0, n0, &a->Z, ctx) || - !field_mul(group, n4, &b->Y, n0, ctx)) { - goto end; - } - // n4 = Y_b * Z_a^3 + if (!field_mul(group, n0, n0, &a->Z, ctx) || + !field_mul(group, n4, &b->Y, n0, ctx)) { + goto end; } + // n4 = Y_b * Z_a^3 // n5, n6 if (!bn_mod_sub_consttime(n5, n1, n3, p, ctx) || @@ -426,25 +419,15 @@ int ec_GFp_simple_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, // 'n8' = n2 + n4 // Z_r - if (a_Z_is_one && b_Z_is_one) { - if (!BN_copy(&r->Z, n5)) { - goto end; - } - } else { - if (a_Z_is_one) { - if (!BN_copy(n0, &b->Z)) { - goto end; - } - } else if (b_Z_is_one) { - if (!BN_copy(n0, &a->Z)) { - goto end; - } - } else if (!field_mul(group, n0, &a->Z, &b->Z, ctx)) { - goto end; - } - if (!field_mul(group, &r->Z, n0, n5, ctx)) { + if (mixed) { + if (!BN_copy(n0, &a->Z)) { goto end; } + } else if (!field_mul(group, n0, &a->Z, &b->Z, ctx)) { + goto end; + } + if (!field_mul(group, &r->Z, n0, n5, ctx)) { + goto end; } // Z_r = Z_a * Z_b * n5 @@ -534,15 +517,7 @@ int ec_GFp_simple_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, // ('r' might the same as 'a'.) // n1 - if (BN_cmp(&a->Z, &group->one) == 0) { - if (!field_sqr(group, n0, &a->X, ctx) || - !bn_mod_lshift1_consttime(n1, n0, p, ctx) || - !bn_mod_add_consttime(n0, n0, n1, p, ctx) || - !bn_mod_add_consttime(n1, n0, &group->a, p, ctx)) { - goto err; - } - // n1 = 3 * X_a^2 + a_curve - } else if (group->a_is_minus3) { + if (group->a_is_minus3) { if (!field_sqr(group, n1, &a->Z, ctx) || !bn_mod_add_consttime(n0, &a->X, n1, p, ctx) || !bn_mod_sub_consttime(n2, &a->X, n1, p, ctx) || @@ -567,14 +542,8 @@ int ec_GFp_simple_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, } // Z_r - if (BN_cmp(&a->Z, &group->one) == 0) { - if (!BN_copy(n0, &a->Y)) { - goto err; - } - } else if (!field_mul(group, n0, &a->Y, &a->Z, ctx)) { - goto err; - } - if (!bn_mod_lshift1_consttime(&r->Z, n0, p, ctx)) { + if (!field_mul(group, n0, &a->Y, &a->Z, ctx) || + !bn_mod_lshift1_consttime(&r->Z, n0, p, ctx)) { goto err; } // Z_r = 2 * Y_a * Z_a diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c index 7bc0bc7b..49fc8bca 100644 --- a/crypto/fipsmodule/ec/wnaf.c +++ b/crypto/fipsmodule/ec/wnaf.c @@ -291,7 +291,9 @@ int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar, tmp = EC_POINT_new(group); if (tmp == NULL || - // |window_bits_for_scalar_size| assumes we do this step. + // Convert the points to affine coordinates. This allows us to use the + // slightly faster |ec_point_add_mixed|. The conversion itself is not + // cheap, but it is worthwhile when there are two points. !EC_POINTs_make_affine(group, total_precomp, precomp_storage, ctx)) { goto err; } @@ -302,35 +304,31 @@ int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar, goto err; } - if (g_scalar != NULL) { - if (g_wNAF[k] != 0) { - if (!lookup_precomp(group, tmp, g_precomp, g_wNAF[k], ctx)) { - goto err; - } - if (r_is_at_infinity) { - if (!EC_POINT_copy(r, tmp)) { - goto err; - } - r_is_at_infinity = 0; - } else if (!EC_POINT_add(group, r, r, tmp, ctx)) { + if (g_scalar != NULL && g_wNAF[k] != 0) { + if (!lookup_precomp(group, tmp, g_precomp, g_wNAF[k], ctx)) { + goto err; + } + if (r_is_at_infinity) { + if (!EC_POINT_copy(r, tmp)) { goto err; } + r_is_at_infinity = 0; + } else if (!ec_point_add_mixed(group, r, r, tmp, ctx)) { + goto err; } } - if (p_scalar != NULL) { - if (p_wNAF[k] != 0) { - if (!lookup_precomp(group, tmp, p_precomp, p_wNAF[k], ctx)) { - goto err; - } - if (r_is_at_infinity) { - if (!EC_POINT_copy(r, tmp)) { - goto err; - } - r_is_at_infinity = 0; - } else if (!EC_POINT_add(group, r, r, tmp, ctx)) { + if (p_scalar != NULL && p_wNAF[k] != 0) { + if (!lookup_precomp(group, tmp, p_precomp, p_wNAF[k], ctx)) { + goto err; + } + if (r_is_at_infinity) { + if (!EC_POINT_copy(r, tmp)) { goto err; } + r_is_at_infinity = 0; + } else if (!ec_point_add_mixed(group, r, r, tmp, ctx)) { + goto err; } } }