Implement constant-time generic multiplication.

This is slower, but constant-time. It intentionally omits the signed digit optimization because we cannot be sure the doubling case will be unreachable for all curves. This is a fallback generic implementation for curves which we must support for compatibility but which are not common or important enough to justify curve-specific work. Before: Did 814 ECDH P-384 operations in 1085384us (750.0 ops/sec) Did 1430 ECDSA P-384 signing operations in 1081988us (1321.6 ops/sec) Did 308 ECDH P-521 operations in 1057741us (291.2 ops/sec) Did 539 ECDSA P-521 signing operations in 1049797us (513.4 ops/sec) After: Did 715 ECDH P-384 operations in 1080161us (661.9 ops/sec) Did 1188 ECDSA P-384 verify operations in 1069567us (1110.7 ops/sec) Did 275 ECDH P-521 operations in 1060503us (259.3 ops/sec) Did 506 ECDSA P-521 signing operations in 1084739us (466.5 ops/sec) But we're still faster than the old BIGNUM implementation. EC_FELEM more than paid for both the loss of points_make_affine and this CL. Bug: 239 Change-Id: I65d71a731aad16b523928ee47618822d503ea704 Reviewed-on: https://boringssl-review.googlesource.com/27708 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
6 years ago · 8370fb6b41
--- a/crypto/fipsmodule/bcm.c
+++ b/crypto/fipsmodule/bcm.c
@@ -66,6 +66,7 @@
 #include "ec/p256-x86_64.c"
 #include "ec/scalar.c"
 #include "ec/simple.c"
 #include "ec/simple_mul.c"
 #include "ec/util.c"
 #include "ec/wnaf.c"
 #include "hmac/hmac.c"
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -225,8 +225,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
  out->group_finish = ec_GFp_mont_group_finish;
  out->group_set_curve = ec_GFp_mont_group_set_curve;
  out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
  out->mul = ec_wNAF_mul /* XXX: Not constant time. */;
  out->mul_public = ec_wNAF_mul;
  out->mul = ec_GFp_simple_mul;
  out->mul_public = ec_GFp_simple_mul_public;
  out->felem_mul = ec_GFp_mont_felem_mul;
  out->felem_sqr = ec_GFp_mont_felem_sqr;
  out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -287,6 +287,10 @@ OPENSSL_EXPORT int ec_point_mul_scalar_public(
    const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
    const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
 void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                       const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                       const EC_SCALAR *p_scalar);
 // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of
 // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of
 // which will be either zero or odd with an absolute value less than  2^w
@@ -298,9 +302,9 @@ OPENSSL_EXPORT int ec_point_mul_scalar_public(
 void ec_compute_wNAF(const EC_GROUP *group, int8_t *out,
                     const EC_SCALAR *scalar, size_t bits, int w);
 void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                 const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                 const EC_SCALAR *p_scalar);
 void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
                              const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                              const EC_SCALAR *p_scalar);
 // method functions in simple.c
 int ec_GFp_simple_group_init(EC_GROUP *);
--- a/crypto/fipsmodule/ec/simple.c
+++ b/crypto/fipsmodule/ec/simple.c
@@ -287,9 +287,7 @@ void ec_GFp_simple_add(const EC_GROUP *group, EC_RAW_POINT *out,
  BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
  // TODO(davidben): Analyze how case relates to timing considerations for the
  // supported curves which hit it (P-224, P-384, and P-521) and the
  // to-be-written constant-time generic multiplication implementation.
  // This case will never occur in the constant-time |ec_GFp_simple_mul|.
  if (!xneq && !yneq && z1nz && z2nz) {
    ec_GFp_simple_dbl(group, out, a);
    return;
--- a/crypto/fipsmodule/ec/simple_mul.c
+++ b/crypto/fipsmodule/ec/simple_mul.c
@@ -0,0 +1,98 @@
 /* Copyright (c) 2018, Google Inc.
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
 #include <openssl/ec.h>
 #include <assert.h>
 #include "internal.h"
 #include "../bn/internal.h"
 #include "../../internal.h"
 static void ec_GFp_simple_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
                                     const EC_RAW_POINT *p,
                                     const EC_SCALAR *scalar) {
  // This is a generic implementation for uncommon curves that not do not
  // warrant a tuned one. It uses unsigned digits so that the doubling case in
  // |ec_GFp_simple_add| is always unreachable, erring on safety and simplicity.
  // Compute a table of the first 32 multiples of |p| (including infinity).
  EC_RAW_POINT precomp[32];
  ec_GFp_simple_point_set_to_infinity(group, &precomp[0]);
  ec_GFp_simple_point_copy(&precomp[1], p);
  for (size_t j = 2; j < OPENSSL_ARRAY_SIZE(precomp); j++) {
    if (j & 1) {
      ec_GFp_simple_add(group, &precomp[j], &precomp[1], &precomp[j - 1]);
    } else {
      ec_GFp_simple_dbl(group, &precomp[j], &precomp[j / 2]);
    }
  }
  // Divide bits in |scalar| into windows.
  unsigned bits = BN_num_bits(&group->order);
  int r_is_at_infinity = 1;
  for (unsigned i = bits - 1; i < bits; i--) {
    if (!r_is_at_infinity) {
      ec_GFp_simple_dbl(group, r, r);
    }
    if (i % 5 == 0) {
      // Compute the next window value.
      const size_t width = group->order.width;
      uint8_t window = bn_is_bit_set_words(scalar->words, width, i + 4) << 4;
      window |= bn_is_bit_set_words(scalar->words, width, i + 3) << 3;
      window |= bn_is_bit_set_words(scalar->words, width, i + 2) << 2;
      window |= bn_is_bit_set_words(scalar->words, width, i + 1) << 1;
      window |= bn_is_bit_set_words(scalar->words, width, i);
      // Select the entry in constant-time.
      EC_RAW_POINT tmp;
      for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(precomp); j++) {
        BN_ULONG mask = constant_time_eq_w(j, window);
        ec_felem_select(group, &tmp.X, mask, &precomp[j].X, &tmp.X);
        ec_felem_select(group, &tmp.Y, mask, &precomp[j].Y, &tmp.Y);
        ec_felem_select(group, &tmp.Z, mask, &precomp[j].Z, &tmp.Z);
      }
      if (r_is_at_infinity) {
        ec_GFp_simple_point_copy(r, &tmp);
        r_is_at_infinity = 0;
      } else {
        ec_GFp_simple_add(group, r, r, &tmp);
      }
    }
  }
  if (r_is_at_infinity) {
    ec_GFp_simple_point_set_to_infinity(group, r);
  }
 }
 void ec_GFp_simple_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                       const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                       const EC_SCALAR *p_scalar) {
  assert(g_scalar != NULL || p_scalar != NULL);
  if (p_scalar == NULL) {
    ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
  } else if (g_scalar == NULL) {
    ec_GFp_simple_mul_single(group, r, p, p_scalar);
  } else {
    // Support constant-time two-point multiplication for compatibility.  This
    // does not actually come up in keygen, ECDH, or ECDSA, so we implement it
    // the naive way.
    ec_GFp_simple_mul_single(group, r, &group->generator->raw, g_scalar);
    EC_RAW_POINT tmp;
    ec_GFp_simple_mul_single(group, &tmp, p, p_scalar);
    ec_GFp_simple_add(group, r, r, &tmp);
  }
 }
--- a/crypto/fipsmodule/ec/wnaf.c
+++ b/crypto/fipsmodule/ec/wnaf.c
@@ -72,7 +72,6 @@
 #include <openssl/bn.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
 #include <openssl/thread.h>
 #include "internal.h"
@@ -169,37 +168,30 @@ static void lookup_precomp(const EC_GROUP *group, EC_RAW_POINT *out,
  }
 }
 // EC_WNAF_WINDOW_BITS is the window size to use for |ec_wNAF_mul|.
 // EC_WNAF_WINDOW_BITS is the window size to use for |ec_GFp_simple_mul_public|.
 #define EC_WNAF_WINDOW_BITS 4
 // EC_WNAF_TABLE_SIZE is the table size to use for |ec_wNAF_mul|.
 // EC_WNAF_TABLE_SIZE is the table size to use for |ec_GFp_simple_mul_public|.
 #define EC_WNAF_TABLE_SIZE (1 << (EC_WNAF_WINDOW_BITS - 1))
 void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r,
                 const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                 const EC_SCALAR *p_scalar) {
 void ec_GFp_simple_mul_public(const EC_GROUP *group, EC_RAW_POINT *r,
                              const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                              const EC_SCALAR *p_scalar) {
  size_t bits = BN_num_bits(&group->order);
  size_t wNAF_len = bits + 1;
  // TODO(davidben): |mul_public| is for ECDSA verification which can assume
  // non-NULL inputs, but this code is also used for |mul| which cannot. It's
  // not constant-time, so replace the generic |mul| and remove the NULL checks.
  int8_t g_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1];
  EC_RAW_POINT g_precomp[EC_WNAF_TABLE_SIZE];
  assert(wNAF_len <= OPENSSL_ARRAY_SIZE(g_wNAF));
  if (g_scalar != NULL) {
    const EC_RAW_POINT *g = &group->generator->raw;
    ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS);
    compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE);
  }
  const EC_RAW_POINT *g = &group->generator->raw;
  ec_compute_wNAF(group, g_wNAF, g_scalar, bits, EC_WNAF_WINDOW_BITS);
  compute_precomp(group, g_precomp, g, EC_WNAF_TABLE_SIZE);
  int8_t p_wNAF[EC_MAX_SCALAR_BYTES * 8 + 1];
  EC_RAW_POINT p_precomp[EC_WNAF_TABLE_SIZE];
  assert(wNAF_len <= OPENSSL_ARRAY_SIZE(p_wNAF));
  if (p_scalar != NULL) {
    ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS);
    compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE);
  }
  ec_compute_wNAF(group, p_wNAF, p_scalar, bits, EC_WNAF_WINDOW_BITS);
  compute_precomp(group, p_precomp, p, EC_WNAF_TABLE_SIZE);
  EC_RAW_POINT tmp;
  int r_is_at_infinity = 1;
@@ -208,7 +200,7 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r,
      ec_GFp_simple_dbl(group, r, r);
    }
    if (g_scalar != NULL && g_wNAF[k] != 0) {
    if (g_wNAF[k] != 0) {
      lookup_precomp(group, &tmp, g_precomp, g_wNAF[k]);
      if (r_is_at_infinity) {
        ec_GFp_simple_point_copy(r, &tmp);
@@ -218,7 +210,7 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r,
      }
    }
    if (p_scalar != NULL && p_wNAF[k] != 0) {
    if (p_wNAF[k] != 0) {
      lookup_precomp(group, &tmp, p_precomp, p_wNAF[k]);
      if (r_is_at_infinity) {
        ec_GFp_simple_point_copy(r, &tmp);
@@ -232,7 +224,4 @@ void ec_wNAF_mul(const EC_GROUP *group, EC_RAW_POINT *r,
  if (r_is_at_infinity) {
    ec_GFp_simple_point_set_to_infinity(group, r);
  }
  OPENSSL_cleanse(&g_wNAF, sizeof(g_wNAF));
  OPENSSL_cleanse(&p_wNAF, sizeof(p_wNAF));
 }