Add EC_FELEM for EC_POINTs and related temporaries.

This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used for EC_POINT's representation in the generic EC_METHOD, as well as random operations on tuned EC_METHODs that still are implemented genericly. Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific to the EC_METHOD, analogous to how the old values were BIGNUMs but may or may not have been in Montgomery form. This is kind of a nuisance, but no more than before. (If p224-64.c were easily convertable to Montgomery form, we could say |EC_FELEM| is always in Montgomery form. If we exposed the internal add and double implementations in each of the curves, we could give |EC_POINT| an |EC_METHOD|-specific representation and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this for later.) The generic add and doubling formulas are aligned with the formulas proved in fiat-crypto. Those only applied to a = -3, so I've proved a generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case someone uses a custom curve. The new formulas are verified, constant-time, and swap a multiply for a square. As expressed in fiat-crypto they do use more temporaries, but this seems to be fine with stack-allocated EC_FELEMs. (We can try to help the compiler later, but benchamrks below suggest this isn't necessary.) Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the bounds in the type system and, in particular, that the width is correct, which will make it easier to select a point in constant-time in the future. (Indeed the old code did not always have the correct width. Its point formula involved halving and implemented this in variable time and variable width.) Before: Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec) Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec) Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec) Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec) Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec) Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec) Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec) After: Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%] Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%] Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%] Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%] Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%] Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%] Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%] This more than pays for removing points_make_affine previously and even speeds up ECDH P-256 slightly. (The point-on-curve check uses the generic code.) Next is to push the stack-allocating up to ec_wNAF_mul, followed by a constant-time single-point multiplication. Bug: 239 Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c Reviewed-on: https://boringssl-review.googlesource.com/27668 Reviewed-by: Adam Langley <agl@google.com>
6年前 · 32e0d10069
--- a/crypto/fipsmodule/bcm.c
+++ b/crypto/fipsmodule/bcm.c
@@ -59,6 +59,7 @@
 #include "ec/ec.c"
 #include "ec/ec_key.c"
 #include "ec/ec_montgomery.c"
 #include "ec/felem.c"
 #include "ec/oct.c"
 #include "ec/p224-64.c"
 #include "../../third_party/fiat/p256.c"
--- a/crypto/fipsmodule/bn/div.c
+++ b/crypto/fipsmodule/bn/div.c
@@ -443,11 +443,8 @@ BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
  return carry;
 }

 // bn_mod_sub_words sets |r| to |a| - |b| (mod |m|), using |tmp| as scratch
 // space. Each array is |num| words long. |a| and |b| must be < |m|. Any pair of
 // |r|, |a|, and |b| may alias.
 static void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
                             const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
 void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
                      const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
  // r = a - b
  BN_ULONG borrow = bn_sub_words(r, a, b, num);
  // tmp = a - b + m
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -464,6 +464,12 @@ void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
 int bn_mod_add_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                         const BIGNUM *m, BN_CTX *ctx);

 // bn_mod_sub_words sets |r| to |a| - |b| (mod |m|), using |tmp| as scratch
 // space. Each array is |num| words long. |a| and |b| must be < |m|. Any pair of
 // |r|, |a|, and |b| may alias.
 void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
                      const BN_ULONG *m, BN_ULONG *tmp, size_t num);

 // bn_mod_sub_consttime acts like |BN_mod_sub_quick| but takes a |BN_CTX|.
 int bn_mod_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                         const BIGNUM *m, BN_CTX *ctx);
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -579,13 +579,14 @@ int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ignored) {
  // structure. If |a| or |b| is incomplete (due to legacy OpenSSL mistakes,
  // custom curve construction is sadly done in two parts) but otherwise not the
  // same object, we consider them always unequal.
  return a->generator == NULL ||
  return a->meth != b->meth ||
         a->generator == NULL ||
         b->generator == NULL ||
         BN_cmp(&a->order, &b->order) != 0 ||
         BN_cmp(&a->field, &b->field) != 0 ||
         BN_cmp(&a->a, &b->a) != 0 ||
         BN_cmp(&a->b, &b->b) != 0 ||
         ec_GFp_simple_cmp(a, a->generator, b->generator, NULL) != 0;
         !ec_felem_equal(a, &a->a, &b->a) ||
         !ec_felem_equal(a, &a->b, &b->b) ||
         ec_GFp_simple_cmp(a, a->generator, b->generator) != 0;
 }

 const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group) {
@@ -612,7 +613,7 @@ int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor,

 int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *out_p, BIGNUM *out_a,
                           BIGNUM *out_b, BN_CTX *ctx) {
  return ec_GFp_simple_group_get_curve(group, out_p, out_a, out_b, ctx);
  return ec_GFp_simple_group_get_curve(group, out_p, out_a, out_b);
 }

 int EC_GROUP_get_curve_name(const EC_GROUP *group) { return group->curve_name; }
@@ -636,12 +637,12 @@ EC_POINT *EC_POINT_new(const EC_GROUP *group) {
  }

  ret->group = EC_GROUP_dup(group);
  if (ret->group == NULL ||
      !ec_GFp_simple_point_init(ret)) {
  if (ret->group == NULL) {
    OPENSSL_free(ret);
    return NULL;
  }

  ec_GFp_simple_point_init(ret);
  return ret;
 }

@@ -649,7 +650,6 @@ static void ec_point_free(EC_POINT *point, int free_group) {
  if (!point) {
    return;
  }
  ec_GFp_simple_point_finish(point);
  if (free_group) {
    EC_GROUP_free(point->group);
  }
@@ -670,7 +670,8 @@ int EC_POINT_copy(EC_POINT *dest, const EC_POINT *src) {
  if (dest == src) {
    return 1;
  }
  return ec_GFp_simple_point_copy(dest, src);
  ec_GFp_simple_point_copy(dest, src);
  return 1;
 }

 EC_POINT *EC_POINT_dup(const EC_POINT *a, const EC_GROUP *group) {
@@ -693,7 +694,8 @@ int EC_POINT_set_to_infinity(const EC_GROUP *group, EC_POINT *point) {
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return 0;
  }
  return ec_GFp_simple_point_set_to_infinity(group, point);
  ec_GFp_simple_point_set_to_infinity(group, point);
  return 1;
 }

 int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *point) {
@@ -710,7 +712,7 @@ int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point,
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return 0;
  }
  return ec_GFp_simple_is_on_curve(group, point, ctx);
  return ec_GFp_simple_is_on_curve(group, point);
 }

 int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b,
@@ -720,7 +722,7 @@ int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b,
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return -1;
  }
  return ec_GFp_simple_cmp(group, a, b, ctx);
  return ec_GFp_simple_cmp(group, a, b);
 }

 int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group,
@@ -734,7 +736,7 @@ int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group,
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return 0;
  }
  return group->meth->point_get_affine_coordinates(group, point, x, y, ctx);
  return group->meth->point_get_affine_coordinates(group, point, x, y);
 }

 int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *point,
@@ -744,7 +746,7 @@ int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *point,
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return 0;
  }
  if (!ec_GFp_simple_point_set_affine_coordinates(group, point, x, y, ctx)) {
  if (!ec_GFp_simple_point_set_affine_coordinates(group, point, x, y)) {
    return 0;
  }

@@ -773,7 +775,8 @@ int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return 0;
  }
  return ec_GFp_simple_add(group, r, a, b, ctx);
  ec_GFp_simple_add(group, r, a, b);
  return 1;
 }

 int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
@@ -783,7 +786,8 @@ int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return 0;
  }
  return ec_GFp_simple_dbl(group, r, a, ctx);
  ec_GFp_simple_dbl(group, r, a);
  return 1;
 }


@@ -792,7 +796,8 @@ int EC_POINT_invert(const EC_GROUP *group, EC_POINT *a, BN_CTX *ctx) {
    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
    return 0;
  }
  return ec_GFp_simple_invert(group, a, ctx);
  ec_GFp_simple_invert(group, a);
  return 1;
 }

 static int arbitrary_bignum_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -123,127 +123,101 @@ err:
  return ret;
 }

 int ec_GFp_mont_field_mul(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
                          const BIGNUM *b, BN_CTX *ctx) {
  if (group->mont == NULL) {
    OPENSSL_PUT_ERROR(EC, EC_R_NOT_INITIALIZED);
    return 0;
  }
 static void ec_GFp_mont_felem_to_montgomery(const EC_GROUP *group,
                                            EC_FELEM *out, const EC_FELEM *in) {
  bn_to_montgomery_small(out->words, in->words, group->field.width,
                         group->mont);
 }

  return BN_mod_mul_montgomery(r, a, b, group->mont, ctx);
 static void ec_GFp_mont_felem_from_montgomery(const EC_GROUP *group,
                                              EC_FELEM *out,
                                              const EC_FELEM *in) {
  bn_from_montgomery_small(out->words, in->words, group->field.width,
                           group->mont);
 }

 int ec_GFp_mont_field_sqr(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
                          BN_CTX *ctx) {
  if (group->mont == NULL) {
    OPENSSL_PUT_ERROR(EC, EC_R_NOT_INITIALIZED);
    return 0;
  }
 static void ec_GFp_mont_felem_inv(const EC_GROUP *group, EC_FELEM *out,
                                  const EC_FELEM *a) {
  bn_mod_inverse_prime_mont_small(out->words, a->words, group->field.width,
                                  group->mont);
 }

  return BN_mod_mul_montgomery(r, a, a, group->mont, ctx);
 void ec_GFp_mont_felem_mul(const EC_GROUP *group, EC_FELEM *r,
                           const EC_FELEM *a, const EC_FELEM *b) {
  bn_mod_mul_montgomery_small(r->words, a->words, b->words, group->field.width,
                              group->mont);
 }

 int ec_GFp_mont_field_encode(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
                             BN_CTX *ctx) {
 void ec_GFp_mont_felem_sqr(const EC_GROUP *group, EC_FELEM *r,
                           const EC_FELEM *a) {
  bn_mod_mul_montgomery_small(r->words, a->words, a->words, group->field.width,
                              group->mont);
 }

 int ec_GFp_mont_bignum_to_felem(const EC_GROUP *group, EC_FELEM *out,
                                const BIGNUM *in) {
  if (group->mont == NULL) {
    OPENSSL_PUT_ERROR(EC, EC_R_NOT_INITIALIZED);
    return 0;
  }

  return BN_to_montgomery(r, a, group->mont, ctx);
  if (!bn_copy_words(out->words, group->field.width, in)) {
    return 0;
  }
  ec_GFp_mont_felem_to_montgomery(group, out, out);
  return 1;
 }

 int ec_GFp_mont_field_decode(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
                             BN_CTX *ctx) {
 int ec_GFp_mont_felem_to_bignum(const EC_GROUP *group, BIGNUM *out,
                                const EC_FELEM *in) {
  if (group->mont == NULL) {
    OPENSSL_PUT_ERROR(EC, EC_R_NOT_INITIALIZED);
    return 0;
  }

  return BN_from_montgomery(r, a, group->mont, ctx);
  EC_FELEM tmp;
  ec_GFp_mont_felem_from_montgomery(group, &tmp, in);
  return bn_set_words(out, tmp.words, group->field.width);
 }

 static int ec_GFp_mont_point_get_affine_coordinates(const EC_GROUP *group,
                                                    const EC_POINT *point,
                                                    BIGNUM *x, BIGNUM *y,
                                                    BN_CTX *ctx) {
                                                    BIGNUM *x, BIGNUM *y) {
  if (EC_POINT_is_at_infinity(group, point)) {
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
    return 0;
  }

  BN_CTX *new_ctx = NULL;
  if (ctx == NULL) {
    ctx = new_ctx = BN_CTX_new();
    if (ctx == NULL) {
      return 0;
    }
  }
  // Transform  (X, Y, Z)  into  (x, y) := (X/Z^2, Y/Z^3).

  int ret = 0;

  BN_CTX_start(ctx);

  // transform  (X, Y, Z)  into  (x, y) := (X/Z^2, Y/Z^3)

  BIGNUM *Z_1 = BN_CTX_get(ctx);
  BIGNUM *Z_2 = BN_CTX_get(ctx);
  BIGNUM *Z_3 = BN_CTX_get(ctx);
  if (Z_1 == NULL ||
      Z_2 == NULL ||
      Z_3 == NULL) {
    goto err;
  }

  // The straightforward way to calculate the inverse of a Montgomery-encoded
  // value where the result is Montgomery-encoded is:
  //
  //    |BN_from_montgomery| + invert + |BN_to_montgomery|.
  //
  // This is equivalent, but more efficient, because |BN_from_montgomery|
  // is more efficient (at least in theory) than |BN_to_montgomery|, since it
  // doesn't have to do the multiplication before the reduction.
  //
  // Use Fermat's Little Theorem instead of |BN_mod_inverse_odd| since this
  // inversion may be done as the final step of private key operations.
  // Unfortunately, this is suboptimal for ECDSA verification.
  if (!BN_from_montgomery(Z_1, &point->Z, group->mont, ctx) ||
      !BN_from_montgomery(Z_1, Z_1, group->mont, ctx) ||
      !bn_mod_inverse_prime(Z_1, Z_1, &group->field, ctx, group->mont)) {
    goto err;
  }

  if (!BN_mod_mul_montgomery(Z_2, Z_1, Z_1, group->mont, ctx)) {
    goto err;
  }
  EC_FELEM z1, z2;
  ec_GFp_mont_felem_inv(group, &z2, &point->Z);
  ec_GFp_mont_felem_sqr(group, &z1, &z2);

  // Instead of using |BN_from_montgomery| to convert the |x| coordinate
  // and then calling |BN_from_montgomery| again to convert the |y|
  // coordinate below, convert the common factor |Z_2| once now, saving one
  // reduction.
  if (!BN_from_montgomery(Z_2, Z_2, group->mont, ctx)) {
    goto err;
  }
  // Instead of using |ec_GFp_mont_felem_from_montgomery| to convert the |x|
  // coordinate and then calling |ec_GFp_mont_felem_from_montgomery| again to
  // convert the |y| coordinate below, convert the common factor |z1| once now,
  // saving one reduction.
  ec_GFp_mont_felem_from_montgomery(group, &z1, &z1);

  if (x != NULL) {
    if (!BN_mod_mul_montgomery(x, &point->X, Z_2, group->mont, ctx)) {
      goto err;
    EC_FELEM tmp;
    ec_GFp_mont_felem_mul(group, &tmp, &point->X, &z1);
    if (!bn_set_words(x, tmp.words, group->field.width)) {
      return 0;
    }
  }

  if (y != NULL) {
    if (!BN_mod_mul_montgomery(Z_3, Z_2, Z_1, group->mont, ctx) ||
        !BN_mod_mul_montgomery(y, &point->Y, Z_3, group->mont, ctx)) {
      goto err;
    EC_FELEM tmp;
    ec_GFp_mont_felem_mul(group, &z1, &z1, &z2);
    ec_GFp_mont_felem_mul(group, &tmp, &point->Y, &z1);
    if (!bn_set_words(y, tmp.words, group->field.width)) {
      return 0;
    }
  }

  ret = 1;

 err:
  BN_CTX_end(ctx);
  BN_CTX_free(new_ctx);
  return ret;
  return 1;
 }

 DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
@@ -253,9 +227,9 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
  out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
  out->mul = ec_wNAF_mul /* XXX: Not constant time. */;
  out->mul_public = ec_wNAF_mul;
  out->field_mul = ec_GFp_mont_field_mul;
  out->field_sqr = ec_GFp_mont_field_sqr;
  out->field_encode = ec_GFp_mont_field_encode;
  out->field_decode = ec_GFp_mont_field_decode;
  out->felem_mul = ec_GFp_mont_felem_mul;
  out->felem_sqr = ec_GFp_mont_felem_sqr;
  out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
  out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
  out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
 }
--- a/crypto/fipsmodule/ec/felem.c
+++ b/crypto/fipsmodule/ec/felem.c
@@ -0,0 +1,81 @@
 /* Copyright (c) 2018, Google Inc.
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */

 #include <openssl/ec.h>

 #include <assert.h>

 #include "internal.h"
 #include "../bn/internal.h"
 #include "../../internal.h"


 int ec_bignum_to_felem(const EC_GROUP *group, EC_FELEM *out, const BIGNUM *in) {
  if (BN_is_negative(in) || BN_cmp(in, &group->field) >= 0) {
    OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
    return 0;
  }
  return group->meth->bignum_to_felem(group, out, in);
 }

 int ec_felem_to_bignum(const EC_GROUP *group, BIGNUM *out, const EC_FELEM *in) {
  return group->meth->felem_to_bignum(group, out, in);
 }

 void ec_felem_neg(const EC_GROUP *group, EC_FELEM *out, const EC_FELEM *a) {
  // -a is zero if a is zero and p-a otherwise.
  BN_ULONG mask = ec_felem_non_zero_mask(group, a);
  BN_ULONG borrow =
      bn_sub_words(out->words, group->field.d, a->words, group->field.width);
  assert(borrow == 0);
  (void)borrow;
  for (int i = 0; i < group->field.width; i++) {
    out->words[i] &= mask;
  }
 }

 void ec_felem_add(const EC_GROUP *group, EC_FELEM *out, const EC_FELEM *a,
                  const EC_FELEM *b) {
  EC_FELEM tmp;
  bn_mod_add_words(out->words, a->words, b->words, group->field.d, tmp.words,
                   group->field.width);
 }

 void ec_felem_sub(const EC_GROUP *group, EC_FELEM *out, const EC_FELEM *a,
                  const EC_FELEM *b) {
  EC_FELEM tmp;
  bn_mod_sub_words(out->words, a->words, b->words, group->field.d, tmp.words,
                   group->field.width);
 }

 BN_ULONG ec_felem_non_zero_mask(const EC_GROUP *group, const EC_FELEM *a) {
  BN_ULONG mask = 0;
  for (int i = 0; i < group->field.width; i++) {
    mask |= a->words[i];
  }
  return ~constant_time_is_zero_w(mask);
 }

 void ec_felem_select(const EC_GROUP *group, EC_FELEM *out, BN_ULONG mask,
                     const EC_FELEM *a, const EC_FELEM *b) {
  bn_select_words(out->words, mask, a->words, b->words, group->field.width);
 }

 int ec_felem_equal(const EC_GROUP *group, const EC_FELEM *a,
                   const EC_FELEM *b) {
  // Note this function is variable-time. Constant-time operations should use
  // |ec_felem_non_zero_mask|.
  return OPENSSL_memcmp(a->words, b->words,
                        group->field.width * sizeof(BN_ULONG)) == 0;
 }
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -100,13 +100,23 @@ typedef union {
  BN_ULONG words[EC_MAX_SCALAR_WORDS];
 } EC_SCALAR;

 // An EC_FELEM represents a field element. Only the first |field->width| words
 // are used. An |EC_FELEM| is specific to an |EC_GROUP| and must not be mixed
 // between groups. Additionally, the representation (whether or not elements are
 // represented in Montgomery-form) may vary between |EC_METHOD|s.
 typedef union {
  // bytes is the representation of the field element in little-endian order.
  uint8_t bytes[EC_MAX_SCALAR_BYTES];
  BN_ULONG words[EC_MAX_SCALAR_WORDS];
 } EC_FELEM;

 struct ec_method_st {
  int (*group_init)(EC_GROUP *);
  void (*group_finish)(EC_GROUP *);
  int (*group_set_curve)(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
                         const BIGNUM *b, BN_CTX *);
  int (*point_get_affine_coordinates)(const EC_GROUP *, const EC_POINT *,
                                      BIGNUM *x, BIGNUM *y, BN_CTX *);
                                      BIGNUM *x, BIGNUM *y);

  // Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar|
  // are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null.
@@ -122,17 +132,26 @@ struct ec_method_st {
                    const EC_SCALAR *g_scalar, const EC_POINT *p,
                    const EC_SCALAR *p_scalar, BN_CTX *ctx);

  // 'field_mul' and 'field_sqr' can be used by 'add' and 'dbl' so that the
  // same implementations of point operations can be used with different
  // optimized implementations of expensive field operations:
  int (*field_mul)(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                   const BIGNUM *b, BN_CTX *);
  int (*field_sqr)(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *);

  int (*field_encode)(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                      BN_CTX *);  // e.g. to Montgomery
  int (*field_decode)(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                      BN_CTX *);  // e.g. from Montgomery
  // felem_mul and felem_sqr implement multiplication and squaring,
  // respectively, so that the generic |EC_POINT_add| and |EC_POINT_dbl|
  // implementations can work both with |EC_GFp_mont_method| and the tuned
  // operations.
  //
  // TODO(davidben): This constrains |EC_FELEM|'s internal representation, adds
  // many indirect calls in the middle of the generic code, and a bunch of
  // conversions. If p224-64.c were easily convertable to Montgomery form, we
  // could say |EC_FELEM| is always in Montgomery form. If we exposed the
  // internal add and double implementations in each of the curves, we could
  // give |EC_POINT| an |EC_METHOD|-specific representation and |EC_FELEM| is
  // purely a |EC_GFp_mont_method| type.
  void (*felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
                    const EC_FELEM *b);
  void (*felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a);

  int (*bignum_to_felem)(const EC_GROUP *group, EC_FELEM *out,
                         const BIGNUM *in);
  int (*felem_to_bignum)(const EC_GROUP *group, BIGNUM *out,
                         const EC_FELEM *in);

  // scalar_inv_mont sets |out| to |in|^-1, where both input and output are in
  // Montgomery form.
@@ -160,7 +179,7 @@ struct ec_group_st {

  BIGNUM field;  // For curves over GF(p), this is the modulus.

  BIGNUM a, b;  // Curve coefficients.
  EC_FELEM a, b;  // Curve coefficients.

  int a_is_minus3;  // enable optimized point arithmetics for special case

@@ -168,7 +187,7 @@ struct ec_group_st {

  BN_MONT_CTX *mont;  // Montgomery structure.

  BIGNUM one;  // The value one.
  EC_FELEM one;  // The value one.
 } /* EC_GROUP */;

 struct ec_point_st {
@@ -176,14 +195,45 @@ struct ec_point_st {
  // |group->generator|.
  EC_GROUP *group;

  BIGNUM X;
  BIGNUM Y;
  BIGNUM Z;  // Jacobian projective coordinates:
             // (X, Y, Z)  represents  (X/Z^2, Y/Z^3)  if  Z != 0
  // X, Y, and Z are Jacobian projective coordinates. They represent
  // (X/Z^2, Y/Z^3) if Z != 0 and the point and infinite otherwise.
  EC_FELEM X, Y, Z;
 } /* EC_POINT */;

 EC_GROUP *ec_group_new(const EC_METHOD *meth);

 // ec_bignum_to_felem converts |in| to an |EC_FELEM|. It returns one on success
 // and zero if |in| is out of range.
 int ec_bignum_to_felem(const EC_GROUP *group, EC_FELEM *out, const BIGNUM *in);

 // ec_felem_to_bignum converts |in| to a |BIGNUM|. It returns one on success and
 // zero on allocation failure.
 int ec_felem_to_bignum(const EC_GROUP *group, BIGNUM *out, const EC_FELEM *in);

 // ec_felem_neg sets |out| to -|a|.
 void ec_felem_neg(const EC_GROUP *group, EC_FELEM *out, const EC_FELEM *a);

 // ec_felem_add sets |out| to |a| + |b|.
 void ec_felem_add(const EC_GROUP *group, EC_FELEM *out, const EC_FELEM *a,
                  const EC_FELEM *b);

 // ec_felem_add sets |out| to |a| - |b|.
 void ec_felem_sub(const EC_GROUP *group, EC_FELEM *out, const EC_FELEM *a,
                  const EC_FELEM *b);

 // ec_felem_non_zero_mask returns all ones if |a| is non-zero and all zeros
 // otherwise.
 BN_ULONG ec_felem_non_zero_mask(const EC_GROUP *group, const EC_FELEM *a);

 // ec_felem_select, in constant time, sets |out| to |a| if |mask| is all ones
 // and |b| if |mask| is all zeros.
 void ec_felem_select(const EC_GROUP *group, EC_FELEM *out, BN_ULONG mask,
                     const EC_FELEM *a, const EC_FELEM *b);

 // ec_felem_equal returns one if |a| and |b| are equal and zero otherwise. It
 // treats |a| and |b| as public and does *not* run in constant time.
 int ec_felem_equal(const EC_GROUP *group, const EC_FELEM *a, const EC_FELEM *b);

 // ec_bignum_to_scalar converts |in| to an |EC_SCALAR| and writes it to
 // |*out|. It returns one on success and zero if |in| is out of range.
 OPENSSL_EXPORT int ec_bignum_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
@@ -251,24 +301,20 @@ void ec_GFp_simple_group_finish(EC_GROUP *);
 int ec_GFp_simple_group_set_curve(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
                                  const BIGNUM *b, BN_CTX *);
 int ec_GFp_simple_group_get_curve(const EC_GROUP *, BIGNUM *p, BIGNUM *a,
                                  BIGNUM *b, BN_CTX *);
                                  BIGNUM *b);
 unsigned ec_GFp_simple_group_get_degree(const EC_GROUP *);
 int ec_GFp_simple_point_init(EC_POINT *);
 void ec_GFp_simple_point_finish(EC_POINT *);
 int ec_GFp_simple_point_copy(EC_POINT *, const EC_POINT *);
 int ec_GFp_simple_point_set_to_infinity(const EC_GROUP *, EC_POINT *);
 void ec_GFp_simple_point_init(EC_POINT *);
 void ec_GFp_simple_point_copy(EC_POINT *, const EC_POINT *);
 void ec_GFp_simple_point_set_to_infinity(const EC_GROUP *, EC_POINT *);
 int ec_GFp_simple_point_set_affine_coordinates(const EC_GROUP *, EC_POINT *,
                                               const BIGNUM *x, const BIGNUM *y,
                                               BN_CTX *);
 int ec_GFp_simple_add(const EC_GROUP *, EC_POINT *r, const EC_POINT *a,
                      const EC_POINT *b, BN_CTX *);
 int ec_GFp_simple_dbl(const EC_GROUP *, EC_POINT *r, const EC_POINT *a,
                      BN_CTX *);
 int ec_GFp_simple_invert(const EC_GROUP *, EC_POINT *, BN_CTX *);
                                               const BIGNUM *x, const BIGNUM *y);
 void ec_GFp_simple_add(const EC_GROUP *, EC_POINT *r, const EC_POINT *a,
                       const EC_POINT *b);
 void ec_GFp_simple_dbl(const EC_GROUP *, EC_POINT *r, const EC_POINT *a);
 void ec_GFp_simple_invert(const EC_GROUP *, EC_POINT *);
 int ec_GFp_simple_is_at_infinity(const EC_GROUP *, const EC_POINT *);
 int ec_GFp_simple_is_on_curve(const EC_GROUP *, const EC_POINT *, BN_CTX *);
 int ec_GFp_simple_cmp(const EC_GROUP *, const EC_POINT *a, const EC_POINT *b,
                      BN_CTX *);
 int ec_GFp_simple_is_on_curve(const EC_GROUP *, const EC_POINT *);
 int ec_GFp_simple_cmp(const EC_GROUP *, const EC_POINT *a, const EC_POINT *b);
 void ec_simple_scalar_inv_montgomery(const EC_GROUP *group, EC_SCALAR *r,
                                     const EC_SCALAR *a);

@@ -277,14 +323,14 @@ int ec_GFp_mont_group_init(EC_GROUP *);
 int ec_GFp_mont_group_set_curve(EC_GROUP *, const BIGNUM *p, const BIGNUM *a,
                                const BIGNUM *b, BN_CTX *);
 void ec_GFp_mont_group_finish(EC_GROUP *);
 int ec_GFp_mont_field_mul(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                          const BIGNUM *b, BN_CTX *);
 int ec_GFp_mont_field_sqr(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                          BN_CTX *);
 int ec_GFp_mont_field_encode(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                             BN_CTX *);
 int ec_GFp_mont_field_decode(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                             BN_CTX *);
 void ec_GFp_mont_felem_mul(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
                           const EC_FELEM *b);
 void ec_GFp_mont_felem_sqr(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a);

 int ec_GFp_mont_bignum_to_felem(const EC_GROUP *group, EC_FELEM *out,
                                const BIGNUM *in);
 int ec_GFp_mont_felem_to_bignum(const EC_GROUP *group, BIGNUM *out,
                                const EC_FELEM *in);

 void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit, uint8_t in);

--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -203,20 +203,6 @@ static void p224_felem_to_bin28(uint8_t out[28], const p224_felem in) {
  }
 }

 // From OpenSSL BIGNUM to internal representation
 static int p224_BN_to_felem(p224_felem out, const BIGNUM *bn) {
  // BN_bn2bin eats leading zeroes
  p224_felem_bytearray b_out;
  if (BN_is_negative(bn) ||
      !BN_bn2le_padded(b_out, sizeof(b_out), bn)) {
    OPENSSL_PUT_ERROR(EC, EC_R_BIGNUM_OUT_OF_RANGE);
    return 0;
  }

  p224_bin28_to_felem(out, b_out);
  return 1;
 }

 // From internal representation to OpenSSL BIGNUM
 static BIGNUM *p224_felem_to_BN(BIGNUM *out, const p224_felem in) {
  p224_felem_bytearray b_out;
@@ -224,6 +210,17 @@ static BIGNUM *p224_felem_to_BN(BIGNUM *out, const p224_felem in) {
  return BN_le2bn(b_out, sizeof(b_out), out);
 }

 static void p224_generic_to_felem(p224_felem out, const EC_FELEM *in) {
  p224_bin28_to_felem(out, in->bytes);
 }

 static void p224_felem_to_generic(EC_FELEM *out, const p224_felem in) {
  p224_felem_to_bin28(out->bytes, in);
  // 224 is not a multiple of 64, so zero the remaining bytes.
  OPENSSL_memset(out->bytes + 28, 0, 32 - 28);
 }


 // Field operations, using the internal representation of field elements.
 // NB! These operations are specific to our point multiplication and cannot be
 // expected to be correct in general - e.g., multiplication with a large scalar
@@ -975,27 +972,22 @@ static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out,
 // (X', Y') = (X/Z^2, Y/Z^3)
 static int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
                                                        const EC_POINT *point,
                                                        BIGNUM *x, BIGNUM *y,
                                                        BN_CTX *ctx) {
  p224_felem z1, z2, x_in, y_in, x_out, y_out;
  p224_widefelem tmp;

                                                        BIGNUM *x, BIGNUM *y) {
  if (EC_POINT_is_at_infinity(group, point)) {
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
    return 0;
  }

  if (!p224_BN_to_felem(x_in, &point->X) ||
      !p224_BN_to_felem(y_in, &point->Y) ||
      !p224_BN_to_felem(z1, &point->Z)) {
    return 0;
  }

  p224_felem z1, z2;
  p224_widefelem tmp;
  p224_generic_to_felem(z1, &point->Z);
  p224_felem_inv(z2, z1);
  p224_felem_square(tmp, z2);
  p224_felem_reduce(z1, tmp);

  if (x != NULL) {
    p224_felem x_in, x_out;
    p224_generic_to_felem(x_in, &point->X);
    p224_felem_mul(tmp, x_in, z1);
    p224_felem_reduce(x_in, tmp);
    p224_felem_contract(x_out, x_in);
@@ -1006,6 +998,8 @@ static int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
  }

  if (y != NULL) {
    p224_felem y_in, y_out;
    p224_generic_to_felem(y_in, &point->Y);
    p224_felem_mul(tmp, z1, z2);
    p224_felem_reduce(z1, tmp);
    p224_felem_mul(tmp, y_in, z1);
@@ -1032,11 +1026,9 @@ static int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
    // they contribute nothing to the linear combination.
    OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
    // precompute multiples
    if (!p224_BN_to_felem(x_out, &p->X) ||
        !p224_BN_to_felem(y_out, &p->Y) ||
        !p224_BN_to_felem(z_out, &p->Z)) {
      return 0;
    }
    p224_generic_to_felem(x_out, &p->X);
    p224_generic_to_felem(y_out, &p->Y);
    p224_generic_to_felem(z_out, &p->Z);

    p224_felem_assign(p_pre_comp[1][0], x_out);
    p224_felem_assign(p_pre_comp[1][1], y_out);
@@ -1063,43 +1055,45 @@ static int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,

  // reduce the output to its unique minimal representation
  p224_felem_contract(x_in, x_out);
  p224_felem_to_generic(&r->X, x_in);
  p224_felem_contract(y_in, y_out);
  p224_felem_to_generic(&r->Y, y_in);
  p224_felem_contract(z_in, z_out);
  if (!p224_felem_to_BN(&r->X, x_in) ||
      !p224_felem_to_BN(&r->Y, y_in) ||
      !p224_felem_to_BN(&r->Z, z_in)) {
    OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
    return 0;
  }
  p224_felem_to_generic(&r->Z, z_in);
  return 1;
 }

 static int ec_GFp_nistp224_field_mul(const EC_GROUP *group, BIGNUM *r,
                                     const BIGNUM *a, const BIGNUM *b,
                                     BN_CTX *ctx) {
 static void ec_GFp_nistp224_felem_mul(const EC_GROUP *group, EC_FELEM *r,
                                      const EC_FELEM *a, const EC_FELEM *b) {
  p224_felem felem1, felem2;
  p224_widefelem wide;
  if (!p224_BN_to_felem(felem1, a) ||
      !p224_BN_to_felem(felem2, b)) {
    return 0;
  }
  p224_generic_to_felem(felem1, a);
  p224_generic_to_felem(felem2, b);
  p224_felem_mul(wide, felem1, felem2);
  p224_felem_reduce(felem1, wide);
  p224_felem_contract(felem1, felem1);
  return p224_felem_to_BN(r, felem1) != NULL;
  p224_felem_to_generic(r, felem1);
 }

 static int ec_GFp_nistp224_field_sqr(const EC_GROUP *group, BIGNUM *r,
                                     const BIGNUM *a, BN_CTX *ctx) {
 static void ec_GFp_nistp224_felem_sqr(const EC_GROUP *group, EC_FELEM *r,
                                      const EC_FELEM *a) {
  p224_felem felem;
  if (!p224_BN_to_felem(felem, a)) {
    return 0;
  }
  p224_generic_to_felem(felem, a);
  p224_widefelem wide;
  p224_felem_square(wide, felem);
  p224_felem_reduce(felem, wide);
  p224_felem_contract(felem, felem);
  return p224_felem_to_BN(r, felem) != NULL;
  p224_felem_to_generic(r, felem);
 }

 static int ec_GFp_nistp224_bignum_to_felem(const EC_GROUP *group, EC_FELEM *out,
                                           const BIGNUM *in) {
  return bn_copy_words(out->words, group->field.width, in);
 }

 static int ec_GFp_nistp224_felem_to_bignum(const EC_GROUP *group, BIGNUM *out,
                                           const EC_FELEM *in) {
  return bn_set_words(out, in->words, group->field.width);
 }

 DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp224_method) {
@@ -1110,10 +1104,10 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp224_method) {
      ec_GFp_nistp224_point_get_affine_coordinates;
  out->mul = ec_GFp_nistp224_points_mul;
  out->mul_public = ec_GFp_nistp224_points_mul;
  out->field_mul = ec_GFp_nistp224_field_mul;
  out->field_sqr = ec_GFp_nistp224_field_sqr;
  out->field_encode = NULL;
  out->field_decode = NULL;
  out->felem_mul = ec_GFp_nistp224_felem_mul;
  out->felem_sqr = ec_GFp_nistp224_felem_sqr;
  out->bignum_to_felem = ec_GFp_nistp224_bignum_to_felem;
  out->felem_to_bignum = ec_GFp_nistp224_felem_to_bignum;
  out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
 };

--- a/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/crypto/fipsmodule/ec/p256-x86_64.c
@@ -197,19 +197,13 @@ static void ecp_nistz256_mod_inverse_mont(BN_ULONG r[P256_LIMBS],
  ecp_nistz256_mul_mont(r, res, in);
 }

 // ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and
 // returns one if it fits. Otherwise it returns zero.
 static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS],
                                             const BIGNUM *in) {
  return bn_copy_words(out, P256_LIMBS, in);
 }

 // r = p * p_scalar
 static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
                                     const EC_POINT *p,
                                     const EC_SCALAR *p_scalar) {
 static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
                                      const EC_POINT *p,
                                      const EC_SCALAR *p_scalar) {
  assert(p != NULL);
  assert(p_scalar != NULL);
  assert(group->field.width == P256_LIMBS);

  static const unsigned kWindowSize = 5;
  static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
@@ -226,13 +220,10 @@ static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
  // not stored. All other values are actually stored with an offset of -1 in
  // table.
  P256_POINT *row = table;

  if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &p->X) ||
      !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &p->Y) ||
      !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &p->Z)) {
    OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
    return 0;
  }
  assert(group->field.width == P256_LIMBS);
  OPENSSL_memcpy(row[1 - 1].X, p->X.words, P256_LIMBS * sizeof(BN_ULONG));
  OPENSSL_memcpy(row[1 - 1].Y, p->Y.words, P256_LIMBS * sizeof(BN_ULONG));
  OPENSSL_memcpy(row[1 - 1].Z, p->Z.words, P256_LIMBS * sizeof(BN_ULONG));

  ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
  ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
@@ -296,8 +287,6 @@ static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
  copy_conditional(h.Y, tmp, wvalue & 1);

  ecp_nistz256_point_add(r, r, &h);

  return 1;
 }

 static int ecp_nistz256_points_mul(const EC_GROUP *group, EC_POINT *r,
@@ -362,44 +351,30 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, EC_POINT *r,
      out = &p.p;
    }

    if (!ecp_nistz256_windowed_mul(group, out, p_, p_scalar)) {
      return 0;
    }

    ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
    if (!p_is_infinity) {
      ecp_nistz256_point_add(&p.p, &p.p, out);
    }
  }

  // Not constant-time, but we're only operating on the public output.
  if (!bn_set_words(&r->X, p.p.X, P256_LIMBS) ||
      !bn_set_words(&r->Y, p.p.Y, P256_LIMBS) ||
      !bn_set_words(&r->Z, p.p.Z, P256_LIMBS)) {
    return 0;
  }

  assert(group->field.width == P256_LIMBS);
  OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
  OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
  OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
  return 1;
 }

 static int ecp_nistz256_get_affine(const EC_GROUP *group, const EC_POINT *point,
                                   BIGNUM *x, BIGNUM *y, BN_CTX *ctx) {
  BN_ULONG z_inv2[P256_LIMBS];
  BN_ULONG z_inv3[P256_LIMBS];
  BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS];

                                   BIGNUM *x, BIGNUM *y) {
  if (EC_POINT_is_at_infinity(group, point)) {
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
    return 0;
  }

  if (!ecp_nistz256_bignum_to_field_elem(point_x, &point->X) ||
      !ecp_nistz256_bignum_to_field_elem(point_y, &point->Y) ||
      !ecp_nistz256_bignum_to_field_elem(point_z, &point->Z)) {
    OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
    return 0;
  }

  ecp_nistz256_mod_inverse_mont(z_inv3, point_z);
  BN_ULONG z_inv2[P256_LIMBS];
  BN_ULONG z_inv3[P256_LIMBS];
  assert(group->field.width == P256_LIMBS);
  ecp_nistz256_mod_inverse_mont(z_inv3, point->Z.words);
  ecp_nistz256_sqr_mont(z_inv2, z_inv3);

  // Instead of using |ecp_nistz256_from_mont| to convert the |x| coordinate
@@ -410,7 +385,7 @@ static int ecp_nistz256_get_affine(const EC_GROUP *group, const EC_POINT *point,

  if (x != NULL) {
    BN_ULONG x_aff[P256_LIMBS];
    ecp_nistz256_mul_mont(x_aff, z_inv2, point_x);
    ecp_nistz256_mul_mont(x_aff, z_inv2, point->X.words);
    if (!bn_set_words(x, x_aff, P256_LIMBS)) {
      OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
      return 0;
@@ -420,7 +395,7 @@ static int ecp_nistz256_get_affine(const EC_GROUP *group, const EC_POINT *point,
  if (y != NULL) {
    BN_ULONG y_aff[P256_LIMBS];
    ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2);
    ecp_nistz256_mul_mont(y_aff, z_inv3, point_y);
    ecp_nistz256_mul_mont(y_aff, z_inv3, point->Y.words);
    if (!bn_set_words(y, y_aff, P256_LIMBS)) {
      OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
      return 0;
@@ -518,10 +493,10 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
  out->point_get_affine_coordinates = ecp_nistz256_get_affine;
  out->mul = ecp_nistz256_points_mul;
  out->mul_public = ecp_nistz256_points_mul;
  out->field_mul = ec_GFp_mont_field_mul;
  out->field_sqr = ec_GFp_mont_field_sqr;
  out->field_encode = ec_GFp_mont_field_encode;
  out->field_decode = ec_GFp_mont_field_decode;
  out->felem_mul = ec_GFp_mont_felem_mul;
  out->felem_sqr = ec_GFp_mont_felem_sqr;
  out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
  out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
  out->scalar_inv_montgomery = ecp_nistz256_inv_mod_ord;
 };

--- a/crypto/fipsmodule/ec/simple.c
+++ b/crypto/fipsmodule/ec/simple.c
@@ -90,18 +90,12 @@

 int ec_GFp_simple_group_init(EC_GROUP *group) {
  BN_init(&group->field);
  BN_init(&group->a);
  BN_init(&group->b);
  BN_init(&group->one);
  group->a_is_minus3 = 0;
  return 1;
 }

 void ec_GFp_simple_group_finish(EC_GROUP *group) {
  BN_free(&group->field);
  BN_free(&group->a);
  BN_free(&group->b);
  BN_free(&group->one);
 }

 int ec_GFp_simple_group_set_curve(EC_GROUP *group, const BIGNUM *p,
@@ -109,7 +103,6 @@ int ec_GFp_simple_group_set_curve(EC_GROUP *group, const BIGNUM *p,
                                  BN_CTX *ctx) {
  int ret = 0;
  BN_CTX *new_ctx = NULL;
  BIGNUM *tmp_a;

  // p must be a prime > 3
  if (BN_num_bits(p) <= 2 || !BN_is_odd(p)) {
@@ -125,8 +118,8 @@ int ec_GFp_simple_group_set_curve(EC_GROUP *group, const BIGNUM *p,
  }

  BN_CTX_start(ctx);
  tmp_a = BN_CTX_get(ctx);
  if (tmp_a == NULL) {
  BIGNUM *tmp = BN_CTX_get(ctx);
  if (tmp == NULL) {
    goto err;
  }

@@ -139,37 +132,24 @@ int ec_GFp_simple_group_set_curve(EC_GROUP *group, const BIGNUM *p,
  bn_set_minimal_width(&group->field);

  // group->a
  if (!BN_nnmod(tmp_a, a, &group->field, ctx)) {
    goto err;
  }
  if (group->meth->field_encode) {
    if (!group->meth->field_encode(group, &group->a, tmp_a, ctx)) {
      goto err;
    }
  } else if (!BN_copy(&group->a, tmp_a)) {
  if (!BN_nnmod(tmp, a, &group->field, ctx) ||
      !ec_bignum_to_felem(group, &group->a, tmp)) {
    goto err;
  }

  // group->b
  if (!BN_nnmod(&group->b, b, &group->field, ctx)) {
    goto err;
  }
  if (group->meth->field_encode &&
      !group->meth->field_encode(group, &group->b, &group->b, ctx)) {
  // group->a_is_minus3
  if (!BN_add_word(tmp, 3)) {
    goto err;
  }
  group->a_is_minus3 = (0 == BN_cmp(tmp, &group->field));

  // group->a_is_minus3
  if (!BN_add_word(tmp_a, 3)) {
  // group->b
  if (!BN_nnmod(tmp, b, &group->field, ctx) ||
      !ec_bignum_to_felem(group, &group->b, tmp)) {
    goto err;
  }
  group->a_is_minus3 = (0 == BN_cmp(tmp_a, &group->field));

  if (group->meth->field_encode != NULL) {
    if (!group->meth->field_encode(group, &group->one, BN_value_one(), ctx)) {
      goto err;
    }
  } else if (!BN_copy(&group->one, BN_value_one())) {
  if (!ec_bignum_to_felem(group, &group->one, BN_value_one())) {
    goto err;
  }

@@ -182,440 +162,283 @@ err:
 }

 int ec_GFp_simple_group_get_curve(const EC_GROUP *group, BIGNUM *p, BIGNUM *a,
                                  BIGNUM *b, BN_CTX *ctx) {
  int ret = 0;
  BN_CTX *new_ctx = NULL;

  if (p != NULL && !BN_copy(p, &group->field)) {
                                  BIGNUM *b) {
  if ((p != NULL && !BN_copy(p, &group->field)) ||
      (a != NULL && !ec_felem_to_bignum(group, a, &group->a)) ||
      (b != NULL && !ec_felem_to_bignum(group, b, &group->b))) {
    return 0;
  }

  if (a != NULL || b != NULL) {
    if (group->meth->field_decode) {
      if (ctx == NULL) {
        ctx = new_ctx = BN_CTX_new();
        if (ctx == NULL) {
          return 0;
        }
      }
      if (a != NULL && !group->meth->field_decode(group, a, &group->a, ctx)) {
        goto err;
      }
      if (b != NULL && !group->meth->field_decode(group, b, &group->b, ctx)) {
        goto err;
      }
    } else {
      if (a != NULL && !BN_copy(a, &group->a)) {
        goto err;
      }
      if (b != NULL && !BN_copy(b, &group->b)) {
        goto err;
      }
    }
  }

  ret = 1;

 err:
  BN_CTX_free(new_ctx);
  return ret;
  return 1;
 }

 unsigned ec_GFp_simple_group_get_degree(const EC_GROUP *group) {
  return BN_num_bits(&group->field);
 }

 int ec_GFp_simple_point_init(EC_POINT *point) {
  BN_init(&point->X);
  BN_init(&point->Y);
  BN_init(&point->Z);

  return 1;
 void ec_GFp_simple_point_init(EC_POINT *point) {
  OPENSSL_memset(&point->X, 0, sizeof(EC_FELEM));
  OPENSSL_memset(&point->Y, 0, sizeof(EC_FELEM));
  OPENSSL_memset(&point->Z, 0, sizeof(EC_FELEM));
 }

 void ec_GFp_simple_point_finish(EC_POINT *point) {
  BN_free(&point->X);
  BN_free(&point->Y);
  BN_free(&point->Z);
 void ec_GFp_simple_point_copy(EC_POINT *dest, const EC_POINT *src) {
  OPENSSL_memcpy(&dest->X, &src->X, sizeof(EC_FELEM));
  OPENSSL_memcpy(&dest->Y, &src->Y, sizeof(EC_FELEM));
  OPENSSL_memcpy(&dest->Z, &src->Z, sizeof(EC_FELEM));
 }

 int ec_GFp_simple_point_copy(EC_POINT *dest, const EC_POINT *src) {
  if (!BN_copy(&dest->X, &src->X) ||
      !BN_copy(&dest->Y, &src->Y) ||
      !BN_copy(&dest->Z, &src->Z)) {
    return 0;
  }

  return 1;
 }

 int ec_GFp_simple_point_set_to_infinity(const EC_GROUP *group,
                                        EC_POINT *point) {
  BN_zero(&point->Z);
  return 1;
 }

 static int set_Jprojective_coordinate_GFp(const EC_GROUP *group, BIGNUM *out,
                                          const BIGNUM *in, BN_CTX *ctx) {
  if (in == NULL) {
    return 1;
  }
  if (BN_is_negative(in) ||
      BN_cmp(in, &group->field) >= 0) {
    OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
    return 0;
  }
  if (group->meth->field_encode) {
    return group->meth->field_encode(group, out, in, ctx);
  }
  return BN_copy(out, in) != NULL;
 void ec_GFp_simple_point_set_to_infinity(const EC_GROUP *group,
                                         EC_POINT *point) {
  OPENSSL_memset(&point->Z, 0, sizeof(EC_FELEM));
 }

 int ec_GFp_simple_point_set_affine_coordinates(const EC_GROUP *group,
                                               EC_POINT *point, const BIGNUM *x,
                                               const BIGNUM *y, BN_CTX *ctx) {
                                               const BIGNUM *y) {
  if (x == NULL || y == NULL) {
    OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
    return 0;
  }

  BN_CTX *new_ctx = NULL;
  int ret = 0;

  if (ctx == NULL) {
    ctx = new_ctx = BN_CTX_new();
    if (ctx == NULL) {
      return 0;
    }
  }

  if (!set_Jprojective_coordinate_GFp(group, &point->X, x, ctx) ||
      !set_Jprojective_coordinate_GFp(group, &point->Y, y, ctx) ||
      !BN_copy(&point->Z, &group->one)) {
    goto err;
  if (!ec_bignum_to_felem(group, &point->X, x) ||
      !ec_bignum_to_felem(group, &point->Y, y)) {
    return 0;
  }
  OPENSSL_memcpy(&point->Z, &group->one, sizeof(EC_FELEM));

  ret = 1;

 err:
  BN_CTX_free(new_ctx);
  return ret;
  return 1;
 }

 int ec_GFp_simple_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
                      const EC_POINT *b, BN_CTX *ctx) {
  int (*field_mul)(const EC_GROUP *, BIGNUM *, const BIGNUM *, const BIGNUM *,
                   BN_CTX *);
  int (*field_sqr)(const EC_GROUP *, BIGNUM *, const BIGNUM *, BN_CTX *);
  const BIGNUM *p;
  BN_CTX *new_ctx = NULL;
  BIGNUM *n0, *n1, *n2, *n3, *n4, *n5, *n6;
  int ret = 0;

 void ec_GFp_simple_add(const EC_GROUP *group, EC_POINT *out, const EC_POINT *a,
                       const EC_POINT *b) {
  if (a == b) {
    return EC_POINT_dbl(group, r, a, ctx);
  }
  if (EC_POINT_is_at_infinity(group, a)) {
    return EC_POINT_copy(r, b);
  }
  if (EC_POINT_is_at_infinity(group, b)) {
    return EC_POINT_copy(r, a);
  }

  field_mul = group->meth->field_mul;
  field_sqr = group->meth->field_sqr;
  p = &group->field;

  if (ctx == NULL) {
    ctx = new_ctx = BN_CTX_new();
    if (ctx == NULL) {
      return 0;
    }
  }

  BN_CTX_start(ctx);
  n0 = BN_CTX_get(ctx);
  n1 = BN_CTX_get(ctx);
  n2 = BN_CTX_get(ctx);
  n3 = BN_CTX_get(ctx);
  n4 = BN_CTX_get(ctx);
  n5 = BN_CTX_get(ctx);
  n6 = BN_CTX_get(ctx);
  if (n6 == NULL) {
    goto end;
  }

  // Note that in this function we must not read components of 'a' or 'b'
  // once we have written the corresponding components of 'r'.
  // ('r' might be one of 'a' or 'b'.)

  // n1, n2
  if (!field_sqr(group, n0, &b->Z, ctx) ||
      !field_mul(group, n1, &a->X, n0, ctx)) {
    goto end;
  }
  // n1 = X_a * Z_b^2

  if (!field_mul(group, n0, n0, &b->Z, ctx) ||
      !field_mul(group, n2, &a->Y, n0, ctx)) {
    goto end;
  }
  // n2 = Y_a * Z_b^3

  // n3, n4
  if (!field_sqr(group, n0, &a->Z, ctx) ||
      !field_mul(group, n3, &b->X, n0, ctx)) {
    goto end;
  }
  // n3 = X_b * Z_a^2

  if (!field_mul(group, n0, n0, &a->Z, ctx) ||
      !field_mul(group, n4, &b->Y, n0, ctx)) {
    goto end;
  }
  // n4 = Y_b * Z_a^3

  // n5, n6
  if (!bn_mod_sub_consttime(n5, n1, n3, p, ctx) ||
      !bn_mod_sub_consttime(n6, n2, n4, p, ctx)) {
    goto end;
  }
  // n5 = n1 - n3
  // n6 = n2 - n4

  if (BN_is_zero(n5)) {
    if (BN_is_zero(n6)) {
      // a is the same point as b
      BN_CTX_end(ctx);
      ret = EC_POINT_dbl(group, r, a, ctx);
      ctx = NULL;
      goto end;
    } else {
      // a is the inverse of b
      BN_zero(&r->Z);
      ret = 1;
      goto end;
    }
  }

  // 'n7', 'n8'
  if (!bn_mod_add_consttime(n1, n1, n3, p, ctx) ||
      !bn_mod_add_consttime(n2, n2, n4, p, ctx)) {
    goto end;
  }
  // 'n7' = n1 + n3
  // 'n8' = n2 + n4

  // Z_r
  if (!field_mul(group, n0, &a->Z, &b->Z, ctx) ||
      !field_mul(group, &r->Z, n0, n5, ctx)) {
    goto end;
  }

  // Z_r = Z_a * Z_b * n5

  // X_r
  if (!field_sqr(group, n0, n6, ctx) ||
      !field_sqr(group, n4, n5, ctx) ||
      !field_mul(group, n3, n1, n4, ctx) ||
      !bn_mod_sub_consttime(&r->X, n0, n3, p, ctx)) {
    goto end;
  }
  // X_r = n6^2 - n5^2 * 'n7'

  // 'n9'
  if (!bn_mod_lshift1_consttime(n0, &r->X, p, ctx) ||
      !bn_mod_sub_consttime(n0, n3, n0, p, ctx)) {
    goto end;
  }
  // n9 = n5^2 * 'n7' - 2 * X_r

  // Y_r
  if (!field_mul(group, n0, n0, n6, ctx) ||
      !field_mul(group, n5, n4, n5, ctx)) {
    goto end;  // now n5 is n5^3
  }
  if (!field_mul(group, n1, n2, n5, ctx) ||
      !bn_mod_sub_consttime(n0, n0, n1, p, ctx)) {
    goto end;
  }
  if (BN_is_odd(n0) && !BN_add(n0, n0, p)) {
    goto end;
  }
  // now  0 <= n0 < 2*p,  and n0 is even
  if (!BN_rshift1(&r->Y, n0)) {
    goto end;
  }
  // Y_r = (n6 * 'n9' - 'n8' * 'n5^3') / 2

  ret = 1;

 end:
  if (ctx) {
    // otherwise we already called BN_CTX_end
    BN_CTX_end(ctx);
  }
  BN_CTX_free(new_ctx);
  return ret;
    ec_GFp_simple_dbl(group, out, a);
    return;
  }


  // The method is taken from:
  //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
  //
  // Coq transcription and correctness proof:
  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
  // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
                          const EC_FELEM *b) = group->meth->felem_mul;
  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
      group->meth->felem_sqr;

  EC_FELEM x_out, y_out, z_out;
  BN_ULONG z1nz = ec_felem_non_zero_mask(group, &a->Z);
  BN_ULONG z2nz = ec_felem_non_zero_mask(group, &b->Z);

  // z1z1 = z1z1 = z1**2
  EC_FELEM z1z1;
  felem_sqr(group, &z1z1, &a->Z);

  // z2z2 = z2**2
  EC_FELEM z2z2;
  felem_sqr(group, &z2z2, &b->Z);

  // u1 = x1*z2z2
  EC_FELEM u1;
  felem_mul(group, &u1, &a->X, &z2z2);

  // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
  EC_FELEM two_z1z2;
  ec_felem_add(group, &two_z1z2, &a->Z, &b->Z);
  felem_sqr(group, &two_z1z2, &two_z1z2);
  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z1z1);
  ec_felem_sub(group, &two_z1z2, &two_z1z2, &z2z2);

  // s1 = y1 * z2**3
  EC_FELEM s1;
  felem_mul(group, &s1, &b->Z, &z2z2);
  felem_mul(group, &s1, &s1, &a->Y);

  // u2 = x2*z1z1
  EC_FELEM u2;
  felem_mul(group, &u2, &b->X, &z1z1);

  // h = u2 - u1
  EC_FELEM h;
  ec_felem_sub(group, &h, &u2, &u1);

  BN_ULONG xneq = ec_felem_non_zero_mask(group, &h);

  // z_out = two_z1z2 * h
  felem_mul(group, &z_out, &h, &two_z1z2);

  // z1z1z1 = z1 * z1z1
  EC_FELEM z1z1z1;
  felem_mul(group, &z1z1z1, &a->Z, &z1z1);

  // s2 = y2 * z1**3
  EC_FELEM s2;
  felem_mul(group, &s2, &b->Y, &z1z1z1);

  // r = (s2 - s1)*2
  EC_FELEM r;
  ec_felem_sub(group, &r, &s2, &s1);
  ec_felem_add(group, &r, &r, &r);

  BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);

  // TODO(davidben): Analyze how case relates to timing considerations for the
  // supported curves which hit it (P-224, P-384, and P-521) and the
  // to-be-written constant-time generic multiplication implementation.
  if (!xneq && !yneq && z1nz && z2nz) {
    ec_GFp_simple_dbl(group, out, a);
    return;
  }

  // I = (2h)**2
  EC_FELEM i;
  ec_felem_add(group, &i, &h, &h);
  felem_sqr(group, &i, &i);

  // J = h * I
  EC_FELEM j;
  felem_mul(group, &j, &h, &i);

  // V = U1 * I
  EC_FELEM v;
  felem_mul(group, &v, &u1, &i);

  // x_out = r**2 - J - 2V
  felem_sqr(group, &x_out, &r);
  ec_felem_sub(group, &x_out, &x_out, &j);
  ec_felem_sub(group, &x_out, &x_out, &v);
  ec_felem_sub(group, &x_out, &x_out, &v);

  // y_out = r(V-x_out) - 2 * s1 * J
  ec_felem_sub(group, &y_out, &v, &x_out);
  felem_mul(group, &y_out, &y_out, &r);
  EC_FELEM s1j;
  felem_mul(group, &s1j, &s1, &j);
  ec_felem_sub(group, &y_out, &y_out, &s1j);
  ec_felem_sub(group, &y_out, &y_out, &s1j);

  ec_felem_select(group, &x_out, z1nz, &x_out, &b->X);
  ec_felem_select(group, &out->X, z2nz, &x_out, &a->X);
  ec_felem_select(group, &y_out, z1nz, &y_out, &b->Y);
  ec_felem_select(group, &out->Y, z2nz, &y_out, &a->Y);
  ec_felem_select(group, &z_out, z1nz, &z_out, &b->Z);
  ec_felem_select(group, &out->Z, z2nz, &z_out, &a->Z);
 }

 int ec_GFp_simple_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
                      BN_CTX *ctx) {
  int (*field_mul)(const EC_GROUP *, BIGNUM *, const BIGNUM *, const BIGNUM *,
                   BN_CTX *);
  int (*field_sqr)(const EC_GROUP *, BIGNUM *, const BIGNUM *, BN_CTX *);
  const BIGNUM *p;
  BN_CTX *new_ctx = NULL;
  BIGNUM *n0, *n1, *n2, *n3;
  int ret = 0;

  if (EC_POINT_is_at_infinity(group, a)) {
    BN_zero(&r->Z);
    return 1;
  }

  field_mul = group->meth->field_mul;
  field_sqr = group->meth->field_sqr;
  p = &group->field;
 void ec_GFp_simple_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a) {
  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
                          const EC_FELEM *b) = group->meth->felem_mul;
  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
      group->meth->felem_sqr;

  if (ctx == NULL) {
    ctx = new_ctx = BN_CTX_new();
    if (ctx == NULL) {
      return 0;
    }
  }

  BN_CTX_start(ctx);
  n0 = BN_CTX_get(ctx);
  n1 = BN_CTX_get(ctx);
  n2 = BN_CTX_get(ctx);
  n3 = BN_CTX_get(ctx);
  if (n3 == NULL) {
    goto err;
  }

  // Note that in this function we must not read components of 'a'
  // once we have written the corresponding components of 'r'.
  // ('r' might the same as 'a'.)

  // n1
  if (group->a_is_minus3) {
    if (!field_sqr(group, n1, &a->Z, ctx) ||
        !bn_mod_add_consttime(n0, &a->X, n1, p, ctx) ||
        !bn_mod_sub_consttime(n2, &a->X, n1, p, ctx) ||
        !field_mul(group, n1, n0, n2, ctx) ||
        !bn_mod_lshift1_consttime(n0, n1, p, ctx) ||
        !bn_mod_add_consttime(n1, n0, n1, p, ctx)) {
      goto err;
    }
    // n1 = 3 * (X_a + Z_a^2) * (X_a - Z_a^2)
    //    = 3 * X_a^2 - 3 * Z_a^4
    // The method is taken from:
    //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
    //
    // Coq transcription and correctness proof:
    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
    // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
    EC_FELEM delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
    // delta = z^2
    felem_sqr(group, &delta, &a->Z);
    // gamma = y^2
    felem_sqr(group, &gamma, &a->Y);
    // beta = x*gamma
    felem_mul(group, &beta, &a->X, &gamma);

    // alpha = 3*(x-delta)*(x+delta)
    ec_felem_sub(group, &ftmp, &a->X, &delta);
    ec_felem_add(group, &ftmp2, &a->X, &delta);

    ec_felem_add(group, &tmptmp, &ftmp2, &ftmp2);
    ec_felem_add(group, &ftmp2, &ftmp2, &tmptmp);
    felem_mul(group, &alpha, &ftmp, &ftmp2);

    // x' = alpha^2 - 8*beta
    felem_sqr(group, &r->X, &alpha);
    ec_felem_add(group, &fourbeta, &beta, &beta);
    ec_felem_add(group, &fourbeta, &fourbeta, &fourbeta);
    ec_felem_add(group, &tmptmp, &fourbeta, &fourbeta);
    ec_felem_sub(group, &r->X, &r->X, &tmptmp);

    // z' = (y + z)^2 - gamma - delta
    ec_felem_add(group, &delta, &gamma, &delta);
    ec_felem_add(group, &ftmp, &a->Y, &a->Z);
    felem_sqr(group, &r->Z, &ftmp);
    ec_felem_sub(group, &r->Z, &r->Z, &delta);

    // y' = alpha*(4*beta - x') - 8*gamma^2
    ec_felem_sub(group, &r->Y, &fourbeta, &r->X);
    ec_felem_add(group, &gamma, &gamma, &gamma);
    felem_sqr(group, &gamma, &gamma);
    felem_mul(group, &r->Y, &alpha, &r->Y);
    ec_felem_add(group, &gamma, &gamma, &gamma);
    ec_felem_sub(group, &r->Y, &r->Y, &gamma);
  } else {
    if (!field_sqr(group, n0, &a->X, ctx) ||
        !bn_mod_lshift1_consttime(n1, n0, p, ctx) ||
        !bn_mod_add_consttime(n0, n0, n1, p, ctx) ||
        !field_sqr(group, n1, &a->Z, ctx) ||
        !field_sqr(group, n1, n1, ctx) ||
        !field_mul(group, n1, n1, &group->a, ctx) ||
        !bn_mod_add_consttime(n1, n1, n0, p, ctx)) {
      goto err;
    }
    // n1 = 3 * X_a^2 + a_curve * Z_a^4
  }

  // Z_r
  if (!field_mul(group, n0, &a->Y, &a->Z, ctx) ||
      !bn_mod_lshift1_consttime(&r->Z, n0, p, ctx)) {
    goto err;
  }
  // Z_r = 2 * Y_a * Z_a

  // n2
  if (!field_sqr(group, n3, &a->Y, ctx) ||
      !field_mul(group, n2, &a->X, n3, ctx) ||
      !bn_mod_lshift_consttime(n2, n2, 2, p, ctx)) {
    goto err;
  }
  // n2 = 4 * X_a * Y_a^2

  // X_r
  if (!bn_mod_lshift1_consttime(n0, n2, p, ctx) ||
      !field_sqr(group, &r->X, n1, ctx) ||
      !bn_mod_sub_consttime(&r->X, &r->X, n0, p, ctx)) {
    goto err;
  }
  // X_r = n1^2 - 2 * n2

  // n3
  if (!field_sqr(group, n0, n3, ctx) ||
      !bn_mod_lshift_consttime(n3, n0, 3, p, ctx)) {
    goto err;
    // The method is taken from:
    //   http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
    //
    // Coq transcription and correctness proof:
    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L102>
    // <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L534>
    EC_FELEM xx, yy, yyyy, zz;
    felem_sqr(group, &xx, &a->X);
    felem_sqr(group, &yy, &a->Y);
    felem_sqr(group, &yyyy, &yy);
    felem_sqr(group, &zz, &a->Z);

    // s = 2*((x_in + yy)^2 - xx - yyyy)
    EC_FELEM s;
    ec_felem_add(group, &s, &a->X, &yy);
    felem_sqr(group, &s, &s);
    ec_felem_sub(group, &s, &s, &xx);
    ec_felem_sub(group, &s, &s, &yyyy);
    ec_felem_add(group, &s, &s, &s);

    // m = 3*xx + a*zz^2
    EC_FELEM m;
    felem_sqr(group, &m, &zz);
    felem_mul(group, &m, &group->a, &m);
    ec_felem_add(group, &m, &m, &xx);
    ec_felem_add(group, &m, &m, &xx);
    ec_felem_add(group, &m, &m, &xx);

    // x_out = m^2 - 2*s
    felem_sqr(group, &r->X, &m);
    ec_felem_sub(group, &r->X, &r->X, &s);
    ec_felem_sub(group, &r->X, &r->X, &s);

    // z_out = (y_in + z_in)^2 - yy - zz
    ec_felem_add(group, &r->Z, &a->Y, &a->Z);
    felem_sqr(group, &r->Z, &r->Z);
    ec_felem_sub(group, &r->Z, &r->Z, &yy);
    ec_felem_sub(group, &r->Z, &r->Z, &zz);

    // y_out = m*(s-x_out) - 8*yyyy
    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
    ec_felem_add(group, &yyyy, &yyyy, &yyyy);
    ec_felem_sub(group, &r->Y, &s, &r->X);
    felem_mul(group, &r->Y, &r->Y, &m);
    ec_felem_sub(group, &r->Y, &r->Y, &yyyy);
  }
  // n3 = 8 * Y_a^4

  // Y_r
  if (!bn_mod_sub_consttime(n0, n2, &r->X, p, ctx) ||
      !field_mul(group, n0, n1, n0, ctx) ||
      !bn_mod_sub_consttime(&r->Y, n0, n3, p, ctx)) {
    goto err;
  }
  // Y_r = n1 * (n2 - X_r) - n3

  ret = 1;

 err:
  BN_CTX_end(ctx);
  BN_CTX_free(new_ctx);
  return ret;
 }

 int ec_GFp_simple_invert(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx) {
  if (EC_POINT_is_at_infinity(group, point) || BN_is_zero(&point->Y)) {
    // point is its own inverse
    return 1;
  }

  return BN_usub(&point->Y, &group->field, &point->Y);
 void ec_GFp_simple_invert(const EC_GROUP *group, EC_POINT *point) {
  ec_felem_neg(group, &point->Y, &point->Y);
 }

 int ec_GFp_simple_is_at_infinity(const EC_GROUP *group, const EC_POINT *point) {
  return BN_is_zero(&point->Z);
  return ec_felem_non_zero_mask(group, &point->Z) == 0;
 }

 int ec_GFp_simple_is_on_curve(const EC_GROUP *group, const EC_POINT *point,
                              BN_CTX *ctx) {
  int (*field_mul)(const EC_GROUP *, BIGNUM *, const BIGNUM *, const BIGNUM *,
                   BN_CTX *);
  int (*field_sqr)(const EC_GROUP *, BIGNUM *, const BIGNUM *, BN_CTX *);
  const BIGNUM *p;
  BN_CTX *new_ctx = NULL;
  BIGNUM *rh, *tmp, *Z4, *Z6;
  int ret = 0;

 int ec_GFp_simple_is_on_curve(const EC_GROUP *group, const EC_POINT *point) {
  if (EC_POINT_is_at_infinity(group, point)) {
    return 1;
  }

  field_mul = group->meth->field_mul;
  field_sqr = group->meth->field_sqr;
  p = &group->field;

  if (ctx == NULL) {
    ctx = new_ctx = BN_CTX_new();
    if (ctx == NULL) {
      return 0;
    }
  }

  BN_CTX_start(ctx);
  rh = BN_CTX_get(ctx);
  tmp = BN_CTX_get(ctx);
  Z4 = BN_CTX_get(ctx);
  Z6 = BN_CTX_get(ctx);
  if (Z6 == NULL) {
    goto err;
  }

  // We have a curve defined by a Weierstrass equation
  //      y^2 = x^3 + a*x + b.
  // The point to consider is given in Jacobian projective coordinates
@@ -625,79 +448,53 @@ int ec_GFp_simple_is_on_curve(const EC_GROUP *group, const EC_POINT *point,
  //      Y^2 = X^3 + a*X*Z^4 + b*Z^6.
  // To test this, we add up the right-hand side in 'rh'.

  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
                          const EC_FELEM *b) = group->meth->felem_mul;
  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
      group->meth->felem_sqr;

  // rh := X^2
  if (!field_sqr(group, rh, &point->X, ctx)) {
    goto err;
  }
  EC_FELEM rh;
  felem_sqr(group, &rh, &point->X);

  if (BN_cmp(&point->Z, &group->one) != 0) {
    if (!field_sqr(group, tmp, &point->Z, ctx) ||
        !field_sqr(group, Z4, tmp, ctx) ||
        !field_mul(group, Z6, Z4, tmp, ctx)) {
      goto err;
    }
  EC_FELEM tmp, Z4, Z6;
  if (!ec_felem_equal(group, &point->Z, &group->one)) {
    felem_sqr(group, &tmp, &point->Z);
    felem_sqr(group, &Z4, &tmp);
    felem_mul(group, &Z6, &Z4, &tmp);

    // rh := (rh + a*Z^4)*X
    if (group->a_is_minus3) {
      if (!bn_mod_lshift1_consttime(tmp, Z4, p, ctx) ||
          !bn_mod_add_consttime(tmp, tmp, Z4, p, ctx) ||
          !bn_mod_sub_consttime(rh, rh, tmp, p, ctx) ||
          !field_mul(group, rh, rh, &point->X, ctx)) {
        goto err;
      }
      ec_felem_add(group, &tmp, &Z4, &Z4);
      ec_felem_add(group, &tmp, &tmp, &Z4);
      ec_felem_sub(group, &rh, &rh, &tmp);
      felem_mul(group, &rh, &rh, &point->X);
    } else {
      if (!field_mul(group, tmp, Z4, &group->a, ctx) ||
          !bn_mod_add_consttime(rh, rh, tmp, p, ctx) ||
          !field_mul(group, rh, rh, &point->X, ctx)) {
        goto err;
      }
      felem_mul(group, &tmp, &Z4, &group->a);
      ec_felem_add(group, &rh, &rh, &tmp);
      felem_mul(group, &rh, &rh, &point->X);
    }

    // rh := rh + b*Z^6
    if (!field_mul(group, tmp, &group->b, Z6, ctx) ||
        !bn_mod_add_consttime(rh, rh, tmp, p, ctx)) {
      goto err;
    }
    felem_mul(group, &tmp, &group->b, &Z6);
    ec_felem_add(group, &rh, &rh, &tmp);
  } else {
    // rh := (rh + a)*X
    if (!bn_mod_add_consttime(rh, rh, &group->a, p, ctx) ||
        !field_mul(group, rh, rh, &point->X, ctx)) {
      goto err;
    }
    ec_felem_add(group, &rh, &rh, &group->a);
    felem_mul(group, &rh, &rh, &point->X);
    // rh := rh + b
    if (!bn_mod_add_consttime(rh, rh, &group->b, p, ctx)) {
      goto err;
    }
    ec_felem_add(group, &rh, &rh, &group->b);
  }

  // 'lh' := Y^2
  if (!field_sqr(group, tmp, &point->Y, ctx)) {
    goto err;
  }

  ret = (0 == BN_ucmp(tmp, rh));

 err:
  BN_CTX_end(ctx);
  BN_CTX_free(new_ctx);
  return ret;
  felem_sqr(group, &tmp, &point->Y);
  return ec_felem_equal(group, &tmp, &rh);
 }

 int ec_GFp_simple_cmp(const EC_GROUP *group, const EC_POINT *a,
                      const EC_POINT *b, BN_CTX *ctx) {
  // return values:
  //  -1   error
  //   0   equal (in affine coordinates)
  //   1   not equal

  int (*field_mul)(const EC_GROUP *, BIGNUM *, const BIGNUM *, const BIGNUM *,
                   BN_CTX *);
  int (*field_sqr)(const EC_GROUP *, BIGNUM *, const BIGNUM *, BN_CTX *);
  BN_CTX *new_ctx = NULL;
  BIGNUM *tmp1, *tmp2, *Za23, *Zb23;
  const BIGNUM *tmp1_, *tmp2_;
  int ret = -1;

                      const EC_POINT *b) {
  // Note this function returns zero if |a| and |b| are equal and 1 if they are
  // not equal.
  if (ec_GFp_simple_is_at_infinity(group, a)) {
    return ec_GFp_simple_is_at_infinity(group, b) ? 0 : 1;
  }
@@ -706,93 +503,66 @@ int ec_GFp_simple_cmp(const EC_GROUP *group, const EC_POINT *a,
    return 1;
  }

  int a_Z_is_one = BN_cmp(&a->Z, &group->one) == 0;
  int b_Z_is_one = BN_cmp(&b->Z, &group->one) == 0;
  int a_Z_is_one = ec_felem_equal(group, &a->Z, &group->one);
  int b_Z_is_one = ec_felem_equal(group, &b->Z, &group->one);

  if (a_Z_is_one && b_Z_is_one) {
    return ((BN_cmp(&a->X, &b->X) == 0) && BN_cmp(&a->Y, &b->Y) == 0) ? 0 : 1;
    return !ec_felem_equal(group, &a->X, &b->X) ||
           !ec_felem_equal(group, &a->Y, &b->Y);
  }

  field_mul = group->meth->field_mul;
  field_sqr = group->meth->field_sqr;

  if (ctx == NULL) {
    ctx = new_ctx = BN_CTX_new();
    if (ctx == NULL) {
      return -1;
    }
  }

  BN_CTX_start(ctx);
  tmp1 = BN_CTX_get(ctx);
  tmp2 = BN_CTX_get(ctx);
  Za23 = BN_CTX_get(ctx);
  Zb23 = BN_CTX_get(ctx);
  if (Zb23 == NULL) {
    goto end;
  }
  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
                          const EC_FELEM *b) = group->meth->felem_mul;
  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
      group->meth->felem_sqr;

  // We have to decide whether
  //     (X_a/Z_a^2, Y_a/Z_a^3) = (X_b/Z_b^2, Y_b/Z_b^3),
  // or equivalently, whether
  //     (X_a*Z_b^2, Y_a*Z_b^3) = (X_b*Z_a^2, Y_b*Z_a^3).

  EC_FELEM tmp1, tmp2, Za23, Zb23;
  const EC_FELEM *tmp1_, *tmp2_;
  if (!b_Z_is_one) {
    if (!field_sqr(group, Zb23, &b->Z, ctx) ||
        !field_mul(group, tmp1, &a->X, Zb23, ctx)) {
      goto end;
    }
    tmp1_ = tmp1;
    felem_sqr(group, &Zb23, &b->Z);
    felem_mul(group, &tmp1, &a->X, &Zb23);
    tmp1_ = &tmp1;
  } else {
    tmp1_ = &a->X;
  }
  if (!a_Z_is_one) {
    if (!field_sqr(group, Za23, &a->Z, ctx) ||
        !field_mul(group, tmp2, &b->X, Za23, ctx)) {
      goto end;
    }
    tmp2_ = tmp2;
    felem_sqr(group, &Za23, &a->Z);
    felem_mul(group, &tmp2, &b->X, &Za23);
    tmp2_ = &tmp2;
  } else {
    tmp2_ = &b->X;
  }

  // compare  X_a*Z_b^2  with  X_b*Z_a^2
  if (BN_cmp(tmp1_, tmp2_) != 0) {
    ret = 1;  // points differ
    goto end;
  // Compare  X_a*Z_b^2  with  X_b*Z_a^2.
  if (!ec_felem_equal(group, tmp1_, tmp2_)) {
    return 1;  // The points differ.
  }


  if (!b_Z_is_one) {
    if (!field_mul(group, Zb23, Zb23, &b->Z, ctx) ||
        !field_mul(group, tmp1, &a->Y, Zb23, ctx)) {
      goto end;
    }
    // tmp1_ = tmp1
    felem_mul(group, &Zb23, &Zb23, &b->Z);
    felem_mul(group, &tmp1, &a->Y, &Zb23);
    // tmp1_ = &tmp1
  } else {
    tmp1_ = &a->Y;
  }
  if (!a_Z_is_one) {
    if (!field_mul(group, Za23, Za23, &a->Z, ctx) ||
        !field_mul(group, tmp2, &b->Y, Za23, ctx)) {
      goto end;
    }
    // tmp2_ = tmp2
    felem_mul(group, &Za23, &Za23, &a->Z);
    felem_mul(group, &tmp2, &b->Y, &Za23);
    // tmp2_ = &tmp2
  } else {
    tmp2_ = &b->Y;
  }

  // compare  Y_a*Z_b^3  with  Y_b*Z_a^3
  if (BN_cmp(tmp1_, tmp2_) != 0) {
    ret = 1;  // points differ
    goto end;
  // Compare  Y_a*Z_b^3  with  Y_b*Z_a^3.
  if (!ec_felem_equal(group, tmp1_, tmp2_)) {
    return 1;  // The points differ.
  }

  // points are equal
  ret = 0;

 end:
  BN_CTX_end(ctx);
  BN_CTX_free(new_ctx);
  return ret;
  // The points are equal.
  return 0;
 }
--- a/third_party/fiat/p256.c
+++ b/third_party/fiat/p256.c
@@ -33,6 +33,7 @@
 #include <openssl/ec.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
 #include <openssl/type_check.h>

 #include <string.h>

@@ -896,21 +897,25 @@ static void fe_from_montgomery(fe x) {

 // BN_* compatability wrappers

 static int BN_to_fe(fe out, const BIGNUM *bn) {
  uint8_t tmp[NBYTES];
  if (!BN_bn2le_padded(tmp, NBYTES, bn)) {
    return 0;
  }
  fe_frombytes(out, tmp);
  return 1;
 }

 static BIGNUM *fe_to_BN(BIGNUM *out, const fe in) {
  uint8_t tmp[NBYTES];
  fe_tobytes(tmp, in);
  return BN_le2bn(tmp, NBYTES, out);
 }

 static void fe_from_generic(fe out, const EC_FELEM *in) {
  fe_frombytes(out, in->bytes);
 }

 static void fe_to_generic(EC_FELEM *out, const fe in) {
  // This works because 256 is a multiple of 64, so there are no excess bytes to
  // zero when rounding up to |BN_ULONG|s.
  OPENSSL_COMPILE_ASSERT(
      256 / 8 == sizeof(BN_ULONG) * ((256 + BN_BITS2 - 1) / BN_BITS2),
      bytes_left_over);
  fe_tobytes(out->bytes, in);
 }

 // fe_inv calculates |out| = |in|^{-1}
 //
 // Based on Fermat's Little Theorem:
@@ -1628,20 +1633,14 @@ static void batch_mul(fe x_out, fe y_out, fe z_out,
 static int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
                                                        const EC_POINT *point,
                                                        BIGNUM *x_out,
                                                        BIGNUM *y_out,
                                                        BN_CTX *ctx) {
  fe x, y, z1, z2;

                                                        BIGNUM *y_out) {
  if (EC_POINT_is_at_infinity(group, point)) {
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
    return 0;
  }
  if (!BN_to_fe(x, &point->X) ||
      !BN_to_fe(y, &point->Y) ||
      !BN_to_fe(z1, &point->Z)) {
    return 0;
  }

  fe z1, z2;
  fe_from_generic(z1, &point->Z);
  fe_inv(z2, z1);
  fe_sqr(z1, z2);

@@ -1651,6 +1650,8 @@ static int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
  fe_from_montgomery(z1);

  if (x_out != NULL) {
    fe x;
    fe_from_generic(x, &point->X);
    fe_mul(x, x, z1);
    if (!fe_to_BN(x_out, x)) {
      OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
@@ -1659,6 +1660,8 @@ static int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
  }

  if (y_out != NULL) {
    fe y;
    fe_from_generic(y, &point->Y);
    fe_mul(z1, z1, z2);
    fe_mul(y, y, z1);
    if (!fe_to_BN(y_out, y)) {
@@ -1683,11 +1686,9 @@ static int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
    // they contribute nothing to the linear combination.
    OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
    // Precompute multiples.
    if (!BN_to_fe(p_pre_comp[1][0], &p->X) ||
        !BN_to_fe(p_pre_comp[1][1], &p->Y) ||
        !BN_to_fe(p_pre_comp[1][2], &p->Z)) {
      return 0;
    }
    fe_from_generic(p_pre_comp[1][0], &p->X);
    fe_from_generic(p_pre_comp[1][1], &p->Y);
    fe_from_generic(p_pre_comp[1][2], &p->Z);
    for (size_t j = 2; j <= 16; ++j) {
      if (j & 1) {
        point_add(p_pre_comp[j][0], p_pre_comp[j][1],
@@ -1709,12 +1710,9 @@ static int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
            g_scalar != NULL ? g_scalar->bytes : NULL,
            (const fe (*) [3])p_pre_comp);

  if (!fe_to_BN(&r->X, x_out) ||
      !fe_to_BN(&r->Y, y_out) ||
      !fe_to_BN(&r->Z, z_out)) {
    OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
    return 0;
  }
  fe_to_generic(&r->X, x_out);
  fe_to_generic(&r->Y, y_out);
  fe_to_generic(&r->Z, z_out);
  return 1;
 }

@@ -1726,11 +1724,9 @@ static int ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, EC_POINT *r,
 #define P256_WSIZE_PUBLIC 4
  // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|.
  fe p_pre_comp[1 << (P256_WSIZE_PUBLIC-1)][3];
  if (!BN_to_fe(p_pre_comp[0][0], &p->X) ||
      !BN_to_fe(p_pre_comp[0][1], &p->Y) ||
      !BN_to_fe(p_pre_comp[0][2], &p->Z)) {
    return 0;
  }
  fe_from_generic(p_pre_comp[0][0], &p->X);
  fe_from_generic(p_pre_comp[0][1], &p->Y);
  fe_from_generic(p_pre_comp[0][2], &p->Z);
  fe p2[3];
  point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], p_pre_comp[0][1],
               p_pre_comp[0][2]);
@@ -1798,12 +1794,9 @@ static int ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, EC_POINT *r,
    }
  }

  if (!fe_to_BN(&r->X, ret[0]) ||
      !fe_to_BN(&r->Y, ret[1]) ||
      !fe_to_BN(&r->Z, ret[2])) {
    OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
    return 0;
  }
  fe_to_generic(&r->X, ret[0]);
  fe_to_generic(&r->Y, ret[1]);
  fe_to_generic(&r->Z, ret[2]);
  return 1;
 }

@@ -1815,10 +1808,10 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
    ec_GFp_nistp256_point_get_affine_coordinates;
  out->mul = ec_GFp_nistp256_points_mul;
  out->mul_public = ec_GFp_nistp256_point_mul_public;
  out->field_mul = ec_GFp_mont_field_mul;
  out->field_sqr = ec_GFp_mont_field_sqr;
  out->field_encode = ec_GFp_mont_field_encode;
  out->field_decode = ec_GFp_mont_field_decode;
  out->felem_mul = ec_GFp_mont_felem_mul;
  out->felem_sqr = ec_GFp_mont_felem_sqr;
  out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
  out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
  out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
 };