For simplicity, punt order > field or width mismatches. Analogous optimizations are possible, but the generic path works fine and no commonly-used curve looks hits those cases. Before: Did 5888 ECDSA P-384 verify operations in 3094535us (1902.7 ops/sec) After [+6.7%]: Did 6107 ECDSA P-384 verify operations in 3007515us (2030.6 ops/sec) Also we can fill in p - order generically and avoid extra copies of some constants. Change-Id: I38e1b6d51b28ed4f8cb74697b00a4f0fbc5efc3c Reviewed-on: https://boringssl-review.googlesource.com/c/33068 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>kris/onging/CECPQ3_patch15
@@ -344,9 +344,8 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, | |||
generator->group != group) { | |||
// |EC_GROUP_set_generator| may only be used with |EC_GROUP|s returned by | |||
// |EC_GROUP_new_curve_GFp| and may only used once on each group. | |||
// Additionally, |generator| must been created from | |||
// |EC_GROUP_new_curve_GFp|, not a copy, so that | |||
// |generator->group->generator| is set correctly. | |||
// |generator| must have been created from |EC_GROUP_new_curve_GFp|, not a | |||
// copy, so that |generator->group->generator| is set correctly. | |||
OPENSSL_PUT_ERROR(EC, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); | |||
return 0; | |||
} | |||
@@ -367,25 +366,23 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, | |||
// Note any curve which did not satisfy this must have been invalid or use a | |||
// tiny prime (less than 17). See the proof in |field_element_to_scalar| in | |||
// the ECDSA implementation. | |||
int ret = 0; | |||
EC_POINT *copy = NULL; | |||
BIGNUM *tmp = BN_new(); | |||
if (tmp == NULL || | |||
!BN_lshift1(tmp, order)) { | |||
BN_free(tmp); | |||
return 0; | |||
goto err; | |||
} | |||
int ok = BN_cmp(tmp, &group->field) > 0; | |||
BN_free(tmp); | |||
if (!ok) { | |||
if (BN_cmp(tmp, &group->field) <= 0) { | |||
OPENSSL_PUT_ERROR(EC, EC_R_INVALID_GROUP_ORDER); | |||
return 0; | |||
goto err; | |||
} | |||
EC_POINT *copy = EC_POINT_new(group); | |||
copy = EC_POINT_new(group); | |||
if (copy == NULL || | |||
!EC_POINT_copy(copy, generator) || | |||
!BN_copy(&group->order, order)) { | |||
EC_POINT_free(copy); | |||
return 0; | |||
goto err; | |||
} | |||
// Store the order in minimal form, so it can be used with |BN_ULONG| arrays. | |||
bn_set_minimal_width(&group->order); | |||
@@ -393,11 +390,26 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, | |||
BN_MONT_CTX_free(group->order_mont); | |||
group->order_mont = BN_MONT_CTX_new_for_modulus(&group->order, NULL); | |||
if (group->order_mont == NULL) { | |||
return 0; | |||
goto err; | |||
} | |||
group->field_greater_than_order = BN_cmp(&group->field, &group->order) > 0; | |||
if (group->field_greater_than_order) { | |||
if (!BN_sub(tmp, &group->field, &group->order) || | |||
!bn_copy_words(group->field_minus_order.words, group->field.width, | |||
tmp)) { | |||
goto err; | |||
} | |||
} | |||
ec_group_set0_generator(group, copy); | |||
return 1; | |||
copy = NULL; | |||
ret = 1; | |||
err: | |||
EC_POINT_free(copy); | |||
BN_free(tmp); | |||
return ret; | |||
} | |||
static EC_GROUP *ec_group_new_from_data(const struct built_in_curve *curve) { | |||
@@ -449,6 +461,14 @@ static EC_GROUP *ec_group_new_from_data(const struct built_in_curve *curve) { | |||
goto err; | |||
} | |||
group->field_greater_than_order = BN_cmp(&group->field, &group->order) > 0; | |||
if (group->field_greater_than_order) { | |||
if (!BN_sub(p, &group->field, &group->order) || | |||
!bn_copy_words(group->field_minus_order.words, group->field.width, p)) { | |||
goto err; | |||
} | |||
} | |||
group->order_mont = BN_MONT_CTX_new_for_modulus(&group->order, ctx); | |||
if (group->order_mont == NULL) { | |||
OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB); | |||
@@ -417,6 +417,51 @@ void ec_GFp_mont_dbl(const EC_GROUP *group, EC_RAW_POINT *r, | |||
} | |||
} | |||
static int ec_GFp_mont_cmp_x_coordinate(const EC_GROUP *group, | |||
const EC_RAW_POINT *p, | |||
const EC_SCALAR *r) { | |||
if (!group->field_greater_than_order || | |||
group->field.width != group->order.width) { | |||
// Do not bother optimizing this case. p > order in all commonly-used | |||
// curves. | |||
return ec_GFp_simple_cmp_x_coordinate(group, p, r); | |||
} | |||
if (ec_GFp_simple_is_at_infinity(group, p)) { | |||
return 0; | |||
} | |||
// We wish to compare X/Z^2 with r. This is equivalent to comparing X with | |||
// r*Z^2. Note that X and Z are represented in Montgomery form, while r is | |||
// not. | |||
EC_FELEM r_Z2, Z2_mont, X; | |||
ec_GFp_mont_felem_mul(group, &Z2_mont, &p->Z, &p->Z); | |||
// r < order < p, so this is valid. | |||
OPENSSL_memcpy(r_Z2.words, r->words, group->field.width * sizeof(BN_ULONG)); | |||
ec_GFp_mont_felem_mul(group, &r_Z2, &r_Z2, &Z2_mont); | |||
ec_GFp_mont_felem_from_montgomery(group, &X, &p->X); | |||
if (ec_felem_equal(group, &r_Z2, &X)) { | |||
return 1; | |||
} | |||
// During signing the x coefficient is reduced modulo the group order. | |||
// Therefore there is a small possibility, less than 1/2^128, that group_order | |||
// < p.x < P. in that case we need not only to compare against |r| but also to | |||
// compare against r+group_order. | |||
if (bn_less_than_words(r->words, group->field_minus_order.words, | |||
group->field.width)) { | |||
// We can ignore the carry because: r + group_order < p < 2^256. | |||
bn_add_words(r_Z2.words, r->words, group->order.d, group->field.width); | |||
ec_GFp_mont_felem_mul(group, &r_Z2, &r_Z2, &Z2_mont); | |||
if (ec_felem_equal(group, &r_Z2, &X)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) { | |||
out->group_init = ec_GFp_mont_group_init; | |||
out->group_finish = ec_GFp_mont_group_finish; | |||
@@ -432,5 +477,5 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) { | |||
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum; | |||
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery; | |||
out->scalar_inv_montgomery_vartime = ec_GFp_simple_mont_inv_mod_ord_vartime; | |||
out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate; | |||
out->cmp_x_coordinate = ec_GFp_mont_cmp_x_coordinate; | |||
} |
@@ -213,7 +213,20 @@ struct ec_group_st { | |||
EC_FELEM a, b; // Curve coefficients. | |||
int a_is_minus3; // enable optimized point arithmetics for special case | |||
// a_is_minus3 is one if |a| is -3 mod |field| and zero otherwise. Point | |||
// arithmetic is optimized for -3. | |||
int a_is_minus3; | |||
// field_greater_than_order is one if |field| is greate than |order| and zero | |||
// otherwise. | |||
int field_greater_than_order; | |||
// field_minus_order, if |field_greater_than_order| is true, is |field| minus | |||
// |order| represented as an |EC_FELEM|. Otherwise, it is zero. | |||
// | |||
// Note: unlike |EC_FELEM|s used as intermediate values internal to the | |||
// |EC_METHOD|, this value is not encoded in Montgomery form. | |||
EC_FELEM field_minus_order; | |||
CRYPTO_refcount_t references; | |||
@@ -44,12 +44,6 @@ static const BN_ULONG ONE[P256_LIMBS] = { | |||
TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe), | |||
}; | |||
// P256_ORDER is the order of the P-256 group, not in Montgomery form. | |||
static const BN_ULONG P256_ORDER[P256_LIMBS] = { | |||
TOBN(0xf3b9cac2, 0xfc632551), TOBN(0xbce6faad, 0xa7179e84), | |||
TOBN(0xffffffff, 0xffffffff), TOBN(0xffffffff, 0x00000000), | |||
}; | |||
// Precomputed tables for the default generator | |||
#include "p256-x86_64-table.h" | |||
@@ -591,7 +585,8 @@ static int ecp_nistz256_mont_inv_mod_ord_vartime(const EC_GROUP *group, | |||
return ec_GFp_simple_mont_inv_mod_ord_vartime(group, out, in); | |||
} | |||
if (!beeu_mod_inverse_vartime(out->words, in->words, P256_ORDER)) { | |||
assert(group->order.width == P256_LIMBS); | |||
if (!beeu_mod_inverse_vartime(out->words, in->words, group->order.d)) { | |||
return 0; | |||
} | |||
@@ -607,6 +602,9 @@ static int ecp_nistz256_cmp_x_coordinate(const EC_GROUP *group, | |||
return 0; | |||
} | |||
assert(group->order.width == P256_LIMBS); | |||
assert(group->field.width == P256_LIMBS); | |||
// We wish to compare X/Z^2 with r. This is equivalent to comparing X with | |||
// r*Z^2. Note that X and Z are represented in Montgomery form, while r is | |||
// not. | |||
@@ -623,16 +621,10 @@ static int ecp_nistz256_cmp_x_coordinate(const EC_GROUP *group, | |||
// Therefore there is a small possibility, less than 1/2^128, that group_order | |||
// < p.x < P. in that case we need not only to compare against |r| but also to | |||
// compare against r+group_order. | |||
// P_MINUS_ORDER is the difference between the field order (p) and the group | |||
// order (N). This value is not in the Montgomery domain. | |||
static const BN_ULONG P_MINUS_ORDER[P256_LIMBS] = { | |||
TOBN(0x0c46353d, 0x039cdaae), TOBN(0x43190553, 0x58e8617b), | |||
TOBN(0x00000000, 0x00000000), TOBN(0x00000000, 0x00000000)}; | |||
if (bn_less_than_words(r->words, P_MINUS_ORDER, P256_LIMBS)) { | |||
if (bn_less_than_words(r->words, group->field_minus_order.words, | |||
P256_LIMBS)) { | |||
// We can ignore the carry because: r + group_order < p < 2^256. | |||
bn_add_words(r_Z2, r->words, P256_ORDER, P256_LIMBS); | |||
bn_add_words(r_Z2, r->words, group->order.d, P256_LIMBS); | |||
ecp_nistz256_mul_mont(r_Z2, r_Z2, Z2_mont); | |||
if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) { | |||
return 1; | |||
@@ -1838,15 +1838,9 @@ static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group, | |||
// Therefore there is a small possibility, less than 1/2^128, that group_order | |||
// < p.x < P. in that case we need not only to compare against |r| but also to | |||
// compare against r+group_order. | |||
// P_MINUS_ORDER is the difference between the field order (p) and the group | |||
// order (N). This value is not in the Montgomery domain. | |||
static const BN_ULONG P_MINUS_ORDER[] = { | |||
TOBN(0x0c46353d, 0x039cdaae), TOBN(0x43190553, 0x58e8617b), | |||
TOBN(0x00000000, 0x00000000), TOBN(0x00000000, 0x00000000)}; | |||
assert(OPENSSL_ARRAY_SIZE(P_MINUS_ORDER) == group->order.width); | |||
if (bn_less_than_words(r->words, P_MINUS_ORDER, | |||
OPENSSL_ARRAY_SIZE(P_MINUS_ORDER))) { | |||
assert(group->field.width == group->order.width); | |||
if (bn_less_than_words(r->words, group->field_minus_order.words, | |||
group->field.width)) { | |||
// We can ignore the carry because: r + group_order < p < 2^256. | |||
EC_FELEM tmp; | |||
bn_add_words(tmp.words, r->words, group->order.d, group->order.width); | |||