瀏覽代碼

Route the tuned add/dbl implementations out of EC_METHOD.

Some consumer stumbled upon EC_POINT_{add,dbl} being faster with a
"custom" P-224 curve than the built-in one and made "custom" clones to
work around this. Before the EC_FELEM refactor, EC_GFp_nistp224_method
used BN_mod_mul for all reductions in fallback point arithmetic (we
primarily support the multiplication functions and keep the low-level
point arithmetic for legacy reasons) which took quite a performance hit.

EC_FELEM fixed this, but standalone felem_{mul,sqr} calls out of
nistp224 perform a lot of reductions, rather than batching them up as
that implementation is intended. So it is still slightly faster to use a
"custom" curve.

Custom curves are the last thing we want to encourage, so just route the
tuned implementations out of EC_METHOD to close this gap. Now the
built-in implementation is always solidly faster than (or identical to)
the custom clone.  This also reduces the number of places where we mix
up tuned vs. generic implementation, which gets us closer to making
EC_POINT's representation EC_METHOD-specific.

Change-Id: I843e1101a6208eaabb56d29d342e886e523c78b4
Reviewed-on: https://boringssl-review.googlesource.com/c/32848
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
kris/onging/CECPQ3_patch15
David Benjamin 6 年之前
committed by CQ bot account: commit-bot@chromium.org
父節點
當前提交
cfd50c63a1
共有 6 個檔案被更改,包括 108 行新增14 行删除
  1. +2
    -2
      crypto/fipsmodule/ec/ec.c
  2. +2
    -0
      crypto/fipsmodule/ec/ec_montgomery.c
  3. +9
    -4
      crypto/fipsmodule/ec/internal.h
  4. +37
    -8
      crypto/fipsmodule/ec/p224-64.c
  5. +29
    -0
      crypto/fipsmodule/ec/p256-x86_64.c
  6. +29
    -0
      third_party/fiat/p256.c

+ 2
- 2
crypto/fipsmodule/ec/ec.c 查看文件

@@ -782,7 +782,7 @@ int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
return 0;
}
ec_GFp_simple_add(group, &r->raw, &a->raw, &b->raw);
group->meth->add(group, &r->raw, &a->raw, &b->raw);
return 1;
}

@@ -793,7 +793,7 @@ int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
return 0;
}
ec_GFp_simple_dbl(group, &r->raw, &a->raw);
group->meth->dbl(group, &r->raw, &a->raw);
return 1;
}



+ 2
- 0
crypto/fipsmodule/ec/ec_montgomery.c 查看文件

@@ -225,6 +225,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
out->group_finish = ec_GFp_mont_group_finish;
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
out->add = ec_GFp_simple_add;
out->dbl = ec_GFp_simple_dbl;
out->mul = ec_GFp_simple_mul;
out->mul_public = ec_GFp_simple_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;


+ 9
- 4
crypto/fipsmodule/ec/internal.h 查看文件

@@ -127,6 +127,12 @@ struct ec_method_st {
int (*point_get_affine_coordinates)(const EC_GROUP *, const EC_RAW_POINT *,
BIGNUM *x, BIGNUM *y);

// add sets |r| to |a| + |b|.
void (*add)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a,
const EC_RAW_POINT *b);
// dbl sets |r| to |a| + |a|.
void (*dbl)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a);

// Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar|
// are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null.
// Computes |r = p_scalar*p| if g_scalar is null. At least one of |g_scalar|
@@ -149,10 +155,9 @@ struct ec_method_st {
// TODO(davidben): This constrains |EC_FELEM|'s internal representation, adds
// many indirect calls in the middle of the generic code, and a bunch of
// conversions. If p224-64.c were easily convertable to Montgomery form, we
// could say |EC_FELEM| is always in Montgomery form. If we exposed the
// internal add and double implementations in each of the curves, we could
// give |EC_POINT| an |EC_METHOD|-specific representation and |EC_FELEM| is
// purely a |EC_GFp_mont_method| type.
// could say |EC_FELEM| is always in Montgomery form. If we routed the rest of
// simple.c to |EC_METHOD|, we could give |EC_POINT| an |EC_METHOD|-specific
// representation and say |EC_FELEM| is purely a |EC_GFp_mont_method| type.
void (*felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
const EC_FELEM *b);
void (*felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a);


+ 37
- 8
crypto/fipsmodule/ec/p224-64.c 查看文件

@@ -917,7 +917,7 @@ static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out,

if (!skip) {
p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
tmp[0], tmp[1], tmp[2]);
tmp[0], tmp[1], tmp[2]);
} else {
OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
skip = 0;
@@ -951,7 +951,7 @@ static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out,

if (!skip) {
p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
tmp[0], tmp[1], tmp[2]);
tmp[0], tmp[1], tmp[2]);
} else {
OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
skip = 0;
@@ -1013,6 +1013,33 @@ static int ec_GFp_nistp224_point_get_affine_coordinates(
return 1;
}

static void ec_GFp_nistp224_add(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
p224_felem x1, y1, z1, x2, y2, z2;
p224_generic_to_felem(x1, &a->X);
p224_generic_to_felem(y1, &a->Y);
p224_generic_to_felem(z1, &a->Z);
p224_generic_to_felem(x2, &b->X);
p224_generic_to_felem(y2, &b->Y);
p224_generic_to_felem(z2, &b->Z);
p224_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, z2);
p224_felem_to_generic(&r->X, x1);
p224_felem_to_generic(&r->Y, y1);
p224_felem_to_generic(&r->Z, z1);
}

static void ec_GFp_nistp224_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_RAW_POINT *a) {
p224_felem x, y, z;
p224_generic_to_felem(x, &a->X);
p224_generic_to_felem(y, &a->Y);
p224_generic_to_felem(z, &a->Z);
p224_point_double(x, y, z, x, y, z);
p224_felem_to_generic(&r->X, x);
p224_felem_to_generic(&r->Y, y);
p224_felem_to_generic(&r->Z, z);
}

static void ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar,
const EC_RAW_POINT *p,
@@ -1036,13 +1063,13 @@ static void ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
for (size_t j = 2; j <= 16; ++j) {
if (j & 1) {
p224_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2],
0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
p_pre_comp[j - 1][2]);
p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0,
p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
p_pre_comp[j - 1][2]);
} else {
p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1],
p_pre_comp[j][2], p_pre_comp[j / 2][0],
p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
p_pre_comp[j / 2][0], p_pre_comp[j / 2][1],
p_pre_comp[j / 2][2]);
}
}
}
@@ -1100,6 +1127,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp224_method) {
out->group_set_curve = ec_GFp_simple_group_set_curve;
out->point_get_affine_coordinates =
ec_GFp_nistp224_point_get_affine_coordinates;
out->add = ec_GFp_nistp224_add;
out->dbl = ec_GFp_nistp224_dbl;
out->mul = ec_GFp_nistp224_points_mul;
out->mul_public = ec_GFp_nistp224_points_mul;
out->felem_mul = ec_GFp_nistp224_felem_mul;


+ 29
- 0
crypto/fipsmodule/ec/p256-x86_64.c 查看文件

@@ -485,6 +485,33 @@ static int ecp_nistz256_get_affine(const EC_GROUP *group,
return 1;
}

static void ecp_nistz256_add(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_RAW_POINT *a_, const EC_RAW_POINT *b_) {
P256_POINT a, b;
OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(b.X, b_->X.words, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(b.Y, b_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(b.Z, b_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
ecp_nistz256_point_add(&a, &a, &b);
OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
}

static void ecp_nistz256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_RAW_POINT *a_) {
P256_POINT a;
OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
ecp_nistz256_point_double(&a, &a);
OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
}

static void ecp_nistz256_inv_mod_ord(const EC_GROUP *group, EC_SCALAR *out,
const EC_SCALAR *in) {
// table[i] stores a power of |in| corresponding to the matching enum value.
@@ -633,6 +660,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
out->group_finish = ec_GFp_mont_group_finish;
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates = ecp_nistz256_get_affine;
out->add = ecp_nistz256_add;
out->dbl = ecp_nistz256_dbl;
out->mul = ecp_nistz256_points_mul;
out->mul_public = ecp_nistz256_points_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;


+ 29
- 0
third_party/fiat/p256.c 查看文件

@@ -1672,6 +1672,33 @@ static int ec_GFp_nistp256_point_get_affine_coordinates(
return 1;
}

static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
fe x1, y1, z1, x2, y2, z2;
fe_from_generic(x1, &a->X);
fe_from_generic(y1, &a->Y);
fe_from_generic(z1, &a->Z);
fe_from_generic(x2, &b->X);
fe_from_generic(y2, &b->Y);
fe_from_generic(z2, &b->Z);
point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, z2);
fe_to_generic(&r->X, x1);
fe_to_generic(&r->Y, y1);
fe_to_generic(&r->Z, z1);
}

static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_RAW_POINT *a) {
fe x, y, z;
fe_from_generic(x, &a->X);
fe_from_generic(y, &a->Y);
fe_from_generic(z, &a->Z);
point_double(x, y, z, x, y, z);
fe_to_generic(&r->X, x);
fe_to_generic(&r->Y, y);
fe_to_generic(&r->Z, z);
}

static void ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
const EC_SCALAR *g_scalar,
const EC_RAW_POINT *p,
@@ -1800,6 +1827,8 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates =
ec_GFp_nistp256_point_get_affine_coordinates;
out->add = ec_GFp_nistp256_add;
out->dbl = ec_GFp_nistp256_dbl;
out->mul = ec_GFp_nistp256_points_mul;
out->mul_public = ec_GFp_nistp256_point_mul_public;
out->felem_mul = ec_GFp_mont_felem_mul;


Loading…
取消
儲存