Browse Source

ec/p256.c: fiat-crypto field arithmetic (64, 32)

The fiat-crypto-generated code uses the Montgomery form implementation
strategy, for both 32-bit and 64-bit code.

64-bit throughput seems slower, but the difference is smaller than noise between repetitions (-2%?)

32-bit throughput has decreased significantly for ECDH (-40%). I am
attributing this to the change from varibale-time scalar multiplication
to constant-time scalar multiplication. Due to the same bottleneck,
ECDSA verification still uses the old code (otherwise there would have
been a 60% throughput decrease). On the other hand, ECDSA signing
throughput has increased slightly (+10%), perhaps due to the use of a
precomputed table of multiples of the base point.

64-bit benchmarks (Google Cloud Haswell):

with this change:
Did 9126 ECDH P-256 operations in 1009572us (9039.5 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039832us (22119.0 ops/sec)
Did 8820 ECDSA P-256 verify operations in 1024242us (8611.2 ops/sec)

master (40e8c921ca):
Did 9340 ECDH P-256 operations in 1017975us (9175.1 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039820us (22119.2 ops/sec)
Did 8688 ECDSA P-256 verify operations in 1021108us (8508.4 ops/sec)

benchmarks on ARMv7 (LG Nexus 4):

with this change:
Did 150 ECDH P-256 operations in 1029726us (145.7 ops/sec)
Did 506 ECDSA P-256 signing operations in 1065192us (475.0 ops/sec)
Did 363 ECDSA P-256 verify operations in 1033298us (351.3 ops/sec)

master (2fce1beda0):
Did 245 ECDH P-256 operations in 1017518us (240.8 ops/sec)
Did 473 ECDSA P-256 signing operations in 1086281us (435.4 ops/sec)
Did 360 ECDSA P-256 verify operations in 1003846us (358.6 ops/sec)

64-bit tables converted as follows:

import re, sys, math

p = 2**256 - 2**224 + 2**192 + 2**96 - 1
R = 2**256

def convert(t):
    x0, s1, x1, s2, x2, s3, x3 = t.groups()
    v = int(x0, 0) + 2**64 * (int(x1, 0) + 2**64*(int(x2,0) + 2**64*(int(x3, 0)) ))
    w = v*R%p
    y0 = hex(w%(2**64))
    y1 = hex((w>>64)%(2**64))
    y2 = hex((w>>(2*64))%(2**64))
    y3 = hex((w>>(3*64))%(2**64))
    ww = int(y0, 0) + 2**64 * (int(y1, 0) + 2**64*(int(y2,0) + 2**64*(int(y3, 0)) ))
    if ww != v*R%p:
        print(x0,x1,x2,x3)
        print(hex(v))
        print(y0,y1,y2,y3)
        print(hex(w))
        print(hex(ww))
        assert 0
    return '{'+y0+s1+y1+s2+y2+s3+y3+'}'

fe_re = re.compile('{'+r'(\s*,\s*)'.join(r'(\d+|0x[abcdefABCDEF0123456789]+)' for i in range(4)) + '}')
print (re.sub(fe_re, convert, sys.stdin.read()).rstrip('\n'))

32-bit tables converted from 64-bit tables

Change-Id: I52d6e5504fcb6ca2e8b0ee13727f4500c80c1799
Reviewed-on: https://boringssl-review.googlesource.com/23244
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
kris/onging/CECPQ3_patch15
Andres Erbsen 7 years ago
committed by CQ bot account: commit-bot@chromium.org
parent
commit
46304abf7d
10 changed files with 1770 additions and 1693 deletions
  1. +2
    -2
      crypto/fipsmodule/bcm.c
  2. +26
    -11
      crypto/fipsmodule/ec/ec.c
  3. +1
    -0
      crypto/fipsmodule/ec/ec_montgomery.c
  4. +13
    -0
      crypto/fipsmodule/ec/internal.h
  5. +1
    -0
      crypto/fipsmodule/ec/p224-64.c
  6. +0
    -1674
      crypto/fipsmodule/ec/p256-64.c
  7. +1
    -0
      crypto/fipsmodule/ec/p256-x86_64.c
  8. +0
    -5
      crypto/fipsmodule/ec/util.c
  9. +1
    -1
      crypto/fipsmodule/ecdsa/ecdsa.c
  10. +1725
    -0
      third_party/fiat/p256.c

+ 2
- 2
crypto/fipsmodule/bcm.c View File

@@ -67,10 +67,10 @@
#include "ec/ec_montgomery.c"
#include "ec/oct.c"
#include "ec/p224-64.c"
#include "ec/p256-64.c"
#include "../../third_party/fiat/p256.c"
#include "ec/p256-x86_64.c"
#include "ec/simple.c"
#include "ec/util-64.c"
#include "ec/util.c"
#include "ec/wnaf.c"
#include "hmac/hmac.c"
#include "md4/md4.c"


+ 26
- 11
crypto/fipsmodule/ec/ec.c View File

@@ -215,13 +215,6 @@ static const uint8_t kP521Params[6 * 66] = {
0xB7, 0x1E, 0x91, 0x38, 0x64, 0x09,
};

// MSan appears to have a bug that causes code to be miscompiled in opt mode.
// While that is being looked at, don't run the uint128_t code under MSan.
#if defined(OPENSSL_64_BIT) && !defined(OPENSSL_WINDOWS) && \
!defined(MEMORY_SANITIZER)
#define BORINGSSL_USE_INT128_CODE
#endif

DEFINE_METHOD_FUNCTION(struct built_in_curves, OPENSSL_built_in_curves) {
// 1.3.132.0.35
static const uint8_t kOIDP521[] = {0x2b, 0x81, 0x04, 0x00, 0x23};
@@ -253,15 +246,18 @@ DEFINE_METHOD_FUNCTION(struct built_in_curves, OPENSSL_built_in_curves) {
out->curves[2].param_len = 32;
out->curves[2].params = kP256Params;
out->curves[2].method =
#if defined(BORINGSSL_USE_INT128_CODE)
// MSan appears to have a bug that causes code to be miscompiled in opt mode.
// While that is being looked at, don't run the uint128_t code under MSan.
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
!defined(OPENSSL_SMALL)
!defined(OPENSSL_SMALL) && !defined(MEMORY_SANITIZER)
EC_GFp_nistz256_method();
#else
#if defined(OPENSSL_32_BIT) || \
(defined(OPENSSL_64_BIT) && !defined(MEMORY_SANITIZER))
EC_GFp_nistp256_method();
#endif
#else
EC_GFp_mont_method();
#endif
#endif

// 1.3.132.0.33
@@ -273,7 +269,8 @@ DEFINE_METHOD_FUNCTION(struct built_in_curves, OPENSSL_built_in_curves) {
out->curves[3].param_len = 28;
out->curves[3].params = kP224Params;
out->curves[3].method =
#if defined(BORINGSSL_USE_INT128_CODE) && !defined(OPENSSL_SMALL)
#if defined(OPENSSL_64_BIT) && !defined(OPENSSL_WINDOWS) && \
!defined(MEMORY_SANITIZER) && !defined(OPENSSL_SMALL)
EC_GFp_nistp224_method();
#else
EC_GFp_mont_method();
@@ -883,6 +880,24 @@ err:
return ret;
}

int ec_point_mul_scalar_public(const EC_GROUP *group, EC_POINT *r,
const EC_SCALAR *g_scalar, const EC_POINT *p,
const EC_SCALAR *p_scalar, BN_CTX *ctx) {
if ((g_scalar == NULL && p_scalar == NULL) ||
(p == NULL) != (p_scalar == NULL)) {
OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
return 0;
}

if (EC_GROUP_cmp(group, r->group, NULL) != 0 ||
(p != NULL && EC_GROUP_cmp(group, p->group, NULL) != 0)) {
OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
return 0;
}

return group->meth->mul_public(group, r, g_scalar, p, p_scalar, ctx);
}

int ec_point_mul_scalar(const EC_GROUP *group, EC_POINT *r,
const EC_SCALAR *g_scalar, const EC_POINT *p,
const EC_SCALAR *p_scalar, BN_CTX *ctx) {


+ 1
- 0
crypto/fipsmodule/ec/ec_montgomery.c View File

@@ -270,6 +270,7 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
out->mul = ec_wNAF_mul /* XXX: Not constant time. */;
out->mul_public = ec_wNAF_mul;
out->field_mul = ec_GFp_mont_field_mul;
out->field_sqr = ec_GFp_mont_field_sqr;
out->field_encode = ec_GFp_mont_field_encode;


+ 13
- 0
crypto/fipsmodule/ec/internal.h View File

@@ -115,6 +115,12 @@ struct ec_method_st {
// non-null.
int (*mul)(const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);
// mul_public performs the same computation as mul. It further assumes that
// the inputs are public so there is no concern about leaking their values
// through timing.
int (*mul_public)(const EC_GROUP *group, EC_POINT *r,
const EC_SCALAR *g_scalar, const EC_POINT *p,
const EC_SCALAR *p_scalar, BN_CTX *ctx);

// 'field_mul' and 'field_sqr' can be used by 'add' and 'dbl' so that the
// same implementations of point operations can be used with different
@@ -195,6 +201,13 @@ int ec_point_mul_scalar(const EC_GROUP *group, EC_POINT *r,
const EC_SCALAR *g_scalar, const EC_POINT *p,
const EC_SCALAR *p_scalar, BN_CTX *ctx);

// ec_point_mul_scalar_public performs the same computation as
// ec_point_mul_scalar. It further assumes that the inputs are public so
// there is no concern about leaking their values through timing.
int ec_point_mul_scalar_public(const EC_GROUP *group, EC_POINT *r,
const EC_SCALAR *g_scalar, const EC_POINT *p,
const EC_SCALAR *p_scalar, BN_CTX *ctx);

int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const EC_SCALAR *g_scalar,
const EC_POINT *p, const EC_SCALAR *p_scalar, BN_CTX *ctx);



+ 1
- 0
crypto/fipsmodule/ec/p224-64.c View File

@@ -1122,6 +1122,7 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp224_method) {
out->point_get_affine_coordinates =
ec_GFp_nistp224_point_get_affine_coordinates;
out->mul = ec_GFp_nistp224_points_mul;
out->mul_public = ec_GFp_nistp224_points_mul;
out->field_mul = ec_GFp_simple_field_mul;
out->field_sqr = ec_GFp_simple_field_sqr;
out->field_encode = NULL;


+ 0
- 1674
crypto/fipsmodule/ec/p256-64.c
File diff suppressed because it is too large
View File


+ 1
- 0
crypto/fipsmodule/ec/p256-x86_64.c View File

@@ -446,6 +446,7 @@ DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
out->group_set_curve = ec_GFp_mont_group_set_curve;
out->point_get_affine_coordinates = ecp_nistz256_get_affine;
out->mul = ecp_nistz256_points_mul;
out->mul_public = ecp_nistz256_points_mul;
out->field_mul = ec_GFp_mont_field_mul;
out->field_sqr = ec_GFp_mont_field_sqr;
out->field_encode = ec_GFp_mont_field_encode;


crypto/fipsmodule/ec/util-64.c → crypto/fipsmodule/ec/util.c View File

@@ -14,9 +14,6 @@

#include <openssl/base.h>


#if defined(OPENSSL_64_BIT) && !defined(OPENSSL_WINDOWS)

#include <openssl/ec.h>

#include "internal.h"
@@ -105,5 +102,3 @@ void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit,
*sign = s & 1;
*digit = d;
}

#endif // 64_BIT && !WINDOWS

+ 1
- 1
crypto/fipsmodule/ecdsa/ecdsa.c View File

@@ -275,7 +275,7 @@ int ECDSA_do_verify(const uint8_t *digest, size_t digest_len,
OPENSSL_PUT_ERROR(ECDSA, ERR_R_MALLOC_FAILURE);
goto err;
}
if (!ec_point_mul_scalar(group, point, &u1, pub_key, &u2, ctx)) {
if (!ec_point_mul_scalar_public(group, point, &u1, pub_key, &u2, ctx)) {
OPENSSL_PUT_ERROR(ECDSA, ERR_R_EC_LIB);
goto err;
}


+ 1725
- 0
third_party/fiat/p256.c
File diff suppressed because it is too large
View File


Loading…
Cancel
Save