2017-04-04 22:21:43 +01:00
|
|
|
/* Copyright (c) 2017, Google Inc.
|
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
|
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
|
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
|
|
|
|
2017-04-14 19:16:20 +01:00
|
|
|
#if !defined(_GNU_SOURCE)
|
2017-08-18 19:06:02 +01:00
|
|
|
#define _GNU_SOURCE // needed for syscall() on Linux.
|
2017-04-14 19:16:20 +01:00
|
|
|
#endif
|
|
|
|
|
2017-04-04 22:21:43 +01:00
|
|
|
#include <openssl/crypto.h>
|
2018-01-22 22:30:49 +00:00
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#include <openssl/digest.h>
|
2017-04-04 22:21:43 +01:00
|
|
|
#include <openssl/hmac.h>
|
2017-05-12 23:34:45 +01:00
|
|
|
#include <openssl/sha.h>
|
2017-04-04 22:21:43 +01:00
|
|
|
|
|
|
|
#include "../internal.h"
|
|
|
|
|
2017-04-13 19:00:24 +01:00
|
|
|
#include "aes/aes.c"
|
|
|
|
#include "aes/key_wrap.c"
|
|
|
|
#include "aes/mode_wrappers.c"
|
2017-04-28 22:47:06 +01:00
|
|
|
#include "bn/add.c"
|
|
|
|
#include "bn/asm/x86_64-gcc.c"
|
|
|
|
#include "bn/bn.c"
|
|
|
|
#include "bn/bytes.c"
|
|
|
|
#include "bn/cmp.c"
|
|
|
|
#include "bn/ctx.c"
|
|
|
|
#include "bn/div.c"
|
2018-05-08 22:11:04 +01:00
|
|
|
#include "bn/div_extra.c"
|
2017-04-28 22:47:06 +01:00
|
|
|
#include "bn/exponentiation.c"
|
|
|
|
#include "bn/gcd.c"
|
2018-05-08 22:11:04 +01:00
|
|
|
#include "bn/gcd_extra.c"
|
2017-04-28 22:47:06 +01:00
|
|
|
#include "bn/generic.c"
|
|
|
|
#include "bn/jacobi.c"
|
|
|
|
#include "bn/montgomery.c"
|
|
|
|
#include "bn/montgomery_inv.c"
|
|
|
|
#include "bn/mul.c"
|
|
|
|
#include "bn/prime.c"
|
|
|
|
#include "bn/random.c"
|
|
|
|
#include "bn/rsaz_exp.c"
|
|
|
|
#include "bn/shift.c"
|
|
|
|
#include "bn/sqrt.c"
|
2017-05-03 21:23:37 +01:00
|
|
|
#include "cipher/aead.c"
|
|
|
|
#include "cipher/cipher.c"
|
|
|
|
#include "cipher/e_aes.c"
|
|
|
|
#include "cipher/e_des.c"
|
2017-05-02 19:32:45 +01:00
|
|
|
#include "des/des.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
#include "digest/digest.c"
|
|
|
|
#include "digest/digests.c"
|
2018-07-27 22:27:09 +01:00
|
|
|
#include "ecdh/ecdh.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ecdsa/ecdsa.c"
|
|
|
|
#include "ec/ec.c"
|
|
|
|
#include "ec/ec_key.c"
|
|
|
|
#include "ec/ec_montgomery.c"
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
#include "ec/felem.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ec/oct.c"
|
|
|
|
#include "ec/p224-64.c"
|
ec/p256.c: fiat-crypto field arithmetic (64, 32)
The fiat-crypto-generated code uses the Montgomery form implementation
strategy, for both 32-bit and 64-bit code.
64-bit throughput seems slower, but the difference is smaller than noise between repetitions (-2%?)
32-bit throughput has decreased significantly for ECDH (-40%). I am
attributing this to the change from varibale-time scalar multiplication
to constant-time scalar multiplication. Due to the same bottleneck,
ECDSA verification still uses the old code (otherwise there would have
been a 60% throughput decrease). On the other hand, ECDSA signing
throughput has increased slightly (+10%), perhaps due to the use of a
precomputed table of multiples of the base point.
64-bit benchmarks (Google Cloud Haswell):
with this change:
Did 9126 ECDH P-256 operations in 1009572us (9039.5 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039832us (22119.0 ops/sec)
Did 8820 ECDSA P-256 verify operations in 1024242us (8611.2 ops/sec)
master (40e8c921cab5cce2bc10722ecf4ebe0e380cf6c8):
Did 9340 ECDH P-256 operations in 1017975us (9175.1 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039820us (22119.2 ops/sec)
Did 8688 ECDSA P-256 verify operations in 1021108us (8508.4 ops/sec)
benchmarks on ARMv7 (LG Nexus 4):
with this change:
Did 150 ECDH P-256 operations in 1029726us (145.7 ops/sec)
Did 506 ECDSA P-256 signing operations in 1065192us (475.0 ops/sec)
Did 363 ECDSA P-256 verify operations in 1033298us (351.3 ops/sec)
master (2fce1beda0f7e74e2d687860f807cf0b8d8056a4):
Did 245 ECDH P-256 operations in 1017518us (240.8 ops/sec)
Did 473 ECDSA P-256 signing operations in 1086281us (435.4 ops/sec)
Did 360 ECDSA P-256 verify operations in 1003846us (358.6 ops/sec)
64-bit tables converted as follows:
import re, sys, math
p = 2**256 - 2**224 + 2**192 + 2**96 - 1
R = 2**256
def convert(t):
x0, s1, x1, s2, x2, s3, x3 = t.groups()
v = int(x0, 0) + 2**64 * (int(x1, 0) + 2**64*(int(x2,0) + 2**64*(int(x3, 0)) ))
w = v*R%p
y0 = hex(w%(2**64))
y1 = hex((w>>64)%(2**64))
y2 = hex((w>>(2*64))%(2**64))
y3 = hex((w>>(3*64))%(2**64))
ww = int(y0, 0) + 2**64 * (int(y1, 0) + 2**64*(int(y2,0) + 2**64*(int(y3, 0)) ))
if ww != v*R%p:
print(x0,x1,x2,x3)
print(hex(v))
print(y0,y1,y2,y3)
print(hex(w))
print(hex(ww))
assert 0
return '{'+y0+s1+y1+s2+y2+s3+y3+'}'
fe_re = re.compile('{'+r'(\s*,\s*)'.join(r'(\d+|0x[abcdefABCDEF0123456789]+)' for i in range(4)) + '}')
print (re.sub(fe_re, convert, sys.stdin.read()).rstrip('\n'))
32-bit tables converted from 64-bit tables
Change-Id: I52d6e5504fcb6ca2e8b0ee13727f4500c80c1799
Reviewed-on: https://boringssl-review.googlesource.com/23244
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-08 20:32:38 +00:00
|
|
|
#include "../../third_party/fiat/p256.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ec/p256-x86_64.c"
|
2018-04-05 04:36:15 +01:00
|
|
|
#include "ec/scalar.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ec/simple.c"
|
2018-04-25 04:40:01 +01:00
|
|
|
#include "ec/simple_mul.c"
|
ec/p256.c: fiat-crypto field arithmetic (64, 32)
The fiat-crypto-generated code uses the Montgomery form implementation
strategy, for both 32-bit and 64-bit code.
64-bit throughput seems slower, but the difference is smaller than noise between repetitions (-2%?)
32-bit throughput has decreased significantly for ECDH (-40%). I am
attributing this to the change from varibale-time scalar multiplication
to constant-time scalar multiplication. Due to the same bottleneck,
ECDSA verification still uses the old code (otherwise there would have
been a 60% throughput decrease). On the other hand, ECDSA signing
throughput has increased slightly (+10%), perhaps due to the use of a
precomputed table of multiples of the base point.
64-bit benchmarks (Google Cloud Haswell):
with this change:
Did 9126 ECDH P-256 operations in 1009572us (9039.5 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039832us (22119.0 ops/sec)
Did 8820 ECDSA P-256 verify operations in 1024242us (8611.2 ops/sec)
master (40e8c921cab5cce2bc10722ecf4ebe0e380cf6c8):
Did 9340 ECDH P-256 operations in 1017975us (9175.1 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039820us (22119.2 ops/sec)
Did 8688 ECDSA P-256 verify operations in 1021108us (8508.4 ops/sec)
benchmarks on ARMv7 (LG Nexus 4):
with this change:
Did 150 ECDH P-256 operations in 1029726us (145.7 ops/sec)
Did 506 ECDSA P-256 signing operations in 1065192us (475.0 ops/sec)
Did 363 ECDSA P-256 verify operations in 1033298us (351.3 ops/sec)
master (2fce1beda0f7e74e2d687860f807cf0b8d8056a4):
Did 245 ECDH P-256 operations in 1017518us (240.8 ops/sec)
Did 473 ECDSA P-256 signing operations in 1086281us (435.4 ops/sec)
Did 360 ECDSA P-256 verify operations in 1003846us (358.6 ops/sec)
64-bit tables converted as follows:
import re, sys, math
p = 2**256 - 2**224 + 2**192 + 2**96 - 1
R = 2**256
def convert(t):
x0, s1, x1, s2, x2, s3, x3 = t.groups()
v = int(x0, 0) + 2**64 * (int(x1, 0) + 2**64*(int(x2,0) + 2**64*(int(x3, 0)) ))
w = v*R%p
y0 = hex(w%(2**64))
y1 = hex((w>>64)%(2**64))
y2 = hex((w>>(2*64))%(2**64))
y3 = hex((w>>(3*64))%(2**64))
ww = int(y0, 0) + 2**64 * (int(y1, 0) + 2**64*(int(y2,0) + 2**64*(int(y3, 0)) ))
if ww != v*R%p:
print(x0,x1,x2,x3)
print(hex(v))
print(y0,y1,y2,y3)
print(hex(w))
print(hex(ww))
assert 0
return '{'+y0+s1+y1+s2+y2+s3+y3+'}'
fe_re = re.compile('{'+r'(\s*,\s*)'.join(r'(\d+|0x[abcdefABCDEF0123456789]+)' for i in range(4)) + '}')
print (re.sub(fe_re, convert, sys.stdin.read()).rstrip('\n'))
32-bit tables converted from 64-bit tables
Change-Id: I52d6e5504fcb6ca2e8b0ee13727f4500c80c1799
Reviewed-on: https://boringssl-review.googlesource.com/23244
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-08 20:32:38 +00:00
|
|
|
#include "ec/util.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ec/wnaf.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
#include "hmac/hmac.c"
|
|
|
|
#include "md4/md4.c"
|
|
|
|
#include "md5/md5.c"
|
2017-04-13 19:38:40 +01:00
|
|
|
#include "modes/cbc.c"
|
2018-02-13 18:39:42 +00:00
|
|
|
#include "modes/ccm.c"
|
2017-04-13 19:38:40 +01:00
|
|
|
#include "modes/cfb.c"
|
|
|
|
#include "modes/ctr.c"
|
|
|
|
#include "modes/gcm.c"
|
|
|
|
#include "modes/ofb.c"
|
|
|
|
#include "modes/polyval.c"
|
2017-04-14 19:16:20 +01:00
|
|
|
#include "rand/ctrdrbg.c"
|
|
|
|
#include "rand/rand.c"
|
|
|
|
#include "rand/urandom.c"
|
2017-05-03 19:50:51 +01:00
|
|
|
#include "rsa/blinding.c"
|
|
|
|
#include "rsa/padding.c"
|
|
|
|
#include "rsa/rsa.c"
|
|
|
|
#include "rsa/rsa_impl.c"
|
2018-01-22 22:30:49 +00:00
|
|
|
#include "self_check/self_check.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
#include "sha/sha1-altivec.c"
|
|
|
|
#include "sha/sha1.c"
|
|
|
|
#include "sha/sha256.c"
|
|
|
|
#include "sha/sha512.c"
|
2018-01-22 18:06:51 +00:00
|
|
|
#include "tls/kdf.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
|
|
|
|
|
2018-01-22 19:07:42 +00:00
|
|
|
#if defined(BORINGSSL_FIPS)
|
|
|
|
|
|
|
|
#if !defined(OPENSSL_ASAN)
|
|
|
|
// These symbols are filled in by delocate.go. They point to the start and end
|
|
|
|
// of the module, and the location of the integrity hash, respectively.
|
|
|
|
extern const uint8_t BORINGSSL_bcm_text_start[];
|
|
|
|
extern const uint8_t BORINGSSL_bcm_text_end[];
|
|
|
|
extern const uint8_t BORINGSSL_bcm_text_hash[];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void __attribute__((constructor))
|
|
|
|
BORINGSSL_bcm_power_on_self_test(void) {
|
|
|
|
CRYPTO_library_init();
|
|
|
|
|
|
|
|
#if !defined(OPENSSL_ASAN)
|
|
|
|
// Integrity tests cannot run under ASAN because it involves reading the full
|
|
|
|
// .text section, which triggers the global-buffer overflow detection.
|
|
|
|
const uint8_t *const start = BORINGSSL_bcm_text_start;
|
|
|
|
const uint8_t *const end = BORINGSSL_bcm_text_end;
|
|
|
|
|
|
|
|
static const uint8_t kHMACKey[64] = {0};
|
|
|
|
uint8_t result[SHA512_DIGEST_LENGTH];
|
|
|
|
|
|
|
|
unsigned result_len;
|
|
|
|
if (!HMAC(EVP_sha512(), kHMACKey, sizeof(kHMACKey), start, end - start,
|
|
|
|
result, &result_len) ||
|
|
|
|
result_len != sizeof(result)) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint8_t *expected = BORINGSSL_bcm_text_hash;
|
|
|
|
|
|
|
|
if (!check_test(expected, result, sizeof(result), "FIPS integrity test")) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!BORINGSSL_self_test()) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2017-04-04 22:21:43 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
err:
|
2017-05-18 19:37:44 +01:00
|
|
|
BORINGSSL_FIPS_abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
void BORINGSSL_FIPS_abort(void) {
|
2017-04-04 22:21:43 +01:00
|
|
|
for (;;) {
|
|
|
|
abort();
|
2017-05-18 19:37:44 +01:00
|
|
|
exit(1);
|
2017-04-04 22:21:43 +01:00
|
|
|
}
|
|
|
|
}
|
2018-01-22 19:07:42 +00:00
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
#endif // BORINGSSL_FIPS
|