2017-04-04 22:21:43 +01:00
|
|
|
/* Copyright (c) 2017, Google Inc.
|
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
|
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
|
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
|
|
|
|
2017-04-14 19:16:20 +01:00
|
|
|
#if !defined(_GNU_SOURCE)
|
2017-08-18 19:06:02 +01:00
|
|
|
#define _GNU_SOURCE // needed for syscall() on Linux.
|
2017-04-14 19:16:20 +01:00
|
|
|
#endif
|
|
|
|
|
2017-04-04 22:21:43 +01:00
|
|
|
#include <openssl/crypto.h>
|
2018-01-22 22:30:49 +00:00
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#include <openssl/digest.h>
|
2017-04-04 22:21:43 +01:00
|
|
|
#include <openssl/hmac.h>
|
2017-05-12 23:34:45 +01:00
|
|
|
#include <openssl/sha.h>
|
2017-04-04 22:21:43 +01:00
|
|
|
|
|
|
|
#include "../internal.h"
|
|
|
|
|
2017-04-13 19:00:24 +01:00
|
|
|
#include "aes/aes.c"
|
|
|
|
#include "aes/key_wrap.c"
|
|
|
|
#include "aes/mode_wrappers.c"
|
2017-04-28 22:47:06 +01:00
|
|
|
#include "bn/add.c"
|
|
|
|
#include "bn/asm/x86_64-gcc.c"
|
|
|
|
#include "bn/bn.c"
|
|
|
|
#include "bn/bytes.c"
|
|
|
|
#include "bn/cmp.c"
|
|
|
|
#include "bn/ctx.c"
|
|
|
|
#include "bn/div.c"
|
|
|
|
#include "bn/exponentiation.c"
|
|
|
|
#include "bn/gcd.c"
|
|
|
|
#include "bn/generic.c"
|
|
|
|
#include "bn/jacobi.c"
|
|
|
|
#include "bn/montgomery.c"
|
|
|
|
#include "bn/montgomery_inv.c"
|
|
|
|
#include "bn/mul.c"
|
|
|
|
#include "bn/prime.c"
|
|
|
|
#include "bn/random.c"
|
|
|
|
#include "bn/rsaz_exp.c"
|
|
|
|
#include "bn/shift.c"
|
|
|
|
#include "bn/sqrt.c"
|
2017-05-03 21:23:37 +01:00
|
|
|
#include "cipher/aead.c"
|
|
|
|
#include "cipher/cipher.c"
|
|
|
|
#include "cipher/e_aes.c"
|
|
|
|
#include "cipher/e_des.c"
|
2017-05-02 19:32:45 +01:00
|
|
|
#include "des/des.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
#include "digest/digest.c"
|
|
|
|
#include "digest/digests.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ecdsa/ecdsa.c"
|
|
|
|
#include "ec/ec.c"
|
|
|
|
#include "ec/ec_key.c"
|
|
|
|
#include "ec/ec_montgomery.c"
|
|
|
|
#include "ec/oct.c"
|
|
|
|
#include "ec/p224-64.c"
|
ec/p256.c: fiat-crypto field arithmetic (64, 32)
The fiat-crypto-generated code uses the Montgomery form implementation
strategy, for both 32-bit and 64-bit code.
64-bit throughput seems slower, but the difference is smaller than noise between repetitions (-2%?)
32-bit throughput has decreased significantly for ECDH (-40%). I am
attributing this to the change from varibale-time scalar multiplication
to constant-time scalar multiplication. Due to the same bottleneck,
ECDSA verification still uses the old code (otherwise there would have
been a 60% throughput decrease). On the other hand, ECDSA signing
throughput has increased slightly (+10%), perhaps due to the use of a
precomputed table of multiples of the base point.
64-bit benchmarks (Google Cloud Haswell):
with this change:
Did 9126 ECDH P-256 operations in 1009572us (9039.5 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039832us (22119.0 ops/sec)
Did 8820 ECDSA P-256 verify operations in 1024242us (8611.2 ops/sec)
master (40e8c921cab5cce2bc10722ecf4ebe0e380cf6c8):
Did 9340 ECDH P-256 operations in 1017975us (9175.1 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039820us (22119.2 ops/sec)
Did 8688 ECDSA P-256 verify operations in 1021108us (8508.4 ops/sec)
benchmarks on ARMv7 (LG Nexus 4):
with this change:
Did 150 ECDH P-256 operations in 1029726us (145.7 ops/sec)
Did 506 ECDSA P-256 signing operations in 1065192us (475.0 ops/sec)
Did 363 ECDSA P-256 verify operations in 1033298us (351.3 ops/sec)
master (2fce1beda0f7e74e2d687860f807cf0b8d8056a4):
Did 245 ECDH P-256 operations in 1017518us (240.8 ops/sec)
Did 473 ECDSA P-256 signing operations in 1086281us (435.4 ops/sec)
Did 360 ECDSA P-256 verify operations in 1003846us (358.6 ops/sec)
64-bit tables converted as follows:
import re, sys, math
p = 2**256 - 2**224 + 2**192 + 2**96 - 1
R = 2**256
def convert(t):
x0, s1, x1, s2, x2, s3, x3 = t.groups()
v = int(x0, 0) + 2**64 * (int(x1, 0) + 2**64*(int(x2,0) + 2**64*(int(x3, 0)) ))
w = v*R%p
y0 = hex(w%(2**64))
y1 = hex((w>>64)%(2**64))
y2 = hex((w>>(2*64))%(2**64))
y3 = hex((w>>(3*64))%(2**64))
ww = int(y0, 0) + 2**64 * (int(y1, 0) + 2**64*(int(y2,0) + 2**64*(int(y3, 0)) ))
if ww != v*R%p:
print(x0,x1,x2,x3)
print(hex(v))
print(y0,y1,y2,y3)
print(hex(w))
print(hex(ww))
assert 0
return '{'+y0+s1+y1+s2+y2+s3+y3+'}'
fe_re = re.compile('{'+r'(\s*,\s*)'.join(r'(\d+|0x[abcdefABCDEF0123456789]+)' for i in range(4)) + '}')
print (re.sub(fe_re, convert, sys.stdin.read()).rstrip('\n'))
32-bit tables converted from 64-bit tables
Change-Id: I52d6e5504fcb6ca2e8b0ee13727f4500c80c1799
Reviewed-on: https://boringssl-review.googlesource.com/23244
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-08 20:32:38 +00:00
|
|
|
#include "../../third_party/fiat/p256.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ec/p256-x86_64.c"
|
|
|
|
#include "ec/simple.c"
|
ec/p256.c: fiat-crypto field arithmetic (64, 32)
The fiat-crypto-generated code uses the Montgomery form implementation
strategy, for both 32-bit and 64-bit code.
64-bit throughput seems slower, but the difference is smaller than noise between repetitions (-2%?)
32-bit throughput has decreased significantly for ECDH (-40%). I am
attributing this to the change from varibale-time scalar multiplication
to constant-time scalar multiplication. Due to the same bottleneck,
ECDSA verification still uses the old code (otherwise there would have
been a 60% throughput decrease). On the other hand, ECDSA signing
throughput has increased slightly (+10%), perhaps due to the use of a
precomputed table of multiples of the base point.
64-bit benchmarks (Google Cloud Haswell):
with this change:
Did 9126 ECDH P-256 operations in 1009572us (9039.5 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039832us (22119.0 ops/sec)
Did 8820 ECDSA P-256 verify operations in 1024242us (8611.2 ops/sec)
master (40e8c921cab5cce2bc10722ecf4ebe0e380cf6c8):
Did 9340 ECDH P-256 operations in 1017975us (9175.1 ops/sec)
Did 23000 ECDSA P-256 signing operations in 1039820us (22119.2 ops/sec)
Did 8688 ECDSA P-256 verify operations in 1021108us (8508.4 ops/sec)
benchmarks on ARMv7 (LG Nexus 4):
with this change:
Did 150 ECDH P-256 operations in 1029726us (145.7 ops/sec)
Did 506 ECDSA P-256 signing operations in 1065192us (475.0 ops/sec)
Did 363 ECDSA P-256 verify operations in 1033298us (351.3 ops/sec)
master (2fce1beda0f7e74e2d687860f807cf0b8d8056a4):
Did 245 ECDH P-256 operations in 1017518us (240.8 ops/sec)
Did 473 ECDSA P-256 signing operations in 1086281us (435.4 ops/sec)
Did 360 ECDSA P-256 verify operations in 1003846us (358.6 ops/sec)
64-bit tables converted as follows:
import re, sys, math
p = 2**256 - 2**224 + 2**192 + 2**96 - 1
R = 2**256
def convert(t):
x0, s1, x1, s2, x2, s3, x3 = t.groups()
v = int(x0, 0) + 2**64 * (int(x1, 0) + 2**64*(int(x2,0) + 2**64*(int(x3, 0)) ))
w = v*R%p
y0 = hex(w%(2**64))
y1 = hex((w>>64)%(2**64))
y2 = hex((w>>(2*64))%(2**64))
y3 = hex((w>>(3*64))%(2**64))
ww = int(y0, 0) + 2**64 * (int(y1, 0) + 2**64*(int(y2,0) + 2**64*(int(y3, 0)) ))
if ww != v*R%p:
print(x0,x1,x2,x3)
print(hex(v))
print(y0,y1,y2,y3)
print(hex(w))
print(hex(ww))
assert 0
return '{'+y0+s1+y1+s2+y2+s3+y3+'}'
fe_re = re.compile('{'+r'(\s*,\s*)'.join(r'(\d+|0x[abcdefABCDEF0123456789]+)' for i in range(4)) + '}')
print (re.sub(fe_re, convert, sys.stdin.read()).rstrip('\n'))
32-bit tables converted from 64-bit tables
Change-Id: I52d6e5504fcb6ca2e8b0ee13727f4500c80c1799
Reviewed-on: https://boringssl-review.googlesource.com/23244
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-08 20:32:38 +00:00
|
|
|
#include "ec/util.c"
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "ec/wnaf.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
#include "hmac/hmac.c"
|
|
|
|
#include "md4/md4.c"
|
|
|
|
#include "md5/md5.c"
|
2017-04-13 19:38:40 +01:00
|
|
|
#include "modes/cbc.c"
|
|
|
|
#include "modes/cfb.c"
|
|
|
|
#include "modes/ctr.c"
|
|
|
|
#include "modes/gcm.c"
|
|
|
|
#include "modes/ofb.c"
|
|
|
|
#include "modes/polyval.c"
|
2017-04-14 19:16:20 +01:00
|
|
|
#include "rand/ctrdrbg.c"
|
|
|
|
#include "rand/rand.c"
|
|
|
|
#include "rand/urandom.c"
|
2017-05-03 19:50:51 +01:00
|
|
|
#include "rsa/blinding.c"
|
|
|
|
#include "rsa/padding.c"
|
|
|
|
#include "rsa/rsa.c"
|
|
|
|
#include "rsa/rsa_impl.c"
|
2018-01-22 22:30:49 +00:00
|
|
|
#include "self_check/self_check.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
#include "sha/sha1-altivec.c"
|
|
|
|
#include "sha/sha1.c"
|
|
|
|
#include "sha/sha256.c"
|
|
|
|
#include "sha/sha512.c"
|
2018-01-22 18:06:51 +00:00
|
|
|
#include "tls/kdf.c"
|
2017-04-04 22:21:43 +01:00
|
|
|
|
|
|
|
|
2018-01-22 19:07:42 +00:00
|
|
|
#if defined(BORINGSSL_FIPS)
|
|
|
|
|
|
|
|
#if !defined(OPENSSL_ASAN)
|
|
|
|
// These symbols are filled in by delocate.go. They point to the start and end
|
|
|
|
// of the module, and the location of the integrity hash, respectively.
|
|
|
|
extern const uint8_t BORINGSSL_bcm_text_start[];
|
|
|
|
extern const uint8_t BORINGSSL_bcm_text_end[];
|
|
|
|
extern const uint8_t BORINGSSL_bcm_text_hash[];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void __attribute__((constructor))
|
|
|
|
BORINGSSL_bcm_power_on_self_test(void) {
|
|
|
|
CRYPTO_library_init();
|
|
|
|
|
|
|
|
#if !defined(OPENSSL_ASAN)
|
|
|
|
// Integrity tests cannot run under ASAN because it involves reading the full
|
|
|
|
// .text section, which triggers the global-buffer overflow detection.
|
|
|
|
const uint8_t *const start = BORINGSSL_bcm_text_start;
|
|
|
|
const uint8_t *const end = BORINGSSL_bcm_text_end;
|
|
|
|
|
|
|
|
static const uint8_t kHMACKey[64] = {0};
|
|
|
|
uint8_t result[SHA512_DIGEST_LENGTH];
|
|
|
|
|
|
|
|
unsigned result_len;
|
|
|
|
if (!HMAC(EVP_sha512(), kHMACKey, sizeof(kHMACKey), start, end - start,
|
|
|
|
result, &result_len) ||
|
|
|
|
result_len != sizeof(result)) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint8_t *expected = BORINGSSL_bcm_text_hash;
|
|
|
|
|
|
|
|
if (!check_test(expected, result, sizeof(result), "FIPS integrity test")) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!BORINGSSL_self_test()) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2017-04-04 22:21:43 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
err:
|
2017-05-18 19:37:44 +01:00
|
|
|
BORINGSSL_FIPS_abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
void BORINGSSL_FIPS_abort(void) {
|
2017-04-04 22:21:43 +01:00
|
|
|
for (;;) {
|
|
|
|
abort();
|
2017-05-18 19:37:44 +01:00
|
|
|
exit(1);
|
2017-04-04 22:21:43 +01:00
|
|
|
}
|
|
|
|
}
|
2018-01-22 19:07:42 +00:00
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
#endif // BORINGSSL_FIPS
|