boringssl/third_party/fiat/internal.h

155 lines
4.7 KiB
C
Raw Normal View History

curve25519: fiat-crypto field arithmetic. Each operation was translated from fiat-crypto output using fiat-crypto prettyprint.py. For example fe_mul is synthesized in https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femul.v, and shown in the last Coq-compatible form at https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femulDisplay.log. Benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11382 Ed25519 key generation operations in 1053046us (10808.6 ops/sec) Did 11169 Ed25519 signing operations in 1038080us (10759.3 ops/sec) Did 2925 Ed25519 verify operations in 1001346us (2921.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1084851us (11061.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1085565us (3546.5 ops/sec) Did 11466 Ed25519 key generation operations in 1049821us (10921.9 ops/sec) Did 11000 Ed25519 signing operations in 1013317us (10855.4 ops/sec) Did 3047 Ed25519 verify operations in 1043846us (2919.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1068924us (11226.2 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1090598us (3530.2 ops/sec) Did 10309 Ed25519 key generation operations in 1003320us (10274.9 ops/sec) Did 11000 Ed25519 signing operations in 1017862us (10807.0 ops/sec) Did 3135 Ed25519 verify operations in 1098624us (2853.6 ops/sec) Did 9000 Curve25519 base-point multiplication operations in 1046608us (8599.2 ops/sec) Did 3132 Curve25519 arbitrary point multiplication operations in 1038963us (3014.5 ops/sec) master: Did 11564 Ed25519 key generation operations in 1068762us (10820.0 ops/sec) Did 11104 Ed25519 signing operations in 1024278us (10840.8 ops/sec) Did 3206 Ed25519 verify operations in 1049179us (3055.7 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1073619us (11177.1 ops/sec) Did 3550 Curve25519 arbitrary point multiplication operations in 1000279us (3549.0 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11760 Ed25519 key generation operations in 1072495us (10965.1 ops/sec) Did 10800 Ed25519 signing operations in 1003486us (10762.5 ops/sec) Did 3245 Ed25519 verify operations in 1080399us (3003.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1076021us (11152.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1005087us (3551.9 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11438 Ed25519 key generation operations in 1041115us (10986.3 ops/sec) Did 11000 Ed25519 signing operations in 1012589us (10863.2 ops/sec) Did 3312 Ed25519 verify operations in 1082834us (3058.6 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1061318us (11306.7 ops/sec) Did 3580 Curve25519 arbitrary point multiplication operations in 1004923us (3562.5 ops/sec) squashed: curve25519: convert field constants to unsigned. import re, sys, math def weight(i): return 2**int(math.ceil(25.5*i)) def convert(t): limbs = [x for x in t.groups() if x.replace('-','').isdigit()] v = sum(weight(i)*x for (i,x) in enumerate(map(int, limbs))) % (2**255-19) limbs = [(v % weight(i+1)) // weight(i) for i in range(10)] assert v == sum(weight(i)*x for (i,x) in enumerate(limbs)) i = 0 ret = '' for s in t.groups(): if s.replace('-','').isdigit(): ret += str(limbs[i]) i += 1 else: ret += s return ret fe_re = re.compile(r'(\s*,\s*)'.join(r'(-?\d+)' for i in range(10))) print (re.sub(fe_re, convert, sys.stdin.read())) Change-Id: Ibd4f7f5c38e5c4d61c9826afb406baebe2be5168 Reviewed-on: https://boringssl-review.googlesource.com/22385 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-03 19:03:13 +00:00
// The MIT License (MIT)
//
// Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#ifndef OPENSSL_HEADER_CURVE25519_INTERNAL_H
#define OPENSSL_HEADER_CURVE25519_INTERNAL_H
#if defined(__cplusplus)
extern "C" {
#endif
Use 51-bit limbs from fiat-crypto in 64-bit. Our 64-bit performance was much lower than it could have been, since we weren't using the 64-bit multipliers. Fortunately, fiat-crypto is awesome, so this is just a matter of synthesizing new code and integration work. Functions without the signature fiat-crypto curly braces were written by hand and warrant more review. (It's just redistributing some bits.) These use the donna variants which takes (and proves) some of the instruction scheduling from donna as that's significantly faster. Glancing over things, I suspect but have not confirmed the gap is due to this: https://github.com/mit-plv/fiat-crypto/pull/295#issuecomment-356892413 Clang without OPENSSL_SMALL (ECDH omitted since that uses assembly and is unaffected by this CL). Before: Did 105149 Ed25519 key generation operations in 5025208us (20924.3 ops/sec) Did 125000 Ed25519 signing operations in 5024003us (24880.6 ops/sec) Did 37642 Ed25519 verify operations in 5072539us (7420.7 ops/sec) After: Did 206000 Ed25519 key generation operations in 5020547us (41031.4 ops/sec) Did 227000 Ed25519 signing operations in 5005232us (45352.5 ops/sec) Did 69840 Ed25519 verify operations in 5004769us (13954.7 ops/sec) Clang + OPENSSL_SMALL: Before: Did 68598 Ed25519 key generation operations in 5024629us (13652.4 ops/sec) Did 73000 Ed25519 signing operations in 5067837us (14404.6 ops/sec) Did 36765 Ed25519 verify operations in 5078684us (7239.1 ops/sec) Did 74000 Curve25519 base-point multiplication operations in 5016465us (14751.4 ops/sec) Did 45600 Curve25519 arbitrary point multiplication operations in 5034680us (9057.2 ops/sec) After: Did 117315 Ed25519 key generation operations in 5021860us (23360.9 ops/sec) Did 126000 Ed25519 signing operations in 5003521us (25182.3 ops/sec) Did 64974 Ed25519 verify operations in 5047790us (12871.8 ops/sec) Did 134000 Curve25519 base-point multiplication operations in 5058946us (26487.7 ops/sec) Did 86000 Curve25519 arbitrary point multiplication operations in 5050478us (17028.1 ops/sec) GCC without OPENSSL_SMALL (ECDH omitted since that uses assembly and is unaffected by this CL). Before: Did 35552 Ed25519 key generation operations in 5030756us (7066.9 ops/sec) Did 38286 Ed25519 signing operations in 5001648us (7654.7 ops/sec) Did 10584 Ed25519 verify operations in 5068158us (2088.3 ops/sec) After: Did 92158 Ed25519 key generation operations in 5024021us (18343.5 ops/sec) Did 99000 Ed25519 signing operations in 5011908us (19753.0 ops/sec) Did 31122 Ed25519 verify operations in 5069878us (6138.6 ops/sec) Change-Id: Ic0c24d50b4ee2bbc408b94965e9d63319936107d Reviewed-on: https://boringssl-review.googlesource.com/24805 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-01-09 22:27:21 +00:00
#include <openssl/base.h>
#include "../../crypto/internal.h"
#if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_APPLE)
#define BORINGSSL_X25519_NEON
// x25519_NEON is defined in asm/x25519-arm.S.
void x25519_NEON(uint8_t out[32], const uint8_t scalar[32],
const uint8_t point[32]);
#endif
Use 51-bit limbs from fiat-crypto in 64-bit. Our 64-bit performance was much lower than it could have been, since we weren't using the 64-bit multipliers. Fortunately, fiat-crypto is awesome, so this is just a matter of synthesizing new code and integration work. Functions without the signature fiat-crypto curly braces were written by hand and warrant more review. (It's just redistributing some bits.) These use the donna variants which takes (and proves) some of the instruction scheduling from donna as that's significantly faster. Glancing over things, I suspect but have not confirmed the gap is due to this: https://github.com/mit-plv/fiat-crypto/pull/295#issuecomment-356892413 Clang without OPENSSL_SMALL (ECDH omitted since that uses assembly and is unaffected by this CL). Before: Did 105149 Ed25519 key generation operations in 5025208us (20924.3 ops/sec) Did 125000 Ed25519 signing operations in 5024003us (24880.6 ops/sec) Did 37642 Ed25519 verify operations in 5072539us (7420.7 ops/sec) After: Did 206000 Ed25519 key generation operations in 5020547us (41031.4 ops/sec) Did 227000 Ed25519 signing operations in 5005232us (45352.5 ops/sec) Did 69840 Ed25519 verify operations in 5004769us (13954.7 ops/sec) Clang + OPENSSL_SMALL: Before: Did 68598 Ed25519 key generation operations in 5024629us (13652.4 ops/sec) Did 73000 Ed25519 signing operations in 5067837us (14404.6 ops/sec) Did 36765 Ed25519 verify operations in 5078684us (7239.1 ops/sec) Did 74000 Curve25519 base-point multiplication operations in 5016465us (14751.4 ops/sec) Did 45600 Curve25519 arbitrary point multiplication operations in 5034680us (9057.2 ops/sec) After: Did 117315 Ed25519 key generation operations in 5021860us (23360.9 ops/sec) Did 126000 Ed25519 signing operations in 5003521us (25182.3 ops/sec) Did 64974 Ed25519 verify operations in 5047790us (12871.8 ops/sec) Did 134000 Curve25519 base-point multiplication operations in 5058946us (26487.7 ops/sec) Did 86000 Curve25519 arbitrary point multiplication operations in 5050478us (17028.1 ops/sec) GCC without OPENSSL_SMALL (ECDH omitted since that uses assembly and is unaffected by this CL). Before: Did 35552 Ed25519 key generation operations in 5030756us (7066.9 ops/sec) Did 38286 Ed25519 signing operations in 5001648us (7654.7 ops/sec) Did 10584 Ed25519 verify operations in 5068158us (2088.3 ops/sec) After: Did 92158 Ed25519 key generation operations in 5024021us (18343.5 ops/sec) Did 99000 Ed25519 signing operations in 5011908us (19753.0 ops/sec) Did 31122 Ed25519 verify operations in 5069878us (6138.6 ops/sec) Change-Id: Ic0c24d50b4ee2bbc408b94965e9d63319936107d Reviewed-on: https://boringssl-review.googlesource.com/24805 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-01-09 22:27:21 +00:00
#if defined(BORINGSSL_HAS_UINT128)
#define BORINGSSL_CURVE25519_64BIT
#endif
#if defined(BORINGSSL_CURVE25519_64BIT)
// fe means field element. Here the field is \Z/(2^255-19). An element t,
// entries t[0]...t[4], represents the integer t[0]+2^51 t[1]+2^102 t[2]+2^153
// t[3]+2^204 t[4].
// fe limbs are bounded by 1.125*2^51.
// Multiplication and carrying produce fe from fe_loose.
typedef struct fe { uint64_t v[5]; } fe;
// fe_loose limbs are bounded by 3.375*2^51.
// Addition and subtraction produce fe_loose from (fe, fe).
typedef struct fe_loose { uint64_t v[5]; } fe_loose;
#else
// fe means field element. Here the field is \Z/(2^255-19). An element t,
// entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
curve25519: adhere to preconditions of fe_*. Previously, the ed25519 and SPAKE implementations called field element operations in ways that did not satisfy the preconditions about ranges of limbs. Furthermore, replacing signed field arithmetic with unsigned field arithmetic with similar specifications caused tests to fail. This commit addresses this in three steps: (1) Split fe into fe and fe_loose, tracking the bounds (2) Insert carry operations before uses of fe_add/fe_sub/fe_neg whose input is already within only the loose bounds (3) Assert that each field element is within the appropriate bounds at the beginning and end of every field operation. Throughput diff: Ed25519 key generation: -2% Ed25519 signing: -2% Ed25519 verify: -2% X25519: roughly unchanged Detailed benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11206 Ed25519 key generation operations in 1029462us (10885.3 ops/sec) Did 11104 Ed25519 signing operations in 1035735us (10720.9 ops/sec) Did 3278 Ed25519 verify operations in 1087969us (3013.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1078962us (11121.8 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1002767us (3600.0 ops/sec) Did 11662 Ed25519 key generation operations in 1077690us (10821.3 ops/sec) Did 10780 Ed25519 signing operations in 1011474us (10657.7 ops/sec) Did 3289 Ed25519 verify operations in 1083638us (3035.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1087477us (11034.7 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1017023us (3549.6 ops/sec) Did 11018 Ed25519 key generation operations in 1011606us (10891.6 ops/sec) Did 11000 Ed25519 signing operations in 1029961us (10680.0 ops/sec) Did 3124 Ed25519 verify operations in 1045163us (2989.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1081770us (11092.9 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1014503us (3558.4 ops/sec) master: Did 11662 Ed25519 key generation operations in 1059449us (11007.6 ops/sec) Did 10908 Ed25519 signing operations in 1000081us (10907.1 ops/sec) Did 3333 Ed25519 verify operations in 1078798us (3089.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072831us (11185.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1075821us (3578.7 ops/sec) Did 11102 Ed25519 key generation operations in 1017540us (10910.6 ops/sec) Did 11000 Ed25519 signing operations in 1013279us (10855.8 ops/sec) Did 3311 Ed25519 verify operations in 1066866us (3103.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1069668us (11218.4 ops/sec) Did 3905 Curve25519 arbitrary point multiplication operations in 1095501us (3564.6 ops/sec) Did 11206 Ed25519 key generation operations in 1014127us (11049.9 ops/sec) Did 10908 Ed25519 signing operations in 1015821us (10738.1 ops/sec) Did 3344 Ed25519 verify operations in 1100592us (3038.4 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072847us (11185.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1009373us (3536.8 ops/sec) Change-Id: Ia014386daf36c913f3ea44c5f9a420b98670e465 Reviewed-on: https://boringssl-review.googlesource.com/22104 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-10-23 15:23:44 +01:00
// t[3]+2^102 t[4]+...+2^230 t[9].
curve25519: fiat-crypto field arithmetic. Each operation was translated from fiat-crypto output using fiat-crypto prettyprint.py. For example fe_mul is synthesized in https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femul.v, and shown in the last Coq-compatible form at https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femulDisplay.log. Benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11382 Ed25519 key generation operations in 1053046us (10808.6 ops/sec) Did 11169 Ed25519 signing operations in 1038080us (10759.3 ops/sec) Did 2925 Ed25519 verify operations in 1001346us (2921.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1084851us (11061.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1085565us (3546.5 ops/sec) Did 11466 Ed25519 key generation operations in 1049821us (10921.9 ops/sec) Did 11000 Ed25519 signing operations in 1013317us (10855.4 ops/sec) Did 3047 Ed25519 verify operations in 1043846us (2919.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1068924us (11226.2 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1090598us (3530.2 ops/sec) Did 10309 Ed25519 key generation operations in 1003320us (10274.9 ops/sec) Did 11000 Ed25519 signing operations in 1017862us (10807.0 ops/sec) Did 3135 Ed25519 verify operations in 1098624us (2853.6 ops/sec) Did 9000 Curve25519 base-point multiplication operations in 1046608us (8599.2 ops/sec) Did 3132 Curve25519 arbitrary point multiplication operations in 1038963us (3014.5 ops/sec) master: Did 11564 Ed25519 key generation operations in 1068762us (10820.0 ops/sec) Did 11104 Ed25519 signing operations in 1024278us (10840.8 ops/sec) Did 3206 Ed25519 verify operations in 1049179us (3055.7 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1073619us (11177.1 ops/sec) Did 3550 Curve25519 arbitrary point multiplication operations in 1000279us (3549.0 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11760 Ed25519 key generation operations in 1072495us (10965.1 ops/sec) Did 10800 Ed25519 signing operations in 1003486us (10762.5 ops/sec) Did 3245 Ed25519 verify operations in 1080399us (3003.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1076021us (11152.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1005087us (3551.9 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11438 Ed25519 key generation operations in 1041115us (10986.3 ops/sec) Did 11000 Ed25519 signing operations in 1012589us (10863.2 ops/sec) Did 3312 Ed25519 verify operations in 1082834us (3058.6 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1061318us (11306.7 ops/sec) Did 3580 Curve25519 arbitrary point multiplication operations in 1004923us (3562.5 ops/sec) squashed: curve25519: convert field constants to unsigned. import re, sys, math def weight(i): return 2**int(math.ceil(25.5*i)) def convert(t): limbs = [x for x in t.groups() if x.replace('-','').isdigit()] v = sum(weight(i)*x for (i,x) in enumerate(map(int, limbs))) % (2**255-19) limbs = [(v % weight(i+1)) // weight(i) for i in range(10)] assert v == sum(weight(i)*x for (i,x) in enumerate(limbs)) i = 0 ret = '' for s in t.groups(): if s.replace('-','').isdigit(): ret += str(limbs[i]) i += 1 else: ret += s return ret fe_re = re.compile(r'(\s*,\s*)'.join(r'(-?\d+)' for i in range(10))) print (re.sub(fe_re, convert, sys.stdin.read())) Change-Id: Ibd4f7f5c38e5c4d61c9826afb406baebe2be5168 Reviewed-on: https://boringssl-review.googlesource.com/22385 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-03 19:03:13 +00:00
// fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
curve25519: adhere to preconditions of fe_*. Previously, the ed25519 and SPAKE implementations called field element operations in ways that did not satisfy the preconditions about ranges of limbs. Furthermore, replacing signed field arithmetic with unsigned field arithmetic with similar specifications caused tests to fail. This commit addresses this in three steps: (1) Split fe into fe and fe_loose, tracking the bounds (2) Insert carry operations before uses of fe_add/fe_sub/fe_neg whose input is already within only the loose bounds (3) Assert that each field element is within the appropriate bounds at the beginning and end of every field operation. Throughput diff: Ed25519 key generation: -2% Ed25519 signing: -2% Ed25519 verify: -2% X25519: roughly unchanged Detailed benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11206 Ed25519 key generation operations in 1029462us (10885.3 ops/sec) Did 11104 Ed25519 signing operations in 1035735us (10720.9 ops/sec) Did 3278 Ed25519 verify operations in 1087969us (3013.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1078962us (11121.8 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1002767us (3600.0 ops/sec) Did 11662 Ed25519 key generation operations in 1077690us (10821.3 ops/sec) Did 10780 Ed25519 signing operations in 1011474us (10657.7 ops/sec) Did 3289 Ed25519 verify operations in 1083638us (3035.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1087477us (11034.7 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1017023us (3549.6 ops/sec) Did 11018 Ed25519 key generation operations in 1011606us (10891.6 ops/sec) Did 11000 Ed25519 signing operations in 1029961us (10680.0 ops/sec) Did 3124 Ed25519 verify operations in 1045163us (2989.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1081770us (11092.9 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1014503us (3558.4 ops/sec) master: Did 11662 Ed25519 key generation operations in 1059449us (11007.6 ops/sec) Did 10908 Ed25519 signing operations in 1000081us (10907.1 ops/sec) Did 3333 Ed25519 verify operations in 1078798us (3089.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072831us (11185.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1075821us (3578.7 ops/sec) Did 11102 Ed25519 key generation operations in 1017540us (10910.6 ops/sec) Did 11000 Ed25519 signing operations in 1013279us (10855.8 ops/sec) Did 3311 Ed25519 verify operations in 1066866us (3103.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1069668us (11218.4 ops/sec) Did 3905 Curve25519 arbitrary point multiplication operations in 1095501us (3564.6 ops/sec) Did 11206 Ed25519 key generation operations in 1014127us (11049.9 ops/sec) Did 10908 Ed25519 signing operations in 1015821us (10738.1 ops/sec) Did 3344 Ed25519 verify operations in 1100592us (3038.4 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072847us (11185.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1009373us (3536.8 ops/sec) Change-Id: Ia014386daf36c913f3ea44c5f9a420b98670e465 Reviewed-on: https://boringssl-review.googlesource.com/22104 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-10-23 15:23:44 +01:00
// Multiplication and carrying produce fe from fe_loose.
curve25519: fiat-crypto field arithmetic. Each operation was translated from fiat-crypto output using fiat-crypto prettyprint.py. For example fe_mul is synthesized in https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femul.v, and shown in the last Coq-compatible form at https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femulDisplay.log. Benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11382 Ed25519 key generation operations in 1053046us (10808.6 ops/sec) Did 11169 Ed25519 signing operations in 1038080us (10759.3 ops/sec) Did 2925 Ed25519 verify operations in 1001346us (2921.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1084851us (11061.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1085565us (3546.5 ops/sec) Did 11466 Ed25519 key generation operations in 1049821us (10921.9 ops/sec) Did 11000 Ed25519 signing operations in 1013317us (10855.4 ops/sec) Did 3047 Ed25519 verify operations in 1043846us (2919.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1068924us (11226.2 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1090598us (3530.2 ops/sec) Did 10309 Ed25519 key generation operations in 1003320us (10274.9 ops/sec) Did 11000 Ed25519 signing operations in 1017862us (10807.0 ops/sec) Did 3135 Ed25519 verify operations in 1098624us (2853.6 ops/sec) Did 9000 Curve25519 base-point multiplication operations in 1046608us (8599.2 ops/sec) Did 3132 Curve25519 arbitrary point multiplication operations in 1038963us (3014.5 ops/sec) master: Did 11564 Ed25519 key generation operations in 1068762us (10820.0 ops/sec) Did 11104 Ed25519 signing operations in 1024278us (10840.8 ops/sec) Did 3206 Ed25519 verify operations in 1049179us (3055.7 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1073619us (11177.1 ops/sec) Did 3550 Curve25519 arbitrary point multiplication operations in 1000279us (3549.0 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11760 Ed25519 key generation operations in 1072495us (10965.1 ops/sec) Did 10800 Ed25519 signing operations in 1003486us (10762.5 ops/sec) Did 3245 Ed25519 verify operations in 1080399us (3003.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1076021us (11152.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1005087us (3551.9 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11438 Ed25519 key generation operations in 1041115us (10986.3 ops/sec) Did 11000 Ed25519 signing operations in 1012589us (10863.2 ops/sec) Did 3312 Ed25519 verify operations in 1082834us (3058.6 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1061318us (11306.7 ops/sec) Did 3580 Curve25519 arbitrary point multiplication operations in 1004923us (3562.5 ops/sec) squashed: curve25519: convert field constants to unsigned. import re, sys, math def weight(i): return 2**int(math.ceil(25.5*i)) def convert(t): limbs = [x for x in t.groups() if x.replace('-','').isdigit()] v = sum(weight(i)*x for (i,x) in enumerate(map(int, limbs))) % (2**255-19) limbs = [(v % weight(i+1)) // weight(i) for i in range(10)] assert v == sum(weight(i)*x for (i,x) in enumerate(limbs)) i = 0 ret = '' for s in t.groups(): if s.replace('-','').isdigit(): ret += str(limbs[i]) i += 1 else: ret += s return ret fe_re = re.compile(r'(\s*,\s*)'.join(r'(-?\d+)' for i in range(10))) print (re.sub(fe_re, convert, sys.stdin.read())) Change-Id: Ibd4f7f5c38e5c4d61c9826afb406baebe2be5168 Reviewed-on: https://boringssl-review.googlesource.com/22385 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-03 19:03:13 +00:00
typedef struct fe { uint32_t v[10]; } fe;
curve25519: adhere to preconditions of fe_*. Previously, the ed25519 and SPAKE implementations called field element operations in ways that did not satisfy the preconditions about ranges of limbs. Furthermore, replacing signed field arithmetic with unsigned field arithmetic with similar specifications caused tests to fail. This commit addresses this in three steps: (1) Split fe into fe and fe_loose, tracking the bounds (2) Insert carry operations before uses of fe_add/fe_sub/fe_neg whose input is already within only the loose bounds (3) Assert that each field element is within the appropriate bounds at the beginning and end of every field operation. Throughput diff: Ed25519 key generation: -2% Ed25519 signing: -2% Ed25519 verify: -2% X25519: roughly unchanged Detailed benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11206 Ed25519 key generation operations in 1029462us (10885.3 ops/sec) Did 11104 Ed25519 signing operations in 1035735us (10720.9 ops/sec) Did 3278 Ed25519 verify operations in 1087969us (3013.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1078962us (11121.8 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1002767us (3600.0 ops/sec) Did 11662 Ed25519 key generation operations in 1077690us (10821.3 ops/sec) Did 10780 Ed25519 signing operations in 1011474us (10657.7 ops/sec) Did 3289 Ed25519 verify operations in 1083638us (3035.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1087477us (11034.7 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1017023us (3549.6 ops/sec) Did 11018 Ed25519 key generation operations in 1011606us (10891.6 ops/sec) Did 11000 Ed25519 signing operations in 1029961us (10680.0 ops/sec) Did 3124 Ed25519 verify operations in 1045163us (2989.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1081770us (11092.9 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1014503us (3558.4 ops/sec) master: Did 11662 Ed25519 key generation operations in 1059449us (11007.6 ops/sec) Did 10908 Ed25519 signing operations in 1000081us (10907.1 ops/sec) Did 3333 Ed25519 verify operations in 1078798us (3089.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072831us (11185.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1075821us (3578.7 ops/sec) Did 11102 Ed25519 key generation operations in 1017540us (10910.6 ops/sec) Did 11000 Ed25519 signing operations in 1013279us (10855.8 ops/sec) Did 3311 Ed25519 verify operations in 1066866us (3103.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1069668us (11218.4 ops/sec) Did 3905 Curve25519 arbitrary point multiplication operations in 1095501us (3564.6 ops/sec) Did 11206 Ed25519 key generation operations in 1014127us (11049.9 ops/sec) Did 10908 Ed25519 signing operations in 1015821us (10738.1 ops/sec) Did 3344 Ed25519 verify operations in 1100592us (3038.4 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072847us (11185.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1009373us (3536.8 ops/sec) Change-Id: Ia014386daf36c913f3ea44c5f9a420b98670e465 Reviewed-on: https://boringssl-review.googlesource.com/22104 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-10-23 15:23:44 +01:00
curve25519: fiat-crypto field arithmetic. Each operation was translated from fiat-crypto output using fiat-crypto prettyprint.py. For example fe_mul is synthesized in https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femul.v, and shown in the last Coq-compatible form at https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femulDisplay.log. Benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11382 Ed25519 key generation operations in 1053046us (10808.6 ops/sec) Did 11169 Ed25519 signing operations in 1038080us (10759.3 ops/sec) Did 2925 Ed25519 verify operations in 1001346us (2921.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1084851us (11061.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1085565us (3546.5 ops/sec) Did 11466 Ed25519 key generation operations in 1049821us (10921.9 ops/sec) Did 11000 Ed25519 signing operations in 1013317us (10855.4 ops/sec) Did 3047 Ed25519 verify operations in 1043846us (2919.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1068924us (11226.2 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1090598us (3530.2 ops/sec) Did 10309 Ed25519 key generation operations in 1003320us (10274.9 ops/sec) Did 11000 Ed25519 signing operations in 1017862us (10807.0 ops/sec) Did 3135 Ed25519 verify operations in 1098624us (2853.6 ops/sec) Did 9000 Curve25519 base-point multiplication operations in 1046608us (8599.2 ops/sec) Did 3132 Curve25519 arbitrary point multiplication operations in 1038963us (3014.5 ops/sec) master: Did 11564 Ed25519 key generation operations in 1068762us (10820.0 ops/sec) Did 11104 Ed25519 signing operations in 1024278us (10840.8 ops/sec) Did 3206 Ed25519 verify operations in 1049179us (3055.7 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1073619us (11177.1 ops/sec) Did 3550 Curve25519 arbitrary point multiplication operations in 1000279us (3549.0 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11760 Ed25519 key generation operations in 1072495us (10965.1 ops/sec) Did 10800 Ed25519 signing operations in 1003486us (10762.5 ops/sec) Did 3245 Ed25519 verify operations in 1080399us (3003.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1076021us (11152.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1005087us (3551.9 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11438 Ed25519 key generation operations in 1041115us (10986.3 ops/sec) Did 11000 Ed25519 signing operations in 1012589us (10863.2 ops/sec) Did 3312 Ed25519 verify operations in 1082834us (3058.6 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1061318us (11306.7 ops/sec) Did 3580 Curve25519 arbitrary point multiplication operations in 1004923us (3562.5 ops/sec) squashed: curve25519: convert field constants to unsigned. import re, sys, math def weight(i): return 2**int(math.ceil(25.5*i)) def convert(t): limbs = [x for x in t.groups() if x.replace('-','').isdigit()] v = sum(weight(i)*x for (i,x) in enumerate(map(int, limbs))) % (2**255-19) limbs = [(v % weight(i+1)) // weight(i) for i in range(10)] assert v == sum(weight(i)*x for (i,x) in enumerate(limbs)) i = 0 ret = '' for s in t.groups(): if s.replace('-','').isdigit(): ret += str(limbs[i]) i += 1 else: ret += s return ret fe_re = re.compile(r'(\s*,\s*)'.join(r'(-?\d+)' for i in range(10))) print (re.sub(fe_re, convert, sys.stdin.read())) Change-Id: Ibd4f7f5c38e5c4d61c9826afb406baebe2be5168 Reviewed-on: https://boringssl-review.googlesource.com/22385 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-03 19:03:13 +00:00
// fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc.
curve25519: adhere to preconditions of fe_*. Previously, the ed25519 and SPAKE implementations called field element operations in ways that did not satisfy the preconditions about ranges of limbs. Furthermore, replacing signed field arithmetic with unsigned field arithmetic with similar specifications caused tests to fail. This commit addresses this in three steps: (1) Split fe into fe and fe_loose, tracking the bounds (2) Insert carry operations before uses of fe_add/fe_sub/fe_neg whose input is already within only the loose bounds (3) Assert that each field element is within the appropriate bounds at the beginning and end of every field operation. Throughput diff: Ed25519 key generation: -2% Ed25519 signing: -2% Ed25519 verify: -2% X25519: roughly unchanged Detailed benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11206 Ed25519 key generation operations in 1029462us (10885.3 ops/sec) Did 11104 Ed25519 signing operations in 1035735us (10720.9 ops/sec) Did 3278 Ed25519 verify operations in 1087969us (3013.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1078962us (11121.8 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1002767us (3600.0 ops/sec) Did 11662 Ed25519 key generation operations in 1077690us (10821.3 ops/sec) Did 10780 Ed25519 signing operations in 1011474us (10657.7 ops/sec) Did 3289 Ed25519 verify operations in 1083638us (3035.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1087477us (11034.7 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1017023us (3549.6 ops/sec) Did 11018 Ed25519 key generation operations in 1011606us (10891.6 ops/sec) Did 11000 Ed25519 signing operations in 1029961us (10680.0 ops/sec) Did 3124 Ed25519 verify operations in 1045163us (2989.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1081770us (11092.9 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1014503us (3558.4 ops/sec) master: Did 11662 Ed25519 key generation operations in 1059449us (11007.6 ops/sec) Did 10908 Ed25519 signing operations in 1000081us (10907.1 ops/sec) Did 3333 Ed25519 verify operations in 1078798us (3089.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072831us (11185.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1075821us (3578.7 ops/sec) Did 11102 Ed25519 key generation operations in 1017540us (10910.6 ops/sec) Did 11000 Ed25519 signing operations in 1013279us (10855.8 ops/sec) Did 3311 Ed25519 verify operations in 1066866us (3103.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1069668us (11218.4 ops/sec) Did 3905 Curve25519 arbitrary point multiplication operations in 1095501us (3564.6 ops/sec) Did 11206 Ed25519 key generation operations in 1014127us (11049.9 ops/sec) Did 10908 Ed25519 signing operations in 1015821us (10738.1 ops/sec) Did 3344 Ed25519 verify operations in 1100592us (3038.4 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072847us (11185.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1009373us (3536.8 ops/sec) Change-Id: Ia014386daf36c913f3ea44c5f9a420b98670e465 Reviewed-on: https://boringssl-review.googlesource.com/22104 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-10-23 15:23:44 +01:00
// Addition and subtraction produce fe_loose from (fe, fe).
curve25519: fiat-crypto field arithmetic. Each operation was translated from fiat-crypto output using fiat-crypto prettyprint.py. For example fe_mul is synthesized in https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femul.v, and shown in the last Coq-compatible form at https://github.com/mit-plv/fiat-crypto/blob/master/src/Specific/X25519/C32/femulDisplay.log. Benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11382 Ed25519 key generation operations in 1053046us (10808.6 ops/sec) Did 11169 Ed25519 signing operations in 1038080us (10759.3 ops/sec) Did 2925 Ed25519 verify operations in 1001346us (2921.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1084851us (11061.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1085565us (3546.5 ops/sec) Did 11466 Ed25519 key generation operations in 1049821us (10921.9 ops/sec) Did 11000 Ed25519 signing operations in 1013317us (10855.4 ops/sec) Did 3047 Ed25519 verify operations in 1043846us (2919.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1068924us (11226.2 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1090598us (3530.2 ops/sec) Did 10309 Ed25519 key generation operations in 1003320us (10274.9 ops/sec) Did 11000 Ed25519 signing operations in 1017862us (10807.0 ops/sec) Did 3135 Ed25519 verify operations in 1098624us (2853.6 ops/sec) Did 9000 Curve25519 base-point multiplication operations in 1046608us (8599.2 ops/sec) Did 3132 Curve25519 arbitrary point multiplication operations in 1038963us (3014.5 ops/sec) master: Did 11564 Ed25519 key generation operations in 1068762us (10820.0 ops/sec) Did 11104 Ed25519 signing operations in 1024278us (10840.8 ops/sec) Did 3206 Ed25519 verify operations in 1049179us (3055.7 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1073619us (11177.1 ops/sec) Did 3550 Curve25519 arbitrary point multiplication operations in 1000279us (3549.0 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11760 Ed25519 key generation operations in 1072495us (10965.1 ops/sec) Did 10800 Ed25519 signing operations in 1003486us (10762.5 ops/sec) Did 3245 Ed25519 verify operations in 1080399us (3003.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1076021us (11152.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1005087us (3551.9 ops/sec) andreser@linux-andreser:~/boringssl$ build/tool/bssl speed -filter 25519 Did 11438 Ed25519 key generation operations in 1041115us (10986.3 ops/sec) Did 11000 Ed25519 signing operations in 1012589us (10863.2 ops/sec) Did 3312 Ed25519 verify operations in 1082834us (3058.6 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1061318us (11306.7 ops/sec) Did 3580 Curve25519 arbitrary point multiplication operations in 1004923us (3562.5 ops/sec) squashed: curve25519: convert field constants to unsigned. import re, sys, math def weight(i): return 2**int(math.ceil(25.5*i)) def convert(t): limbs = [x for x in t.groups() if x.replace('-','').isdigit()] v = sum(weight(i)*x for (i,x) in enumerate(map(int, limbs))) % (2**255-19) limbs = [(v % weight(i+1)) // weight(i) for i in range(10)] assert v == sum(weight(i)*x for (i,x) in enumerate(limbs)) i = 0 ret = '' for s in t.groups(): if s.replace('-','').isdigit(): ret += str(limbs[i]) i += 1 else: ret += s return ret fe_re = re.compile(r'(\s*,\s*)'.join(r'(-?\d+)' for i in range(10))) print (re.sub(fe_re, convert, sys.stdin.read())) Change-Id: Ibd4f7f5c38e5c4d61c9826afb406baebe2be5168 Reviewed-on: https://boringssl-review.googlesource.com/22385 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-11-03 19:03:13 +00:00
typedef struct fe_loose { uint32_t v[10]; } fe_loose;
Use 51-bit limbs from fiat-crypto in 64-bit. Our 64-bit performance was much lower than it could have been, since we weren't using the 64-bit multipliers. Fortunately, fiat-crypto is awesome, so this is just a matter of synthesizing new code and integration work. Functions without the signature fiat-crypto curly braces were written by hand and warrant more review. (It's just redistributing some bits.) These use the donna variants which takes (and proves) some of the instruction scheduling from donna as that's significantly faster. Glancing over things, I suspect but have not confirmed the gap is due to this: https://github.com/mit-plv/fiat-crypto/pull/295#issuecomment-356892413 Clang without OPENSSL_SMALL (ECDH omitted since that uses assembly and is unaffected by this CL). Before: Did 105149 Ed25519 key generation operations in 5025208us (20924.3 ops/sec) Did 125000 Ed25519 signing operations in 5024003us (24880.6 ops/sec) Did 37642 Ed25519 verify operations in 5072539us (7420.7 ops/sec) After: Did 206000 Ed25519 key generation operations in 5020547us (41031.4 ops/sec) Did 227000 Ed25519 signing operations in 5005232us (45352.5 ops/sec) Did 69840 Ed25519 verify operations in 5004769us (13954.7 ops/sec) Clang + OPENSSL_SMALL: Before: Did 68598 Ed25519 key generation operations in 5024629us (13652.4 ops/sec) Did 73000 Ed25519 signing operations in 5067837us (14404.6 ops/sec) Did 36765 Ed25519 verify operations in 5078684us (7239.1 ops/sec) Did 74000 Curve25519 base-point multiplication operations in 5016465us (14751.4 ops/sec) Did 45600 Curve25519 arbitrary point multiplication operations in 5034680us (9057.2 ops/sec) After: Did 117315 Ed25519 key generation operations in 5021860us (23360.9 ops/sec) Did 126000 Ed25519 signing operations in 5003521us (25182.3 ops/sec) Did 64974 Ed25519 verify operations in 5047790us (12871.8 ops/sec) Did 134000 Curve25519 base-point multiplication operations in 5058946us (26487.7 ops/sec) Did 86000 Curve25519 arbitrary point multiplication operations in 5050478us (17028.1 ops/sec) GCC without OPENSSL_SMALL (ECDH omitted since that uses assembly and is unaffected by this CL). Before: Did 35552 Ed25519 key generation operations in 5030756us (7066.9 ops/sec) Did 38286 Ed25519 signing operations in 5001648us (7654.7 ops/sec) Did 10584 Ed25519 verify operations in 5068158us (2088.3 ops/sec) After: Did 92158 Ed25519 key generation operations in 5024021us (18343.5 ops/sec) Did 99000 Ed25519 signing operations in 5011908us (19753.0 ops/sec) Did 31122 Ed25519 verify operations in 5069878us (6138.6 ops/sec) Change-Id: Ic0c24d50b4ee2bbc408b94965e9d63319936107d Reviewed-on: https://boringssl-review.googlesource.com/24805 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-01-09 22:27:21 +00:00
#endif
// ge means group element.
//
// Here the group is the set of pairs (x,y) of field elements (see fe.h)
// satisfying -x^2 + y^2 = 1 + d x^2y^2
// where d = -121665/121666.
//
// Representations:
// ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
// ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
// ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
// ge_precomp (Duif): (y+x,y-x,2dxy)
typedef struct {
fe X;
fe Y;
fe Z;
} ge_p2;
typedef struct {
fe X;
fe Y;
fe Z;
fe T;
} ge_p3;
typedef struct {
curve25519: adhere to preconditions of fe_*. Previously, the ed25519 and SPAKE implementations called field element operations in ways that did not satisfy the preconditions about ranges of limbs. Furthermore, replacing signed field arithmetic with unsigned field arithmetic with similar specifications caused tests to fail. This commit addresses this in three steps: (1) Split fe into fe and fe_loose, tracking the bounds (2) Insert carry operations before uses of fe_add/fe_sub/fe_neg whose input is already within only the loose bounds (3) Assert that each field element is within the appropriate bounds at the beginning and end of every field operation. Throughput diff: Ed25519 key generation: -2% Ed25519 signing: -2% Ed25519 verify: -2% X25519: roughly unchanged Detailed benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11206 Ed25519 key generation operations in 1029462us (10885.3 ops/sec) Did 11104 Ed25519 signing operations in 1035735us (10720.9 ops/sec) Did 3278 Ed25519 verify operations in 1087969us (3013.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1078962us (11121.8 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1002767us (3600.0 ops/sec) Did 11662 Ed25519 key generation operations in 1077690us (10821.3 ops/sec) Did 10780 Ed25519 signing operations in 1011474us (10657.7 ops/sec) Did 3289 Ed25519 verify operations in 1083638us (3035.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1087477us (11034.7 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1017023us (3549.6 ops/sec) Did 11018 Ed25519 key generation operations in 1011606us (10891.6 ops/sec) Did 11000 Ed25519 signing operations in 1029961us (10680.0 ops/sec) Did 3124 Ed25519 verify operations in 1045163us (2989.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1081770us (11092.9 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1014503us (3558.4 ops/sec) master: Did 11662 Ed25519 key generation operations in 1059449us (11007.6 ops/sec) Did 10908 Ed25519 signing operations in 1000081us (10907.1 ops/sec) Did 3333 Ed25519 verify operations in 1078798us (3089.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072831us (11185.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1075821us (3578.7 ops/sec) Did 11102 Ed25519 key generation operations in 1017540us (10910.6 ops/sec) Did 11000 Ed25519 signing operations in 1013279us (10855.8 ops/sec) Did 3311 Ed25519 verify operations in 1066866us (3103.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1069668us (11218.4 ops/sec) Did 3905 Curve25519 arbitrary point multiplication operations in 1095501us (3564.6 ops/sec) Did 11206 Ed25519 key generation operations in 1014127us (11049.9 ops/sec) Did 10908 Ed25519 signing operations in 1015821us (10738.1 ops/sec) Did 3344 Ed25519 verify operations in 1100592us (3038.4 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072847us (11185.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1009373us (3536.8 ops/sec) Change-Id: Ia014386daf36c913f3ea44c5f9a420b98670e465 Reviewed-on: https://boringssl-review.googlesource.com/22104 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-10-23 15:23:44 +01:00
fe_loose X;
fe_loose Y;
fe_loose Z;
fe_loose T;
} ge_p1p1;
typedef struct {
curve25519: adhere to preconditions of fe_*. Previously, the ed25519 and SPAKE implementations called field element operations in ways that did not satisfy the preconditions about ranges of limbs. Furthermore, replacing signed field arithmetic with unsigned field arithmetic with similar specifications caused tests to fail. This commit addresses this in three steps: (1) Split fe into fe and fe_loose, tracking the bounds (2) Insert carry operations before uses of fe_add/fe_sub/fe_neg whose input is already within only the loose bounds (3) Assert that each field element is within the appropriate bounds at the beginning and end of every field operation. Throughput diff: Ed25519 key generation: -2% Ed25519 signing: -2% Ed25519 verify: -2% X25519: roughly unchanged Detailed benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11206 Ed25519 key generation operations in 1029462us (10885.3 ops/sec) Did 11104 Ed25519 signing operations in 1035735us (10720.9 ops/sec) Did 3278 Ed25519 verify operations in 1087969us (3013.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1078962us (11121.8 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1002767us (3600.0 ops/sec) Did 11662 Ed25519 key generation operations in 1077690us (10821.3 ops/sec) Did 10780 Ed25519 signing operations in 1011474us (10657.7 ops/sec) Did 3289 Ed25519 verify operations in 1083638us (3035.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1087477us (11034.7 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1017023us (3549.6 ops/sec) Did 11018 Ed25519 key generation operations in 1011606us (10891.6 ops/sec) Did 11000 Ed25519 signing operations in 1029961us (10680.0 ops/sec) Did 3124 Ed25519 verify operations in 1045163us (2989.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1081770us (11092.9 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1014503us (3558.4 ops/sec) master: Did 11662 Ed25519 key generation operations in 1059449us (11007.6 ops/sec) Did 10908 Ed25519 signing operations in 1000081us (10907.1 ops/sec) Did 3333 Ed25519 verify operations in 1078798us (3089.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072831us (11185.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1075821us (3578.7 ops/sec) Did 11102 Ed25519 key generation operations in 1017540us (10910.6 ops/sec) Did 11000 Ed25519 signing operations in 1013279us (10855.8 ops/sec) Did 3311 Ed25519 verify operations in 1066866us (3103.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1069668us (11218.4 ops/sec) Did 3905 Curve25519 arbitrary point multiplication operations in 1095501us (3564.6 ops/sec) Did 11206 Ed25519 key generation operations in 1014127us (11049.9 ops/sec) Did 10908 Ed25519 signing operations in 1015821us (10738.1 ops/sec) Did 3344 Ed25519 verify operations in 1100592us (3038.4 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072847us (11185.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1009373us (3536.8 ops/sec) Change-Id: Ia014386daf36c913f3ea44c5f9a420b98670e465 Reviewed-on: https://boringssl-review.googlesource.com/22104 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-10-23 15:23:44 +01:00
fe_loose yplusx;
fe_loose yminusx;
fe_loose xy2d;
} ge_precomp;
typedef struct {
curve25519: adhere to preconditions of fe_*. Previously, the ed25519 and SPAKE implementations called field element operations in ways that did not satisfy the preconditions about ranges of limbs. Furthermore, replacing signed field arithmetic with unsigned field arithmetic with similar specifications caused tests to fail. This commit addresses this in three steps: (1) Split fe into fe and fe_loose, tracking the bounds (2) Insert carry operations before uses of fe_add/fe_sub/fe_neg whose input is already within only the loose bounds (3) Assert that each field element is within the appropriate bounds at the beginning and end of every field operation. Throughput diff: Ed25519 key generation: -2% Ed25519 signing: -2% Ed25519 verify: -2% X25519: roughly unchanged Detailed benchmarks on Google Cloud's unidentified Intel Xeon with AVX2: git checkout $VARIANT && ( cd build && rm -rf * && CC=clang CXX=clang++ cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=../util/32-bit-toolchain.cmake -DCMAKE_BUILD_TYPE=Release .. && ninja && ./tool/bssl speed -filter 25519 ) this branch: Did 11206 Ed25519 key generation operations in 1029462us (10885.3 ops/sec) Did 11104 Ed25519 signing operations in 1035735us (10720.9 ops/sec) Did 3278 Ed25519 verify operations in 1087969us (3013.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1078962us (11121.8 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1002767us (3600.0 ops/sec) Did 11662 Ed25519 key generation operations in 1077690us (10821.3 ops/sec) Did 10780 Ed25519 signing operations in 1011474us (10657.7 ops/sec) Did 3289 Ed25519 verify operations in 1083638us (3035.1 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1087477us (11034.7 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1017023us (3549.6 ops/sec) Did 11018 Ed25519 key generation operations in 1011606us (10891.6 ops/sec) Did 11000 Ed25519 signing operations in 1029961us (10680.0 ops/sec) Did 3124 Ed25519 verify operations in 1045163us (2989.0 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1081770us (11092.9 ops/sec) Did 3610 Curve25519 arbitrary point multiplication operations in 1014503us (3558.4 ops/sec) master: Did 11662 Ed25519 key generation operations in 1059449us (11007.6 ops/sec) Did 10908 Ed25519 signing operations in 1000081us (10907.1 ops/sec) Did 3333 Ed25519 verify operations in 1078798us (3089.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072831us (11185.4 ops/sec) Did 3850 Curve25519 arbitrary point multiplication operations in 1075821us (3578.7 ops/sec) Did 11102 Ed25519 key generation operations in 1017540us (10910.6 ops/sec) Did 11000 Ed25519 signing operations in 1013279us (10855.8 ops/sec) Did 3311 Ed25519 verify operations in 1066866us (3103.5 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1069668us (11218.4 ops/sec) Did 3905 Curve25519 arbitrary point multiplication operations in 1095501us (3564.6 ops/sec) Did 11206 Ed25519 key generation operations in 1014127us (11049.9 ops/sec) Did 10908 Ed25519 signing operations in 1015821us (10738.1 ops/sec) Did 3344 Ed25519 verify operations in 1100592us (3038.4 ops/sec) Did 12000 Curve25519 base-point multiplication operations in 1072847us (11185.2 ops/sec) Did 3570 Curve25519 arbitrary point multiplication operations in 1009373us (3536.8 ops/sec) Change-Id: Ia014386daf36c913f3ea44c5f9a420b98670e465 Reviewed-on: https://boringssl-review.googlesource.com/22104 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-10-23 15:23:44 +01:00
fe_loose YplusX;
fe_loose YminusX;
fe_loose Z;
fe_loose T2d;
} ge_cached;
void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h);
int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s);
void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p);
void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
void x25519_ge_scalarmult_small_precomp(
ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]);
void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]);
void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A);
void x25519_sc_reduce(uint8_t s[64]);
enum spake2_state_t {
spake2_state_init = 0,
spake2_state_msg_generated,
spake2_state_key_generated,
};
struct spake2_ctx_st {
uint8_t private_key[32];
uint8_t my_msg[32];
uint8_t password_scalar[32];
uint8_t password_hash[64];
uint8_t *my_name;
size_t my_name_len;
uint8_t *their_name;
size_t their_name_len;
enum spake2_role_t my_role;
enum spake2_state_t state;
char disable_password_scalar_hack;
};
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_CURVE25519_INTERNAL_H