boringssl/third_party/fiat/curve25519_64.h

554 lines
24 KiB
C
Raw Normal View History

Switch to new fiat pipeline. This new version makes it much easier to tell which code is handwritten and which is verified. For some reason, it also is *dramatically* faster for 32-bit x86 GCC. Clang x86_64, however, does take a small hit. Benchmarks below. x86, GCC 7.3.0, OPENSSL_SMALL (For some reason, GCC used to be really bad at compiling the 32-bit curve25519 code. The new one fixes this. I'm not sure what changed.) Before: Did 17135 Ed25519 key generation operations in 10026402us (1709.0 ops/sec) Did 17170 Ed25519 signing operations in 10074192us (1704.4 ops/sec) Did 9180 Ed25519 verify operations in 10034025us (914.9 ops/sec) Did 17271 Curve25519 base-point multiplication operations in 10050837us (1718.4 ops/sec) Did 10605 Curve25519 arbitrary point multiplication operations in 10047714us (1055.5 ops/sec) Did 7800 ECDH P-256 operations in 10018331us (778.6 ops/sec) Did 24308 ECDSA P-256 signing operations in 10019241us (2426.1 ops/sec) Did 9191 ECDSA P-256 verify operations in 10081639us (911.7 ops/sec) After: Did 99873 Ed25519 key generation operations in 10021810us (9965.6 ops/sec) [+483.1%] Did 99960 Ed25519 signing operations in 10052236us (9944.1 ops/sec) [+483.4%] Did 53676 Ed25519 verify operations in 10009078us (5362.7 ops/sec) [+486.2%] Did 102000 Curve25519 base-point multiplication operations in 10039764us (10159.6 ops/sec) [+491.2%] Did 60802 Curve25519 arbitrary point multiplication operations in 10056897us (6045.8 ops/sec) [+472.8%] Did 7900 ECDH P-256 operations in 10054509us (785.7 ops/sec) [+0.9%] Did 24926 ECDSA P-256 signing operations in 10050919us (2480.0 ops/sec) [+2.2%] Did 9494 ECDSA P-256 verify operations in 10064659us (943.3 ops/sec) [+3.5%] x86, Clang 8.0.0 trunk 349417, OPENSSL_SMALL Before: Did 82750 Ed25519 key generation operations in 10051177us (8232.9 ops/sec) Did 82400 Ed25519 signing operations in 10035806us (8210.6 ops/sec) Did 41511 Ed25519 verify operations in 10048919us (4130.9 ops/sec) Did 83300 Curve25519 base-point multiplication operations in 10044283us (8293.3 ops/sec) Did 49700 Curve25519 arbitrary point multiplication operations in 10007005us (4966.5 ops/sec) Did 14039 ECDH P-256 operations in 10093929us (1390.8 ops/sec) Did 40950 ECDSA P-256 signing operations in 10006757us (4092.2 ops/sec) Did 16068 ECDSA P-256 verify operations in 10095996us (1591.5 ops/sec) After: Did 80476 Ed25519 key generation operations in 10048648us (8008.6 ops/sec) [-2.7%] Did 79050 Ed25519 signing operations in 10049180us (7866.3 ops/sec) [-4.2%] Did 40501 Ed25519 verify operations in 10048347us (4030.6 ops/sec) [-2.4%] Did 81300 Curve25519 base-point multiplication operations in 10017480us (8115.8 ops/sec) [-2.1%] Did 48278 Curve25519 arbitrary point multiplication operations in 10092500us (4783.6 ops/sec) [-3.7%] Did 15402 ECDH P-256 operations in 10096705us (1525.4 ops/sec) [+9.7%] Did 44200 ECDSA P-256 signing operations in 10037715us (4403.4 ops/sec) [+7.6%] Did 17000 ECDSA P-256 verify operations in 10008813us (1698.5 ops/sec) [+6.7%] x86_64, GCC 7.3.0 (Note these P-256 numbers are not affected by this change. Included to get a sense of noise.) Before: Did 557000 Ed25519 key generation operations in 10011721us (55634.8 ops/sec) Did 550000 Ed25519 signing operations in 10016449us (54909.7 ops/sec) Did 190000 Ed25519 verify operations in 10014565us (18972.4 ops/sec) Did 587000 Curve25519 base-point multiplication operations in 10015402us (58609.7 ops/sec) Did 230000 Curve25519 arbitrary point multiplication operations in 10023827us (22945.3 ops/sec) Did 179000 ECDH P-256 operations in 10016294us (17870.9 ops/sec) Did 557000 ECDSA P-256 signing operations in 10014158us (55621.3 ops/sec) Did 198000 ECDSA P-256 verify operations in 10036694us (19727.6 ops/sec) After: Did 569000 Ed25519 key generation operations in 10004965us (56871.8 ops/sec) [+2.2%] Did 563000 Ed25519 signing operations in 10000064us (56299.6 ops/sec) [+2.5%] Did 196000 Ed25519 verify operations in 10025650us (19549.9 ops/sec) [+3.0%] Did 596000 Curve25519 base-point multiplication operations in 10008666us (59548.4 ops/sec) [+1.6%] Did 229000 Curve25519 arbitrary point multiplication operations in 10028921us (22834.0 ops/sec) [-0.5%] Did 182910 ECDH P-256 operations in 10014905us (18263.8 ops/sec) [+2.2%] Did 562000 ECDSA P-256 signing operations in 10011944us (56133.0 ops/sec) [+0.9%] Did 202000 ECDSA P-256 verify operations in 10046901us (20105.7 ops/sec) [+1.9%] x86_64, GCC 7.3.0, OPENSSL_SMALL Before: Did 350000 Ed25519 key generation operations in 10002540us (34991.1 ops/sec) Did 344000 Ed25519 signing operations in 10010420us (34364.2 ops/sec) Did 197000 Ed25519 verify operations in 10030593us (19639.9 ops/sec) Did 362000 Curve25519 base-point multiplication operations in 10004615us (36183.3 ops/sec) Did 235000 Curve25519 arbitrary point multiplication operations in 10025951us (23439.2 ops/sec) Did 32032 ECDH P-256 operations in 10056486us (3185.2 ops/sec) Did 96354 ECDSA P-256 signing operations in 10007297us (9628.4 ops/sec) Did 37774 ECDSA P-256 verify operations in 10044892us (3760.5 ops/sec) After: Did 343000 Ed25519 key generation operations in 10025108us (34214.1 ops/sec) [-2.2%] Did 340000 Ed25519 signing operations in 10014870us (33949.5 ops/sec) [-1.2%] Did 192000 Ed25519 verify operations in 10025082us (19152.0 ops/sec) [-2.5%] Did 355000 Curve25519 base-point multiplication operations in 10013220us (35453.1 ops/sec) [-2.0%] Did 231000 Curve25519 arbitrary point multiplication operations in 10010775us (23075.1 ops/sec) [-1.6%] Did 31540 ECDH P-256 operations in 10009664us (3151.0 ops/sec) [-1.1%] Did 99012 ECDSA P-256 signing operations in 10090296us (9812.6 ops/sec) [+1.9%] Did 37695 ECDSA P-256 verify operations in 10092859us (3734.8 ops/sec) [-0.7%] x86_64, Clang 8.0.0 trunk 349417 (Note these P-256 numbers are not affected by this change. Included to get a sense of noise.) Before: Did 600000 Ed25519 key generation operations in 10000278us (59998.3 ops/sec) Did 595000 Ed25519 signing operations in 10010375us (59438.3 ops/sec) Did 184000 Ed25519 verify operations in 10013984us (18374.3 ops/sec) Did 636000 Curve25519 base-point multiplication operations in 10005250us (63566.6 ops/sec) Did 229000 Curve25519 arbitrary point multiplication operations in 10006059us (22886.1 ops/sec) Did 179250 ECDH P-256 operations in 10026354us (17877.9 ops/sec) Did 547000 ECDSA P-256 signing operations in 10017585us (54604.0 ops/sec) Did 197000 ECDSA P-256 verify operations in 10013020us (19674.4 ops/sec) After: Did 560000 Ed25519 key generation operations in 10009295us (55948.0 ops/sec) [-6.8%] Did 548000 Ed25519 signing operations in 10007912us (54756.7 ops/sec) [-7.9%] Did 170000 Ed25519 verify operations in 10056948us (16903.7 ops/sec) [-8.0%] Did 592000 Curve25519 base-point multiplication operations in 10016818us (59100.6 ops/sec) [-7.0%] Did 214000 Curve25519 arbitrary point multiplication operations in 10043918us (21306.4 ops/sec) [-6.9%] Did 180000 ECDH P-256 operations in 10026019us (17953.3 ops/sec) [+0.4%] Did 550000 ECDSA P-256 signing operations in 10004943us (54972.8 ops/sec) [+0.7%] Did 198000 ECDSA P-256 verify operations in 10021714us (19757.1 ops/sec) [+0.4%] x86_64, Clang 8.0.0 trunk 349417, OPENSSL_SMALL Before: Did 326000 Ed25519 key generation operations in 10003266us (32589.4 ops/sec) Did 322000 Ed25519 signing operations in 10026783us (32114.0 ops/sec) Did 181000 Ed25519 verify operations in 10015635us (18071.7 ops/sec) Did 335000 Curve25519 base-point multiplication operations in 10000359us (33498.8 ops/sec) Did 224000 Curve25519 arbitrary point multiplication operations in 10027245us (22339.1 ops/sec) Did 68552 ECDH P-256 operations in 10018900us (6842.3 ops/sec) Did 184000 ECDSA P-256 signing operations in 10014516us (18373.3 ops/sec) Did 76020 ECDSA P-256 verify operations in 10016891us (7589.2 ops/sec) After: Did 310000 Ed25519 key generation operations in 10022086us (30931.7 ops/sec) [-5.1%] Did 308000 Ed25519 signing operations in 10007543us (30776.8 ops/sec) [-4.2%] Did 173000 Ed25519 verify operations in 10005829us (17289.9 ops/sec) [-4.3%] Did 321000 Curve25519 base-point multiplication operations in 10027058us (32013.4 ops/sec) [-4.4%] Did 212000 Curve25519 arbitrary point multiplication operations in 10015203us (21167.8 ops/sec) [-5.2%] Did 64059 ECDH P-256 operations in 10042781us (6378.6 ops/sec) [-6.8%] Did 170000 ECDSA P-256 signing operations in 10030896us (16947.6 ops/sec) [-7.8%] Did 72176 ECDSA P-256 verify operations in 10075369us (7163.6 ops/sec) [-5.6%] Bug: 254 Change-Id: Ib04c773f01b542bcb8611cceb582466bfa6f6d52 Reviewed-on: https://boringssl-review.googlesource.com/c/34306 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com>
2019-01-08 23:08:42 +00:00
/* Autogenerated */
/* curve description: 25519 */
/* requested operations: carry_mul, carry_square, carry_scmul121666, carry, add, sub, opp, selectznz, to_bytes, from_bytes */
/* n = 5 (from "5") */
/* s = 0x8000000000000000000000000000000000000000000000000000000000000000 (from "2^255") */
/* c = [(1, 19)] (from "1,19") */
/* machine_wordsize = 64 (from "64") */
#include <stdint.h>
typedef unsigned char fiat_25519_uint1;
typedef signed char fiat_25519_int1;
typedef signed __int128 fiat_25519_int128;
typedef unsigned __int128 fiat_25519_uint128;
/*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0x7ffffffffffff]
* arg3: [0x0 ~> 0x7ffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0x7ffffffffffff]
* out2: [0x0 ~> 0x1]
*/
static void fiat_25519_addcarryx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
uint64_t x1 = ((arg1 + arg2) + arg3);
uint64_t x2 = (x1 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint1 x3 = (fiat_25519_uint1)(x1 >> 51);
*out1 = x2;
*out2 = x3;
}
/*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0x7ffffffffffff]
* arg3: [0x0 ~> 0x7ffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0x7ffffffffffff]
* out2: [0x0 ~> 0x1]
*/
static void fiat_25519_subborrowx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
int64_t x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3);
fiat_25519_int1 x2 = (fiat_25519_int1)(x1 >> 51);
uint64_t x3 = (x1 & UINT64_C(0x7ffffffffffff));
*out1 = x3;
*out2 = (fiat_25519_uint1)(0x0 - x2);
}
/*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [0x0 ~> 0xffffffffffffffff]
* arg3: [0x0 ~> 0xffffffffffffffff]
* Output Bounds:
* out1: [0x0 ~> 0xffffffffffffffff]
*/
static void fiat_25519_cmovznz_u64(uint64_t* out1, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
fiat_25519_uint1 x1 = (!(!arg1));
uint64_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
uint64_t x3 = ((x2 & arg3) | ((~x2) & arg2));
*out1 = x3;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
* arg2: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
* Output Bounds:
* out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
*/
static void fiat_25519_carry_mul(uint64_t out1[5], const uint64_t arg1[5], const uint64_t arg2[5]) {
fiat_25519_uint128 x1 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[4]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x2 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[3]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x3 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[2]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x4 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[1]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x5 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[4]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x6 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[3]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x7 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[2]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x8 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[4]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x9 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[3]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x10 = ((fiat_25519_uint128)(arg1[1]) * ((arg2[4]) * (uint64_t)UINT8_C(0x13)));
fiat_25519_uint128 x11 = ((fiat_25519_uint128)(arg1[4]) * (arg2[0]));
fiat_25519_uint128 x12 = ((fiat_25519_uint128)(arg1[3]) * (arg2[1]));
fiat_25519_uint128 x13 = ((fiat_25519_uint128)(arg1[3]) * (arg2[0]));
fiat_25519_uint128 x14 = ((fiat_25519_uint128)(arg1[2]) * (arg2[2]));
fiat_25519_uint128 x15 = ((fiat_25519_uint128)(arg1[2]) * (arg2[1]));
fiat_25519_uint128 x16 = ((fiat_25519_uint128)(arg1[2]) * (arg2[0]));
fiat_25519_uint128 x17 = ((fiat_25519_uint128)(arg1[1]) * (arg2[3]));
fiat_25519_uint128 x18 = ((fiat_25519_uint128)(arg1[1]) * (arg2[2]));
fiat_25519_uint128 x19 = ((fiat_25519_uint128)(arg1[1]) * (arg2[1]));
fiat_25519_uint128 x20 = ((fiat_25519_uint128)(arg1[1]) * (arg2[0]));
fiat_25519_uint128 x21 = ((fiat_25519_uint128)(arg1[0]) * (arg2[4]));
fiat_25519_uint128 x22 = ((fiat_25519_uint128)(arg1[0]) * (arg2[3]));
fiat_25519_uint128 x23 = ((fiat_25519_uint128)(arg1[0]) * (arg2[2]));
fiat_25519_uint128 x24 = ((fiat_25519_uint128)(arg1[0]) * (arg2[1]));
fiat_25519_uint128 x25 = ((fiat_25519_uint128)(arg1[0]) * (arg2[0]));
fiat_25519_uint128 x26 = (x25 + (x10 + (x9 + (x7 + x4))));
uint64_t x27 = (uint64_t)(x26 >> 51);
uint64_t x28 = (uint64_t)(x26 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x29 = (x21 + (x17 + (x14 + (x12 + x11))));
fiat_25519_uint128 x30 = (x22 + (x18 + (x15 + (x13 + x1))));
fiat_25519_uint128 x31 = (x23 + (x19 + (x16 + (x5 + x2))));
fiat_25519_uint128 x32 = (x24 + (x20 + (x8 + (x6 + x3))));
fiat_25519_uint128 x33 = (x27 + x32);
uint64_t x34 = (uint64_t)(x33 >> 51);
uint64_t x35 = (uint64_t)(x33 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x36 = (x34 + x31);
uint64_t x37 = (uint64_t)(x36 >> 51);
uint64_t x38 = (uint64_t)(x36 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x39 = (x37 + x30);
uint64_t x40 = (uint64_t)(x39 >> 51);
uint64_t x41 = (uint64_t)(x39 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x42 = (x40 + x29);
uint64_t x43 = (uint64_t)(x42 >> 51);
uint64_t x44 = (uint64_t)(x42 & UINT64_C(0x7ffffffffffff));
uint64_t x45 = (x43 * (uint64_t)UINT8_C(0x13));
uint64_t x46 = (x28 + x45);
uint64_t x47 = (x46 >> 51);
uint64_t x48 = (x46 & UINT64_C(0x7ffffffffffff));
uint64_t x49 = (x47 + x35);
uint64_t x50 = (x49 >> 51);
uint64_t x51 = (x49 & UINT64_C(0x7ffffffffffff));
uint64_t x52 = (x50 + x38);
out1[0] = x48;
out1[1] = x51;
out1[2] = x52;
out1[3] = x41;
out1[4] = x44;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
* Output Bounds:
* out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
*/
static void fiat_25519_carry_square(uint64_t out1[5], const uint64_t arg1[5]) {
uint64_t x1 = ((arg1[4]) * (uint64_t)UINT8_C(0x13));
uint64_t x2 = (x1 * (uint64_t)0x2);
uint64_t x3 = ((arg1[4]) * (uint64_t)0x2);
uint64_t x4 = ((arg1[3]) * (uint64_t)UINT8_C(0x13));
uint64_t x5 = (x4 * (uint64_t)0x2);
uint64_t x6 = ((arg1[3]) * (uint64_t)0x2);
uint64_t x7 = ((arg1[2]) * (uint64_t)0x2);
uint64_t x8 = ((arg1[1]) * (uint64_t)0x2);
fiat_25519_uint128 x9 = ((fiat_25519_uint128)(arg1[4]) * x1);
fiat_25519_uint128 x10 = ((fiat_25519_uint128)(arg1[3]) * x2);
fiat_25519_uint128 x11 = ((fiat_25519_uint128)(arg1[3]) * x4);
fiat_25519_uint128 x12 = ((fiat_25519_uint128)(arg1[2]) * x2);
fiat_25519_uint128 x13 = ((fiat_25519_uint128)(arg1[2]) * x5);
fiat_25519_uint128 x14 = ((fiat_25519_uint128)(arg1[2]) * (arg1[2]));
fiat_25519_uint128 x15 = ((fiat_25519_uint128)(arg1[1]) * x2);
fiat_25519_uint128 x16 = ((fiat_25519_uint128)(arg1[1]) * x6);
fiat_25519_uint128 x17 = ((fiat_25519_uint128)(arg1[1]) * x7);
fiat_25519_uint128 x18 = ((fiat_25519_uint128)(arg1[1]) * (arg1[1]));
fiat_25519_uint128 x19 = ((fiat_25519_uint128)(arg1[0]) * x3);
fiat_25519_uint128 x20 = ((fiat_25519_uint128)(arg1[0]) * x6);
fiat_25519_uint128 x21 = ((fiat_25519_uint128)(arg1[0]) * x7);
fiat_25519_uint128 x22 = ((fiat_25519_uint128)(arg1[0]) * x8);
fiat_25519_uint128 x23 = ((fiat_25519_uint128)(arg1[0]) * (arg1[0]));
fiat_25519_uint128 x24 = (x23 + (x15 + x13));
uint64_t x25 = (uint64_t)(x24 >> 51);
uint64_t x26 = (uint64_t)(x24 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x27 = (x19 + (x16 + x14));
fiat_25519_uint128 x28 = (x20 + (x17 + x9));
fiat_25519_uint128 x29 = (x21 + (x18 + x10));
fiat_25519_uint128 x30 = (x22 + (x12 + x11));
fiat_25519_uint128 x31 = (x25 + x30);
uint64_t x32 = (uint64_t)(x31 >> 51);
uint64_t x33 = (uint64_t)(x31 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x34 = (x32 + x29);
uint64_t x35 = (uint64_t)(x34 >> 51);
uint64_t x36 = (uint64_t)(x34 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x37 = (x35 + x28);
uint64_t x38 = (uint64_t)(x37 >> 51);
uint64_t x39 = (uint64_t)(x37 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x40 = (x38 + x27);
uint64_t x41 = (uint64_t)(x40 >> 51);
uint64_t x42 = (uint64_t)(x40 & UINT64_C(0x7ffffffffffff));
uint64_t x43 = (x41 * (uint64_t)UINT8_C(0x13));
uint64_t x44 = (x26 + x43);
uint64_t x45 = (x44 >> 51);
uint64_t x46 = (x44 & UINT64_C(0x7ffffffffffff));
uint64_t x47 = (x45 + x33);
uint64_t x48 = (x47 >> 51);
uint64_t x49 = (x47 & UINT64_C(0x7ffffffffffff));
uint64_t x50 = (x48 + x36);
out1[0] = x46;
out1[1] = x49;
out1[2] = x50;
out1[3] = x39;
out1[4] = x42;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
* Output Bounds:
* out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
*/
static void fiat_25519_carry_scmul_121666(uint64_t out1[5], const uint64_t arg1[5]) {
fiat_25519_uint128 x1 = (UINT32_C(0x1db42) * (fiat_25519_uint128)(arg1[4]));
fiat_25519_uint128 x2 = (UINT32_C(0x1db42) * (fiat_25519_uint128)(arg1[3]));
fiat_25519_uint128 x3 = (UINT32_C(0x1db42) * (fiat_25519_uint128)(arg1[2]));
fiat_25519_uint128 x4 = (UINT32_C(0x1db42) * (fiat_25519_uint128)(arg1[1]));
fiat_25519_uint128 x5 = (UINT32_C(0x1db42) * (fiat_25519_uint128)(arg1[0]));
uint64_t x6 = (uint64_t)(x5 >> 51);
uint64_t x7 = (uint64_t)(x5 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x8 = (x6 + x4);
uint64_t x9 = (uint64_t)(x8 >> 51);
uint64_t x10 = (uint64_t)(x8 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x11 = (x9 + x3);
uint64_t x12 = (uint64_t)(x11 >> 51);
uint64_t x13 = (uint64_t)(x11 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x14 = (x12 + x2);
uint64_t x15 = (uint64_t)(x14 >> 51);
uint64_t x16 = (uint64_t)(x14 & UINT64_C(0x7ffffffffffff));
fiat_25519_uint128 x17 = (x15 + x1);
uint64_t x18 = (uint64_t)(x17 >> 51);
uint64_t x19 = (uint64_t)(x17 & UINT64_C(0x7ffffffffffff));
uint64_t x20 = (x18 * (uint64_t)UINT8_C(0x13));
uint64_t x21 = (x7 + x20);
uint64_t x22 = (x21 >> 51);
uint64_t x23 = (x21 & UINT64_C(0x7ffffffffffff));
uint64_t x24 = (x22 + x10);
uint64_t x25 = (x24 >> 51);
uint64_t x26 = (x24 & UINT64_C(0x7ffffffffffff));
uint64_t x27 = (x25 + x13);
out1[0] = x23;
out1[1] = x26;
out1[2] = x27;
out1[3] = x16;
out1[4] = x19;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
* Output Bounds:
* out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
*/
static void fiat_25519_carry(uint64_t out1[5], const uint64_t arg1[5]) {
uint64_t x1 = (arg1[0]);
uint64_t x2 = ((x1 >> 51) + (arg1[1]));
uint64_t x3 = ((x2 >> 51) + (arg1[2]));
uint64_t x4 = ((x3 >> 51) + (arg1[3]));
uint64_t x5 = ((x4 >> 51) + (arg1[4]));
uint64_t x6 = ((x1 & UINT64_C(0x7ffffffffffff)) + ((x5 >> 51) * (uint64_t)UINT8_C(0x13)));
uint64_t x7 = ((x6 >> 51) + (x2 & UINT64_C(0x7ffffffffffff)));
uint64_t x8 = (x6 & UINT64_C(0x7ffffffffffff));
uint64_t x9 = (x7 & UINT64_C(0x7ffffffffffff));
uint64_t x10 = ((x7 >> 51) + (x3 & UINT64_C(0x7ffffffffffff)));
uint64_t x11 = (x4 & UINT64_C(0x7ffffffffffff));
uint64_t x12 = (x5 & UINT64_C(0x7ffffffffffff));
out1[0] = x8;
out1[1] = x9;
out1[2] = x10;
out1[3] = x11;
out1[4] = x12;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
* arg2: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
* Output Bounds:
* out1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
*/
static void fiat_25519_add(uint64_t out1[5], const uint64_t arg1[5], const uint64_t arg2[5]) {
uint64_t x1 = ((arg1[0]) + (arg2[0]));
uint64_t x2 = ((arg1[1]) + (arg2[1]));
uint64_t x3 = ((arg1[2]) + (arg2[2]));
uint64_t x4 = ((arg1[3]) + (arg2[3]));
uint64_t x5 = ((arg1[4]) + (arg2[4]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out1[4] = x5;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
* arg2: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
* Output Bounds:
* out1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
*/
static void fiat_25519_sub(uint64_t out1[5], const uint64_t arg1[5], const uint64_t arg2[5]) {
uint64_t x1 = ((UINT64_C(0xfffffffffffda) + (arg1[0])) - (arg2[0]));
uint64_t x2 = ((UINT64_C(0xffffffffffffe) + (arg1[1])) - (arg2[1]));
uint64_t x3 = ((UINT64_C(0xffffffffffffe) + (arg1[2])) - (arg2[2]));
uint64_t x4 = ((UINT64_C(0xffffffffffffe) + (arg1[3])) - (arg2[3]));
uint64_t x5 = ((UINT64_C(0xffffffffffffe) + (arg1[4])) - (arg2[4]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out1[4] = x5;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
* Output Bounds:
* out1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
*/
static void fiat_25519_opp(uint64_t out1[5], const uint64_t arg1[5]) {
uint64_t x1 = (UINT64_C(0xfffffffffffda) - (arg1[0]));
uint64_t x2 = (UINT64_C(0xffffffffffffe) - (arg1[1]));
uint64_t x3 = (UINT64_C(0xffffffffffffe) - (arg1[2]));
uint64_t x4 = (UINT64_C(0xffffffffffffe) - (arg1[3]));
uint64_t x5 = (UINT64_C(0xffffffffffffe) - (arg1[4]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out1[4] = x5;
}
/*
* Input Bounds:
* arg1: [0x0 ~> 0x1]
* arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
* Output Bounds:
* out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
*/
static void fiat_25519_selectznz(uint64_t out1[5], fiat_25519_uint1 arg1, const uint64_t arg2[5], const uint64_t arg3[5]) {
uint64_t x1;
fiat_25519_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
uint64_t x2;
fiat_25519_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
uint64_t x3;
fiat_25519_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
uint64_t x4;
fiat_25519_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
uint64_t x5;
fiat_25519_cmovznz_u64(&x5, arg1, (arg2[4]), (arg3[4]));
out1[0] = x1;
out1[1] = x2;
out1[2] = x3;
out1[3] = x4;
out1[4] = x5;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
* Output Bounds:
* out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
*/
static void fiat_25519_to_bytes(uint8_t out1[32], const uint64_t arg1[5]) {
uint64_t x1;
fiat_25519_uint1 x2;
fiat_25519_subborrowx_u51(&x1, &x2, 0x0, (arg1[0]), UINT64_C(0x7ffffffffffed));
uint64_t x3;
fiat_25519_uint1 x4;
fiat_25519_subborrowx_u51(&x3, &x4, x2, (arg1[1]), UINT64_C(0x7ffffffffffff));
uint64_t x5;
fiat_25519_uint1 x6;
fiat_25519_subborrowx_u51(&x5, &x6, x4, (arg1[2]), UINT64_C(0x7ffffffffffff));
uint64_t x7;
fiat_25519_uint1 x8;
fiat_25519_subborrowx_u51(&x7, &x8, x6, (arg1[3]), UINT64_C(0x7ffffffffffff));
uint64_t x9;
fiat_25519_uint1 x10;
fiat_25519_subborrowx_u51(&x9, &x10, x8, (arg1[4]), UINT64_C(0x7ffffffffffff));
uint64_t x11;
fiat_25519_cmovznz_u64(&x11, x10, 0x0, UINT64_C(0xffffffffffffffff));
uint64_t x12;
fiat_25519_uint1 x13;
fiat_25519_addcarryx_u51(&x12, &x13, 0x0, (x11 & UINT64_C(0x7ffffffffffed)), x1);
uint64_t x14;
fiat_25519_uint1 x15;
fiat_25519_addcarryx_u51(&x14, &x15, x13, (x11 & UINT64_C(0x7ffffffffffff)), x3);
uint64_t x16;
fiat_25519_uint1 x17;
fiat_25519_addcarryx_u51(&x16, &x17, x15, (x11 & UINT64_C(0x7ffffffffffff)), x5);
uint64_t x18;
fiat_25519_uint1 x19;
fiat_25519_addcarryx_u51(&x18, &x19, x17, (x11 & UINT64_C(0x7ffffffffffff)), x7);
uint64_t x20;
fiat_25519_uint1 x21;
fiat_25519_addcarryx_u51(&x20, &x21, x19, (x11 & UINT64_C(0x7ffffffffffff)), x9);
uint64_t x22 = (x20 << 4);
uint64_t x23 = (x18 * (uint64_t)0x2);
uint64_t x24 = (x16 << 6);
uint64_t x25 = (x14 << 3);
uint64_t x26 = (x12 >> 8);
uint8_t x27 = (uint8_t)(x12 & UINT8_C(0xff));
uint64_t x28 = (x26 >> 8);
uint8_t x29 = (uint8_t)(x26 & UINT8_C(0xff));
uint64_t x30 = (x28 >> 8);
uint8_t x31 = (uint8_t)(x28 & UINT8_C(0xff));
uint64_t x32 = (x30 >> 8);
uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff));
uint64_t x34 = (x32 >> 8);
uint8_t x35 = (uint8_t)(x32 & UINT8_C(0xff));
uint8_t x36 = (uint8_t)(x34 >> 8);
uint8_t x37 = (uint8_t)(x34 & UINT8_C(0xff));
uint64_t x38 = (x36 + x25);
uint64_t x39 = (x38 >> 8);
uint8_t x40 = (uint8_t)(x38 & UINT8_C(0xff));
uint64_t x41 = (x39 >> 8);
uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff));
uint64_t x43 = (x41 >> 8);
uint8_t x44 = (uint8_t)(x41 & UINT8_C(0xff));
uint64_t x45 = (x43 >> 8);
uint8_t x46 = (uint8_t)(x43 & UINT8_C(0xff));
uint64_t x47 = (x45 >> 8);
uint8_t x48 = (uint8_t)(x45 & UINT8_C(0xff));
uint8_t x49 = (uint8_t)(x47 >> 8);
uint8_t x50 = (uint8_t)(x47 & UINT8_C(0xff));
uint64_t x51 = (x49 + x24);
uint64_t x52 = (x51 >> 8);
uint8_t x53 = (uint8_t)(x51 & UINT8_C(0xff));
uint64_t x54 = (x52 >> 8);
uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
uint64_t x56 = (x54 >> 8);
uint8_t x57 = (uint8_t)(x54 & UINT8_C(0xff));
uint64_t x58 = (x56 >> 8);
uint8_t x59 = (uint8_t)(x56 & UINT8_C(0xff));
uint64_t x60 = (x58 >> 8);
uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff));
uint64_t x62 = (x60 >> 8);
uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff));
fiat_25519_uint1 x64 = (fiat_25519_uint1)(x62 >> 8);
uint8_t x65 = (uint8_t)(x62 & UINT8_C(0xff));
uint64_t x66 = (x64 + x23);
uint64_t x67 = (x66 >> 8);
uint8_t x68 = (uint8_t)(x66 & UINT8_C(0xff));
uint64_t x69 = (x67 >> 8);
uint8_t x70 = (uint8_t)(x67 & UINT8_C(0xff));
uint64_t x71 = (x69 >> 8);
uint8_t x72 = (uint8_t)(x69 & UINT8_C(0xff));
uint64_t x73 = (x71 >> 8);
uint8_t x74 = (uint8_t)(x71 & UINT8_C(0xff));
uint64_t x75 = (x73 >> 8);
uint8_t x76 = (uint8_t)(x73 & UINT8_C(0xff));
uint8_t x77 = (uint8_t)(x75 >> 8);
uint8_t x78 = (uint8_t)(x75 & UINT8_C(0xff));
uint64_t x79 = (x77 + x22);
uint64_t x80 = (x79 >> 8);
uint8_t x81 = (uint8_t)(x79 & UINT8_C(0xff));
uint64_t x82 = (x80 >> 8);
uint8_t x83 = (uint8_t)(x80 & UINT8_C(0xff));
uint64_t x84 = (x82 >> 8);
uint8_t x85 = (uint8_t)(x82 & UINT8_C(0xff));
uint64_t x86 = (x84 >> 8);
uint8_t x87 = (uint8_t)(x84 & UINT8_C(0xff));
uint64_t x88 = (x86 >> 8);
uint8_t x89 = (uint8_t)(x86 & UINT8_C(0xff));
uint8_t x90 = (uint8_t)(x88 >> 8);
uint8_t x91 = (uint8_t)(x88 & UINT8_C(0xff));
out1[0] = x27;
out1[1] = x29;
out1[2] = x31;
out1[3] = x33;
out1[4] = x35;
out1[5] = x37;
out1[6] = x40;
out1[7] = x42;
out1[8] = x44;
out1[9] = x46;
out1[10] = x48;
out1[11] = x50;
out1[12] = x53;
out1[13] = x55;
out1[14] = x57;
out1[15] = x59;
out1[16] = x61;
out1[17] = x63;
out1[18] = x65;
out1[19] = x68;
out1[20] = x70;
out1[21] = x72;
out1[22] = x74;
out1[23] = x76;
out1[24] = x78;
out1[25] = x81;
out1[26] = x83;
out1[27] = x85;
out1[28] = x87;
out1[29] = x89;
out1[30] = x91;
out1[31] = x90;
}
/*
* Input Bounds:
* arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
* Output Bounds:
* out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
*/
static void fiat_25519_from_bytes(uint64_t out1[5], const uint8_t arg1[32]) {
uint64_t x1 = ((uint64_t)(arg1[31]) << 44);
uint64_t x2 = ((uint64_t)(arg1[30]) << 36);
uint64_t x3 = ((uint64_t)(arg1[29]) << 28);
uint64_t x4 = ((uint64_t)(arg1[28]) << 20);
uint64_t x5 = ((uint64_t)(arg1[27]) << 12);
uint64_t x6 = ((uint64_t)(arg1[26]) << 4);
uint64_t x7 = ((uint64_t)(arg1[25]) << 47);
uint64_t x8 = ((uint64_t)(arg1[24]) << 39);
uint64_t x9 = ((uint64_t)(arg1[23]) << 31);
uint64_t x10 = ((uint64_t)(arg1[22]) << 23);
uint64_t x11 = ((uint64_t)(arg1[21]) << 15);
uint64_t x12 = ((uint64_t)(arg1[20]) << 7);
uint64_t x13 = ((uint64_t)(arg1[19]) << 50);
uint64_t x14 = ((uint64_t)(arg1[18]) << 42);
uint64_t x15 = ((uint64_t)(arg1[17]) << 34);
uint64_t x16 = ((uint64_t)(arg1[16]) << 26);
uint64_t x17 = ((uint64_t)(arg1[15]) << 18);
uint64_t x18 = ((uint64_t)(arg1[14]) << 10);
uint64_t x19 = ((uint64_t)(arg1[13]) << 2);
uint64_t x20 = ((uint64_t)(arg1[12]) << 45);
uint64_t x21 = ((uint64_t)(arg1[11]) << 37);
uint64_t x22 = ((uint64_t)(arg1[10]) << 29);
uint64_t x23 = ((uint64_t)(arg1[9]) << 21);
uint64_t x24 = ((uint64_t)(arg1[8]) << 13);
uint64_t x25 = ((uint64_t)(arg1[7]) << 5);
uint64_t x26 = ((uint64_t)(arg1[6]) << 48);
uint64_t x27 = ((uint64_t)(arg1[5]) << 40);
uint64_t x28 = ((uint64_t)(arg1[4]) << 32);
uint64_t x29 = ((uint64_t)(arg1[3]) << 24);
uint64_t x30 = ((uint64_t)(arg1[2]) << 16);
uint64_t x31 = ((uint64_t)(arg1[1]) << 8);
uint8_t x32 = (arg1[0]);
uint64_t x33 = (x32 + (x31 + (x30 + (x29 + (x28 + (x27 + x26))))));
uint8_t x34 = (uint8_t)(x33 >> 51);
uint64_t x35 = (x33 & UINT64_C(0x7ffffffffffff));
uint64_t x36 = (x6 + (x5 + (x4 + (x3 + (x2 + x1)))));
uint64_t x37 = (x12 + (x11 + (x10 + (x9 + (x8 + x7)))));
uint64_t x38 = (x19 + (x18 + (x17 + (x16 + (x15 + (x14 + x13))))));
uint64_t x39 = (x25 + (x24 + (x23 + (x22 + (x21 + x20)))));
uint64_t x40 = (x34 + x39);
uint8_t x41 = (uint8_t)(x40 >> 51);
uint64_t x42 = (x40 & UINT64_C(0x7ffffffffffff));
uint64_t x43 = (x41 + x38);
uint8_t x44 = (uint8_t)(x43 >> 51);
uint64_t x45 = (x43 & UINT64_C(0x7ffffffffffff));
uint64_t x46 = (x44 + x37);
uint8_t x47 = (uint8_t)(x46 >> 51);
uint64_t x48 = (x46 & UINT64_C(0x7ffffffffffff));
uint64_t x49 = (x47 + x36);
out1[0] = x35;
out1[1] = x42;
out1[2] = x45;
out1[3] = x48;
out1[4] = x49;
}