boringssl/third_party/fiat/curve25519.c
David Benjamin 302bb3964a Small curve25519 cleanups.
Per Brian, x25519_ge_frombytes_vartime does not match the usual
BoringSSL return value convention, and we're slightly inconsistent about
whether to mask the last byte with 63 or 127. (It then gets ANDed with
64, so it doesn't matter which.) Use 127 to align with the curve25519
RFC. Finally, when we invert the transformation, use the same constants
inverted so that they're parallel.

Bug: 243, 244
Change-Id: I0e3aca0433ead210446c58d86b2f57526bde1eac
Reviewed-on: https://boringssl-review.googlesource.com/27984
Reviewed-by: Adam Langley <agl@google.com>
2018-05-02 19:24:00 +00:00

3231 lines
96 KiB
C

// The MIT License (MIT)
//
// Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP
// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
// public domain but parts have been replaced with code generated by Fiat
// (https://github.com/mit-plv/fiat-crypto), which is MIT licensed.
//
// The field functions are shared by Ed25519 and X25519 where possible.
#include <openssl/curve25519.h>
#include <assert.h>
#include <string.h>
#include <openssl/cpu.h>
#include <openssl/mem.h>
#include <openssl/rand.h>
#include <openssl/sha.h>
#include <openssl/type_check.h>
#include "internal.h"
#include "../../crypto/internal.h"
// Various pre-computed constants.
#include "./curve25519_tables.h"
// Low-level intrinsic operations (hand-written).
static uint64_t load_3(const uint8_t *in) {
uint64_t result;
result = (uint64_t)in[0];
result |= ((uint64_t)in[1]) << 8;
result |= ((uint64_t)in[2]) << 16;
return result;
}
static uint64_t load_4(const uint8_t *in) {
uint64_t result;
result = (uint64_t)in[0];
result |= ((uint64_t)in[1]) << 8;
result |= ((uint64_t)in[2]) << 16;
result |= ((uint64_t)in[3]) << 24;
return result;
}
#if defined(BORINGSSL_CURVE25519_64BIT)
static uint64_t load_8(const uint8_t *in) {
uint64_t result;
result = (uint64_t)in[0];
result |= ((uint64_t)in[1]) << 8;
result |= ((uint64_t)in[2]) << 16;
result |= ((uint64_t)in[3]) << 24;
result |= ((uint64_t)in[4]) << 32;
result |= ((uint64_t)in[5]) << 40;
result |= ((uint64_t)in[6]) << 48;
result |= ((uint64_t)in[7]) << 56;
return result;
}
static uint8_t /*bool*/ addcarryx_u51(uint8_t /*bool*/ c, uint64_t a,
uint64_t b, uint64_t *low) {
// This function extracts 51 bits of result and 1 bit of carry (52 total), so
// a 64-bit intermediate is sufficient.
uint64_t x = a + b + c;
*low = x & ((UINT64_C(1) << 51) - 1);
return (x >> 51) & 1;
}
static uint8_t /*bool*/ subborrow_u51(uint8_t /*bool*/ c, uint64_t a,
uint64_t b, uint64_t *low) {
// This function extracts 51 bits of result and 1 bit of borrow (52 total), so
// a 64-bit intermediate is sufficient.
uint64_t x = a - b - c;
*low = x & ((UINT64_C(1) << 51) - 1);
return x >> 63;
}
static uint64_t cmovznz64(uint64_t t, uint64_t z, uint64_t nz) {
t = -!!t; // all set if nonzero, 0 if 0
return (t&nz) | ((~t)&z);
}
#else
static uint8_t /*bool*/ addcarryx_u25(uint8_t /*bool*/ c, uint32_t a,
uint32_t b, uint32_t *low) {
// This function extracts 25 bits of result and 1 bit of carry (26 total), so
// a 32-bit intermediate is sufficient.
uint32_t x = a + b + c;
*low = x & ((1 << 25) - 1);
return (x >> 25) & 1;
}
static uint8_t /*bool*/ addcarryx_u26(uint8_t /*bool*/ c, uint32_t a,
uint32_t b, uint32_t *low) {
// This function extracts 26 bits of result and 1 bit of carry (27 total), so
// a 32-bit intermediate is sufficient.
uint32_t x = a + b + c;
*low = x & ((1 << 26) - 1);
return (x >> 26) & 1;
}
static uint8_t /*bool*/ subborrow_u25(uint8_t /*bool*/ c, uint32_t a,
uint32_t b, uint32_t *low) {
// This function extracts 25 bits of result and 1 bit of borrow (26 total), so
// a 32-bit intermediate is sufficient.
uint32_t x = a - b - c;
*low = x & ((1 << 25) - 1);
return x >> 31;
}
static uint8_t /*bool*/ subborrow_u26(uint8_t /*bool*/ c, uint32_t a,
uint32_t b, uint32_t *low) {
// This function extracts 26 bits of result and 1 bit of borrow (27 total), so
// a 32-bit intermediate is sufficient.
uint32_t x = a - b - c;
*low = x & ((1 << 26) - 1);
return x >> 31;
}
static uint32_t cmovznz32(uint32_t t, uint32_t z, uint32_t nz) {
t = -!!t; // all set if nonzero, 0 if 0
return (t&nz) | ((~t)&z);
}
#endif
// Field operations.
#if defined(BORINGSSL_CURVE25519_64BIT)
#define assert_fe(f) do { \
for (unsigned _assert_fe_i = 0; _assert_fe_i< 5; _assert_fe_i++) { \
assert(f[_assert_fe_i] < 1.125*(UINT64_C(1)<<51)); \
} \
} while (0)
#define assert_fe_loose(f) do { \
for (unsigned _assert_fe_i = 0; _assert_fe_i< 5; _assert_fe_i++) { \
assert(f[_assert_fe_i] < 3.375*(UINT64_C(1)<<51)); \
} \
} while (0)
#define assert_fe_frozen(f) do { \
for (unsigned _assert_fe_i = 0; _assert_fe_i< 5; _assert_fe_i++) { \
assert(f[_assert_fe_i] < (UINT64_C(1)<<51)); \
} \
} while (0)
static void fe_frombytes_impl(uint64_t h[5], const uint8_t *s) {
// Ignores top bit of s.
uint64_t a0 = load_8(s);
uint64_t a1 = load_8(s+8);
uint64_t a2 = load_8(s+16);
uint64_t a3 = load_8(s+24);
// Use 51 bits, 64-51 = 13 left.
h[0] = a0 & ((UINT64_C(1) << 51) - 1);
// (64-51) + 38 = 13 + 38 = 51
h[1] = (a0 >> 51) | ((a1 & ((UINT64_C(1) << 38) - 1)) << 13);
// (64-38) + 25 = 26 + 25 = 51
h[2] = (a1 >> 38) | ((a2 & ((UINT64_C(1) << 25) - 1)) << 26);
// (64-25) + 12 = 39 + 12 = 51
h[3] = (a2 >> 25) | ((a3 & ((UINT64_C(1) << 12) - 1)) << 39);
// (64-12) = 52, ignore top bit
h[4] = (a3 >> 12) & ((UINT64_C(1) << 51) - 1);
assert_fe(h);
}
static void fe_frombytes(fe *h, const uint8_t *s) {
fe_frombytes_impl(h->v, s);
}
static void fe_freeze(uint64_t out[5], const uint64_t in1[5]) {
{ const uint64_t x7 = in1[4];
{ const uint64_t x8 = in1[3];
{ const uint64_t x6 = in1[2];
{ const uint64_t x4 = in1[1];
{ const uint64_t x2 = in1[0];
{ uint64_t x10; uint8_t/*bool*/ x11 = subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10);
{ uint64_t x13; uint8_t/*bool*/ x14 = subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
{ uint64_t x16; uint8_t/*bool*/ x17 = subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
{ uint64_t x19; uint8_t/*bool*/ x20 = subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
{ uint64_t x22; uint8_t/*bool*/ x23 = subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
{ uint64_t x24 = cmovznz64(x23, 0x0, 0xffffffffffffffffL);
{ uint64_t x25 = (x24 & 0x7ffffffffffed);
{ uint64_t x27; uint8_t/*bool*/ x28 = addcarryx_u51(0x0, x10, x25, &x27);
{ uint64_t x29 = (x24 & 0x7ffffffffffff);
{ uint64_t x31; uint8_t/*bool*/ x32 = addcarryx_u51(x28, x13, x29, &x31);
{ uint64_t x33 = (x24 & 0x7ffffffffffff);
{ uint64_t x35; uint8_t/*bool*/ x36 = addcarryx_u51(x32, x16, x33, &x35);
{ uint64_t x37 = (x24 & 0x7ffffffffffff);
{ uint64_t x39; uint8_t/*bool*/ x40 = addcarryx_u51(x36, x19, x37, &x39);
{ uint64_t x41 = (x24 & 0x7ffffffffffff);
{ uint64_t x43; addcarryx_u51(x40, x22, x41, &x43);
out[0] = x27;
out[1] = x31;
out[2] = x35;
out[3] = x39;
out[4] = x43;
}}}}}}}}}}}}}}}}}}}}}
}
static void fe_tobytes(uint8_t s[32], const fe *f) {
assert_fe(f->v);
uint64_t h[5];
fe_freeze(h, f->v);
assert_fe_frozen(h);
s[0] = h[0] >> 0;
s[1] = h[0] >> 8;
s[2] = h[0] >> 16;
s[3] = h[0] >> 24;
s[4] = h[0] >> 32;
s[5] = h[0] >> 40;
s[6] = (h[0] >> 48) | (h[1] << 3);
s[7] = h[1] >> 5;
s[8] = h[1] >> 13;
s[9] = h[1] >> 21;
s[10] = h[1] >> 29;
s[11] = h[1] >> 37;
s[12] = (h[1] >> 45) | (h[2] << 6);
s[13] = h[2] >> 2;
s[14] = h[2] >> 10;
s[15] = h[2] >> 18;
s[16] = h[2] >> 26;
s[17] = h[2] >> 34;
s[18] = h[2] >> 42;
s[19] = (h[2] >> 50) | (h[3] << 1);
s[20] = h[3] >> 7;
s[21] = h[3] >> 15;
s[22] = h[3] >> 23;
s[23] = h[3] >> 31;
s[24] = h[3] >> 39;
s[25] = (h[3] >> 47) | (h[4] << 4);
s[26] = h[4] >> 4;
s[27] = h[4] >> 12;
s[28] = h[4] >> 20;
s[29] = h[4] >> 28;
s[30] = h[4] >> 36;
s[31] = h[4] >> 44;
}
// h = 0
static void fe_0(fe *h) {
OPENSSL_memset(h, 0, sizeof(fe));
}
static void fe_loose_0(fe_loose *h) {
OPENSSL_memset(h, 0, sizeof(fe_loose));
}
// h = 1
static void fe_1(fe *h) {
OPENSSL_memset(h, 0, sizeof(fe));
h->v[0] = 1;
}
static void fe_loose_1(fe_loose *h) {
OPENSSL_memset(h, 0, sizeof(fe_loose));
h->v[0] = 1;
}
static void fe_add_impl(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
{ const uint64_t x10 = in1[4];
{ const uint64_t x11 = in1[3];
{ const uint64_t x9 = in1[2];
{ const uint64_t x7 = in1[1];
{ const uint64_t x5 = in1[0];
{ const uint64_t x18 = in2[4];
{ const uint64_t x19 = in2[3];
{ const uint64_t x17 = in2[2];
{ const uint64_t x15 = in2[1];
{ const uint64_t x13 = in2[0];
out[0] = (x5 + x13);
out[1] = (x7 + x15);
out[2] = (x9 + x17);
out[3] = (x11 + x19);
out[4] = (x10 + x18);
}}}}}}}}}}
}
// h = f + g
// Can overlap h with f or g.
static void fe_add(fe_loose *h, const fe *f, const fe *g) {
assert_fe(f->v);
assert_fe(g->v);
fe_add_impl(h->v, f->v, g->v);
assert_fe_loose(h->v);
}
static void fe_sub_impl(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
{ const uint64_t x10 = in1[4];
{ const uint64_t x11 = in1[3];
{ const uint64_t x9 = in1[2];
{ const uint64_t x7 = in1[1];
{ const uint64_t x5 = in1[0];
{ const uint64_t x18 = in2[4];
{ const uint64_t x19 = in2[3];
{ const uint64_t x17 = in2[2];
{ const uint64_t x15 = in2[1];
{ const uint64_t x13 = in2[0];
out[0] = ((0xfffffffffffda + x5) - x13);
out[1] = ((0xffffffffffffe + x7) - x15);
out[2] = ((0xffffffffffffe + x9) - x17);
out[3] = ((0xffffffffffffe + x11) - x19);
out[4] = ((0xffffffffffffe + x10) - x18);
}}}}}}}}}}
}
// h = f - g
// Can overlap h with f or g.
static void fe_sub(fe_loose *h, const fe *f, const fe *g) {
assert_fe(f->v);
assert_fe(g->v);
fe_sub_impl(h->v, f->v, g->v);
assert_fe_loose(h->v);
}
static void fe_carry_impl(uint64_t out[5], const uint64_t in1[5]) {
{ const uint64_t x7 = in1[4];
{ const uint64_t x8 = in1[3];
{ const uint64_t x6 = in1[2];
{ const uint64_t x4 = in1[1];
{ const uint64_t x2 = in1[0];
{ uint64_t x9 = (x2 >> 0x33);
{ uint64_t x10 = (x2 & 0x7ffffffffffff);
{ uint64_t x11 = (x9 + x4);
{ uint64_t x12 = (x11 >> 0x33);
{ uint64_t x13 = (x11 & 0x7ffffffffffff);
{ uint64_t x14 = (x12 + x6);
{ uint64_t x15 = (x14 >> 0x33);
{ uint64_t x16 = (x14 & 0x7ffffffffffff);
{ uint64_t x17 = (x15 + x8);
{ uint64_t x18 = (x17 >> 0x33);
{ uint64_t x19 = (x17 & 0x7ffffffffffff);
{ uint64_t x20 = (x18 + x7);
{ uint64_t x21 = (x20 >> 0x33);
{ uint64_t x22 = (x20 & 0x7ffffffffffff);
{ uint64_t x23 = (x10 + (0x13 * x21));
{ uint64_t x24 = (x23 >> 0x33);
{ uint64_t x25 = (x23 & 0x7ffffffffffff);
{ uint64_t x26 = (x24 + x13);
{ uint64_t x27 = (x26 >> 0x33);
{ uint64_t x28 = (x26 & 0x7ffffffffffff);
out[0] = x25;
out[1] = x28;
out[2] = (x27 + x16);
out[3] = x19;
out[4] = x22;
}}}}}}}}}}}}}}}}}}}}}}}}}
}
static void fe_carry(fe *h, const fe_loose* f) {
assert_fe_loose(f->v);
fe_carry_impl(h->v, f->v);
assert_fe(h->v);
}
static void fe_mul_impl(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
assert_fe_loose(in1);
assert_fe_loose(in2);
{ const uint64_t x10 = in1[4];
{ const uint64_t x11 = in1[3];
{ const uint64_t x9 = in1[2];
{ const uint64_t x7 = in1[1];
{ const uint64_t x5 = in1[0];
{ const uint64_t x18 = in2[4];
{ const uint64_t x19 = in2[3];
{ const uint64_t x17 = in2[2];
{ const uint64_t x15 = in2[1];
{ const uint64_t x13 = in2[0];
{ uint128_t x20 = ((uint128_t)x5 * x13);
{ uint128_t x21 = (((uint128_t)x5 * x15) + ((uint128_t)x7 * x13));
{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((uint128_t)x9 * x13)) + ((uint128_t)x7 * x15));
{ uint128_t x23 = (((((uint128_t)x5 * x19) + ((uint128_t)x11 * x13)) + ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15));
{ uint128_t x24 = ((((((uint128_t)x5 * x18) + ((uint128_t)x10 * x13)) + ((uint128_t)x11 * x15)) + ((uint128_t)x7 * x19)) + ((uint128_t)x9 * x17));
{ uint64_t x25 = (x10 * 0x13);
{ uint64_t x26 = (x7 * 0x13);
{ uint64_t x27 = (x9 * 0x13);
{ uint64_t x28 = (x11 * 0x13);
{ uint128_t x29 = ((((x20 + ((uint128_t)x25 * x15)) + ((uint128_t)x26 * x18)) + ((uint128_t)x27 * x19)) + ((uint128_t)x28 * x17));
{ uint128_t x30 = (((x21 + ((uint128_t)x25 * x17)) + ((uint128_t)x27 * x18)) + ((uint128_t)x28 * x19));
{ uint128_t x31 = ((x22 + ((uint128_t)x25 * x19)) + ((uint128_t)x28 * x18));
{ uint128_t x32 = (x23 + ((uint128_t)x25 * x18));
{ uint64_t x33 = (uint64_t) (x29 >> 0x33);
{ uint64_t x34 = ((uint64_t)x29 & 0x7ffffffffffff);
{ uint128_t x35 = (x33 + x30);
{ uint64_t x36 = (uint64_t) (x35 >> 0x33);
{ uint64_t x37 = ((uint64_t)x35 & 0x7ffffffffffff);
{ uint128_t x38 = (x36 + x31);
{ uint64_t x39 = (uint64_t) (x38 >> 0x33);
{ uint64_t x40 = ((uint64_t)x38 & 0x7ffffffffffff);
{ uint128_t x41 = (x39 + x32);
{ uint64_t x42 = (uint64_t) (x41 >> 0x33);
{ uint64_t x43 = ((uint64_t)x41 & 0x7ffffffffffff);
{ uint128_t x44 = (x42 + x24);
{ uint64_t x45 = (uint64_t) (x44 >> 0x33);
{ uint64_t x46 = ((uint64_t)x44 & 0x7ffffffffffff);
{ uint64_t x47 = (x34 + (0x13 * x45));
{ uint64_t x48 = (x47 >> 0x33);
{ uint64_t x49 = (x47 & 0x7ffffffffffff);
{ uint64_t x50 = (x48 + x37);
{ uint64_t x51 = (x50 >> 0x33);
{ uint64_t x52 = (x50 & 0x7ffffffffffff);
out[0] = x49;
out[1] = x52;
out[2] = (x51 + x40);
out[3] = x43;
out[4] = x46;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
assert_fe(out);
}
static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_ttt(fe *h, const fe *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_sqr_impl(uint64_t out[5], const uint64_t in1[5]) {
assert_fe_loose(in1);
{ const uint64_t x7 = in1[4];
{ const uint64_t x8 = in1[3];
{ const uint64_t x6 = in1[2];
{ const uint64_t x4 = in1[1];
{ const uint64_t x2 = in1[0];
{ uint64_t x9 = (x2 * 0x2);
{ uint64_t x10 = (x4 * 0x2);
{ uint64_t x11 = ((x6 * 0x2) * 0x13);
{ uint64_t x12 = (x7 * 0x13);
{ uint64_t x13 = (x12 * 0x2);
{ uint128_t x14 = ((((uint128_t)x2 * x2) + ((uint128_t)x13 * x4)) + ((uint128_t)x11 * x8));
{ uint128_t x15 = ((((uint128_t)x9 * x4) + ((uint128_t)x13 * x6)) + ((uint128_t)x8 * (x8 * 0x13)));
{ uint128_t x16 = ((((uint128_t)x9 * x6) + ((uint128_t)x4 * x4)) + ((uint128_t)x13 * x8));
{ uint128_t x17 = ((((uint128_t)x9 * x8) + ((uint128_t)x10 * x6)) + ((uint128_t)x7 * x12));
{ uint128_t x18 = ((((uint128_t)x9 * x7) + ((uint128_t)x10 * x8)) + ((uint128_t)x6 * x6));
{ uint64_t x19 = (uint64_t) (x14 >> 0x33);
{ uint64_t x20 = ((uint64_t)x14 & 0x7ffffffffffff);
{ uint128_t x21 = (x19 + x15);
{ uint64_t x22 = (uint64_t) (x21 >> 0x33);
{ uint64_t x23 = ((uint64_t)x21 & 0x7ffffffffffff);
{ uint128_t x24 = (x22 + x16);
{ uint64_t x25 = (uint64_t) (x24 >> 0x33);
{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
{ uint128_t x27 = (x25 + x17);
{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
{ uint128_t x30 = (x28 + x18);
{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
{ uint64_t x33 = (x20 + (0x13 * x31));
{ uint64_t x34 = (x33 >> 0x33);
{ uint64_t x35 = (x33 & 0x7ffffffffffff);
{ uint64_t x36 = (x34 + x23);
{ uint64_t x37 = (x36 >> 0x33);
{ uint64_t x38 = (x36 & 0x7ffffffffffff);
out[0] = x35;
out[1] = x38;
out[2] = (x37 + x26);
out[3] = x29;
out[4] = x32;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
assert_fe(out);
}
static void fe_sq_tl(fe *h, const fe_loose *f) {
fe_sqr_impl(h->v, f->v);
}
static void fe_sq_tt(fe *h, const fe *f) {
fe_sqr_impl(h->v, f->v);
}
// Replace (f,g) with (g,f) if b == 1;
// replace (f,g) with (f,g) if b == 0.
//
// Preconditions: b in {0,1}.
static void fe_cswap(fe *f, fe *g, uint64_t b) {
b = 0-b;
for (unsigned i = 0; i < 5; i++) {
uint64_t x = f->v[i] ^ g->v[i];
x &= b;
f->v[i] ^= x;
g->v[i] ^= x;
}
}
// NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0..
static void fe_mul_121666_impl(uint64_t out[5], const uint64_t in1[5]) {
{ const uint64_t x10 = in1[4];
{ const uint64_t x11 = in1[3];
{ const uint64_t x9 = in1[2];
{ const uint64_t x7 = in1[1];
{ const uint64_t x5 = in1[0];
{ const uint64_t x18 = 0;
{ const uint64_t x19 = 0;
{ const uint64_t x17 = 0;
{ const uint64_t x15 = 0;
{ const uint64_t x13 = 121666;
{ uint128_t x20 = ((uint128_t)x5 * x13);
{ uint128_t x21 = (((uint128_t)x5 * x15) + ((uint128_t)x7 * x13));
{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((uint128_t)x9 * x13)) + ((uint128_t)x7 * x15));
{ uint128_t x23 = (((((uint128_t)x5 * x19) + ((uint128_t)x11 * x13)) + ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15));
{ uint128_t x24 = ((((((uint128_t)x5 * x18) + ((uint128_t)x10 * x13)) + ((uint128_t)x11 * x15)) + ((uint128_t)x7 * x19)) + ((uint128_t)x9 * x17));
{ uint64_t x25 = (x10 * 0x13);
{ uint64_t x26 = (x7 * 0x13);
{ uint64_t x27 = (x9 * 0x13);
{ uint64_t x28 = (x11 * 0x13);
{ uint128_t x29 = ((((x20 + ((uint128_t)x25 * x15)) + ((uint128_t)x26 * x18)) + ((uint128_t)x27 * x19)) + ((uint128_t)x28 * x17));
{ uint128_t x30 = (((x21 + ((uint128_t)x25 * x17)) + ((uint128_t)x27 * x18)) + ((uint128_t)x28 * x19));
{ uint128_t x31 = ((x22 + ((uint128_t)x25 * x19)) + ((uint128_t)x28 * x18));
{ uint128_t x32 = (x23 + ((uint128_t)x25 * x18));
{ uint64_t x33 = (uint64_t) (x29 >> 0x33);
{ uint64_t x34 = ((uint64_t)x29 & 0x7ffffffffffff);
{ uint128_t x35 = (x33 + x30);
{ uint64_t x36 = (uint64_t) (x35 >> 0x33);
{ uint64_t x37 = ((uint64_t)x35 & 0x7ffffffffffff);
{ uint128_t x38 = (x36 + x31);
{ uint64_t x39 = (uint64_t) (x38 >> 0x33);
{ uint64_t x40 = ((uint64_t)x38 & 0x7ffffffffffff);
{ uint128_t x41 = (x39 + x32);
{ uint64_t x42 = (uint64_t) (x41 >> 0x33);
{ uint64_t x43 = ((uint64_t)x41 & 0x7ffffffffffff);
{ uint128_t x44 = (x42 + x24);
{ uint64_t x45 = (uint64_t) (x44 >> 0x33);
{ uint64_t x46 = ((uint64_t)x44 & 0x7ffffffffffff);
{ uint64_t x47 = (x34 + (0x13 * x45));
{ uint64_t x48 = (x47 >> 0x33);
{ uint64_t x49 = (x47 & 0x7ffffffffffff);
{ uint64_t x50 = (x48 + x37);
{ uint64_t x51 = (x50 >> 0x33);
{ uint64_t x52 = (x50 & 0x7ffffffffffff);
out[0] = x49;
out[1] = x52;
out[2] = (x51 + x40);
out[3] = x43;
out[4] = x46;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static void fe_mul121666(fe *h, const fe_loose *f) {
assert_fe_loose(f->v);
fe_mul_121666_impl(h->v, f->v);
assert_fe(h->v);
}
// Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
static void fe_neg_impl(uint64_t out[5], const uint64_t in2[5]) {
{ const uint64_t x10 = 0;
{ const uint64_t x11 = 0;
{ const uint64_t x9 = 0;
{ const uint64_t x7 = 0;
{ const uint64_t x5 = 0;
{ const uint64_t x18 = in2[4];
{ const uint64_t x19 = in2[3];
{ const uint64_t x17 = in2[2];
{ const uint64_t x15 = in2[1];
{ const uint64_t x13 = in2[0];
out[0] = ((0xfffffffffffda + x5) - x13);
out[1] = ((0xffffffffffffe + x7) - x15);
out[2] = ((0xffffffffffffe + x9) - x17);
out[3] = ((0xffffffffffffe + x11) - x19);
out[4] = ((0xffffffffffffe + x10) - x18);
}}}}}}}}}}
}
// h = -f
static void fe_neg(fe_loose *h, const fe *f) {
assert_fe(f->v);
fe_neg_impl(h->v, f->v);
assert_fe_loose(h->v);
}
// Replace (f,g) with (g,g) if b == 1;
// replace (f,g) with (f,g) if b == 0.
//
// Preconditions: b in {0,1}.
static void fe_cmov(fe_loose *f, const fe_loose *g, uint64_t b) {
b = 0-b;
for (unsigned i = 0; i < 5; i++) {
uint64_t x = f->v[i] ^ g->v[i];
x &= b;
f->v[i] ^= x;
}
}
#else
#define assert_fe(f) do { \
for (unsigned _assert_fe_i = 0; _assert_fe_i< 10; _assert_fe_i++) { \
assert(f[_assert_fe_i] < 1.125*(1<<(26-(_assert_fe_i&1)))); \
} \
} while (0)
#define assert_fe_loose(f) do { \
for (unsigned _assert_fe_i = 0; _assert_fe_i< 10; _assert_fe_i++) { \
assert(f[_assert_fe_i] < 3.375*(1<<(26-(_assert_fe_i&1)))); \
} \
} while (0)
#define assert_fe_frozen(f) do { \
for (unsigned _assert_fe_i = 0; _assert_fe_i< 10; _assert_fe_i++) { \
assert(f[_assert_fe_i] < (1u<<(26-(_assert_fe_i&1)))); \
} \
} while (0)
static void fe_frombytes_impl(uint32_t h[10], const uint8_t *s) {
// Ignores top bit of s.
uint32_t a0 = load_4(s);
uint32_t a1 = load_4(s+4);
uint32_t a2 = load_4(s+8);
uint32_t a3 = load_4(s+12);
uint32_t a4 = load_4(s+16);
uint32_t a5 = load_4(s+20);
uint32_t a6 = load_4(s+24);
uint32_t a7 = load_4(s+28);
h[0] = a0&((1<<26)-1); // 26 used, 32-26 left. 26
h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); // (32-26) + 19 = 6+19 = 25
h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); // (32-19) + 13 = 13+13 = 26
h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); // (32-13) + 6 = 19+ 6 = 25
h[4] = (a3>> 6); // (32- 6) = 26
h[5] = a4&((1<<25)-1); // 25
h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); // (32-25) + 19 = 7+19 = 26
h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); // (32-19) + 12 = 13+12 = 25
h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); // (32-12) + 6 = 20+ 6 = 26
h[9] = (a7>> 6)&((1<<25)-1); // 25
assert_fe(h);
}
static void fe_frombytes(fe *h, const uint8_t *s) {
fe_frombytes_impl(h->v, s);
}
static void fe_freeze(uint32_t out[10], const uint32_t in1[10]) {
{ const uint32_t x17 = in1[9];
{ const uint32_t x18 = in1[8];
{ const uint32_t x16 = in1[7];
{ const uint32_t x14 = in1[6];
{ const uint32_t x12 = in1[5];
{ const uint32_t x10 = in1[4];
{ const uint32_t x8 = in1[3];
{ const uint32_t x6 = in1[2];
{ const uint32_t x4 = in1[1];
{ const uint32_t x2 = in1[0];
{ uint32_t x20; uint8_t/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
{ uint32_t x23; uint8_t/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
{ uint32_t x26; uint8_t/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
{ uint32_t x29; uint8_t/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
{ uint32_t x32; uint8_t/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
{ uint32_t x35; uint8_t/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
{ uint32_t x38; uint8_t/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
{ uint32_t x41; uint8_t/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
{ uint32_t x44; uint8_t/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
{ uint32_t x47; uint8_t/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
{ uint32_t x49 = cmovznz32(x48, 0x0, 0xffffffff);
{ uint32_t x50 = (x49 & 0x3ffffed);
{ uint32_t x52; uint8_t/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
{ uint32_t x54 = (x49 & 0x1ffffff);
{ uint32_t x56; uint8_t/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
{ uint32_t x58 = (x49 & 0x3ffffff);
{ uint32_t x60; uint8_t/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
{ uint32_t x62 = (x49 & 0x1ffffff);
{ uint32_t x64; uint8_t/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
{ uint32_t x66 = (x49 & 0x3ffffff);
{ uint32_t x68; uint8_t/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
{ uint32_t x70 = (x49 & 0x1ffffff);
{ uint32_t x72; uint8_t/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
{ uint32_t x74 = (x49 & 0x3ffffff);
{ uint32_t x76; uint8_t/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
{ uint32_t x78 = (x49 & 0x1ffffff);
{ uint32_t x80; uint8_t/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
{ uint32_t x82 = (x49 & 0x3ffffff);
{ uint32_t x84; uint8_t/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
{ uint32_t x86 = (x49 & 0x1ffffff);
{ uint32_t x88; addcarryx_u25(x85, x47, x86, &x88);
out[0] = x52;
out[1] = x56;
out[2] = x60;
out[3] = x64;
out[4] = x68;
out[5] = x72;
out[6] = x76;
out[7] = x80;
out[8] = x84;
out[9] = x88;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static void fe_tobytes(uint8_t s[32], const fe *f) {
assert_fe(f->v);
uint32_t h[10];
fe_freeze(h, f->v);
assert_fe_frozen(h);
s[0] = h[0] >> 0;
s[1] = h[0] >> 8;
s[2] = h[0] >> 16;
s[3] = (h[0] >> 24) | (h[1] << 2);
s[4] = h[1] >> 6;
s[5] = h[1] >> 14;
s[6] = (h[1] >> 22) | (h[2] << 3);
s[7] = h[2] >> 5;
s[8] = h[2] >> 13;
s[9] = (h[2] >> 21) | (h[3] << 5);
s[10] = h[3] >> 3;
s[11] = h[3] >> 11;
s[12] = (h[3] >> 19) | (h[4] << 6);
s[13] = h[4] >> 2;
s[14] = h[4] >> 10;
s[15] = h[4] >> 18;
s[16] = h[5] >> 0;
s[17] = h[5] >> 8;
s[18] = h[5] >> 16;
s[19] = (h[5] >> 24) | (h[6] << 1);
s[20] = h[6] >> 7;
s[21] = h[6] >> 15;
s[22] = (h[6] >> 23) | (h[7] << 3);
s[23] = h[7] >> 5;
s[24] = h[7] >> 13;
s[25] = (h[7] >> 21) | (h[8] << 4);
s[26] = h[8] >> 4;
s[27] = h[8] >> 12;
s[28] = (h[8] >> 20) | (h[9] << 6);
s[29] = h[9] >> 2;
s[30] = h[9] >> 10;
s[31] = h[9] >> 18;
}
// h = 0
static void fe_0(fe *h) {
OPENSSL_memset(h, 0, sizeof(fe));
}
static void fe_loose_0(fe_loose *h) {
OPENSSL_memset(h, 0, sizeof(fe_loose));
}
// h = 1
static void fe_1(fe *h) {
OPENSSL_memset(h, 0, sizeof(fe));
h->v[0] = 1;
}
static void fe_loose_1(fe_loose *h) {
OPENSSL_memset(h, 0, sizeof(fe_loose));
h->v[0] = 1;
}
static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
{ const uint32_t x20 = in1[9];
{ const uint32_t x21 = in1[8];
{ const uint32_t x19 = in1[7];
{ const uint32_t x17 = in1[6];
{ const uint32_t x15 = in1[5];
{ const uint32_t x13 = in1[4];
{ const uint32_t x11 = in1[3];
{ const uint32_t x9 = in1[2];
{ const uint32_t x7 = in1[1];
{ const uint32_t x5 = in1[0];
{ const uint32_t x38 = in2[9];
{ const uint32_t x39 = in2[8];
{ const uint32_t x37 = in2[7];
{ const uint32_t x35 = in2[6];
{ const uint32_t x33 = in2[5];
{ const uint32_t x31 = in2[4];
{ const uint32_t x29 = in2[3];
{ const uint32_t x27 = in2[2];
{ const uint32_t x25 = in2[1];
{ const uint32_t x23 = in2[0];
out[0] = (x5 + x23);
out[1] = (x7 + x25);
out[2] = (x9 + x27);
out[3] = (x11 + x29);
out[4] = (x13 + x31);
out[5] = (x15 + x33);
out[6] = (x17 + x35);
out[7] = (x19 + x37);
out[8] = (x21 + x39);
out[9] = (x20 + x38);
}}}}}}}}}}}}}}}}}}}}
}
// h = f + g
// Can overlap h with f or g.
static void fe_add(fe_loose *h, const fe *f, const fe *g) {
assert_fe(f->v);
assert_fe(g->v);
fe_add_impl(h->v, f->v, g->v);
assert_fe_loose(h->v);
}
static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
{ const uint32_t x20 = in1[9];
{ const uint32_t x21 = in1[8];
{ const uint32_t x19 = in1[7];
{ const uint32_t x17 = in1[6];
{ const uint32_t x15 = in1[5];
{ const uint32_t x13 = in1[4];
{ const uint32_t x11 = in1[3];
{ const uint32_t x9 = in1[2];
{ const uint32_t x7 = in1[1];
{ const uint32_t x5 = in1[0];
{ const uint32_t x38 = in2[9];
{ const uint32_t x39 = in2[8];
{ const uint32_t x37 = in2[7];
{ const uint32_t x35 = in2[6];
{ const uint32_t x33 = in2[5];
{ const uint32_t x31 = in2[4];
{ const uint32_t x29 = in2[3];
{ const uint32_t x27 = in2[2];
{ const uint32_t x25 = in2[1];
{ const uint32_t x23 = in2[0];
out[0] = ((0x7ffffda + x5) - x23);
out[1] = ((0x3fffffe + x7) - x25);
out[2] = ((0x7fffffe + x9) - x27);
out[3] = ((0x3fffffe + x11) - x29);
out[4] = ((0x7fffffe + x13) - x31);
out[5] = ((0x3fffffe + x15) - x33);
out[6] = ((0x7fffffe + x17) - x35);
out[7] = ((0x3fffffe + x19) - x37);
out[8] = ((0x7fffffe + x21) - x39);
out[9] = ((0x3fffffe + x20) - x38);
}}}}}}}}}}}}}}}}}}}}
}
// h = f - g
// Can overlap h with f or g.
static void fe_sub(fe_loose *h, const fe *f, const fe *g) {
assert_fe(f->v);
assert_fe(g->v);
fe_sub_impl(h->v, f->v, g->v);
assert_fe_loose(h->v);
}
static void fe_carry_impl(uint32_t out[10], const uint32_t in1[10]) {
{ const uint32_t x17 = in1[9];
{ const uint32_t x18 = in1[8];
{ const uint32_t x16 = in1[7];
{ const uint32_t x14 = in1[6];
{ const uint32_t x12 = in1[5];
{ const uint32_t x10 = in1[4];
{ const uint32_t x8 = in1[3];
{ const uint32_t x6 = in1[2];
{ const uint32_t x4 = in1[1];
{ const uint32_t x2 = in1[0];
{ uint32_t x19 = (x2 >> 0x1a);
{ uint32_t x20 = (x2 & 0x3ffffff);
{ uint32_t x21 = (x19 + x4);
{ uint32_t x22 = (x21 >> 0x19);
{ uint32_t x23 = (x21 & 0x1ffffff);
{ uint32_t x24 = (x22 + x6);
{ uint32_t x25 = (x24 >> 0x1a);
{ uint32_t x26 = (x24 & 0x3ffffff);
{ uint32_t x27 = (x25 + x8);
{ uint32_t x28 = (x27 >> 0x19);
{ uint32_t x29 = (x27 & 0x1ffffff);
{ uint32_t x30 = (x28 + x10);
{ uint32_t x31 = (x30 >> 0x1a);
{ uint32_t x32 = (x30 & 0x3ffffff);
{ uint32_t x33 = (x31 + x12);
{ uint32_t x34 = (x33 >> 0x19);
{ uint32_t x35 = (x33 & 0x1ffffff);
{ uint32_t x36 = (x34 + x14);
{ uint32_t x37 = (x36 >> 0x1a);
{ uint32_t x38 = (x36 & 0x3ffffff);
{ uint32_t x39 = (x37 + x16);
{ uint32_t x40 = (x39 >> 0x19);
{ uint32_t x41 = (x39 & 0x1ffffff);
{ uint32_t x42 = (x40 + x18);
{ uint32_t x43 = (x42 >> 0x1a);
{ uint32_t x44 = (x42 & 0x3ffffff);
{ uint32_t x45 = (x43 + x17);
{ uint32_t x46 = (x45 >> 0x19);
{ uint32_t x47 = (x45 & 0x1ffffff);
{ uint32_t x48 = (x20 + (0x13 * x46));
{ uint32_t x49 = (x48 >> 0x1a);
{ uint32_t x50 = (x48 & 0x3ffffff);
{ uint32_t x51 = (x49 + x23);
{ uint32_t x52 = (x51 >> 0x19);
{ uint32_t x53 = (x51 & 0x1ffffff);
out[0] = x50;
out[1] = x53;
out[2] = (x52 + x26);
out[3] = x29;
out[4] = x32;
out[5] = x35;
out[6] = x38;
out[7] = x41;
out[8] = x44;
out[9] = x47;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static void fe_carry(fe *h, const fe_loose* f) {
assert_fe_loose(f->v);
fe_carry_impl(h->v, f->v);
assert_fe(h->v);
}
static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
assert_fe_loose(in1);
assert_fe_loose(in2);
{ const uint32_t x20 = in1[9];
{ const uint32_t x21 = in1[8];
{ const uint32_t x19 = in1[7];
{ const uint32_t x17 = in1[6];
{ const uint32_t x15 = in1[5];
{ const uint32_t x13 = in1[4];
{ const uint32_t x11 = in1[3];
{ const uint32_t x9 = in1[2];
{ const uint32_t x7 = in1[1];
{ const uint32_t x5 = in1[0];
{ const uint32_t x38 = in2[9];
{ const uint32_t x39 = in2[8];
{ const uint32_t x37 = in2[7];
{ const uint32_t x35 = in2[6];
{ const uint32_t x33 = in2[5];
{ const uint32_t x31 = in2[4];
{ const uint32_t x29 = in2[3];
{ const uint32_t x27 = in2[2];
{ const uint32_t x25 = in2[1];
{ const uint32_t x23 = in2[0];
{ uint64_t x40 = ((uint64_t)x23 * x5);
{ uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
{ uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
{ uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
{ uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
{ uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
{ uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
{ uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
{ uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
{ uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
{ uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
{ uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
{ uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
{ uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
{ uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
{ uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
{ uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
{ uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
{ uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
{ uint64_t x59 = (x48 + (x58 << 0x4));
{ uint64_t x60 = (x59 + (x58 << 0x1));
{ uint64_t x61 = (x60 + x58);
{ uint64_t x62 = (x47 + (x57 << 0x4));
{ uint64_t x63 = (x62 + (x57 << 0x1));
{ uint64_t x64 = (x63 + x57);
{ uint64_t x65 = (x46 + (x56 << 0x4));
{ uint64_t x66 = (x65 + (x56 << 0x1));
{ uint64_t x67 = (x66 + x56);
{ uint64_t x68 = (x45 + (x55 << 0x4));
{ uint64_t x69 = (x68 + (x55 << 0x1));
{ uint64_t x70 = (x69 + x55);
{ uint64_t x71 = (x44 + (x54 << 0x4));
{ uint64_t x72 = (x71 + (x54 << 0x1));
{ uint64_t x73 = (x72 + x54);
{ uint64_t x74 = (x43 + (x53 << 0x4));
{ uint64_t x75 = (x74 + (x53 << 0x1));
{ uint64_t x76 = (x75 + x53);
{ uint64_t x77 = (x42 + (x52 << 0x4));
{ uint64_t x78 = (x77 + (x52 << 0x1));
{ uint64_t x79 = (x78 + x52);
{ uint64_t x80 = (x41 + (x51 << 0x4));
{ uint64_t x81 = (x80 + (x51 << 0x1));
{ uint64_t x82 = (x81 + x51);
{ uint64_t x83 = (x40 + (x50 << 0x4));
{ uint64_t x84 = (x83 + (x50 << 0x1));
{ uint64_t x85 = (x84 + x50);
{ uint64_t x86 = (x85 >> 0x1a);
{ uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
{ uint64_t x88 = (x86 + x82);
{ uint64_t x89 = (x88 >> 0x19);
{ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
{ uint64_t x91 = (x89 + x79);
{ uint64_t x92 = (x91 >> 0x1a);
{ uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
{ uint64_t x94 = (x92 + x76);
{ uint64_t x95 = (x94 >> 0x19);
{ uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
{ uint64_t x97 = (x95 + x73);
{ uint64_t x98 = (x97 >> 0x1a);
{ uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
{ uint64_t x100 = (x98 + x70);
{ uint64_t x101 = (x100 >> 0x19);
{ uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
{ uint64_t x103 = (x101 + x67);
{ uint64_t x104 = (x103 >> 0x1a);
{ uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
{ uint64_t x106 = (x104 + x64);
{ uint64_t x107 = (x106 >> 0x19);
{ uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
{ uint64_t x109 = (x107 + x61);
{ uint64_t x110 = (x109 >> 0x1a);
{ uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
{ uint64_t x112 = (x110 + x49);
{ uint64_t x113 = (x112 >> 0x19);
{ uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
{ uint64_t x115 = (x87 + (0x13 * x113));
{ uint32_t x116 = (uint32_t) (x115 >> 0x1a);
{ uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
{ uint32_t x118 = (x116 + x90);
{ uint32_t x119 = (x118 >> 0x19);
{ uint32_t x120 = (x118 & 0x1ffffff);
out[0] = x117;
out[1] = x120;
out[2] = (x119 + x93);
out[3] = x96;
out[4] = x99;
out[5] = x102;
out[6] = x105;
out[7] = x108;
out[8] = x111;
out[9] = x114;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
assert_fe(out);
}
static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_ttt(fe *h, const fe *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) {
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) {
assert_fe_loose(in1);
{ const uint32_t x17 = in1[9];
{ const uint32_t x18 = in1[8];
{ const uint32_t x16 = in1[7];
{ const uint32_t x14 = in1[6];
{ const uint32_t x12 = in1[5];
{ const uint32_t x10 = in1[4];
{ const uint32_t x8 = in1[3];
{ const uint32_t x6 = in1[2];
{ const uint32_t x4 = in1[1];
{ const uint32_t x2 = in1[0];
{ uint64_t x19 = ((uint64_t)x2 * x2);
{ uint64_t x20 = ((uint64_t)(0x2 * x2) * x4);
{ uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6)));
{ uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8)));
{ uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10));
{ uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12)));
{ uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12)));
{ uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16)));
{ uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12))))));
{ uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17)));
{ uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17)))));
{ uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17)));
{ uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17))))));
{ uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17)));
{ uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17)));
{ uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17)));
{ uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17));
{ uint64_t x36 = ((uint64_t)(0x2 * x18) * x17);
{ uint64_t x37 = ((uint64_t)(0x2 * x17) * x17);
{ uint64_t x38 = (x27 + (x37 << 0x4));
{ uint64_t x39 = (x38 + (x37 << 0x1));
{ uint64_t x40 = (x39 + x37);
{ uint64_t x41 = (x26 + (x36 << 0x4));
{ uint64_t x42 = (x41 + (x36 << 0x1));
{ uint64_t x43 = (x42 + x36);
{ uint64_t x44 = (x25 + (x35 << 0x4));
{ uint64_t x45 = (x44 + (x35 << 0x1));
{ uint64_t x46 = (x45 + x35);
{ uint64_t x47 = (x24 + (x34 << 0x4));
{ uint64_t x48 = (x47 + (x34 << 0x1));
{ uint64_t x49 = (x48 + x34);
{ uint64_t x50 = (x23 + (x33 << 0x4));
{ uint64_t x51 = (x50 + (x33 << 0x1));
{ uint64_t x52 = (x51 + x33);
{ uint64_t x53 = (x22 + (x32 << 0x4));
{ uint64_t x54 = (x53 + (x32 << 0x1));
{ uint64_t x55 = (x54 + x32);
{ uint64_t x56 = (x21 + (x31 << 0x4));
{ uint64_t x57 = (x56 + (x31 << 0x1));
{ uint64_t x58 = (x57 + x31);
{ uint64_t x59 = (x20 + (x30 << 0x4));
{ uint64_t x60 = (x59 + (x30 << 0x1));
{ uint64_t x61 = (x60 + x30);
{ uint64_t x62 = (x19 + (x29 << 0x4));
{ uint64_t x63 = (x62 + (x29 << 0x1));
{ uint64_t x64 = (x63 + x29);
{ uint64_t x65 = (x64 >> 0x1a);
{ uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
{ uint64_t x67 = (x65 + x61);
{ uint64_t x68 = (x67 >> 0x19);
{ uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
{ uint64_t x70 = (x68 + x58);
{ uint64_t x71 = (x70 >> 0x1a);
{ uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
{ uint64_t x73 = (x71 + x55);
{ uint64_t x74 = (x73 >> 0x19);
{ uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
{ uint64_t x76 = (x74 + x52);
{ uint64_t x77 = (x76 >> 0x1a);
{ uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
{ uint64_t x79 = (x77 + x49);
{ uint64_t x80 = (x79 >> 0x19);
{ uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
{ uint64_t x82 = (x80 + x46);
{ uint64_t x83 = (x82 >> 0x1a);
{ uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
{ uint64_t x85 = (x83 + x43);
{ uint64_t x86 = (x85 >> 0x19);
{ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
{ uint64_t x88 = (x86 + x40);
{ uint64_t x89 = (x88 >> 0x1a);
{ uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
{ uint64_t x91 = (x89 + x28);
{ uint64_t x92 = (x91 >> 0x19);
{ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
{ uint64_t x94 = (x66 + (0x13 * x92));
{ uint32_t x95 = (uint32_t) (x94 >> 0x1a);
{ uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
{ uint32_t x97 = (x95 + x69);
{ uint32_t x98 = (x97 >> 0x19);
{ uint32_t x99 = (x97 & 0x1ffffff);
out[0] = x96;
out[1] = x99;
out[2] = (x98 + x72);
out[3] = x75;
out[4] = x78;
out[5] = x81;
out[6] = x84;
out[7] = x87;
out[8] = x90;
out[9] = x93;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
assert_fe(out);
}
static void fe_sq_tl(fe *h, const fe_loose *f) {
fe_sqr_impl(h->v, f->v);
}
static void fe_sq_tt(fe *h, const fe *f) {
fe_sqr_impl(h->v, f->v);
}
// Replace (f,g) with (g,f) if b == 1;
// replace (f,g) with (f,g) if b == 0.
//
// Preconditions: b in {0,1}.
static void fe_cswap(fe *f, fe *g, unsigned int b) {
b = 0-b;
unsigned i;
for (i = 0; i < 10; i++) {
uint32_t x = f->v[i] ^ g->v[i];
x &= b;
f->v[i] ^= x;
g->v[i] ^= x;
}
}
// NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0..
static void fe_mul_121666_impl(uint32_t out[10], const uint32_t in1[10]) {
{ const uint32_t x20 = in1[9];
{ const uint32_t x21 = in1[8];
{ const uint32_t x19 = in1[7];
{ const uint32_t x17 = in1[6];
{ const uint32_t x15 = in1[5];
{ const uint32_t x13 = in1[4];
{ const uint32_t x11 = in1[3];
{ const uint32_t x9 = in1[2];
{ const uint32_t x7 = in1[1];
{ const uint32_t x5 = in1[0];
{ const uint32_t x38 = 0;
{ const uint32_t x39 = 0;
{ const uint32_t x37 = 0;
{ const uint32_t x35 = 0;
{ const uint32_t x33 = 0;
{ const uint32_t x31 = 0;
{ const uint32_t x29 = 0;
{ const uint32_t x27 = 0;
{ const uint32_t x25 = 0;
{ const uint32_t x23 = 121666;
{ uint64_t x40 = ((uint64_t)x23 * x5);
{ uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
{ uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
{ uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
{ uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
{ uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
{ uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
{ uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
{ uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
{ uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
{ uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
{ uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
{ uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
{ uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
{ uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
{ uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
{ uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
{ uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
{ uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
{ uint64_t x59 = (x48 + (x58 << 0x4));
{ uint64_t x60 = (x59 + (x58 << 0x1));
{ uint64_t x61 = (x60 + x58);
{ uint64_t x62 = (x47 + (x57 << 0x4));
{ uint64_t x63 = (x62 + (x57 << 0x1));
{ uint64_t x64 = (x63 + x57);
{ uint64_t x65 = (x46 + (x56 << 0x4));
{ uint64_t x66 = (x65 + (x56 << 0x1));
{ uint64_t x67 = (x66 + x56);
{ uint64_t x68 = (x45 + (x55 << 0x4));
{ uint64_t x69 = (x68 + (x55 << 0x1));
{ uint64_t x70 = (x69 + x55);
{ uint64_t x71 = (x44 + (x54 << 0x4));
{ uint64_t x72 = (x71 + (x54 << 0x1));
{ uint64_t x73 = (x72 + x54);
{ uint64_t x74 = (x43 + (x53 << 0x4));
{ uint64_t x75 = (x74 + (x53 << 0x1));
{ uint64_t x76 = (x75 + x53);
{ uint64_t x77 = (x42 + (x52 << 0x4));
{ uint64_t x78 = (x77 + (x52 << 0x1));
{ uint64_t x79 = (x78 + x52);
{ uint64_t x80 = (x41 + (x51 << 0x4));
{ uint64_t x81 = (x80 + (x51 << 0x1));
{ uint64_t x82 = (x81 + x51);
{ uint64_t x83 = (x40 + (x50 << 0x4));
{ uint64_t x84 = (x83 + (x50 << 0x1));
{ uint64_t x85 = (x84 + x50);
{ uint64_t x86 = (x85 >> 0x1a);
{ uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
{ uint64_t x88 = (x86 + x82);
{ uint64_t x89 = (x88 >> 0x19);
{ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
{ uint64_t x91 = (x89 + x79);
{ uint64_t x92 = (x91 >> 0x1a);
{ uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
{ uint64_t x94 = (x92 + x76);
{ uint64_t x95 = (x94 >> 0x19);
{ uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
{ uint64_t x97 = (x95 + x73);
{ uint64_t x98 = (x97 >> 0x1a);
{ uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
{ uint64_t x100 = (x98 + x70);
{ uint64_t x101 = (x100 >> 0x19);
{ uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
{ uint64_t x103 = (x101 + x67);
{ uint64_t x104 = (x103 >> 0x1a);
{ uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
{ uint64_t x106 = (x104 + x64);
{ uint64_t x107 = (x106 >> 0x19);
{ uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
{ uint64_t x109 = (x107 + x61);
{ uint64_t x110 = (x109 >> 0x1a);
{ uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
{ uint64_t x112 = (x110 + x49);
{ uint64_t x113 = (x112 >> 0x19);
{ uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
{ uint64_t x115 = (x87 + (0x13 * x113));
{ uint32_t x116 = (uint32_t) (x115 >> 0x1a);
{ uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
{ uint32_t x118 = (x116 + x90);
{ uint32_t x119 = (x118 >> 0x19);
{ uint32_t x120 = (x118 & 0x1ffffff);
out[0] = x117;
out[1] = x120;
out[2] = (x119 + x93);
out[3] = x96;
out[4] = x99;
out[5] = x102;
out[6] = x105;
out[7] = x108;
out[8] = x111;
out[9] = x114;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static void fe_mul121666(fe *h, const fe_loose *f) {
assert_fe_loose(f->v);
fe_mul_121666_impl(h->v, f->v);
assert_fe(h->v);
}
// Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) {
{ const uint32_t x20 = 0;
{ const uint32_t x21 = 0;
{ const uint32_t x19 = 0;
{ const uint32_t x17 = 0;
{ const uint32_t x15 = 0;
{ const uint32_t x13 = 0;
{ const uint32_t x11 = 0;
{ const uint32_t x9 = 0;
{ const uint32_t x7 = 0;
{ const uint32_t x5 = 0;
{ const uint32_t x38 = in2[9];
{ const uint32_t x39 = in2[8];
{ const uint32_t x37 = in2[7];
{ const uint32_t x35 = in2[6];
{ const uint32_t x33 = in2[5];
{ const uint32_t x31 = in2[4];
{ const uint32_t x29 = in2[3];
{ const uint32_t x27 = in2[2];
{ const uint32_t x25 = in2[1];
{ const uint32_t x23 = in2[0];
out[0] = ((0x7ffffda + x5) - x23);
out[1] = ((0x3fffffe + x7) - x25);
out[2] = ((0x7fffffe + x9) - x27);
out[3] = ((0x3fffffe + x11) - x29);
out[4] = ((0x7fffffe + x13) - x31);
out[5] = ((0x3fffffe + x15) - x33);
out[6] = ((0x7fffffe + x17) - x35);
out[7] = ((0x3fffffe + x19) - x37);
out[8] = ((0x7fffffe + x21) - x39);
out[9] = ((0x3fffffe + x20) - x38);
}}}}}}}}}}}}}}}}}}}}
}
// h = -f
static void fe_neg(fe_loose *h, const fe *f) {
assert_fe(f->v);
fe_neg_impl(h->v, f->v);
assert_fe_loose(h->v);
}
// Replace (f,g) with (g,g) if b == 1;
// replace (f,g) with (f,g) if b == 0.
//
// Preconditions: b in {0,1}.
static void fe_cmov(fe_loose *f, const fe_loose *g, unsigned b) {
b = 0-b;
unsigned i;
for (i = 0; i < 10; i++) {
uint32_t x = f->v[i] ^ g->v[i];
x &= b;
f->v[i] ^= x;
}
}
#endif // BORINGSSL_CURVE25519_64BIT
// h = f
static void fe_copy(fe *h, const fe *f) {
OPENSSL_memmove(h, f, sizeof(fe));
}
static void fe_copy_lt(fe_loose *h, const fe *f) {
OPENSSL_COMPILE_ASSERT(sizeof(fe_loose) == sizeof(fe),
fe_and_fe_loose_mismatch);
OPENSSL_memmove(h, f, sizeof(fe));
}
#if !defined(OPENSSL_SMALL)
static void fe_copy_ll(fe_loose *h, const fe_loose *f) {
OPENSSL_memmove(h, f, sizeof(fe_loose));
}
#endif // !defined(OPENSSL_SMALL)
static void fe_loose_invert(fe *out, const fe_loose *z) {
fe t0;
fe t1;
fe t2;
fe t3;
int i;
fe_sq_tl(&t0, z);
fe_sq_tt(&t1, &t0);
for (i = 1; i < 2; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_tlt(&t1, z, &t1);
fe_mul_ttt(&t0, &t0, &t1);
fe_sq_tt(&t2, &t0);
fe_mul_ttt(&t1, &t1, &t2);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 5; ++i) {
fe_sq_tt(&t2, &t2);
}
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 10; ++i) {
fe_sq_tt(&t2, &t2);
}
fe_mul_ttt(&t2, &t2, &t1);
fe_sq_tt(&t3, &t2);
for (i = 1; i < 20; ++i) {
fe_sq_tt(&t3, &t3);
}
fe_mul_ttt(&t2, &t3, &t2);
fe_sq_tt(&t2, &t2);
for (i = 1; i < 10; ++i) {
fe_sq_tt(&t2, &t2);
}
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 50; ++i) {
fe_sq_tt(&t2, &t2);
}
fe_mul_ttt(&t2, &t2, &t1);
fe_sq_tt(&t3, &t2);
for (i = 1; i < 100; ++i) {
fe_sq_tt(&t3, &t3);
}
fe_mul_ttt(&t2, &t3, &t2);
fe_sq_tt(&t2, &t2);
for (i = 1; i < 50; ++i) {
fe_sq_tt(&t2, &t2);
}
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t1, &t1);
for (i = 1; i < 5; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_ttt(out, &t1, &t0);
}
static void fe_invert(fe *out, const fe *z) {
fe_loose l;
fe_copy_lt(&l, z);
fe_loose_invert(out, &l);
}
// return 0 if f == 0
// return 1 if f != 0
static int fe_isnonzero(const fe_loose *f) {
fe tight;
fe_carry(&tight, f);
uint8_t s[32];
fe_tobytes(s, &tight);
static const uint8_t zero[32] = {0};
return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;
}
// return 1 if f is in {1,3,5,...,q-2}
// return 0 if f is in {0,2,4,...,q-1}
static int fe_isnegative(const fe *f) {
uint8_t s[32];
fe_tobytes(s, f);
return s[0] & 1;
}
static void fe_sq2_tt(fe *h, const fe *f) {
// h = f^2
fe_sq_tt(h, f);
// h = h + h
fe_loose tmp;
fe_add(&tmp, h, h);
fe_carry(h, &tmp);
}
static void fe_pow22523(fe *out, const fe *z) {
fe t0;
fe t1;
fe t2;
int i;
fe_sq_tt(&t0, z);
fe_sq_tt(&t1, &t0);
for (i = 1; i < 2; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_ttt(&t1, z, &t1);
fe_mul_ttt(&t0, &t0, &t1);
fe_sq_tt(&t0, &t0);
fe_mul_ttt(&t0, &t1, &t0);
fe_sq_tt(&t1, &t0);
for (i = 1; i < 5; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_ttt(&t0, &t1, &t0);
fe_sq_tt(&t1, &t0);
for (i = 1; i < 10; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_ttt(&t1, &t1, &t0);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 20; ++i) {
fe_sq_tt(&t2, &t2);
}
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t1, &t1);
for (i = 1; i < 10; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_ttt(&t0, &t1, &t0);
fe_sq_tt(&t1, &t0);
for (i = 1; i < 50; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_ttt(&t1, &t1, &t0);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 100; ++i) {
fe_sq_tt(&t2, &t2);
}
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t1, &t1);
for (i = 1; i < 50; ++i) {
fe_sq_tt(&t1, &t1);
}
fe_mul_ttt(&t0, &t1, &t0);
fe_sq_tt(&t0, &t0);
for (i = 1; i < 2; ++i) {
fe_sq_tt(&t0, &t0);
}
fe_mul_ttt(out, &t0, z);
}
// Group operations.
void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) {
fe recip;
fe x;
fe y;
fe_invert(&recip, &h->Z);
fe_mul_ttt(&x, &h->X, &recip);
fe_mul_ttt(&y, &h->Y, &recip);
fe_tobytes(s, &y);
s[31] ^= fe_isnegative(&x) << 7;
}
static void ge_p3_tobytes(uint8_t s[32], const ge_p3 *h) {
fe recip;
fe x;
fe y;
fe_invert(&recip, &h->Z);
fe_mul_ttt(&x, &h->X, &recip);
fe_mul_ttt(&y, &h->Y, &recip);
fe_tobytes(s, &y);
s[31] ^= fe_isnegative(&x) << 7;
}
int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) {
fe u;
fe_loose v;
fe v3;
fe vxx;
fe_loose check;
fe_frombytes(&h->Y, s);
fe_1(&h->Z);
fe_sq_tt(&v3, &h->Y);
fe_mul_ttt(&vxx, &v3, &d);
fe_sub(&v, &v3, &h->Z); // u = y^2-1
fe_carry(&u, &v);
fe_add(&v, &vxx, &h->Z); // v = dy^2+1
fe_sq_tl(&v3, &v);
fe_mul_ttl(&v3, &v3, &v); // v3 = v^3
fe_sq_tt(&h->X, &v3);
fe_mul_ttl(&h->X, &h->X, &v);
fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7
fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8)
fe_mul_ttt(&h->X, &h->X, &v3);
fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8)
fe_sq_tt(&vxx, &h->X);
fe_mul_ttl(&vxx, &vxx, &v);
fe_sub(&check, &vxx, &u);
if (fe_isnonzero(&check)) {
fe_add(&check, &vxx, &u);
if (fe_isnonzero(&check)) {
return 0;
}
fe_mul_ttt(&h->X, &h->X, &sqrtm1);
}
if (fe_isnegative(&h->X) != (s[31] >> 7)) {
fe_loose t;
fe_neg(&t, &h->X);
fe_carry(&h->X, &t);
}
fe_mul_ttt(&h->T, &h->X, &h->Y);
return 1;
}
static void ge_p2_0(ge_p2 *h) {
fe_0(&h->X);
fe_1(&h->Y);
fe_1(&h->Z);
}
static void ge_p3_0(ge_p3 *h) {
fe_0(&h->X);
fe_1(&h->Y);
fe_1(&h->Z);
fe_0(&h->T);
}
static void ge_cached_0(ge_cached *h) {
fe_loose_1(&h->YplusX);
fe_loose_1(&h->YminusX);
fe_loose_1(&h->Z);
fe_loose_0(&h->T2d);
}
static void ge_precomp_0(ge_precomp *h) {
fe_loose_1(&h->yplusx);
fe_loose_1(&h->yminusx);
fe_loose_0(&h->xy2d);
}
// r = p
static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
fe_copy(&r->X, &p->X);
fe_copy(&r->Y, &p->Y);
fe_copy(&r->Z, &p->Z);
}
// r = p
void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
fe_add(&r->YplusX, &p->Y, &p->X);
fe_sub(&r->YminusX, &p->Y, &p->X);
fe_copy_lt(&r->Z, &p->Z);
fe_mul_ltt(&r->T2d, &p->T, &d2);
}
// r = p
void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
fe_mul_tll(&r->X, &p->X, &p->T);
fe_mul_tll(&r->Y, &p->Y, &p->Z);
fe_mul_tll(&r->Z, &p->Z, &p->T);
}
// r = p
void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
fe_mul_tll(&r->X, &p->X, &p->T);
fe_mul_tll(&r->Y, &p->Y, &p->Z);
fe_mul_tll(&r->Z, &p->Z, &p->T);
fe_mul_tll(&r->T, &p->X, &p->Y);
}
// r = p
static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) {
ge_p3 t;
x25519_ge_p1p1_to_p3(&t, p);
x25519_ge_p3_to_cached(r, &t);
}
// r = 2 * p
static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
fe trX, trZ, trT;
fe t0;
fe_sq_tt(&trX, &p->X);
fe_sq_tt(&trZ, &p->Y);
fe_sq2_tt(&trT, &p->Z);
fe_add(&r->Y, &p->X, &p->Y);
fe_sq_tl(&t0, &r->Y);
fe_add(&r->Y, &trZ, &trX);
fe_sub(&r->Z, &trZ, &trX);
fe_carry(&trZ, &r->Y);
fe_sub(&r->X, &t0, &trZ);
fe_carry(&trZ, &r->Z);
fe_sub(&r->T, &trT, &trZ);
}
// r = 2 * p
static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
ge_p2 q;
ge_p3_to_p2(&q, p);
ge_p2_dbl(r, &q);
}
// r = p + q
static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
fe trY, trZ, trT;
fe_add(&r->X, &p->Y, &p->X);
fe_sub(&r->Y, &p->Y, &p->X);
fe_mul_tll(&trZ, &r->X, &q->yplusx);
fe_mul_tll(&trY, &r->Y, &q->yminusx);
fe_mul_tlt(&trT, &q->xy2d, &p->T);
fe_add(&r->T, &p->Z, &p->Z);
fe_sub(&r->X, &trZ, &trY);
fe_add(&r->Y, &trZ, &trY);
fe_carry(&trZ, &r->T);
fe_add(&r->Z, &trZ, &trT);
fe_sub(&r->T, &trZ, &trT);
}
// r = p - q
static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
fe trY, trZ, trT;
fe_add(&r->X, &p->Y, &p->X);
fe_sub(&r->Y, &p->Y, &p->X);
fe_mul_tll(&trZ, &r->X, &q->yminusx);
fe_mul_tll(&trY, &r->Y, &q->yplusx);
fe_mul_tlt(&trT, &q->xy2d, &p->T);
fe_add(&r->T, &p->Z, &p->Z);
fe_sub(&r->X, &trZ, &trY);
fe_add(&r->Y, &trZ, &trY);
fe_carry(&trZ, &r->T);
fe_sub(&r->Z, &trZ, &trT);
fe_add(&r->T, &trZ, &trT);
}
// r = p + q
void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
fe trX, trY, trZ, trT;
fe_add(&r->X, &p->Y, &p->X);
fe_sub(&r->Y, &p->Y, &p->X);
fe_mul_tll(&trZ, &r->X, &q->YplusX);
fe_mul_tll(&trY, &r->Y, &q->YminusX);
fe_mul_tlt(&trT, &q->T2d, &p->T);
fe_mul_ttl(&trX, &p->Z, &q->Z);
fe_add(&r->T, &trX, &trX);
fe_sub(&r->X, &trZ, &trY);
fe_add(&r->Y, &trZ, &trY);
fe_carry(&trZ, &r->T);
fe_add(&r->Z, &trZ, &trT);
fe_sub(&r->T, &trZ, &trT);
}
// r = p - q
void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
fe trX, trY, trZ, trT;
fe_add(&r->X, &p->Y, &p->X);
fe_sub(&r->Y, &p->Y, &p->X);
fe_mul_tll(&trZ, &r->X, &q->YminusX);
fe_mul_tll(&trY, &r->Y, &q->YplusX);
fe_mul_tlt(&trT, &q->T2d, &p->T);
fe_mul_ttl(&trX, &p->Z, &q->Z);
fe_add(&r->T, &trX, &trX);
fe_sub(&r->X, &trZ, &trY);
fe_add(&r->Y, &trZ, &trY);
fe_carry(&trZ, &r->T);
fe_sub(&r->Z, &trZ, &trT);
fe_add(&r->T, &trZ, &trT);
}
static uint8_t equal(signed char b, signed char c) {
uint8_t ub = b;
uint8_t uc = c;
uint8_t x = ub ^ uc; // 0: yes; 1..255: no
uint32_t y = x; // 0: yes; 1..255: no
y -= 1; // 4294967295: yes; 0..254: no
y >>= 31; // 1: yes; 0: no
return y;
}
static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
fe_cmov(&t->yplusx, &u->yplusx, b);
fe_cmov(&t->yminusx, &u->yminusx, b);
fe_cmov(&t->xy2d, &u->xy2d, b);
}
void x25519_ge_scalarmult_small_precomp(
ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) {
// precomp_table is first expanded into matching |ge_precomp|
// elements.
ge_precomp multiples[15];
unsigned i;
for (i = 0; i < 15; i++) {
const uint8_t *bytes = &precomp_table[i*(2 * 32)];
fe x, y;
fe_frombytes(&x, bytes);
fe_frombytes(&y, bytes + 32);
ge_precomp *out = &multiples[i];
fe_add(&out->yplusx, &y, &x);
fe_sub(&out->yminusx, &y, &x);
fe_mul_ltt(&out->xy2d, &x, &y);
fe_mul_llt(&out->xy2d, &out->xy2d, &d2);
}
// See the comment above |k25519SmallPrecomp| about the structure of the
// precomputed elements. This loop does 64 additions and 64 doublings to
// calculate the result.
ge_p3_0(h);
for (i = 63; i < 64; i--) {
unsigned j;
signed char index = 0;
for (j = 0; j < 4; j++) {
const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7));
index |= (bit << j);
}
ge_precomp e;
ge_precomp_0(&e);
for (j = 1; j < 16; j++) {
cmov(&e, &multiples[j-1], equal(index, j));
}
ge_cached cached;
ge_p1p1 r;
x25519_ge_p3_to_cached(&cached, h);
x25519_ge_add(&r, h, &cached);
x25519_ge_p1p1_to_p3(h, &r);
ge_madd(&r, h, &e);
x25519_ge_p1p1_to_p3(h, &r);
}
}
#if defined(OPENSSL_SMALL)
void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp);
}
#else
static uint8_t negative(signed char b) {
uint32_t x = b;
x >>= 31; // 1: yes; 0: no
return x;
}
static void table_select(ge_precomp *t, int pos, signed char b) {
ge_precomp minust;
uint8_t bnegative = negative(b);
uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
ge_precomp_0(t);
cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
fe_copy_ll(&minust.yplusx, &t->yminusx);
fe_copy_ll(&minust.yminusx, &t->yplusx);
// NOTE: the input table is canonical, but types don't encode it
fe tmp;
fe_carry(&tmp, &t->xy2d);
fe_neg(&minust.xy2d, &tmp);
cmov(t, &minust, bnegative);
}
// h = a * B
// where a = a[0]+256*a[1]+...+256^31 a[31]
// B is the Ed25519 base point (x,4/5) with x positive.
//
// Preconditions:
// a[31] <= 127
void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
signed char e[64];
signed char carry;
ge_p1p1 r;
ge_p2 s;
ge_precomp t;
int i;
for (i = 0; i < 32; ++i) {
e[2 * i + 0] = (a[i] >> 0) & 15;
e[2 * i + 1] = (a[i] >> 4) & 15;
}
// each e[i] is between 0 and 15
// e[63] is between 0 and 7
carry = 0;
for (i = 0; i < 63; ++i) {
e[i] += carry;
carry = e[i] + 8;
carry >>= 4;
e[i] -= carry << 4;
}
e[63] += carry;
// each e[i] is between -8 and 8
ge_p3_0(h);
for (i = 1; i < 64; i += 2) {
table_select(&t, i / 2, e[i]);
ge_madd(&r, h, &t);
x25519_ge_p1p1_to_p3(h, &r);
}
ge_p3_dbl(&r, h);
x25519_ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s);
x25519_ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s);
x25519_ge_p1p1_to_p2(&s, &r);
ge_p2_dbl(&r, &s);
x25519_ge_p1p1_to_p3(h, &r);
for (i = 0; i < 64; i += 2) {
table_select(&t, i / 2, e[i]);
ge_madd(&r, h, &t);
x25519_ge_p1p1_to_p3(h, &r);
}
}
#endif
static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) {
fe_cmov(&t->YplusX, &u->YplusX, b);
fe_cmov(&t->YminusX, &u->YminusX, b);
fe_cmov(&t->Z, &u->Z, b);
fe_cmov(&t->T2d, &u->T2d, b);
}
// r = scalar * A.
// where a = a[0]+256*a[1]+...+256^31 a[31].
void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A) {
ge_p2 Ai_p2[8];
ge_cached Ai[16];
ge_p1p1 t;
ge_cached_0(&Ai[0]);
x25519_ge_p3_to_cached(&Ai[1], A);
ge_p3_to_p2(&Ai_p2[1], A);
unsigned i;
for (i = 2; i < 16; i += 2) {
ge_p2_dbl(&t, &Ai_p2[i / 2]);
ge_p1p1_to_cached(&Ai[i], &t);
if (i < 8) {
x25519_ge_p1p1_to_p2(&Ai_p2[i], &t);
}
x25519_ge_add(&t, A, &Ai[i]);
ge_p1p1_to_cached(&Ai[i + 1], &t);
if (i < 7) {
x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t);
}
}
ge_p2_0(r);
ge_p3 u;
for (i = 0; i < 256; i += 4) {
ge_p2_dbl(&t, r);
x25519_ge_p1p1_to_p2(r, &t);
ge_p2_dbl(&t, r);
x25519_ge_p1p1_to_p2(r, &t);
ge_p2_dbl(&t, r);
x25519_ge_p1p1_to_p2(r, &t);
ge_p2_dbl(&t, r);
x25519_ge_p1p1_to_p3(&u, &t);
uint8_t index = scalar[31 - i/8];
index >>= 4 - (i & 4);
index &= 0xf;
unsigned j;
ge_cached selected;
ge_cached_0(&selected);
for (j = 0; j < 16; j++) {
cmov_cached(&selected, &Ai[j], equal(j, index));
}
x25519_ge_add(&t, &u, &selected);
x25519_ge_p1p1_to_p2(r, &t);
}
}
static void slide(signed char *r, const uint8_t *a) {
int i;
int b;
int k;
for (i = 0; i < 256; ++i) {
r[i] = 1 & (a[i >> 3] >> (i & 7));
}
for (i = 0; i < 256; ++i) {
if (r[i]) {
for (b = 1; b <= 6 && i + b < 256; ++b) {
if (r[i + b]) {
if (r[i] + (r[i + b] << b) <= 15) {
r[i] += r[i + b] << b;
r[i + b] = 0;
} else if (r[i] - (r[i + b] << b) >= -15) {
r[i] -= r[i + b] << b;
for (k = i + b; k < 256; ++k) {
if (!r[k]) {
r[k] = 1;
break;
}
r[k] = 0;
}
} else {
break;
}
}
}
}
}
}
// r = a * A + b * B
// where a = a[0]+256*a[1]+...+256^31 a[31].
// and b = b[0]+256*b[1]+...+256^31 b[31].
// B is the Ed25519 base point (x,4/5) with x positive.
static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
const ge_p3 *A, const uint8_t *b) {
signed char aslide[256];
signed char bslide[256];
ge_cached Ai[8]; // A,3A,5A,7A,9A,11A,13A,15A
ge_p1p1 t;
ge_p3 u;
ge_p3 A2;
int i;
slide(aslide, a);
slide(bslide, b);
x25519_ge_p3_to_cached(&Ai[0], A);
ge_p3_dbl(&t, A);
x25519_ge_p1p1_to_p3(&A2, &t);
x25519_ge_add(&t, &A2, &Ai[0]);
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_p3_to_cached(&Ai[1], &u);
x25519_ge_add(&t, &A2, &Ai[1]);
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_p3_to_cached(&Ai[2], &u);
x25519_ge_add(&t, &A2, &Ai[2]);
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_p3_to_cached(&Ai[3], &u);
x25519_ge_add(&t, &A2, &Ai[3]);
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_p3_to_cached(&Ai[4], &u);
x25519_ge_add(&t, &A2, &Ai[4]);
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_p3_to_cached(&Ai[5], &u);
x25519_ge_add(&t, &A2, &Ai[5]);
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_p3_to_cached(&Ai[6], &u);
x25519_ge_add(&t, &A2, &Ai[6]);
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_p3_to_cached(&Ai[7], &u);
ge_p2_0(r);
for (i = 255; i >= 0; --i) {
if (aslide[i] || bslide[i]) {
break;
}
}
for (; i >= 0; --i) {
ge_p2_dbl(&t, r);
if (aslide[i] > 0) {
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]);
} else if (aslide[i] < 0) {
x25519_ge_p1p1_to_p3(&u, &t);
x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
}
if (bslide[i] > 0) {
x25519_ge_p1p1_to_p3(&u, &t);
ge_madd(&t, &u, &Bi[bslide[i] / 2]);
} else if (bslide[i] < 0) {
x25519_ge_p1p1_to_p3(&u, &t);
ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
}
x25519_ge_p1p1_to_p2(r, &t);
}
}
// The set of scalars is \Z/l
// where l = 2^252 + 27742317777372353535851937790883648493.
// Input:
// s[0]+256*s[1]+...+256^63*s[63] = s
//
// Output:
// s[0]+256*s[1]+...+256^31*s[31] = s mod l
// where l = 2^252 + 27742317777372353535851937790883648493.
// Overwrites s in place.
void x25519_sc_reduce(uint8_t s[64]) {
int64_t s0 = 2097151 & load_3(s);
int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
int64_t s8 = 2097151 & load_3(s + 21);
int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
int64_t s16 = 2097151 & load_3(s + 42);
int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
int64_t s23 = (load_4(s + 60) >> 3);
int64_t carry0;
int64_t carry1;
int64_t carry2;
int64_t carry3;
int64_t carry4;
int64_t carry5;
int64_t carry6;
int64_t carry7;
int64_t carry8;
int64_t carry9;
int64_t carry10;
int64_t carry11;
int64_t carry12;
int64_t carry13;
int64_t carry14;
int64_t carry15;
int64_t carry16;
s11 += s23 * 666643;
s12 += s23 * 470296;
s13 += s23 * 654183;
s14 -= s23 * 997805;
s15 += s23 * 136657;
s16 -= s23 * 683901;
s23 = 0;
s10 += s22 * 666643;
s11 += s22 * 470296;
s12 += s22 * 654183;
s13 -= s22 * 997805;
s14 += s22 * 136657;
s15 -= s22 * 683901;
s22 = 0;
s9 += s21 * 666643;
s10 += s21 * 470296;
s11 += s21 * 654183;
s12 -= s21 * 997805;
s13 += s21 * 136657;
s14 -= s21 * 683901;
s21 = 0;
s8 += s20 * 666643;
s9 += s20 * 470296;
s10 += s20 * 654183;
s11 -= s20 * 997805;
s12 += s20 * 136657;
s13 -= s20 * 683901;
s20 = 0;
s7 += s19 * 666643;
s8 += s19 * 470296;
s9 += s19 * 654183;
s10 -= s19 * 997805;
s11 += s19 * 136657;
s12 -= s19 * 683901;
s19 = 0;
s6 += s18 * 666643;
s7 += s18 * 470296;
s8 += s18 * 654183;
s9 -= s18 * 997805;
s10 += s18 * 136657;
s11 -= s18 * 683901;
s18 = 0;
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
carry12 = (s12 + (1 << 20)) >> 21;
s13 += carry12;
s12 -= carry12 << 21;
carry14 = (s14 + (1 << 20)) >> 21;
s15 += carry14;
s14 -= carry14 << 21;
carry16 = (s16 + (1 << 20)) >> 21;
s17 += carry16;
s16 -= carry16 << 21;
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
carry13 = (s13 + (1 << 20)) >> 21;
s14 += carry13;
s13 -= carry13 << 21;
carry15 = (s15 + (1 << 20)) >> 21;
s16 += carry15;
s15 -= carry15 << 21;
s5 += s17 * 666643;
s6 += s17 * 470296;
s7 += s17 * 654183;
s8 -= s17 * 997805;
s9 += s17 * 136657;
s10 -= s17 * 683901;
s17 = 0;
s4 += s16 * 666643;
s5 += s16 * 470296;
s6 += s16 * 654183;
s7 -= s16 * 997805;
s8 += s16 * 136657;
s9 -= s16 * 683901;
s16 = 0;
s3 += s15 * 666643;
s4 += s15 * 470296;
s5 += s15 * 654183;
s6 -= s15 * 997805;
s7 += s15 * 136657;
s8 -= s15 * 683901;
s15 = 0;
s2 += s14 * 666643;
s3 += s14 * 470296;
s4 += s14 * 654183;
s5 -= s14 * 997805;
s6 += s14 * 136657;
s7 -= s14 * 683901;
s14 = 0;
s1 += s13 * 666643;
s2 += s13 * 470296;
s3 += s13 * 654183;
s4 -= s13 * 997805;
s5 += s13 * 136657;
s6 -= s13 * 683901;
s13 = 0;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = (s0 + (1 << 20)) >> 21;
s1 += carry0;
s0 -= carry0 << 21;
carry2 = (s2 + (1 << 20)) >> 21;
s3 += carry2;
s2 -= carry2 << 21;
carry4 = (s4 + (1 << 20)) >> 21;
s5 += carry4;
s4 -= carry4 << 21;
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
carry1 = (s1 + (1 << 20)) >> 21;
s2 += carry1;
s1 -= carry1 << 21;
carry3 = (s3 + (1 << 20)) >> 21;
s4 += carry3;
s3 -= carry3 << 21;
carry5 = (s5 + (1 << 20)) >> 21;
s6 += carry5;
s5 -= carry5 << 21;
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
carry11 = s11 >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s[0] = s0 >> 0;
s[1] = s0 >> 8;
s[2] = (s0 >> 16) | (s1 << 5);
s[3] = s1 >> 3;
s[4] = s1 >> 11;
s[5] = (s1 >> 19) | (s2 << 2);
s[6] = s2 >> 6;
s[7] = (s2 >> 14) | (s3 << 7);
s[8] = s3 >> 1;
s[9] = s3 >> 9;
s[10] = (s3 >> 17) | (s4 << 4);
s[11] = s4 >> 4;
s[12] = s4 >> 12;
s[13] = (s4 >> 20) | (s5 << 1);
s[14] = s5 >> 7;
s[15] = (s5 >> 15) | (s6 << 6);
s[16] = s6 >> 2;
s[17] = s6 >> 10;
s[18] = (s6 >> 18) | (s7 << 3);
s[19] = s7 >> 5;
s[20] = s7 >> 13;
s[21] = s8 >> 0;
s[22] = s8 >> 8;
s[23] = (s8 >> 16) | (s9 << 5);
s[24] = s9 >> 3;
s[25] = s9 >> 11;
s[26] = (s9 >> 19) | (s10 << 2);
s[27] = s10 >> 6;
s[28] = (s10 >> 14) | (s11 << 7);
s[29] = s11 >> 1;
s[30] = s11 >> 9;
s[31] = s11 >> 17;
}
// Input:
// a[0]+256*a[1]+...+256^31*a[31] = a
// b[0]+256*b[1]+...+256^31*b[31] = b
// c[0]+256*c[1]+...+256^31*c[31] = c
//
// Output:
// s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
// where l = 2^252 + 27742317777372353535851937790883648493.
static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
const uint8_t *c) {
int64_t a0 = 2097151 & load_3(a);
int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
int64_t a8 = 2097151 & load_3(a + 21);
int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
int64_t a11 = (load_4(a + 28) >> 7);
int64_t b0 = 2097151 & load_3(b);
int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
int64_t b8 = 2097151 & load_3(b + 21);
int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
int64_t b11 = (load_4(b + 28) >> 7);
int64_t c0 = 2097151 & load_3(c);
int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
int64_t c8 = 2097151 & load_3(c + 21);
int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
int64_t c11 = (load_4(c + 28) >> 7);
int64_t s0;
int64_t s1;
int64_t s2;
int64_t s3;
int64_t s4;
int64_t s5;
int64_t s6;
int64_t s7;
int64_t s8;
int64_t s9;
int64_t s10;
int64_t s11;
int64_t s12;
int64_t s13;
int64_t s14;
int64_t s15;
int64_t s16;
int64_t s17;
int64_t s18;
int64_t s19;
int64_t s20;
int64_t s21;
int64_t s22;
int64_t s23;
int64_t carry0;
int64_t carry1;
int64_t carry2;
int64_t carry3;
int64_t carry4;
int64_t carry5;
int64_t carry6;
int64_t carry7;
int64_t carry8;
int64_t carry9;
int64_t carry10;
int64_t carry11;
int64_t carry12;
int64_t carry13;
int64_t carry14;
int64_t carry15;
int64_t carry16;
int64_t carry17;
int64_t carry18;
int64_t carry19;
int64_t carry20;
int64_t carry21;
int64_t carry22;
s0 = c0 + a0 * b0;
s1 = c1 + a0 * b1 + a1 * b0;
s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
a6 * b1 + a7 * b0;
s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
a6 * b2 + a7 * b1 + a8 * b0;
s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 +
a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 +
a9 * b4 + a10 * b3 + a11 * b2;
s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 +
a10 * b4 + a11 * b3;
s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 +
a11 * b4;
s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
s20 = a9 * b11 + a10 * b10 + a11 * b9;
s21 = a10 * b11 + a11 * b10;
s22 = a11 * b11;
s23 = 0;
carry0 = (s0 + (1 << 20)) >> 21;
s1 += carry0;
s0 -= carry0 << 21;
carry2 = (s2 + (1 << 20)) >> 21;
s3 += carry2;
s2 -= carry2 << 21;
carry4 = (s4 + (1 << 20)) >> 21;
s5 += carry4;
s4 -= carry4 << 21;
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
carry12 = (s12 + (1 << 20)) >> 21;
s13 += carry12;
s12 -= carry12 << 21;
carry14 = (s14 + (1 << 20)) >> 21;
s15 += carry14;
s14 -= carry14 << 21;
carry16 = (s16 + (1 << 20)) >> 21;
s17 += carry16;
s16 -= carry16 << 21;
carry18 = (s18 + (1 << 20)) >> 21;
s19 += carry18;
s18 -= carry18 << 21;
carry20 = (s20 + (1 << 20)) >> 21;
s21 += carry20;
s20 -= carry20 << 21;
carry22 = (s22 + (1 << 20)) >> 21;
s23 += carry22;
s22 -= carry22 << 21;
carry1 = (s1 + (1 << 20)) >> 21;
s2 += carry1;
s1 -= carry1 << 21;
carry3 = (s3 + (1 << 20)) >> 21;
s4 += carry3;
s3 -= carry3 << 21;
carry5 = (s5 + (1 << 20)) >> 21;
s6 += carry5;
s5 -= carry5 << 21;
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
carry13 = (s13 + (1 << 20)) >> 21;
s14 += carry13;
s13 -= carry13 << 21;
carry15 = (s15 + (1 << 20)) >> 21;
s16 += carry15;
s15 -= carry15 << 21;
carry17 = (s17 + (1 << 20)) >> 21;
s18 += carry17;
s17 -= carry17 << 21;
carry19 = (s19 + (1 << 20)) >> 21;
s20 += carry19;
s19 -= carry19 << 21;
carry21 = (s21 + (1 << 20)) >> 21;
s22 += carry21;
s21 -= carry21 << 21;
s11 += s23 * 666643;
s12 += s23 * 470296;
s13 += s23 * 654183;
s14 -= s23 * 997805;
s15 += s23 * 136657;
s16 -= s23 * 683901;
s23 = 0;
s10 += s22 * 666643;
s11 += s22 * 470296;
s12 += s22 * 654183;
s13 -= s22 * 997805;
s14 += s22 * 136657;
s15 -= s22 * 683901;
s22 = 0;
s9 += s21 * 666643;
s10 += s21 * 470296;
s11 += s21 * 654183;
s12 -= s21 * 997805;
s13 += s21 * 136657;
s14 -= s21 * 683901;
s21 = 0;
s8 += s20 * 666643;
s9 += s20 * 470296;
s10 += s20 * 654183;
s11 -= s20 * 997805;
s12 += s20 * 136657;
s13 -= s20 * 683901;
s20 = 0;
s7 += s19 * 666643;
s8 += s19 * 470296;
s9 += s19 * 654183;
s10 -= s19 * 997805;
s11 += s19 * 136657;
s12 -= s19 * 683901;
s19 = 0;
s6 += s18 * 666643;
s7 += s18 * 470296;
s8 += s18 * 654183;
s9 -= s18 * 997805;
s10 += s18 * 136657;
s11 -= s18 * 683901;
s18 = 0;
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
carry12 = (s12 + (1 << 20)) >> 21;
s13 += carry12;
s12 -= carry12 << 21;
carry14 = (s14 + (1 << 20)) >> 21;
s15 += carry14;
s14 -= carry14 << 21;
carry16 = (s16 + (1 << 20)) >> 21;
s17 += carry16;
s16 -= carry16 << 21;
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
carry13 = (s13 + (1 << 20)) >> 21;
s14 += carry13;
s13 -= carry13 << 21;
carry15 = (s15 + (1 << 20)) >> 21;
s16 += carry15;
s15 -= carry15 << 21;
s5 += s17 * 666643;
s6 += s17 * 470296;
s7 += s17 * 654183;
s8 -= s17 * 997805;
s9 += s17 * 136657;
s10 -= s17 * 683901;
s17 = 0;
s4 += s16 * 666643;
s5 += s16 * 470296;
s6 += s16 * 654183;
s7 -= s16 * 997805;
s8 += s16 * 136657;
s9 -= s16 * 683901;
s16 = 0;
s3 += s15 * 666643;
s4 += s15 * 470296;
s5 += s15 * 654183;
s6 -= s15 * 997805;
s7 += s15 * 136657;
s8 -= s15 * 683901;
s15 = 0;
s2 += s14 * 666643;
s3 += s14 * 470296;
s4 += s14 * 654183;
s5 -= s14 * 997805;
s6 += s14 * 136657;
s7 -= s14 * 683901;
s14 = 0;
s1 += s13 * 666643;
s2 += s13 * 470296;
s3 += s13 * 654183;
s4 -= s13 * 997805;
s5 += s13 * 136657;
s6 -= s13 * 683901;
s13 = 0;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = (s0 + (1 << 20)) >> 21;
s1 += carry0;
s0 -= carry0 << 21;
carry2 = (s2 + (1 << 20)) >> 21;
s3 += carry2;
s2 -= carry2 << 21;
carry4 = (s4 + (1 << 20)) >> 21;
s5 += carry4;
s4 -= carry4 << 21;
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
carry1 = (s1 + (1 << 20)) >> 21;
s2 += carry1;
s1 -= carry1 << 21;
carry3 = (s3 + (1 << 20)) >> 21;
s4 += carry3;
s3 -= carry3 << 21;
carry5 = (s5 + (1 << 20)) >> 21;
s6 += carry5;
s5 -= carry5 << 21;
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
carry11 = s11 >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s[0] = s0 >> 0;
s[1] = s0 >> 8;
s[2] = (s0 >> 16) | (s1 << 5);
s[3] = s1 >> 3;
s[4] = s1 >> 11;
s[5] = (s1 >> 19) | (s2 << 2);
s[6] = s2 >> 6;
s[7] = (s2 >> 14) | (s3 << 7);
s[8] = s3 >> 1;
s[9] = s3 >> 9;
s[10] = (s3 >> 17) | (s4 << 4);
s[11] = s4 >> 4;
s[12] = s4 >> 12;
s[13] = (s4 >> 20) | (s5 << 1);
s[14] = s5 >> 7;
s[15] = (s5 >> 15) | (s6 << 6);
s[16] = s6 >> 2;
s[17] = s6 >> 10;
s[18] = (s6 >> 18) | (s7 << 3);
s[19] = s7 >> 5;
s[20] = s7 >> 13;
s[21] = s8 >> 0;
s[22] = s8 >> 8;
s[23] = (s8 >> 16) | (s9 << 5);
s[24] = s9 >> 3;
s[25] = s9 >> 11;
s[26] = (s9 >> 19) | (s10 << 2);
s[27] = s10 >> 6;
s[28] = (s10 >> 14) | (s11 << 7);
s[29] = s11 >> 1;
s[30] = s11 >> 9;
s[31] = s11 >> 17;
}
void ED25519_keypair(uint8_t out_public_key[32], uint8_t out_private_key[64]) {
uint8_t seed[32];
RAND_bytes(seed, 32);
ED25519_keypair_from_seed(out_public_key, out_private_key, seed);
}
int ED25519_sign(uint8_t out_sig[64], const uint8_t *message,
size_t message_len, const uint8_t private_key[64]) {
uint8_t az[SHA512_DIGEST_LENGTH];
SHA512(private_key, 32, az);
az[0] &= 248;
az[31] &= 63;
az[31] |= 64;
SHA512_CTX hash_ctx;
SHA512_Init(&hash_ctx);
SHA512_Update(&hash_ctx, az + 32, 32);
SHA512_Update(&hash_ctx, message, message_len);
uint8_t nonce[SHA512_DIGEST_LENGTH];
SHA512_Final(nonce, &hash_ctx);
x25519_sc_reduce(nonce);
ge_p3 R;
x25519_ge_scalarmult_base(&R, nonce);
ge_p3_tobytes(out_sig, &R);
SHA512_Init(&hash_ctx);
SHA512_Update(&hash_ctx, out_sig, 32);
SHA512_Update(&hash_ctx, private_key + 32, 32);
SHA512_Update(&hash_ctx, message, message_len);
uint8_t hram[SHA512_DIGEST_LENGTH];
SHA512_Final(hram, &hash_ctx);
x25519_sc_reduce(hram);
sc_muladd(out_sig + 32, hram, az, nonce);
return 1;
}
int ED25519_verify(const uint8_t *message, size_t message_len,
const uint8_t signature[64], const uint8_t public_key[32]) {
ge_p3 A;
if ((signature[63] & 224) != 0 ||
!x25519_ge_frombytes_vartime(&A, public_key)) {
return 0;
}
fe_loose t;
fe_neg(&t, &A.X);
fe_carry(&A.X, &t);
fe_neg(&t, &A.T);
fe_carry(&A.T, &t);
uint8_t pkcopy[32];
OPENSSL_memcpy(pkcopy, public_key, 32);
uint8_t rcopy[32];
OPENSSL_memcpy(rcopy, signature, 32);
union {
uint64_t u64[4];
uint8_t u8[32];
} scopy;
OPENSSL_memcpy(&scopy.u8[0], signature + 32, 32);
// https://tools.ietf.org/html/rfc8032#section-5.1.7 requires that s be in
// the range [0, order) in order to prevent signature malleability.
// kOrder is the order of Curve25519 in little-endian form.
static const uint64_t kOrder[4] = {
UINT64_C(0x5812631a5cf5d3ed),
UINT64_C(0x14def9dea2f79cd6),
0,
UINT64_C(0x1000000000000000),
};
for (size_t i = 3;; i--) {
if (scopy.u64[i] > kOrder[i]) {
return 0;
} else if (scopy.u64[i] < kOrder[i]) {
break;
} else if (i == 0) {
return 0;
}
}
SHA512_CTX hash_ctx;
SHA512_Init(&hash_ctx);
SHA512_Update(&hash_ctx, signature, 32);
SHA512_Update(&hash_ctx, public_key, 32);
SHA512_Update(&hash_ctx, message, message_len);
uint8_t h[SHA512_DIGEST_LENGTH];
SHA512_Final(h, &hash_ctx);
x25519_sc_reduce(h);
ge_p2 R;
ge_double_scalarmult_vartime(&R, h, &A, scopy.u8);
uint8_t rcheck[32];
x25519_ge_tobytes(rcheck, &R);
return CRYPTO_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0;
}
void ED25519_keypair_from_seed(uint8_t out_public_key[32],
uint8_t out_private_key[64],
const uint8_t seed[32]) {
uint8_t az[SHA512_DIGEST_LENGTH];
SHA512(seed, 32, az);
az[0] &= 248;
az[31] &= 127;
az[31] |= 64;
ge_p3 A;
x25519_ge_scalarmult_base(&A, az);
ge_p3_tobytes(out_public_key, &A);
OPENSSL_memcpy(out_private_key, seed, 32);
OPENSSL_memcpy(out_private_key + 32, out_public_key, 32);
}
static void x25519_scalar_mult_generic(uint8_t out[32],
const uint8_t scalar[32],
const uint8_t point[32]) {
fe x1, x2, z2, x3, z3, tmp0, tmp1;
fe_loose x2l, z2l, x3l, tmp0l, tmp1l;
uint8_t e[32];
OPENSSL_memcpy(e, scalar, 32);
e[0] &= 248;
e[31] &= 127;
e[31] |= 64;
// The following implementation was transcribed to Coq and proven to
// correspond to unary scalar multiplication in affine coordinates given that
// x1 != 0 is the x coordinate of some point on the curve. It was also checked
// in Coq that doing a ladderstep with x1 = x3 = 0 gives z2' = z3' = 0, and z2
// = z3 = 0 gives z2' = z3' = 0. The statement was quantified over the
// underlying field, so it applies to Curve25519 itself and the quadratic
// twist of Curve25519. It was not proven in Coq that prime-field arithmetic
// correctly simulates extension-field arithmetic on prime-field values.
// The decoding of the byte array representation of e was not considered.
// Specification of Montgomery curves in affine coordinates:
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
// Proof that these form a group that is isomorphic to a Weierstrass curve:
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
// Coq transcription and correctness proof of the loop (where scalarbits=255):
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
// preconditions: 0 <= e < 2^255 (not necessarily e < order), fe_invert(0) = 0
fe_frombytes(&x1, point);
fe_1(&x2);
fe_0(&z2);
fe_copy(&x3, &x1);
fe_1(&z3);
unsigned swap = 0;
int pos;
for (pos = 254; pos >= 0; --pos) {
// loop invariant as of right before the test, for the case where x1 != 0:
// pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 is nonzero
// let r := e >> (pos+1) in the following equalities of projective points:
// to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
// to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
// x1 is the nonzero x coordinate of the nonzero point (r*P-(r+1)*P)
unsigned b = 1 & (e[pos / 8] >> (pos & 7));
swap ^= b;
fe_cswap(&x2, &x3, swap);
fe_cswap(&z2, &z3, swap);
swap = b;
// Coq transcription of ladderstep formula (called from transcribed loop):
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
// <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
// x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
// x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
fe_sub(&tmp0l, &x3, &z3);
fe_sub(&tmp1l, &x2, &z2);
fe_add(&x2l, &x2, &z2);
fe_add(&z2l, &x3, &z3);
fe_mul_tll(&z3, &tmp0l, &x2l);
fe_mul_tll(&z2, &z2l, &tmp1l);
fe_sq_tl(&tmp0, &tmp1l);
fe_sq_tl(&tmp1, &x2l);
fe_add(&x3l, &z3, &z2);
fe_sub(&z2l, &z3, &z2);
fe_mul_ttt(&x2, &tmp1, &tmp0);
fe_sub(&tmp1l, &tmp1, &tmp0);
fe_sq_tl(&z2, &z2l);
fe_mul121666(&z3, &tmp1l);
fe_sq_tl(&x3, &x3l);
fe_add(&tmp0l, &tmp0, &z3);
fe_mul_ttt(&z3, &x1, &z2);
fe_mul_tll(&z2, &tmp1l, &tmp0l);
}
// here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) else (x2, z2)
fe_cswap(&x2, &x3, swap);
fe_cswap(&z2, &z3, swap);
fe_invert(&z2, &z2);
fe_mul_ttt(&x2, &x2, &z2);
fe_tobytes(out, &x2);
}
static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
const uint8_t point[32]) {
#if defined(BORINGSSL_X25519_NEON)
if (CRYPTO_is_NEON_capable()) {
x25519_NEON(out, scalar, point);
return;
}
#endif
x25519_scalar_mult_generic(out, scalar, point);
}
void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) {
RAND_bytes(out_private_key, 32);
// All X25519 implementations should decode scalars correctly (see
// https://tools.ietf.org/html/rfc7748#section-5). However, if an
// implementation doesn't then it might interoperate with random keys a
// fraction of the time because they'll, randomly, happen to be correctly
// formed.
//
// Thus we do the opposite of the masking here to make sure that our private
// keys are never correctly masked and so, hopefully, any incorrect
// implementations are deterministically broken.
//
// This does not affect security because, although we're throwing away
// entropy, a valid implementation of scalarmult should throw away the exact
// same bits anyway.
out_private_key[0] |= ~248;
out_private_key[31] &= ~64;
out_private_key[31] |= ~127;
X25519_public_from_private(out_public_value, out_private_key);
}
int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32],
const uint8_t peer_public_value[32]) {
static const uint8_t kZeros[32] = {0};
x25519_scalar_mult(out_shared_key, private_key, peer_public_value);
// The all-zero output results when the input is a point of small order.
return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0;
}
void X25519_public_from_private(uint8_t out_public_value[32],
const uint8_t private_key[32]) {
#if defined(BORINGSSL_X25519_NEON)
if (CRYPTO_is_NEON_capable()) {
static const uint8_t kMongomeryBasePoint[32] = {9};
x25519_NEON(out_public_value, private_key, kMongomeryBasePoint);
return;
}
#endif
uint8_t e[32];
OPENSSL_memcpy(e, private_key, 32);
e[0] &= 248;
e[31] &= 127;
e[31] |= 64;
ge_p3 A;
x25519_ge_scalarmult_base(&A, e);
// We only need the u-coordinate of the curve25519 point. The map is
// u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y).
fe_loose zplusy, zminusy;
fe zminusy_inv;
fe_add(&zplusy, &A.Z, &A.Y);
fe_sub(&zminusy, &A.Z, &A.Y);
fe_loose_invert(&zminusy_inv, &zminusy);
fe_mul_tlt(&zminusy_inv, &zplusy, &zminusy_inv);
fe_tobytes(out_public_value, &zminusy_inv);
}