diff --git a/reference/csidh-20180427-by-castryck-et-al/Makefile b/reference/csidh-20180427-by-castryck-et-al/Makefile deleted file mode 100644 index 926d7df..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -all: - @cc \ - -std=c99 -pedantic \ - -Wall -Wextra \ - -O2 -funroll-loops \ - rng.c \ - u512.s fp.s \ - mont.c \ - csidh.c \ - main.c \ - -o main - -debug: - cc \ - -std=c99 -pedantic \ - -Wall -Wextra \ - -g \ - rng.c \ - u512.s fp.s \ - mont.c \ - csidh.c \ - main.c \ - -o main - -clean: - rm -f main - diff --git a/reference/csidh-20180427-by-castryck-et-al/bench.c b/reference/csidh-20180427-by-castryck-et-al/bench.c deleted file mode 100644 index fd0f64a..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/bench.c +++ /dev/null @@ -1,54 +0,0 @@ - -#include -#include -#include -#include -#include - -#include "u512.h" -#include "fp.h" -#include "mont.h" -#include "csidh.h" - -#include - -static __inline__ uint64_t rdtsc(void) -{ - uint32_t hi, lo; - __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); - return lo | (uint64_t) hi << 32; -} - -unsigned long its = 10000; - -int main() -{ - clock_t t0, t1, time = 0; - uint64_t c0, c1, cycles = 0; - - private_key priv; - public_key pub = base; - - for (unsigned long i = 0; i < its; ++i) { - - csidh_private(&priv); - - t0 = clock(); - c0 = rdtsc(); - - /**************************************/ - assert(validate(&pub)); - action(&pub, &pub, &priv); - /**************************************/ - - c1 = rdtsc(); - t1 = clock(); - cycles += c1 - c0; - time += t1 - t0; - } - - printf("iterations: %lu\n", its); - printf("clock cycles: %" PRIu64 "\n", (uint64_t) cycles / its); - printf("wall-clock time: %.3lf ms\n", 1000. * time / CLOCKS_PER_SEC / its); -} - diff --git a/reference/csidh-20180427-by-castryck-et-al/csidh.c b/reference/csidh-20180427-by-castryck-et-al/csidh.c deleted file mode 100644 index 6539f8e..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/csidh.c +++ /dev/null @@ -1,220 +0,0 @@ - -#include -#include - -#include "csidh.h" -#include "rng.h" - -/* specific to p, should perhaps be somewhere else */ -const unsigned primes[num_primes] = { - 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, - 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, - 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, - 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, - 317, 331, 337, 347, 349, 353, 359, 367, 373, 587, -}; - -const u512 four_sqrt_p = {{ - 0x85e2579c786882cf, 0x4e3433657e18da95, 0x850ae5507965a0b3, 0xa15bc4e676475964, -}}; - - -const public_key base = {0}; /* A = 0 */ - -void csidh_private(private_key *priv) -{ - memset(&priv->e, 0, sizeof(priv->e)); - for (size_t i = 0; i < num_primes; ) { - int8_t buf[64]; - randombytes(buf, sizeof(buf)); - for (size_t j = 0; j < sizeof(buf); ++j) { - if (buf[j] <= max_exponent && buf[j] >= -max_exponent) { - priv->e[i / 2] |= (buf[j] & 0xf) << i % 2 * 4; - if (++i >= num_primes) - break; - } - } - } -} - -/* compute [(p+1)/l] P for all l in our list of primes. */ -/* divide and conquer is much faster than doing it naively, - * but uses more memory. */ -static void cofactor_multiples(proj *P, const proj *A, size_t lower, size_t upper) -{ - assert(lower < upper); - - if (upper - lower == 1) - return; - - size_t mid = lower + (upper - lower + 1) / 2; - - u512 cl = u512_1, cu = u512_1; - for (size_t i = lower; i < mid; ++i) - u512_mul3_64(&cu, &cu, primes[i]); - for (size_t i = mid; i < upper; ++i) - u512_mul3_64(&cl, &cl, primes[i]); - - xMUL(&P[mid], A, &P[lower], &cu); - xMUL(&P[lower], A, &P[lower], &cl); - - cofactor_multiples(P, A, lower, mid); - cofactor_multiples(P, A, mid, upper); -} - -/* never accepts invalid keys. */ -bool validate(public_key const *in) -{ - const proj A = {in->A, fp_1}; - - do { - - proj P[num_primes]; - fp_random(&P->x); - P->z = fp_1; - - /* maximal 2-power in p+1 */ - xDBL(P, &A, P); - xDBL(P, &A, P); - - cofactor_multiples(P, &A, 0, num_primes); - - u512 order = u512_1; - - for (size_t i = num_primes - 1; i < num_primes; --i) { - - /* we only gain information if [(p+1)/l] P is non-zero */ - if (memcmp(&P[i].z, &fp_0, sizeof(fp))) { - - u512 tmp; - u512_set(&tmp, primes[i]); - xMUL(&P[i], &A, &P[i], &tmp); - - if (memcmp(&P[i].z, &fp_0, sizeof(fp))) - /* P does not have order dividing p+1. */ - return false; - - u512_mul3_64(&order, &order, primes[i]); - - if (u512_sub3(&tmp, &four_sqrt_p, &order)) /* returns borrow */ - /* order > 4 sqrt(p), hence definitely supersingular */ - return true; - } - } - - /* P didn't have big enough order to prove supersingularity. */ - } while (1); -} - -/* compute x^3 + Ax^2 + x */ -static void montgomery_rhs(fp *rhs, fp const *A, fp const *x) -{ - fp tmp; - *rhs = *x; - fp_sq1(rhs); - fp_mul3(&tmp, A, x); - fp_add2(rhs, &tmp); - fp_add2(rhs, &fp_1); - fp_mul2(rhs, x); -} - -/* totally not constant-time. */ -void action(public_key *out, public_key const *in, private_key const *priv) -{ - u512 k[2]; - u512_set(&k[0], 4); /* maximal 2-power in p+1 */ - u512_set(&k[1], 4); /* maximal 2-power in p+1 */ - - uint8_t e[2][num_primes]; - - for (size_t i = 0; i < num_primes; ++i) { - - int8_t t = (int8_t) (priv->e[i / 2] << i % 2 * 4) >> 4; - - if (t > 0) { - e[0][i] = t; - e[1][i] = 0; - u512_mul3_64(&k[1], &k[1], primes[i]); - } - else if (t < 0) { - e[1][i] = -t; - e[0][i] = 0; - u512_mul3_64(&k[0], &k[0], primes[i]); - } - else { - e[0][i] = 0; - e[1][i] = 0; - u512_mul3_64(&k[0], &k[0], primes[i]); - u512_mul3_64(&k[1], &k[1], primes[i]); - } - } - - proj A = {in->A, fp_1}; - - bool done[2] = {false, false}; - - do { - - assert(!memcmp(&A.z, &fp_1, sizeof(fp))); - - proj P; - fp_random(&P.x); - P.z = fp_1; - - fp rhs; - montgomery_rhs(&rhs, &A.x, &P.x); - bool sign = !fp_issquare(&rhs); - - if (done[sign]) - continue; - - xMUL(&P, &A, &P, &k[sign]); - - done[sign] = true; - - for (size_t i = 0; i < num_primes; ++i) { - - if (e[sign][i]) { - - u512 cof = u512_1; - for (size_t j = i + 1; j < num_primes; ++j) - if (e[sign][j]) - u512_mul3_64(&cof, &cof, primes[j]); - - proj K; - xMUL(&K, &A, &P, &cof); - - if (memcmp(&K.z, &fp_0, sizeof(fp))) { - - xISOG(&A, &P, &K, primes[i]); - - if (!--e[sign][i]) - u512_mul3_64(&k[sign], &k[sign], primes[i]); - - } - - } - - done[sign] &= !e[sign][i]; - } - - fp_inv(&A.z); - fp_mul2(&A.x, &A.z); - A.z = fp_1; - - } while (!(done[0] && done[1])); - - out->A = A.x; -} - -/* includes public-key validation. */ -bool csidh(public_key *out, public_key const *in, private_key const *priv) -{ - if (!validate(in)) { - fp_random(&out->A); - return false; - } - action(out, in, priv); - return true; -} - diff --git a/reference/csidh-20180427-by-castryck-et-al/csidh.h b/reference/csidh-20180427-by-castryck-et-al/csidh.h deleted file mode 100644 index 52e9d54..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/csidh.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef CSIDH_H -#define CSIDH_H - -#include "u512.h" -#include "fp.h" -#include "mont.h" - -/* specific to p, should perhaps be somewhere else */ -#define num_primes 74 -#define max_exponent 5 /* (2*5+1)^74 is roughly 2^256 */ - - -typedef struct private_key { - int8_t e[(num_primes + 1) / 2]; /* packed int4_t */ -} private_key; - -typedef struct public_key { - fp A; /* Montgomery coefficient: represents y^2 = x^3 + Ax^2 + x */ -} public_key; - -extern const public_key base; - -void csidh_private(private_key *priv); -bool csidh(public_key *out, public_key const *in, private_key const *priv); - -#endif diff --git a/reference/csidh-20180427-by-castryck-et-al/fp.h b/reference/csidh-20180427-by-castryck-et-al/fp.h deleted file mode 100644 index 6cfa064..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/fp.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef FP_H -#define FP_H - -#include "u512.h" - -/* fp is in the Montgomery domain, so interpreting that - as an integer should never make sense. - enable compiler warnings when mixing up u512 and fp. */ -typedef struct fp { - u512 x; -} fp; - -extern const fp fp_0; -extern const fp fp_1; - -void fp_set(fp *x, uint64_t y); -void fp_cswap(fp *x, fp *y, bool c); - -void fp_enc(fp *x, u512 const *y); /* encode to Montgomery representation */ -void fp_dec(u512 *x, fp const *y); /* decode from Montgomery representation */ - -void fp_add2(fp *x, fp const *y); -void fp_sub2(fp *x, fp const *y); -void fp_mul2(fp *x, fp const *y); - -void fp_add3(fp *x, fp const *y, fp const *z); -void fp_sub3(fp *x, fp const *y, fp const *z); -void fp_mul3(fp *x, fp const *y, fp const *z); - -void fp_sq1(fp *x); -void fp_sq2(fp *x, fp const *y); -void fp_inv(fp *x); -bool fp_issquare(fp const *x); - -void fp_random(fp *x); - -#endif diff --git a/reference/csidh-20180427-by-castryck-et-al/fp.s b/reference/csidh-20180427-by-castryck-et-al/fp.s deleted file mode 100644 index ac65d94..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/fp.s +++ /dev/null @@ -1,452 +0,0 @@ - -.intel_syntax noprefix - -.section .rodata - -.set pbits, 511 -p: - .quad 0x1b81b90533c6c87b, 0xc2721bf457aca835, 0x516730cc1f0b4f25, 0xa7aac6c567f35507 - .quad 0x5afbfcc69322c9cd, 0xb42d083aedc88c42, 0xfc8ab0d15e3e4c4a, 0x65b48e8f740f89bf - - -.global fp_0 -fp_0: .quad 0, 0, 0, 0, 0, 0, 0, 0 - -.global fp_1 -fp_1: /* 2^512 mod p */ - .quad 0xc8fc8df598726f0a, 0x7b1bc81750a6af95, 0x5d319e67c1e961b4, 0xb0aa7275301955f1 - .quad 0x4a080672d9ba6c64, 0x97a5ef8a246ee77b, 0x06ea9e5d4383676a, 0x3496e2e117e0ec80 - - -/* (2^512)^2 mod p */ -.r_squared_mod_p: - .quad 0x36905b572ffc1724, 0x67086f4525f1f27d, 0x4faf3fbfd22370ca, 0x192ea214bcc584b1 - .quad 0x5dae03ee2f5de3d0, 0x1e9248731776b371, 0xad5f166e20e4f52d, 0x4ed759aea6f3917e - -/* -p^-1 mod 2^64 */ -.inv_min_p_mod_r: - .quad 0x66c1301f632e294d - - -.section .text - -.global fp_copy -fp_copy: - cld - mov rcx, 8 - rep movsq - ret - -.global fp_set -fp_set: - push rdi - call u512_set - pop rdi - mov rsi, rdi - jmp fp_enc - -.global fp_cswap -fp_cswap: - movzx rax, dl - neg rax - .set k, 0 - .rept 8 - mov rcx, [rdi + 8*k] - mov rdx, [rsi + 8*k] - - mov r8, rcx - xor r8, rdx - and r8, rax - - xor rcx, r8 - xor rdx, r8 - - mov [rdi + 8*k], rcx - mov [rsi + 8*k], rdx - - .set k, k+1 - .endr - ret - -.reduce_once: - push rbp - mov rbp, rdi - - mov rdi, [rbp + 0] - sub rdi, [rip + p + 0] - mov rsi, [rbp + 8] - sbb rsi, [rip + p + 8] - mov rdx, [rbp + 16] - sbb rdx, [rip + p + 16] - mov rcx, [rbp + 24] - sbb rcx, [rip + p + 24] - mov r8, [rbp + 32] - sbb r8, [rip + p + 32] - mov r9, [rbp + 40] - sbb r9, [rip + p + 40] - mov r10, [rbp + 48] - sbb r10, [rip + p + 48] - mov r11, [rbp + 56] - sbb r11, [rip + p + 56] - - setnc al - movzx rax, al - neg rax - -.macro cswap2, r, m - xor \r, \m - and \r, rax - xor \m, \r -.endm - - cswap2 rdi, [rbp + 0] - cswap2 rsi, [rbp + 8] - cswap2 rdx, [rbp + 16] - cswap2 rcx, [rbp + 24] - cswap2 r8, [rbp + 32] - cswap2 r9, [rbp + 40] - cswap2 r10, [rbp + 48] - cswap2 r11, [rbp + 56] - - pop rbp - ret - -.global fp_add3 -fp_add3: - push rdi - call u512_add3 - pop rdi - jmp .reduce_once - -.global fp_add2 -fp_add2: - mov rdx, rdi - jmp fp_add3 - -.global fp_sub3 -fp_sub3: - push rdi - call u512_sub3 - pop rdi - xor rsi, rsi - xor rdx, rdx - xor rcx, rcx - xor r8, r8 - xor r9, r9 - xor r10, r10 - xor r11, r11 - test rax, rax - cmovnz rax, [rip + p + 0] - cmovnz rsi, [rip + p + 8] - cmovnz rdx, [rip + p + 16] - cmovnz rcx, [rip + p + 24] - cmovnz r8, [rip + p + 32] - cmovnz r9, [rip + p + 40] - cmovnz r10, [rip + p + 48] - cmovnz r11, [rip + p + 56] - add [rdi + 0], rax - adc [rdi + 8], rsi - adc [rdi + 16], rdx - adc [rdi + 24], rcx - adc [rdi + 32], r8 - adc [rdi + 40], r9 - adc [rdi + 48], r10 - adc [rdi + 56], r11 - ret - -.global fp_sub2 -fp_sub2: - mov rdx, rdi - xchg rsi, rdx - jmp fp_sub3 - - -/* Montgomery arithmetic */ - -.global fp_enc -fp_enc: - lea rdx, [rip + .r_squared_mod_p] - jmp fp_mul3 - -.global fp_dec -fp_dec: - lea rdx, [rip + u512_1] - jmp fp_mul3 - -.global fp_mul3 -fp_mul3: - push rbp - push rbx - push r12 - push r13 - push r14 - push r15 - - push rdi - - mov rdi, rsi - mov rsi, rdx - - xor r8, r8 - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - xor rbp, rbp - - /* flags are already cleared */ - -.macro MULSTEP, k, r0, r1, r2, r3, r4, r5, r6, r7, r8 - - mov rdx, [rsi + 0] - mulx rcx, rdx, [rdi + 8*\k] - add rdx, \r0 - mulx rcx, rdx, [rip + .inv_min_p_mod_r] - - xor rax, rax /* clear flags */ - - mulx rbx, rax, [rip + p + 0] - adox \r0, rax - - mulx rcx, rax, [rip + p + 8] - adcx \r1, rbx - adox \r1, rax - - mulx rbx, rax, [rip + p + 16] - adcx \r2, rcx - adox \r2, rax - - mulx rcx, rax, [rip + p + 24] - adcx \r3, rbx - adox \r3, rax - - mulx rbx, rax, [rip + p + 32] - adcx \r4, rcx - adox \r4, rax - - mulx rcx, rax, [rip + p + 40] - adcx \r5, rbx - adox \r5, rax - - mulx rbx, rax, [rip + p + 48] - adcx \r6, rcx - adox \r6, rax - - mulx rcx, rax, [rip + p + 56] - adcx \r7, rbx - adox \r7, rax - - mov rax, 0 - adcx \r8, rcx - adox \r8, rax - - - mov rdx, [rdi + 8*\k] - - xor rax, rax /* clear flags */ - - mulx rbx, rax, [rsi + 0] - adox \r0, rax - - mulx rcx, rax, [rsi + 8] - adcx \r1, rbx - adox \r1, rax - - mulx rbx, rax, [rsi + 16] - adcx \r2, rcx - adox \r2, rax - - mulx rcx, rax, [rsi + 24] - adcx \r3, rbx - adox \r3, rax - - mulx rbx, rax, [rsi + 32] - adcx \r4, rcx - adox \r4, rax - - mulx rcx, rax, [rsi + 40] - adcx \r5, rbx - adox \r5, rax - - mulx rbx, rax, [rsi + 48] - adcx \r6, rcx - adox \r6, rax - - mulx rcx, rax, [rsi + 56] - adcx \r7, rbx - adox \r7, rax - - mov rax, 0 - adcx \r8, rcx - adox \r8, rax - -.endm - - MULSTEP 0, r8, r9, r10, r11, r12, r13, r14, r15, rbp - MULSTEP 1, r9, r10, r11, r12, r13, r14, r15, rbp, r8 - MULSTEP 2, r10, r11, r12, r13, r14, r15, rbp, r8, r9 - MULSTEP 3, r11, r12, r13, r14, r15, rbp, r8, r9, r10 - MULSTEP 4, r12, r13, r14, r15, rbp, r8, r9, r10, r11 - MULSTEP 5, r13, r14, r15, rbp, r8, r9, r10, r11, r12 - MULSTEP 6, r14, r15, rbp, r8, r9, r10, r11, r12, r13 - MULSTEP 7, r15, rbp, r8, r9, r10, r11, r12, r13, r14 - - pop rdi - - mov [rdi + 0], rbp - mov [rdi + 8], r8 - mov [rdi + 16], r9 - mov [rdi + 24], r10 - mov [rdi + 32], r11 - mov [rdi + 40], r12 - mov [rdi + 48], r13 - mov [rdi + 56], r14 - - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp - jmp .reduce_once - -.global fp_mul2 -fp_mul2: - mov rdx, rdi - jmp fp_mul3 - -.global fp_sq2 -fp_sq2: - /* TODO implement optimized Montgomery squaring */ - mov rdx, rsi - jmp fp_mul3 - -.global fp_sq1 -fp_sq1: - mov rsi, rdi - jmp fp_sq2 - -/* (obviously) not constant time in the exponent! */ -.fp_pow: - push rbx - mov rbx, rsi - push r12 - push r13 - push rdi - sub rsp, 64 - - mov rsi, rdi - mov rdi, rsp - call fp_copy - - mov rdi, [rsp + 64] - lea rsi, [rip + fp_1] - call fp_copy - -.macro POWSTEP, k - mov r13, [rbx + 8*\k] - xor r12, r12 - - 0: - test r13, 1 - jz 1f - - mov rdi, [rsp + 64] - mov rsi, rsp - call fp_mul2 - - 1: - mov rdi, rsp - call fp_sq1 - - shr r13 - - inc r12 - test r12, 64 - jz 0b -.endm - - POWSTEP 0 - POWSTEP 1 - POWSTEP 2 - POWSTEP 3 - POWSTEP 4 - POWSTEP 5 - POWSTEP 6 - POWSTEP 7 - - add rsp, 64+8 - pop r13 - pop r12 - pop rbx - ret - -.section .rodata -.p_minus_2: - .quad 0x1b81b90533c6c879, 0xc2721bf457aca835, 0x516730cc1f0b4f25, 0xa7aac6c567f35507 - .quad 0x5afbfcc69322c9cd, 0xb42d083aedc88c42, 0xfc8ab0d15e3e4c4a, 0x65b48e8f740f89bf - -.section .text - -/* TODO use a better addition chain? */ -.global fp_inv -fp_inv: - lea rsi, [rip + .p_minus_2] - jmp .fp_pow - -.section .rodata -.p_minus_1_halves: - .quad 0x8dc0dc8299e3643d, 0xe1390dfa2bd6541a, 0xa8b398660f85a792, 0xd3d56362b3f9aa83 - .quad 0x2d7dfe63499164e6, 0x5a16841d76e44621, 0xfe455868af1f2625, 0x32da4747ba07c4df - -.section .text - -/* TODO use a better addition chain? */ -.global fp_issquare -fp_issquare: - push rdi - lea rsi, [rip + .p_minus_1_halves] - call .fp_pow - pop rdi - - xor rax, rax - .set k, 0 - .rept 8 - mov rsi, [rdi + 8*k] - xor rsi, [rip + fp_1 + 8*k] - or rax, rsi - .set k, k+1 - .endr - test rax, rax - setz al - movzx rax, al - ret - - -/* not constant time (but this shouldn't leak anything of importance) */ -.global fp_random -fp_random: - - push rdi - mov rsi, 64 - call randombytes - pop rdi - mov rax, 1 - shl rax, (pbits % 64) - dec rax - and [rdi + 56], rax - - .set k, 7 - .rept 8 - mov rax, [rip + p + 8*k] - cmp [rdi + 8*k], rax - jge fp_random - jl 0f - .set k, k-1 - .endr - 0: - ret - diff --git a/reference/csidh-20180427-by-castryck-et-al/main b/reference/csidh-20180427-by-castryck-et-al/main deleted file mode 100755 index d8e66de..0000000 Binary files a/reference/csidh-20180427-by-castryck-et-al/main and /dev/null differ diff --git a/reference/csidh-20180427-by-castryck-et-al/main.c b/reference/csidh-20180427-by-castryck-et-al/main.c deleted file mode 100644 index 6649bfc..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/main.c +++ /dev/null @@ -1,99 +0,0 @@ - -#include -#include -#include -#include -#include -#include - -#include "u512.h" -#include "fp.h" -#include "mont.h" -#include "csidh.h" - -void u512_print(u512 const *x) -{ - for (size_t i = 63; i < 64; --i) - printf("%02hhx", i[(unsigned char *) x->c]); -} - -void fp_print(fp const *x) -{ - u512 y; - fp_dec(&y, x); - u512_print(&y); -} - -int main() -{ - clock_t t0, t1; - - private_key priv_alice, priv_bob; - public_key pub_alice, pub_bob; - public_key shared_alice, shared_bob; - - printf("\n"); - - - t0 = clock(); - csidh_private(&priv_alice); - t1 = clock(); - - printf("Alice's private key (%7.3lf ms):\n ", 1000. * (t1 - t0) / CLOCKS_PER_SEC); - for (size_t i = 0; i < sizeof(priv_alice); ++i) - printf("%02hhx", i[(uint8_t *) &priv_alice]); - printf("\n\n"); - - t0 = clock(); - csidh_private(&priv_bob); - t1 = clock(); - - printf("Bob's private key (%7.3lf ms):\n ", 1000. * (t1 - t0) / CLOCKS_PER_SEC); - for (size_t i = 0; i < sizeof(priv_bob); ++i) - printf("%02hhx", i[(uint8_t *) &priv_bob]); - printf("\n\n"); - - - t0 = clock(); - assert(csidh(&pub_alice, &base, &priv_alice)); - t1 = clock(); - - printf("Alice's public key (%7.3lf ms):\n ", 1000. * (t1 - t0) / CLOCKS_PER_SEC); - fp_print(&pub_alice.A); - printf("\n\n"); - - t0 = clock(); - assert(csidh(&pub_bob, &base, &priv_bob)); - t1 = clock(); - - printf("Bob's public key (%7.3lf ms):\n ", 1000. * (t1 - t0) / CLOCKS_PER_SEC); - fp_print(&pub_bob.A); - printf("\n\n"); - - - t0 = clock(); - assert(csidh(&shared_alice, &pub_bob, &priv_alice)); - t1 = clock(); - - printf("Alice's shared secret (%7.3lf ms):\n ", 1000. * (t1 - t0) / CLOCKS_PER_SEC); - fp_print(&shared_alice.A); - printf("\n\n"); - - t0 = clock(); - assert(csidh(&shared_bob, &pub_alice, &priv_bob)); - t1 = clock(); - - printf("Bob's shared secret (%7.3lf ms):\n ", 1000. * (t1 - t0) / CLOCKS_PER_SEC); - fp_print(&shared_bob.A); - printf("\n\n"); - - printf(" "); - if (memcmp(&shared_alice, &shared_bob, sizeof(public_key))) - printf("\x1b[31mNOT EQUAL!\x1b[0m\n"); - else - printf("\x1b[32mequal.\x1b[0m\n"); - printf("\n"); - - printf("\n"); -} - diff --git a/reference/csidh-20180427-by-castryck-et-al/mont.c b/reference/csidh-20180427-by-castryck-et-al/mont.c deleted file mode 100644 index d2a6533..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/mont.c +++ /dev/null @@ -1,188 +0,0 @@ - -#include - -#include "mont.h" - -void xDBLADD(proj *R, proj *S, proj const *P, proj const *Q, proj const *PQ, proj const *A24) -{ - fp tmp0, tmp1, tmp2; - - fp_add3(&tmp0, &P->x, &P->z); - fp_sub3(&tmp1, &P->x, &P->z); - fp_sq2(&R->x, &tmp0); - fp_sub3(&tmp2, &Q->x, &Q->z); - fp_add3(&S->x, &Q->x, &Q->z); - fp_mul2(&tmp0, &tmp2); - fp_sq2(&R->z, &tmp1); - fp_mul2(&tmp1, &S->x); - fp_sub3(&tmp2, &R->x, &R->z); - fp_mul2(&R->z, &A24->z); - fp_mul2(&R->x, &R->z); - fp_mul3(&S->x, &A24->x, &tmp2); - fp_sub3(&S->z, &tmp0, &tmp1); - fp_add2(&R->z, &S->x); - fp_add3(&S->x, &tmp0, &tmp1); - fp_mul2(&R->z, &tmp2); - fp_sq1(&S->z); - fp_sq1(&S->x); - fp_mul2(&S->z, &PQ->x); - fp_mul2(&S->x, &PQ->z); -} - -void xDBL(proj *Q, proj const *A, proj const *P) -{ - fp a, b, c; - fp_add3(&a, &P->x, &P->z); - fp_sq1(&a); - fp_sub3(&b, &P->x, &P->z); - fp_sq1(&b); - fp_sub3(&c, &a, &b); - fp_add2(&b, &b); fp_add2(&b, &b); /* multiplication by 4 */ - fp_mul2(&b, &A->z); - fp_mul3(&Q->x, &a, &b); - fp_add3(&a, &A->z, &A->z); /* multiplication by 2 */ - fp_add2(&a, &A->x); - fp_mul2(&a, &c); - fp_add2(&a, &b); - fp_mul3(&Q->z, &a, &c); -} - -void xADD(proj *S, proj const *P, proj const *Q, proj const *PQ) -{ - fp a, b, c, d; - fp_add3(&a, &P->x, &P->z); - fp_sub3(&b, &P->x, &P->z); - fp_add3(&c, &Q->x, &Q->z); - fp_sub3(&d, &Q->x, &Q->z); - fp_mul2(&a, &d); - fp_mul2(&b, &c); - fp_add3(&c, &a, &b); - fp_sub3(&d, &a, &b); - fp_sq1(&c); - fp_sq1(&d); - fp_mul3(&S->x, &PQ->z, &c); - fp_mul3(&S->z, &PQ->x, &d); -} - -/* Montgomery ladder. */ -/* P must not be the unique point of order 2. */ -/* not constant-time! */ -void xMUL(proj *Q, proj const *A, proj const *P, u512 const *k) -{ - proj R = *P; - proj A24; - const proj Pcopy = *P; /* in case Q = P */ - - Q->x = fp_1; - Q->z = fp_0; - - fp_add3(&A24.x, &A->z, &A->z); - fp_add3(&A24.z, &A24.x, &A24.x); - fp_add2(&A24.x, &A->x); - - unsigned long i = 512; - while (--i && !u512_bit(k, i)); - - do { - - bool bit = u512_bit(k, i); - - if (bit) { proj T = *Q; *Q = R; R = T; } /* not constant-time */ - //fp_cswap(&Q->x, &R.x, bit); - //fp_cswap(&Q->z, &R.z, bit); - - xDBLADD(Q, &R, Q, &R, &Pcopy, &A24); - - if (bit) { proj T = *Q; *Q = R; R = T; } /* not constant-time */ - //fp_cswap(&Q->x, &R.x, bit); - //fp_cswap(&Q->z, &R.z, bit); - - } while (i--); -} - - -/* computes the isogeny with kernel point K of order k */ -/* returns the new curve coefficient A and the image of P */ -/* (obviously) not constant time in k */ -void xISOG(proj *A, proj *P, proj const *K, uint64_t k) -{ - assert (k >= 3); - assert (k % 2 == 1); - - fp tmp0, tmp1; - fp T[4] = {K->z, K->x, K->x, K->z}; - proj Q; - - fp_mul3(&Q.x, &P->x, &K->x); - fp_mul3(&tmp0, &P->z, &K->z); - fp_sub2(&Q.x, &tmp0); - - fp_mul3(&Q.z, &P->x, &K->z); - fp_mul3(&tmp0, &P->z, &K->x); - fp_sub2(&Q.z, &tmp0); - - proj M[3] = {*K}; - xDBL(&M[1], A, K); - - for (uint64_t i = 1; i < k / 2; ++i) { - - if (i >= 2) - xADD(&M[i % 3], &M[(i - 1) % 3], K, &M[(i - 2) % 3]); - - fp_mul3(&tmp0, &M[i % 3].x, &T[0]); - fp_mul3(&tmp1, &M[i % 3].z, &T[1]); - fp_add3(&T[0], &tmp0, &tmp1); - - fp_mul2(&T[1], &M[i % 3].x); - - fp_mul3(&tmp0, &M[i % 3].z, &T[2]); - fp_mul3(&tmp1, &M[i % 3].x, &T[3]); - fp_add3(&T[2], &tmp0, &tmp1); - - fp_mul2(&T[3], &M[i % 3].z); - - - fp_mul3(&tmp0, &P->x, &M[i % 3].x); - fp_mul3(&tmp1, &P->z, &M[i % 3].z); - fp_sub2(&tmp0, &tmp1); - fp_mul2(&Q.x, &tmp0); - - fp_mul3(&tmp0, &P->x, &M[i % 3].z); - fp_mul3(&tmp1, &P->z, &M[i % 3].x); - fp_sub2(&tmp0, &tmp1); - fp_mul2(&Q.z, &tmp0); - } - - fp_mul2(&T[0], &T[1]); - fp_add2(&T[0], &T[0]); /* multiplication by 2 */ - - fp_sq1(&T[1]); - - fp_mul2(&T[2], &T[3]); - fp_add2(&T[2], &T[2]); /* multiplication by 2 */ - - fp_sq1(&T[3]); - - /* Ax := T[1] * T[3] * Ax - 3 * Az * (T[1] * T[2] - T[0] * T[3]) */ - fp_mul3(&tmp0, &T[1], &T[2]); - fp_mul3(&tmp1, &T[0], &T[3]); - fp_sub2(&tmp0, &tmp1); - fp_mul2(&tmp0, &A->z); - fp_add3(&tmp1, &tmp0, &tmp0); fp_add2(&tmp0, &tmp1); /* multiplication by 3 */ - - fp_mul3(&tmp1, &T[1], &T[3]); - fp_mul2(&tmp1, &A->x); - - fp_sub3(&A->x, &tmp1, &tmp0); - - /* Az := Az * T[3]^2 */ - fp_sq1(&T[3]); - fp_mul2(&A->z, &T[3]); - - /* X := X * Xim^2, Z := Z * Zim^2 */ - fp_sq1(&Q.x); - fp_sq1(&Q.z); - fp_mul2(&P->x, &Q.x); - fp_mul2(&P->z, &Q.z); -} - diff --git a/reference/csidh-20180427-by-castryck-et-al/mont.h b/reference/csidh-20180427-by-castryck-et-al/mont.h deleted file mode 100644 index 36b3ad7..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/mont.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef MONT_H -#define MONT_H - -#include "u512.h" -#include "fp.h" - -/* P^1 over fp. */ -typedef struct proj { - fp x; - fp z; -} proj; - -void xDBL(proj *Q, proj const *A, proj const *P); -void xADD(proj *S, proj const *P, proj const *Q, proj const *PQ); -void xDBLADD(proj *R, proj *S, proj const *P, proj const *Q, proj const *PQ, proj const *A); -void xMUL(proj *Q, proj const *A, proj const *P, u512 const *k); -void xISOG(proj *A, proj *P, proj const *K, uint64_t k); - -#endif diff --git a/reference/csidh-20180427-by-castryck-et-al/rng.c b/reference/csidh-20180427-by-castryck-et-al/rng.c deleted file mode 100644 index fc28c87..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/rng.c +++ /dev/null @@ -1,18 +0,0 @@ - -#include "rng.h" - -#include -#include -#include - -void randombytes(void *x, size_t l) -{ - static int fd = -1; - ssize_t n; - if (fd < 0 && 0 > (fd = open("/dev/urandom", O_RDONLY))) - exit(1); - for (size_t i = 0; i < l; i += n) - if (0 >= (n = read(fd, (char *) x + i, l - i))) - exit(2); -} - diff --git a/reference/csidh-20180427-by-castryck-et-al/rng.h b/reference/csidh-20180427-by-castryck-et-al/rng.h deleted file mode 100644 index 30e01d0..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/rng.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef RNG_H -#define RNG_H - -#include - -void randombytes(void *x, size_t l); - -#endif diff --git a/reference/csidh-20180427-by-castryck-et-al/supersingular.sage b/reference/csidh-20180427-by-castryck-et-al/supersingular.sage deleted file mode 100755 index 928f695..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/supersingular.sage +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env sage -#coding: utf8 - -proof.all(False) - - -# parameters. - -ls = list(primes(3, 374)) + [587] # Elkies primes -#ls = list(primes(3, 47)) + [97] # (a smaller example) -p = 4 * prod(ls) - 1 -assert is_prime(p) -print "\nElkies primes:", " ".join(map(str, ls)) - -max_exp = ceil((sqrt(p) ** (1/len(ls)) - 1) / 2) -assert (2 * max_exp + 1) ** len(ls) >= sqrt(p) -print "exponents are chosen in the range {}..{}.".format(-max_exp, max_exp) - -base = GF(p)(0) # Montgomery coefficient of starting curve - - -# helper functions. - -# NB: all the operations can be computed entirely over the prime field, -# but for simplicity of this implementation we will make use of curves -# defined over GF(p^2). note this slows everything down quite a bit. - -Fp2. = GF(p**2, modulus = x**2 + 1) - -def montgomery_curve(A): - return EllipticCurve(Fp2, [0, A, 0, 1, 0]) - -# sage's isogeny formulas return Weierstraß curves, hence we need this... -def montgomery_coefficient(E): - Ew = E.change_ring(GF(p)).short_weierstrass_model() - _, _, _, a, b = Ew.a_invariants() - R. = GF(p)[] - r = (z**3 + a*z + b).roots(multiplicities=False)[0] - s = sqrt(3 * r**2 + a) - if not is_square(s): s = -s - A = 3 * r / s - assert montgomery_curve(A).change_ring(GF(p)).is_isomorphic(Ew) - return GF(p)(A) - - -# actual implementation. - -def private(): - return [randrange(-max_exp, max_exp + 1) for _ in range(len(ls))] - -def validate(A): - while True: - k = 1 - P = montgomery_curve(A).lift_x(GF(p).random_element()) - for l in ls: - Q = (p + 1) // l * P - if not Q: continue - if l * Q: return False - k *= l - if k > 4 * sqrt(p): return True - -def action(pub, priv): - - E = montgomery_curve(pub) - es = priv[:] - - while any(es): - - E._order = (p + 1)**2 # else sage computes this - - P = E.lift_x(GF(p).random_element()) - s = +1 if P.xy()[1] in GF(p) else -1 - k = prod(l for l, e in zip(ls, es) if sign(e) == s) - P *= (p + 1) // k - - for i, (l, e) in enumerate(zip(ls, es)): - - if sign(e) != s: continue - - Q = k // l * P - if not Q: continue - Q._order = l # else sage computes this - phi = E.isogeny(Q) - - E, P = phi.codomain(), phi(P) - es[i] -= s - k //= l - - return montgomery_coefficient(E) - - -# example. - -print - -print "testing public-key validation on random ordinary curves (should be all 0s):\n ", -for _ in range(16): - while True: - A = GF(p).random_element() - if montgomery_curve(A).is_ordinary(): break - print int(validate(A)), -print - -privA = private() -print "\nAlice's private key:\n ", " ".join(map('{:2d}'.format, privA)) - -pubA = action(base, privA) -print "\nAlice's public key:\n ", pubA, -print " (valid: {})".format(int(validate(pubA))) - -privB = private() -print "\nBob's private key:\n ", " ".join(map('{:2d}'.format, privB)) - -pubB = action(base, privB) -print "\nBob's public key:\n ", pubB, -print " (valid: {})".format(int(validate(pubB))) - -sharedA = action(pubB, privA) -print "\nAlice's shared secret:\n ", sharedA - -sharedB = action(pubA, privB) -print "\nBob's shared secret:\n ", sharedB - -if sharedA == sharedB: - print "\n--> equal!\n" -else: - print "\n--> NOT EQUAL?!\n" - diff --git a/reference/csidh-20180427-by-castryck-et-al/u512.h b/reference/csidh-20180427-by-castryck-et-al/u512.h deleted file mode 100644 index 6eaae93..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/u512.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef UINT_H -#define UINT_H - -#include -#include - -typedef struct u512 { - uint64_t c[8]; -} u512; - -extern const u512 u512_1; - -void u512_set(u512 *x, uint64_t y); - -bool u512_bit(u512 const *x, uint64_t k); - -bool u512_add3(u512 *x, u512 const *y, u512 const *z); /* returns carry */ -bool u512_sub3(u512 *x, u512 const *y, u512 const *z); /* returns borrow */ - -void u512_mul3_64(u512 *x, u512 const *y, uint64_t z); - -#endif diff --git a/reference/csidh-20180427-by-castryck-et-al/u512.s b/reference/csidh-20180427-by-castryck-et-al/u512.s deleted file mode 100644 index 779be3e..0000000 --- a/reference/csidh-20180427-by-castryck-et-al/u512.s +++ /dev/null @@ -1,102 +0,0 @@ - -.intel_syntax noprefix - -.section .rodata - -.global u512_1 -u512_1: .quad 1, 0, 0, 0, 0, 0, 0, 0 - - -.section .text - -.global u512_set -u512_set: - cld - mov rax, rsi - stosq - xor rax, rax - mov rcx, 7 - rep stosq - ret - - -.global u512_bit -u512_bit: - mov rcx, rsi - and rcx, 0x3f - shr rsi, 6 - mov rax, [rdi + 8*rsi] - shr rax, cl - and rax, 1 - ret - - -.global u512_add3 -u512_add3: - mov rax, [rsi + 0] - add rax, [rdx + 0] - mov [rdi + 0], rax - .set k, 1 - .rept 7 - mov rax, [rsi + 8*k] - adc rax, [rdx + 8*k] - mov [rdi + 8*k], rax - .set k, k+1 - .endr - setc al - movzx rax, al - ret - -.global u512_sub3 -u512_sub3: - mov rax, [rsi + 0] - sub rax, [rdx + 0] - mov [rdi + 0], rax - .set k, 1 - .rept 7 - mov rax, [rsi + 8*k] - sbb rax, [rdx + 8*k] - mov [rdi + 8*k], rax - .set k, k+1 - .endr - setc al - movzx rax, al - ret - - -.global u512_mul3_64 -u512_mul3_64: - - mulx r10, rax, [rsi + 0] - mov [rdi + 0], rax - - mulx r11, rax, [rsi + 8] - add rax, r10 - mov [rdi + 8], rax - - mulx r10, rax, [rsi + 16] - adcx rax, r11 - mov [rdi + 16], rax - - mulx r11, rax, [rsi + 24] - adcx rax, r10 - mov [rdi + 24], rax - - mulx r10, rax, [rsi + 32] - adcx rax, r11 - mov [rdi + 32],rax - - mulx r11, rax, [rsi + 40] - adcx rax, r10 - mov [rdi + 40],rax - - mulx r10, rax, [rsi + 48] - adcx rax, r11 - mov [rdi + 48],rax - - mulx r11, rax, [rsi + 56] - adcx rax, r10 - mov [rdi + 56],rax - - ret -