@@ -0,0 +1,14 @@ | |||||
kind: pipeline | |||||
type: exec | |||||
name: default | |||||
steps: | |||||
- name: build | |||||
commands: | |||||
- git submodule init | |||||
- git submodule update --recursive --remote | |||||
- mkdir build | |||||
- cd build | |||||
- cmake .. | |||||
- make | |||||
- ./test |
@@ -2,12 +2,19 @@ set( | |||||
SRC_CLEAN_SIKE_P434 | SRC_CLEAN_SIKE_P434 | ||||
p434/fpx.c | p434/fpx.c | ||||
p434/fp_generic.c | p434/fp_generic.c | ||||
p434/fp_glue.c | |||||
p434/fp-x86_64.S | |||||
p434/isogeny.c | p434/isogeny.c | ||||
p434/params.c | p434/params.c | ||||
p434/sike.c) | p434/sike.c) | ||||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||||
add_definitions(-DPQC_ASM=1) | |||||
set( | |||||
SRC_CLEAN_SIKE_P434 | |||||
${SRC_CLEAN_SIKE_P434} | |||||
p434/fp-x86_64.S | |||||
) | |||||
endif() | |||||
define_kem_alg( | define_kem_alg( | ||||
sike_p434_clean | sike_p434_clean | ||||
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}") | PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -103,154 +103,7 @@ sike_fpadd_asm: | |||||
.cfi_adjust_cfa_offset -8 | .cfi_adjust_cfa_offset -8 | ||||
.byte 0xf3,0xc3 | .byte 0xf3,0xc3 | ||||
.cfi_endproc | .cfi_endproc | ||||
.globl sike_cswap_asm | |||||
.hidden sike_cswap_asm | |||||
.type sike_cswap_asm,@function | |||||
sike_cswap_asm: | |||||
movq %rdx,%xmm3 | |||||
pshufd $68,%xmm3,%xmm3 | |||||
movdqu 0(%rdi),%xmm0 | |||||
movdqu 0(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,0(%rdi) | |||||
movdqu %xmm1,0(%rsi) | |||||
movdqu 16(%rdi),%xmm0 | |||||
movdqu 16(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,16(%rdi) | |||||
movdqu %xmm1,16(%rsi) | |||||
movdqu 32(%rdi),%xmm0 | |||||
movdqu 32(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,32(%rdi) | |||||
movdqu %xmm1,32(%rsi) | |||||
movdqu 48(%rdi),%xmm0 | |||||
movdqu 48(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,48(%rdi) | |||||
movdqu %xmm1,48(%rsi) | |||||
movdqu 64(%rdi),%xmm0 | |||||
movdqu 64(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,64(%rdi) | |||||
movdqu %xmm1,64(%rsi) | |||||
movdqu 80(%rdi),%xmm0 | |||||
movdqu 80(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,80(%rdi) | |||||
movdqu %xmm1,80(%rsi) | |||||
movdqu 96(%rdi),%xmm0 | |||||
movdqu 96(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,96(%rdi) | |||||
movdqu %xmm1,96(%rsi) | |||||
movdqu 112(%rdi),%xmm0 | |||||
movdqu 112(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,112(%rdi) | |||||
movdqu %xmm1,112(%rsi) | |||||
movdqu 128(%rdi),%xmm0 | |||||
movdqu 128(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,128(%rdi) | |||||
movdqu %xmm1,128(%rsi) | |||||
movdqu 144(%rdi),%xmm0 | |||||
movdqu 144(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,144(%rdi) | |||||
movdqu %xmm1,144(%rsi) | |||||
movdqu 160(%rdi),%xmm0 | |||||
movdqu 160(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,160(%rdi) | |||||
movdqu %xmm1,160(%rsi) | |||||
movdqu 176(%rdi),%xmm0 | |||||
movdqu 176(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,176(%rdi) | |||||
movdqu %xmm1,176(%rsi) | |||||
movdqu 192(%rdi),%xmm0 | |||||
movdqu 192(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,192(%rdi) | |||||
movdqu %xmm1,192(%rsi) | |||||
movdqu 208(%rdi),%xmm0 | |||||
movdqu 208(%rsi),%xmm1 | |||||
movdqa %xmm1,%xmm2 | |||||
pxor %xmm0,%xmm2 | |||||
pand %xmm3,%xmm2 | |||||
pxor %xmm2,%xmm0 | |||||
pxor %xmm2,%xmm1 | |||||
movdqu %xmm0,208(%rdi) | |||||
movdqu %xmm1,208(%rsi) | |||||
.byte 0xf3,0xc3 | |||||
.globl sike_fpsub_asm | .globl sike_fpsub_asm | ||||
.hidden sike_fpsub_asm | .hidden sike_fpsub_asm | ||||
.type sike_fpsub_asm,@function | .type sike_fpsub_asm,@function | ||||
@@ -8,6 +8,13 @@ | |||||
#include "utils.h" | #include "utils.h" | ||||
#include "fpx.h" | #include "fpx.h" | ||||
#ifndef PQC_NOASM | |||||
void sike_fprdc_asm(const felm_t ma, felm_t mc); | |||||
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); | |||||
void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c); | |||||
void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c); | |||||
#endif | |||||
// Global constants | // Global constants | ||||
extern const struct params_t params; | extern const struct params_t params; | ||||
@@ -51,6 +58,9 @@ static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_w | |||||
// Output: c in [0, 2*p434-1] | // Output: c in [0, 2*p434-1] | ||||
void sike_fpadd(const felm_t a, const felm_t b, felm_t c) | void sike_fpadd(const felm_t a, const felm_t b, felm_t c) | ||||
{ | { | ||||
#ifdef PQC_ASM | |||||
sike_fpadd_asm(a,b,c); | |||||
#else | |||||
unsigned int i, carry = 0; | unsigned int i, carry = 0; | ||||
crypto_word_t mask; | crypto_word_t mask; | ||||
@@ -68,12 +78,16 @@ void sike_fpadd(const felm_t a, const felm_t b, felm_t c) | |||||
for (i = 0; i < NWORDS_FIELD; i++) { | for (i = 0; i < NWORDS_FIELD; i++) { | ||||
ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]); | ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]); | ||||
} | } | ||||
#endif | |||||
} | } | ||||
void sike_fpsub(const felm_t a, const felm_t b, felm_t c) | void sike_fpsub(const felm_t a, const felm_t b, felm_t c) | ||||
{ // Modular subtraction, c = a-b mod p434. | { // Modular subtraction, c = a-b mod p434. | ||||
// Inputs: a, b in [0, 2*p434-1] | // Inputs: a, b in [0, 2*p434-1] | ||||
// Output: c in [0, 2*p434-1] | // Output: c in [0, 2*p434-1] | ||||
#ifdef PQC_ASM | |||||
sike_fpsub_asm(a,b,c); | |||||
#else | |||||
unsigned int i, borrow = 0; | unsigned int i, borrow = 0; | ||||
crypto_word_t mask; | crypto_word_t mask; | ||||
@@ -86,21 +100,22 @@ void sike_fpsub(const felm_t a, const felm_t b, felm_t c) | |||||
for (i = 0; i < NWORDS_FIELD; i++) { | for (i = 0; i < NWORDS_FIELD; i++) { | ||||
ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]); | ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]); | ||||
} | } | ||||
#endif | |||||
} | } | ||||
// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. | // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. | ||||
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); | |||||
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) | void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) | ||||
{ | { | ||||
unsigned int i, j; | |||||
crypto_word_t t = 0, u = 0, v = 0, UV[2]; | |||||
unsigned int carry = 0; | |||||
// TODO: faster would be to use bitmap | |||||
#ifdef PQC_ASM | |||||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | ||||
sike_mpmul_asm(a,b,c); | sike_mpmul_asm(a,b,c); | ||||
return; | return; | ||||
} | } | ||||
#endif | |||||
unsigned int i, j; | |||||
crypto_word_t t = 0, u = 0, v = 0, UV[2]; | |||||
unsigned int carry = 0; | |||||
for (i = 0; i < NWORDS_FIELD; i++) { | for (i = 0; i < NWORDS_FIELD; i++) { | ||||
for (j = 0; j <= i; j++) { | for (j = 0; j <= i; j++) { | ||||
@@ -134,14 +149,14 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) | |||||
// mc = ma*R^-1 mod p434x2, where R = 2^448. | // mc = ma*R^-1 mod p434x2, where R = 2^448. | ||||
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. | // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. | ||||
// ma is assumed to be in Montgomery representation. | // ma is assumed to be in Montgomery representation. | ||||
void sike_fprdc_asm(const felm_t ma, felm_t mc); | |||||
void sike_fprdc(const felm_t ma, felm_t mc) | void sike_fprdc(const felm_t ma, felm_t mc) | ||||
{ | { | ||||
#ifdef PQC_ASM | |||||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | ||||
sike_fprdc_asm(ma, mc); | sike_fprdc_asm(ma, mc); | ||||
return; | return; | ||||
} | } | ||||
#endif | |||||
unsigned int i, j, carry, count = ZERO_WORDS; | unsigned int i, j, carry, count = ZERO_WORDS; | ||||
crypto_word_t UV[2], t = 0, u = 0, v = 0; | crypto_word_t UV[2], t = 0, u = 0, v = 0; | ||||
@@ -1,4 +0,0 @@ | |||||
#include "fpx.h" | |||||
#include "utils.h" | |||||
void sike_mpmul_asm_X(const felm_t a, const felm_t b, dfelm_t c); |
@@ -110,7 +110,7 @@ static void fpinv_mont(felm_t a) | |||||
} | } | ||||
// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. | // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. | ||||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||||
#ifndef PQC_ASM | |||||
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { | inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { | ||||
uint8_t carry = 0; | uint8_t carry = 0; | ||||
for (size_t i = 0; i < nwords; i++) { | for (size_t i = 0; i < nwords; i++) { | ||||
@@ -132,31 +132,31 @@ inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, cons | |||||
// Multiprecision addition, c = a+b. | // Multiprecision addition, c = a+b. | ||||
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) | inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) | ||||
{ | { | ||||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||||
mp_add(a, b, c, NWORDS_FIELD); | |||||
#else | |||||
#ifdef PQC_ASM | |||||
sike_mpadd_asm(a, b, c); | sike_mpadd_asm(a, b, c); | ||||
#else | |||||
mp_add(a, b, c, NWORDS_FIELD); | |||||
#endif | #endif | ||||
} | } | ||||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | ||||
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 | // If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 | ||||
inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | ||||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||||
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); | |||||
#else | |||||
#ifdef PQC_ASM | |||||
return sike_mpsubx2_asm(a, b, c); | return sike_mpsubx2_asm(a, b, c); | ||||
#else | |||||
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); | |||||
#endif | #endif | ||||
} | } | ||||
// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | ||||
// Inputs should be s.t. c > a and c > b | // Inputs should be s.t. c > a and c > b | ||||
inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | ||||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||||
#ifdef PQC_ASM | |||||
sike_mpdblsubx2_asm(a, b, c); | |||||
#else | |||||
mp_sub(c, a, c, 2*NWORDS_FIELD); | mp_sub(c, a, c, 2*NWORDS_FIELD); | ||||
mp_sub(c, b, c, 2*NWORDS_FIELD); | mp_sub(c, b, c, 2*NWORDS_FIELD); | ||||
#else | |||||
sike_mpdblsubx2_asm(a, b, c); | |||||
#endif | #endif | ||||
} | } | ||||
@@ -31,8 +31,6 @@ void sike_fpneg(felm_t a); | |||||
void sike_fpcopy(const felm_t a, felm_t c); | void sike_fpcopy(const felm_t a, felm_t c); | ||||
// Copy a field element, c = a. | // Copy a field element, c = a. | ||||
void sike_fpzero(felm_t a); | void sike_fpzero(felm_t a); | ||||
// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time. | |||||
void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option); | |||||
// Conversion from Montgomery representation to standard representation, | // Conversion from Montgomery representation to standard representation, | ||||
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. | // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. | ||||
void sike_from_mont(const felm_t ma, felm_t c); | void sike_from_mont(const felm_t ma, felm_t c); | ||||
@@ -30,8 +30,7 @@ extern const struct params_t params; | |||||
// Swap points. | // Swap points. | ||||
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P | // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P | ||||
#if !defined(ARCH_X86_64) || defined(ARCH_GENERIC) | |||||
static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||||
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||||
{ | { | ||||
crypto_word_t temp; | crypto_word_t temp; | ||||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | for (size_t i = 0; i < NWORDS_FIELD; i++) { | ||||
@@ -49,18 +48,6 @@ static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t optio | |||||
Q->Z->c1[i] = temp ^ Q->Z->c1[i]; | Q->Z->c1[i] = temp ^ Q->Z->c1[i]; | ||||
} | } | ||||
} | } | ||||
#endif | |||||
// Swap points. | |||||
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P | |||||
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||||
{ | |||||
#if defined(ARCH_X86_64) && !defined(ARCH_GENERIC) | |||||
sike_cswap_asm(P, Q, option); | |||||
#else | |||||
sike_cswap(P, Q, option); | |||||
#endif | |||||
} | |||||
static void ladder3Pt( | static void ladder3Pt( | ||||
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m, | const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m, | ||||