@@ -0,0 +1,14 @@ | |||
kind: pipeline | |||
type: exec | |||
name: default | |||
steps: | |||
- name: build | |||
commands: | |||
- git submodule init | |||
- git submodule update --recursive --remote | |||
- mkdir build | |||
- cd build | |||
- cmake .. | |||
- make | |||
- ./test |
@@ -2,12 +2,19 @@ set( | |||
SRC_CLEAN_SIKE_P434 | |||
p434/fpx.c | |||
p434/fp_generic.c | |||
p434/fp_glue.c | |||
p434/fp-x86_64.S | |||
p434/isogeny.c | |||
p434/params.c | |||
p434/sike.c) | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
add_definitions(-DPQC_ASM=1) | |||
set( | |||
SRC_CLEAN_SIKE_P434 | |||
${SRC_CLEAN_SIKE_P434} | |||
p434/fp-x86_64.S | |||
) | |||
endif() | |||
define_kem_alg( | |||
sike_p434_clean | |||
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -103,154 +103,7 @@ sike_fpadd_asm: | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_cswap_asm | |||
.hidden sike_cswap_asm | |||
.type sike_cswap_asm,@function | |||
sike_cswap_asm: | |||
movq %rdx,%xmm3 | |||
pshufd $68,%xmm3,%xmm3 | |||
movdqu 0(%rdi),%xmm0 | |||
movdqu 0(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,0(%rdi) | |||
movdqu %xmm1,0(%rsi) | |||
movdqu 16(%rdi),%xmm0 | |||
movdqu 16(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,16(%rdi) | |||
movdqu %xmm1,16(%rsi) | |||
movdqu 32(%rdi),%xmm0 | |||
movdqu 32(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,32(%rdi) | |||
movdqu %xmm1,32(%rsi) | |||
movdqu 48(%rdi),%xmm0 | |||
movdqu 48(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,48(%rdi) | |||
movdqu %xmm1,48(%rsi) | |||
movdqu 64(%rdi),%xmm0 | |||
movdqu 64(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,64(%rdi) | |||
movdqu %xmm1,64(%rsi) | |||
movdqu 80(%rdi),%xmm0 | |||
movdqu 80(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,80(%rdi) | |||
movdqu %xmm1,80(%rsi) | |||
movdqu 96(%rdi),%xmm0 | |||
movdqu 96(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,96(%rdi) | |||
movdqu %xmm1,96(%rsi) | |||
movdqu 112(%rdi),%xmm0 | |||
movdqu 112(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,112(%rdi) | |||
movdqu %xmm1,112(%rsi) | |||
movdqu 128(%rdi),%xmm0 | |||
movdqu 128(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,128(%rdi) | |||
movdqu %xmm1,128(%rsi) | |||
movdqu 144(%rdi),%xmm0 | |||
movdqu 144(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,144(%rdi) | |||
movdqu %xmm1,144(%rsi) | |||
movdqu 160(%rdi),%xmm0 | |||
movdqu 160(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,160(%rdi) | |||
movdqu %xmm1,160(%rsi) | |||
movdqu 176(%rdi),%xmm0 | |||
movdqu 176(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,176(%rdi) | |||
movdqu %xmm1,176(%rsi) | |||
movdqu 192(%rdi),%xmm0 | |||
movdqu 192(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,192(%rdi) | |||
movdqu %xmm1,192(%rsi) | |||
movdqu 208(%rdi),%xmm0 | |||
movdqu 208(%rsi),%xmm1 | |||
movdqa %xmm1,%xmm2 | |||
pxor %xmm0,%xmm2 | |||
pand %xmm3,%xmm2 | |||
pxor %xmm2,%xmm0 | |||
pxor %xmm2,%xmm1 | |||
movdqu %xmm0,208(%rdi) | |||
movdqu %xmm1,208(%rsi) | |||
.byte 0xf3,0xc3 | |||
.globl sike_fpsub_asm | |||
.hidden sike_fpsub_asm | |||
.type sike_fpsub_asm,@function | |||
@@ -8,6 +8,13 @@ | |||
#include "utils.h" | |||
#include "fpx.h" | |||
#ifndef PQC_NOASM | |||
void sike_fprdc_asm(const felm_t ma, felm_t mc); | |||
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); | |||
void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c); | |||
void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c); | |||
#endif | |||
// Global constants | |||
extern const struct params_t params; | |||
@@ -51,6 +58,9 @@ static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_w | |||
// Output: c in [0, 2*p434-1] | |||
void sike_fpadd(const felm_t a, const felm_t b, felm_t c) | |||
{ | |||
#ifdef PQC_ASM | |||
sike_fpadd_asm(a,b,c); | |||
#else | |||
unsigned int i, carry = 0; | |||
crypto_word_t mask; | |||
@@ -68,12 +78,16 @@ void sike_fpadd(const felm_t a, const felm_t b, felm_t c) | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]); | |||
} | |||
#endif | |||
} | |||
void sike_fpsub(const felm_t a, const felm_t b, felm_t c) | |||
{ // Modular subtraction, c = a-b mod p434. | |||
// Inputs: a, b in [0, 2*p434-1] | |||
// Output: c in [0, 2*p434-1] | |||
#ifdef PQC_ASM | |||
sike_fpsub_asm(a,b,c); | |||
#else | |||
unsigned int i, borrow = 0; | |||
crypto_word_t mask; | |||
@@ -86,21 +100,22 @@ void sike_fpsub(const felm_t a, const felm_t b, felm_t c) | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]); | |||
} | |||
#endif | |||
} | |||
// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. | |||
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); | |||
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) | |||
{ | |||
unsigned int i, j; | |||
crypto_word_t t = 0, u = 0, v = 0, UV[2]; | |||
unsigned int carry = 0; | |||
// TODO: faster would be to use bitmap | |||
#ifdef PQC_ASM | |||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | |||
sike_mpmul_asm(a,b,c); | |||
return; | |||
} | |||
#endif | |||
unsigned int i, j; | |||
crypto_word_t t = 0, u = 0, v = 0, UV[2]; | |||
unsigned int carry = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
for (j = 0; j <= i; j++) { | |||
@@ -134,14 +149,14 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) | |||
// mc = ma*R^-1 mod p434x2, where R = 2^448. | |||
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. | |||
// ma is assumed to be in Montgomery representation. | |||
void sike_fprdc_asm(const felm_t ma, felm_t mc); | |||
void sike_fprdc(const felm_t ma, felm_t mc) | |||
{ | |||
#ifdef PQC_ASM | |||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | |||
sike_fprdc_asm(ma, mc); | |||
return; | |||
} | |||
#endif | |||
unsigned int i, j, carry, count = ZERO_WORDS; | |||
crypto_word_t UV[2], t = 0, u = 0, v = 0; | |||
@@ -1,4 +0,0 @@ | |||
#include "fpx.h" | |||
#include "utils.h" | |||
void sike_mpmul_asm_X(const felm_t a, const felm_t b, dfelm_t c); |
@@ -110,7 +110,7 @@ static void fpinv_mont(felm_t a) | |||
} | |||
// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. | |||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||
#ifndef PQC_ASM | |||
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { | |||
uint8_t carry = 0; | |||
for (size_t i = 0; i < nwords; i++) { | |||
@@ -132,31 +132,31 @@ inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, cons | |||
// Multiprecision addition, c = a+b. | |||
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) | |||
{ | |||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||
mp_add(a, b, c, NWORDS_FIELD); | |||
#else | |||
#ifdef PQC_ASM | |||
sike_mpadd_asm(a, b, c); | |||
#else | |||
mp_add(a, b, c, NWORDS_FIELD); | |||
#endif | |||
} | |||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | |||
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 | |||
inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | |||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); | |||
#else | |||
#ifdef PQC_ASM | |||
return sike_mpsubx2_asm(a, b, c); | |||
#else | |||
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); | |||
#endif | |||
} | |||
// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | |||
// Inputs should be s.t. c > a and c > b | |||
inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | |||
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) | |||
#ifdef PQC_ASM | |||
sike_mpdblsubx2_asm(a, b, c); | |||
#else | |||
mp_sub(c, a, c, 2*NWORDS_FIELD); | |||
mp_sub(c, b, c, 2*NWORDS_FIELD); | |||
#else | |||
sike_mpdblsubx2_asm(a, b, c); | |||
#endif | |||
} | |||
@@ -31,8 +31,6 @@ void sike_fpneg(felm_t a); | |||
void sike_fpcopy(const felm_t a, felm_t c); | |||
// Copy a field element, c = a. | |||
void sike_fpzero(felm_t a); | |||
// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time. | |||
void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option); | |||
// Conversion from Montgomery representation to standard representation, | |||
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. | |||
void sike_from_mont(const felm_t ma, felm_t c); | |||
@@ -30,8 +30,7 @@ extern const struct params_t params; | |||
// Swap points. | |||
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P | |||
#if !defined(ARCH_X86_64) || defined(ARCH_GENERIC) | |||
static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||
{ | |||
crypto_word_t temp; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
@@ -49,18 +48,6 @@ static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t optio | |||
Q->Z->c1[i] = temp ^ Q->Z->c1[i]; | |||
} | |||
} | |||
#endif | |||
// Swap points. | |||
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P | |||
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||
{ | |||
#if defined(ARCH_X86_64) && !defined(ARCH_GENERIC) | |||
sike_cswap_asm(P, Q, option); | |||
#else | |||
sike_cswap(P, Q, option); | |||
#endif | |||
} | |||
static void ladder3Pt( | |||
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m, | |||