From 744461b0ff24d2845e3e33027e5d90aba885f234 Mon Sep 17 00:00:00 2001 From: Kris Kwiatkowski Date: Sun, 18 Apr 2021 23:59:38 +0100 Subject: [PATCH] add drone.yml --- buid.dbg/.drone.yml | 14 ++++ src/kem/sike/CMakeLists.txt | 11 ++- src/kem/sike/p434/fp-x86_64.S | 147 --------------------------------- src/kem/sike/p434/fp_generic.c | 31 +++++-- src/kem/sike/p434/fp_glue.c | 4 - src/kem/sike/p434/fpx.c | 20 ++--- src/kem/sike/p434/fpx.h | 2 - src/kem/sike/p434/sike.c | 15 +--- 8 files changed, 57 insertions(+), 187 deletions(-) create mode 100644 buid.dbg/.drone.yml delete mode 100644 src/kem/sike/p434/fp_glue.c diff --git a/buid.dbg/.drone.yml b/buid.dbg/.drone.yml new file mode 100644 index 00000000..41c554ac --- /dev/null +++ b/buid.dbg/.drone.yml @@ -0,0 +1,14 @@ +kind: pipeline +type: exec +name: default + +steps: +- name: build + commands: + - git submodule init + - git submodule update --recursive --remote + - mkdir build + - cd build + - cmake .. + - make + - ./test diff --git a/src/kem/sike/CMakeLists.txt b/src/kem/sike/CMakeLists.txt index 8eb116cc..f5543d67 100644 --- a/src/kem/sike/CMakeLists.txt +++ b/src/kem/sike/CMakeLists.txt @@ -2,12 +2,19 @@ set( SRC_CLEAN_SIKE_P434 p434/fpx.c p434/fp_generic.c - p434/fp_glue.c - p434/fp-x86_64.S p434/isogeny.c p434/params.c p434/sike.c) +if(${ARCH} STREQUAL "ARCH_x86_64") +add_definitions(-DPQC_ASM=1) +set( + SRC_CLEAN_SIKE_P434 + ${SRC_CLEAN_SIKE_P434} + p434/fp-x86_64.S +) +endif() + define_kem_alg( sike_p434_clean PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/kem/sike/p434/fp-x86_64.S b/src/kem/sike/p434/fp-x86_64.S index e6f30b27..1e40a2d5 100644 --- a/src/kem/sike/p434/fp-x86_64.S +++ b/src/kem/sike/p434/fp-x86_64.S @@ -103,154 +103,7 @@ sike_fpadd_asm: .cfi_adjust_cfa_offset -8 .byte 0xf3,0xc3 .cfi_endproc -.globl sike_cswap_asm -.hidden sike_cswap_asm -.type sike_cswap_asm,@function -sike_cswap_asm: - movq %rdx,%xmm3 - pshufd $68,%xmm3,%xmm3 - - movdqu 0(%rdi),%xmm0 - movdqu 0(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,0(%rdi) - movdqu %xmm1,0(%rsi) - - movdqu 16(%rdi),%xmm0 - movdqu 16(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,16(%rdi) - movdqu %xmm1,16(%rsi) - - movdqu 32(%rdi),%xmm0 - movdqu 32(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,32(%rdi) - movdqu %xmm1,32(%rsi) - - movdqu 48(%rdi),%xmm0 - movdqu 48(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,48(%rdi) - movdqu %xmm1,48(%rsi) - - movdqu 64(%rdi),%xmm0 - movdqu 64(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,64(%rdi) - movdqu %xmm1,64(%rsi) - - movdqu 80(%rdi),%xmm0 - movdqu 80(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,80(%rdi) - movdqu %xmm1,80(%rsi) - - movdqu 96(%rdi),%xmm0 - movdqu 96(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,96(%rdi) - movdqu %xmm1,96(%rsi) - - movdqu 112(%rdi),%xmm0 - movdqu 112(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,112(%rdi) - movdqu %xmm1,112(%rsi) - - movdqu 128(%rdi),%xmm0 - movdqu 128(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,128(%rdi) - movdqu %xmm1,128(%rsi) - - movdqu 144(%rdi),%xmm0 - movdqu 144(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,144(%rdi) - movdqu %xmm1,144(%rsi) - - movdqu 160(%rdi),%xmm0 - movdqu 160(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,160(%rdi) - movdqu %xmm1,160(%rsi) - - movdqu 176(%rdi),%xmm0 - movdqu 176(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,176(%rdi) - movdqu %xmm1,176(%rsi) - - movdqu 192(%rdi),%xmm0 - movdqu 192(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,192(%rdi) - movdqu %xmm1,192(%rsi) - - movdqu 208(%rdi),%xmm0 - movdqu 208(%rsi),%xmm1 - movdqa %xmm1,%xmm2 - pxor %xmm0,%xmm2 - pand %xmm3,%xmm2 - pxor %xmm2,%xmm0 - pxor %xmm2,%xmm1 - movdqu %xmm0,208(%rdi) - movdqu %xmm1,208(%rsi) - .byte 0xf3,0xc3 .globl sike_fpsub_asm .hidden sike_fpsub_asm .type sike_fpsub_asm,@function diff --git a/src/kem/sike/p434/fp_generic.c b/src/kem/sike/p434/fp_generic.c index 38e29926..020d452c 100644 --- a/src/kem/sike/p434/fp_generic.c +++ b/src/kem/sike/p434/fp_generic.c @@ -8,6 +8,13 @@ #include "utils.h" #include "fpx.h" +#ifndef PQC_NOASM +void sike_fprdc_asm(const felm_t ma, felm_t mc); +void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); +void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c); +void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c); +#endif + // Global constants extern const struct params_t params; @@ -51,6 +58,9 @@ static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_w // Output: c in [0, 2*p434-1] void sike_fpadd(const felm_t a, const felm_t b, felm_t c) { +#ifdef PQC_ASM + sike_fpadd_asm(a,b,c); +#else unsigned int i, carry = 0; crypto_word_t mask; @@ -68,12 +78,16 @@ void sike_fpadd(const felm_t a, const felm_t b, felm_t c) for (i = 0; i < NWORDS_FIELD; i++) { ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]); } +#endif } void sike_fpsub(const felm_t a, const felm_t b, felm_t c) { // Modular subtraction, c = a-b mod p434. // Inputs: a, b in [0, 2*p434-1] // Output: c in [0, 2*p434-1] +#ifdef PQC_ASM + sike_fpsub_asm(a,b,c); +#else unsigned int i, borrow = 0; crypto_word_t mask; @@ -86,21 +100,22 @@ void sike_fpsub(const felm_t a, const felm_t b, felm_t c) for (i = 0; i < NWORDS_FIELD; i++) { ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]); } +#endif } // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. -void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) { - unsigned int i, j; - crypto_word_t t = 0, u = 0, v = 0, UV[2]; - unsigned int carry = 0; - - // TODO: faster would be to use bitmap +#ifdef PQC_ASM if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { sike_mpmul_asm(a,b,c); return; } +#endif + + unsigned int i, j; + crypto_word_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; for (i = 0; i < NWORDS_FIELD; i++) { for (j = 0; j <= i; j++) { @@ -134,14 +149,14 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) // mc = ma*R^-1 mod p434x2, where R = 2^448. // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. // ma is assumed to be in Montgomery representation. -void sike_fprdc_asm(const felm_t ma, felm_t mc); void sike_fprdc(const felm_t ma, felm_t mc) { +#ifdef PQC_ASM if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { sike_fprdc_asm(ma, mc); return; } - +#endif unsigned int i, j, carry, count = ZERO_WORDS; crypto_word_t UV[2], t = 0, u = 0, v = 0; diff --git a/src/kem/sike/p434/fp_glue.c b/src/kem/sike/p434/fp_glue.c deleted file mode 100644 index 0495a3a4..00000000 --- a/src/kem/sike/p434/fp_glue.c +++ /dev/null @@ -1,4 +0,0 @@ -#include "fpx.h" -#include "utils.h" - -void sike_mpmul_asm_X(const felm_t a, const felm_t b, dfelm_t c); \ No newline at end of file diff --git a/src/kem/sike/p434/fpx.c b/src/kem/sike/p434/fpx.c index 30233406..44c6481c 100644 --- a/src/kem/sike/p434/fpx.c +++ b/src/kem/sike/p434/fpx.c @@ -110,7 +110,7 @@ static void fpinv_mont(felm_t a) } // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. -#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) +#ifndef PQC_ASM inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { uint8_t carry = 0; for (size_t i = 0; i < nwords; i++) { @@ -132,31 +132,31 @@ inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, cons // Multiprecision addition, c = a+b. inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) { -#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) - mp_add(a, b, c, NWORDS_FIELD); -#else +#ifdef PQC_ASM sike_mpadd_asm(a, b, c); +#else + mp_add(a, b, c, NWORDS_FIELD); #endif } // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. // If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { -#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) - return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); -#else +#ifdef PQC_ASM return sike_mpsubx2_asm(a, b, c); +#else + return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); #endif } // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. // Inputs should be s.t. c > a and c > b inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { -#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64)) +#ifdef PQC_ASM + sike_mpdblsubx2_asm(a, b, c); +#else mp_sub(c, a, c, 2*NWORDS_FIELD); mp_sub(c, b, c, 2*NWORDS_FIELD); -#else - sike_mpdblsubx2_asm(a, b, c); #endif } diff --git a/src/kem/sike/p434/fpx.h b/src/kem/sike/p434/fpx.h index b9255ac7..fc51d0e9 100644 --- a/src/kem/sike/p434/fpx.h +++ b/src/kem/sike/p434/fpx.h @@ -31,8 +31,6 @@ void sike_fpneg(felm_t a); void sike_fpcopy(const felm_t a, felm_t c); // Copy a field element, c = a. void sike_fpzero(felm_t a); -// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time. -void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option); // Conversion from Montgomery representation to standard representation, // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. void sike_from_mont(const felm_t ma, felm_t c); diff --git a/src/kem/sike/p434/sike.c b/src/kem/sike/p434/sike.c index 83a9dc1d..27ea1e4b 100644 --- a/src/kem/sike/p434/sike.c +++ b/src/kem/sike/p434/sike.c @@ -30,8 +30,7 @@ extern const struct params_t params; // Swap points. // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P -#if !defined(ARCH_X86_64) || defined(ARCH_GENERIC) -static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) +static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) { crypto_word_t temp; for (size_t i = 0; i < NWORDS_FIELD; i++) { @@ -49,18 +48,6 @@ static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t optio Q->Z->c1[i] = temp ^ Q->Z->c1[i]; } } -#endif - -// Swap points. -// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P -static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) -{ -#if defined(ARCH_X86_64) && !defined(ARCH_GENERIC) - sike_cswap_asm(P, Q, option); -#else - sike_cswap(P, Q, option); -#endif -} static void ladder3Pt( const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,