add drone.yml

3 年前 · 744461b0ff
--- a/buid.dbg/.drone.yml
+++ b/buid.dbg/.drone.yml
@@ -0,0 +1,14 @@
 kind: pipeline
 type: exec
 name: default

 steps:
 - name: build
  commands:
  - git submodule init
  - git submodule update --recursive --remote
  - mkdir build
  - cd build
  - cmake ..
  - make
  - ./test
--- a/src/kem/sike/CMakeLists.txt
+++ b/src/kem/sike/CMakeLists.txt
@@ -2,12 +2,19 @@ set(
  SRC_CLEAN_SIKE_P434
  p434/fpx.c
  p434/fp_generic.c
  p434/fp_glue.c
  p434/fp-x86_64.S
  p434/isogeny.c
  p434/params.c
  p434/sike.c)

 if(${ARCH} STREQUAL "ARCH_x86_64")
 add_definitions(-DPQC_ASM=1)
 set(
  SRC_CLEAN_SIKE_P434
  ${SRC_CLEAN_SIKE_P434}
  p434/fp-x86_64.S
 )
 endif()

 define_kem_alg(
  sike_p434_clean
  PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")
--- a/src/kem/sike/p434/fp-x86_64.S
+++ b/src/kem/sike/p434/fp-x86_64.S
@@ -103,154 +103,7 @@ sike_fpadd_asm:
 .cfi_adjust_cfa_offset	-8
 	.byte	0xf3,0xc3
 .cfi_endproc
 .globl	sike_cswap_asm
 .hidden sike_cswap_asm
 .type	sike_cswap_asm,@function
 sike_cswap_asm:
 	movq	%rdx,%xmm3
 	pshufd	$68,%xmm3,%xmm3

 	movdqu	0(%rdi),%xmm0
 	movdqu	0(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,0(%rdi)
 	movdqu	%xmm1,0(%rsi)

 	movdqu	16(%rdi),%xmm0
 	movdqu	16(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,16(%rdi)
 	movdqu	%xmm1,16(%rsi)

 	movdqu	32(%rdi),%xmm0
 	movdqu	32(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,32(%rdi)
 	movdqu	%xmm1,32(%rsi)

 	movdqu	48(%rdi),%xmm0
 	movdqu	48(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,48(%rdi)
 	movdqu	%xmm1,48(%rsi)

 	movdqu	64(%rdi),%xmm0
 	movdqu	64(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,64(%rdi)
 	movdqu	%xmm1,64(%rsi)

 	movdqu	80(%rdi),%xmm0
 	movdqu	80(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,80(%rdi)
 	movdqu	%xmm1,80(%rsi)

 	movdqu	96(%rdi),%xmm0
 	movdqu	96(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,96(%rdi)
 	movdqu	%xmm1,96(%rsi)

 	movdqu	112(%rdi),%xmm0
 	movdqu	112(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,112(%rdi)
 	movdqu	%xmm1,112(%rsi)

 	movdqu	128(%rdi),%xmm0
 	movdqu	128(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,128(%rdi)
 	movdqu	%xmm1,128(%rsi)

 	movdqu	144(%rdi),%xmm0
 	movdqu	144(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,144(%rdi)
 	movdqu	%xmm1,144(%rsi)

 	movdqu	160(%rdi),%xmm0
 	movdqu	160(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,160(%rdi)
 	movdqu	%xmm1,160(%rsi)

 	movdqu	176(%rdi),%xmm0
 	movdqu	176(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,176(%rdi)
 	movdqu	%xmm1,176(%rsi)

 	movdqu	192(%rdi),%xmm0
 	movdqu	192(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,192(%rdi)
 	movdqu	%xmm1,192(%rsi)

 	movdqu	208(%rdi),%xmm0
 	movdqu	208(%rsi),%xmm1
 	movdqa	%xmm1,%xmm2
 	pxor	%xmm0,%xmm2
 	pand	%xmm3,%xmm2
 	pxor	%xmm2,%xmm0
 	pxor	%xmm2,%xmm1
 	movdqu	%xmm0,208(%rdi)
 	movdqu	%xmm1,208(%rsi)

 	.byte	0xf3,0xc3
 .globl	sike_fpsub_asm
 .hidden sike_fpsub_asm
 .type	sike_fpsub_asm,@function
--- a/src/kem/sike/p434/fp_generic.c
+++ b/src/kem/sike/p434/fp_generic.c
@@ -8,6 +8,13 @@
 #include "utils.h"
 #include "fpx.h"

 #ifndef PQC_NOASM
 void sike_fprdc_asm(const felm_t ma, felm_t mc);
 void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
 void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c);
 void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c);
 #endif

 // Global constants
 extern const struct params_t params;

@@ -51,6 +58,9 @@ static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_w
 // Output: c in [0, 2*p434-1]
 void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
 {
 #ifdef PQC_ASM
    sike_fpadd_asm(a,b,c);
 #else
    unsigned int i, carry = 0;
    crypto_word_t mask;

@@ -68,12 +78,16 @@ void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
    for (i = 0; i < NWORDS_FIELD; i++) {
        ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]);
    }
 #endif
 }

 void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
 { // Modular subtraction, c = a-b mod p434.
  // Inputs: a, b in [0, 2*p434-1]
  // Output: c in [0, 2*p434-1]
 #ifdef PQC_ASM
    sike_fpsub_asm(a,b,c);
 #else
    unsigned int i, borrow = 0;
    crypto_word_t mask;

@@ -86,21 +100,22 @@ void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
    for (i = 0; i < NWORDS_FIELD; i++) {
        ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]);
    }
 #endif
 }

 // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
 void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
 void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
 {
    unsigned int i, j;
    crypto_word_t t = 0, u = 0, v = 0, UV[2];
    unsigned int carry = 0;

    // TODO: faster would be to use bitmap
 #ifdef PQC_ASM
    if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
        sike_mpmul_asm(a,b,c);
        return;
    }
 #endif

    unsigned int i, j;
    crypto_word_t t = 0, u = 0, v = 0, UV[2];
    unsigned int carry = 0;

    for (i = 0; i < NWORDS_FIELD; i++) {
        for (j = 0; j <= i; j++) {
@@ -134,14 +149,14 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
 // mc = ma*R^-1 mod p434x2, where R = 2^448.
 // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
 // ma is assumed to be in Montgomery representation.
 void sike_fprdc_asm(const felm_t ma, felm_t mc);
 void sike_fprdc(const felm_t ma, felm_t mc)
 {
 #ifdef PQC_ASM
    if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
        sike_fprdc_asm(ma, mc);
        return;
    }

 #endif
    unsigned int i, j, carry, count = ZERO_WORDS;
    crypto_word_t UV[2], t = 0, u = 0, v = 0;

--- a/src/kem/sike/p434/fp_glue.c
+++ b/src/kem/sike/p434/fp_glue.c
@@ -1,4 +0,0 @@
 #include "fpx.h"
 #include "utils.h"

 void sike_mpmul_asm_X(const felm_t a, const felm_t b, dfelm_t c);
--- a/src/kem/sike/p434/fpx.c
+++ b/src/kem/sike/p434/fpx.c
@@ -110,7 +110,7 @@ static void fpinv_mont(felm_t a)
 }

 // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
 #if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
 #ifndef PQC_ASM
 inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
    uint8_t carry = 0;
    for (size_t i = 0; i < nwords; i++) {
@@ -132,31 +132,31 @@ inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, cons
 // Multiprecision addition, c = a+b.
 inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
 {
 #if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
    mp_add(a, b, c, NWORDS_FIELD);
 #else
 #ifdef PQC_ASM
    sike_mpadd_asm(a, b, c);
 #else
    mp_add(a, b, c, NWORDS_FIELD);
 #endif
 }

 // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
 // If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
 inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
 #if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
    return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
 #else
 #ifdef PQC_ASM
    return sike_mpsubx2_asm(a, b, c);
 #else
    return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
 #endif
 }

 // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
 // Inputs should be s.t. c > a and c > b
 inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
 #if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
 #ifdef PQC_ASM
    sike_mpdblsubx2_asm(a, b, c);
 #else
    mp_sub(c, a, c, 2*NWORDS_FIELD);
    mp_sub(c, b, c, 2*NWORDS_FIELD);
 #else
    sike_mpdblsubx2_asm(a, b, c);
 #endif
 }

--- a/src/kem/sike/p434/fpx.h
+++ b/src/kem/sike/p434/fpx.h
@@ -31,8 +31,6 @@ void sike_fpneg(felm_t a);
 void sike_fpcopy(const felm_t a, felm_t c);
 // Copy a field element, c = a.
 void sike_fpzero(felm_t a);
 // If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
 void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
 // Conversion from Montgomery representation to standard representation,
 // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
 void sike_from_mont(const felm_t ma, felm_t c);
--- a/src/kem/sike/p434/sike.c
+++ b/src/kem/sike/p434/sike.c
@@ -30,8 +30,7 @@ extern const struct params_t params;

 // Swap points.
 // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
 #if !defined(ARCH_X86_64) || defined(ARCH_GENERIC)
 static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
 static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
 {
    crypto_word_t temp;
    for (size_t i = 0; i < NWORDS_FIELD; i++) {
@@ -49,18 +48,6 @@ static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t optio
        Q->Z->c1[i] = temp ^ Q->Z->c1[i];
    }
 }
 #endif

 // Swap points.
 // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
 static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
 {
 #if defined(ARCH_X86_64) && !defined(ARCH_GENERIC)
    sike_cswap_asm(P, Q, option);
 #else
    sike_cswap(P, Q, option);
 #endif
 }

 static void ladder3Pt(
    const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,