Henry Case пре 3 година
committed by Henry Case
родитељ
комит
744461b0ff
8 измењених фајлова са 57 додато и 187 уклоњено
  1. +14
    -0
      buid.dbg/.drone.yml
  2. +9
    -2
      src/kem/sike/CMakeLists.txt
  3. +0
    -147
      src/kem/sike/p434/fp-x86_64.S
  4. +23
    -8
      src/kem/sike/p434/fp_generic.c
  5. +0
    -4
      src/kem/sike/p434/fp_glue.c
  6. +10
    -10
      src/kem/sike/p434/fpx.c
  7. +0
    -2
      src/kem/sike/p434/fpx.h
  8. +1
    -14
      src/kem/sike/p434/sike.c

+ 14
- 0
buid.dbg/.drone.yml Прегледај датотеку

@@ -0,0 +1,14 @@
kind: pipeline
type: exec
name: default

steps:
- name: build
commands:
- git submodule init
- git submodule update --recursive --remote
- mkdir build
- cd build
- cmake ..
- make
- ./test

+ 9
- 2
src/kem/sike/CMakeLists.txt Прегледај датотеку

@@ -2,12 +2,19 @@ set(
SRC_CLEAN_SIKE_P434
p434/fpx.c
p434/fp_generic.c
p434/fp_glue.c
p434/fp-x86_64.S
p434/isogeny.c
p434/params.c
p434/sike.c)

if(${ARCH} STREQUAL "ARCH_x86_64")
add_definitions(-DPQC_ASM=1)
set(
SRC_CLEAN_SIKE_P434
${SRC_CLEAN_SIKE_P434}
p434/fp-x86_64.S
)
endif()

define_kem_alg(
sike_p434_clean
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 147
src/kem/sike/p434/fp-x86_64.S Прегледај датотеку

@@ -103,154 +103,7 @@ sike_fpadd_asm:
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_cswap_asm
.hidden sike_cswap_asm
.type sike_cswap_asm,@function
sike_cswap_asm:
movq %rdx,%xmm3
pshufd $68,%xmm3,%xmm3

movdqu 0(%rdi),%xmm0
movdqu 0(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,0(%rdi)
movdqu %xmm1,0(%rsi)

movdqu 16(%rdi),%xmm0
movdqu 16(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,16(%rdi)
movdqu %xmm1,16(%rsi)

movdqu 32(%rdi),%xmm0
movdqu 32(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,32(%rdi)
movdqu %xmm1,32(%rsi)

movdqu 48(%rdi),%xmm0
movdqu 48(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,48(%rdi)
movdqu %xmm1,48(%rsi)

movdqu 64(%rdi),%xmm0
movdqu 64(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,64(%rdi)
movdqu %xmm1,64(%rsi)

movdqu 80(%rdi),%xmm0
movdqu 80(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,80(%rdi)
movdqu %xmm1,80(%rsi)

movdqu 96(%rdi),%xmm0
movdqu 96(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,96(%rdi)
movdqu %xmm1,96(%rsi)

movdqu 112(%rdi),%xmm0
movdqu 112(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,112(%rdi)
movdqu %xmm1,112(%rsi)

movdqu 128(%rdi),%xmm0
movdqu 128(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,128(%rdi)
movdqu %xmm1,128(%rsi)

movdqu 144(%rdi),%xmm0
movdqu 144(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,144(%rdi)
movdqu %xmm1,144(%rsi)

movdqu 160(%rdi),%xmm0
movdqu 160(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,160(%rdi)
movdqu %xmm1,160(%rsi)

movdqu 176(%rdi),%xmm0
movdqu 176(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,176(%rdi)
movdqu %xmm1,176(%rsi)

movdqu 192(%rdi),%xmm0
movdqu 192(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,192(%rdi)
movdqu %xmm1,192(%rsi)

movdqu 208(%rdi),%xmm0
movdqu 208(%rsi),%xmm1
movdqa %xmm1,%xmm2
pxor %xmm0,%xmm2
pand %xmm3,%xmm2
pxor %xmm2,%xmm0
pxor %xmm2,%xmm1
movdqu %xmm0,208(%rdi)
movdqu %xmm1,208(%rsi)

.byte 0xf3,0xc3
.globl sike_fpsub_asm
.hidden sike_fpsub_asm
.type sike_fpsub_asm,@function


+ 23
- 8
src/kem/sike/p434/fp_generic.c Прегледај датотеку

@@ -8,6 +8,13 @@
#include "utils.h"
#include "fpx.h"

#ifndef PQC_NOASM
void sike_fprdc_asm(const felm_t ma, felm_t mc);
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c);
void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c);
#endif

// Global constants
extern const struct params_t params;

@@ -51,6 +58,9 @@ static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_w
// Output: c in [0, 2*p434-1]
void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
{
#ifdef PQC_ASM
sike_fpadd_asm(a,b,c);
#else
unsigned int i, carry = 0;
crypto_word_t mask;

@@ -68,12 +78,16 @@ void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]);
}
#endif
}

void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
{ // Modular subtraction, c = a-b mod p434.
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#ifdef PQC_ASM
sike_fpsub_asm(a,b,c);
#else
unsigned int i, borrow = 0;
crypto_word_t mask;

@@ -86,21 +100,22 @@ void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]);
}
#endif
}

// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
{
unsigned int i, j;
crypto_word_t t = 0, u = 0, v = 0, UV[2];
unsigned int carry = 0;

// TODO: faster would be to use bitmap
#ifdef PQC_ASM
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
sike_mpmul_asm(a,b,c);
return;
}
#endif

unsigned int i, j;
crypto_word_t t = 0, u = 0, v = 0, UV[2];
unsigned int carry = 0;

for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j <= i; j++) {
@@ -134,14 +149,14 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
// mc = ma*R^-1 mod p434x2, where R = 2^448.
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.
void sike_fprdc_asm(const felm_t ma, felm_t mc);
void sike_fprdc(const felm_t ma, felm_t mc)
{
#ifdef PQC_ASM
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
sike_fprdc_asm(ma, mc);
return;
}
#endif
unsigned int i, j, carry, count = ZERO_WORDS;
crypto_word_t UV[2], t = 0, u = 0, v = 0;



+ 0
- 4
src/kem/sike/p434/fp_glue.c Прегледај датотеку

@@ -1,4 +0,0 @@
#include "fpx.h"
#include "utils.h"

void sike_mpmul_asm_X(const felm_t a, const felm_t b, dfelm_t c);

+ 10
- 10
src/kem/sike/p434/fpx.c Прегледај датотеку

@@ -110,7 +110,7 @@ static void fpinv_mont(felm_t a)
}

// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
#ifndef PQC_ASM
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
uint8_t carry = 0;
for (size_t i = 0; i < nwords; i++) {
@@ -132,31 +132,31 @@ inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, cons
// Multiprecision addition, c = a+b.
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
{
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
mp_add(a, b, c, NWORDS_FIELD);
#else
#ifdef PQC_ASM
sike_mpadd_asm(a, b, c);
#else
mp_add(a, b, c, NWORDS_FIELD);
#endif
}

// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
#else
#ifdef PQC_ASM
return sike_mpsubx2_asm(a, b, c);
#else
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
#endif
}

// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// Inputs should be s.t. c > a and c > b
inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
#ifdef PQC_ASM
sike_mpdblsubx2_asm(a, b, c);
#else
mp_sub(c, a, c, 2*NWORDS_FIELD);
mp_sub(c, b, c, 2*NWORDS_FIELD);
#else
sike_mpdblsubx2_asm(a, b, c);
#endif
}



+ 0
- 2
src/kem/sike/p434/fpx.h Прегледај датотеку

@@ -31,8 +31,6 @@ void sike_fpneg(felm_t a);
void sike_fpcopy(const felm_t a, felm_t c);
// Copy a field element, c = a.
void sike_fpzero(felm_t a);
// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
// Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
void sike_from_mont(const felm_t ma, felm_t c);


+ 1
- 14
src/kem/sike/p434/sike.c Прегледај датотеку

@@ -30,8 +30,7 @@ extern const struct params_t params;

// Swap points.
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
#if !defined(ARCH_X86_64) || defined(ARCH_GENERIC)
static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
{
crypto_word_t temp;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
@@ -49,18 +48,6 @@ static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t optio
Q->Z->c1[i] = temp ^ Q->Z->c1[i];
}
}
#endif

// Swap points.
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
{
#if defined(ARCH_X86_64) && !defined(ARCH_GENERIC)
sike_cswap_asm(P, Q, option);
#else
sike_cswap(P, Q, option);
#endif
}

static void ladder3Pt(
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,


Loading…
Откажи
Сачувај