SIKE: enable optimized version
This commit is contained in:
parent
8711dcce1a
commit
c18ca419a8
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -7,3 +7,6 @@
|
|||||||
[submodule "3rd/gbench"]
|
[submodule "3rd/gbench"]
|
||||||
path = 3rd/gbench
|
path = 3rd/gbench
|
||||||
url = https://github.com/henrydcase/benchmark.git
|
url = https://github.com/henrydcase/benchmark.git
|
||||||
|
[submodule "3rd/cpu_features"]
|
||||||
|
path = 3rd/cpu_features
|
||||||
|
url = https://github.com/google/cpu_features.git
|
||||||
|
1
3rd/cpu_features
Submodule
1
3rd/cpu_features
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 3e8243b7d9951c078259c3186c039a6e8f036055
|
@ -34,6 +34,7 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(3rd/gtest)
|
add_subdirectory(3rd/gtest)
|
||||||
|
add_subdirectory(3rd/cpu_features)
|
||||||
|
|
||||||
# Arch settings
|
# Arch settings
|
||||||
|
|
||||||
@ -82,6 +83,7 @@ include_directories(
|
|||||||
public
|
public
|
||||||
src/common/
|
src/common/
|
||||||
src
|
src
|
||||||
|
3rd/cpu_features/include
|
||||||
)
|
)
|
||||||
|
|
||||||
set_property(GLOBAL PROPERTY obj_libs "")
|
set_property(GLOBAL PROPERTY obj_libs "")
|
||||||
@ -243,12 +245,14 @@ get_property(OBJ_LIBS GLOBAL PROPERTY obj_libs)
|
|||||||
target_link_libraries(
|
target_link_libraries(
|
||||||
pqc
|
pqc
|
||||||
common
|
common
|
||||||
|
cpu_features
|
||||||
${OBJ_LIBS}
|
${OBJ_LIBS}
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(
|
target_link_libraries(
|
||||||
pqc_s
|
pqc_s
|
||||||
common
|
common
|
||||||
|
cpu_features
|
||||||
${OBJ_LIBS}
|
${OBJ_LIBS}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <pqc/pqc.h>
|
#include <pqc/pqc.h>
|
||||||
|
#include <cpuinfo_x86.h>
|
||||||
|
|
||||||
#include "schemes.h"
|
#include "schemes.h"
|
||||||
|
|
||||||
@ -126,3 +127,9 @@ bool pqc_sig_verify(const params_t *p,
|
|||||||
const uint8_t *pk) {
|
const uint8_t *pk) {
|
||||||
return !((sig_params_t *)p)->verify(sig, siglen, m, mlen, pk);
|
return !((sig_params_t *)p)->verify(sig, siglen, m, mlen, pk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
X86Features CPU_CAPS;
|
||||||
|
void static_initialization(void) __attribute__((constructor));
|
||||||
|
void static_initialization(void) {
|
||||||
|
CPU_CAPS = GetX86Info().features;
|
||||||
|
}
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
set(
|
set(
|
||||||
SRC_CLEAN_SIKE_P434
|
SRC_CLEAN_SIKE_P434
|
||||||
p434/fpx.c
|
p434/fpx.c
|
||||||
p434/isogeny.c
|
|
||||||
p434/fp_generic.c
|
p434/fp_generic.c
|
||||||
|
p434/fp_glue.c
|
||||||
|
p434/fp-x86_64.S
|
||||||
|
p434/isogeny.c
|
||||||
p434/params.c
|
p434/params.c
|
||||||
p434/sike.c)
|
p434/sike.c)
|
||||||
|
|
||||||
define_kem_alg(
|
define_kem_alg(
|
||||||
sike_p434_clean
|
sike_p434_clean
|
||||||
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")
|
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
|
||||||
|
@ -15,10 +15,10 @@
|
|||||||
.quad 0x6CFC5FD681C52056
|
.quad 0x6CFC5FD681C52056
|
||||||
.quad 0x0002341F27177344
|
.quad 0x0002341F27177344
|
||||||
|
|
||||||
.globl sike_fpadd
|
.globl sike_fpadd_asm
|
||||||
.hidden sike_fpadd
|
.hidden sike_fpadd_asm
|
||||||
.type sike_fpadd,@function
|
.type sike_fpadd_asm,@function
|
||||||
sike_fpadd:
|
sike_fpadd_asm:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
pushq %r12
|
pushq %r12
|
||||||
.cfi_adjust_cfa_offset 8
|
.cfi_adjust_cfa_offset 8
|
||||||
@ -107,14 +107,7 @@ sike_fpadd:
|
|||||||
.hidden sike_cswap_asm
|
.hidden sike_cswap_asm
|
||||||
.type sike_cswap_asm,@function
|
.type sike_cswap_asm,@function
|
||||||
sike_cswap_asm:
|
sike_cswap_asm:
|
||||||
|
|
||||||
|
|
||||||
movq %rdx,%xmm3
|
movq %rdx,%xmm3
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
pshufd $68,%xmm3,%xmm3
|
pshufd $68,%xmm3,%xmm3
|
||||||
|
|
||||||
movdqu 0(%rdi),%xmm0
|
movdqu 0(%rdi),%xmm0
|
||||||
@ -258,10 +251,10 @@ sike_cswap_asm:
|
|||||||
movdqu %xmm1,208(%rsi)
|
movdqu %xmm1,208(%rsi)
|
||||||
|
|
||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.globl sike_fpsub
|
.globl sike_fpsub_asm
|
||||||
.hidden sike_fpsub
|
.hidden sike_fpsub_asm
|
||||||
.type sike_fpsub,@function
|
.type sike_fpsub_asm,@function
|
||||||
sike_fpsub:
|
sike_fpsub_asm:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
pushq %r12
|
pushq %r12
|
||||||
.cfi_adjust_cfa_offset 8
|
.cfi_adjust_cfa_offset 8
|
||||||
@ -508,10 +501,10 @@ sike_mpdblsubx2_asm:
|
|||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
|
|
||||||
.globl sike_fprdc
|
.globl sike_fprdc_asm
|
||||||
.hidden sike_fprdc
|
.hidden sike_fprdc_asm
|
||||||
.type sike_fprdc,@function
|
.type sike_fprdc_asm,@function
|
||||||
sike_fprdc:
|
sike_fprdc_asm:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
pushq %r12
|
pushq %r12
|
||||||
.cfi_adjust_cfa_offset 8
|
.cfi_adjust_cfa_offset 8
|
||||||
@ -723,10 +716,10 @@ sike_fprdc:
|
|||||||
.cfi_adjust_cfa_offset -8
|
.cfi_adjust_cfa_offset -8
|
||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
.globl sike_mpmul
|
.globl sike_mpmul_asm
|
||||||
.hidden sike_mpmul
|
.hidden sike_mpmul_asm
|
||||||
.type sike_mpmul,@function
|
.type sike_mpmul_asm,@function
|
||||||
sike_mpmul:
|
sike_mpmul_asm:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
pushq %r12
|
pushq %r12
|
||||||
.cfi_adjust_cfa_offset 8
|
.cfi_adjust_cfa_offset 8
|
||||||
|
@ -5,12 +5,16 @@
|
|||||||
*********************************************************************************************/
|
*********************************************************************************************/
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "fpx.h"
|
#include "fpx.h"
|
||||||
|
#include <cpuinfo_x86.h>
|
||||||
|
|
||||||
|
extern X86Features CPU_CAPS;
|
||||||
|
|
||||||
// Global constants
|
// Global constants
|
||||||
extern const struct params_t params;
|
extern const struct params_t params;
|
||||||
|
|
||||||
|
// Digit multiplication, digit * digit -> 2-digit result
|
||||||
static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
|
static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
|
||||||
{ // Digit multiplication, digit * digit -> 2-digit result
|
{
|
||||||
crypto_word_t al, ah, bl, bh, temp;
|
crypto_word_t al, ah, bl, bh, temp;
|
||||||
crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
|
crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
|
||||||
crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
|
crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
|
||||||
@ -43,10 +47,11 @@ static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_w
|
|||||||
c[1] ^= (ahbh & mask_high) + carry; // C11
|
c[1] ^= (ahbh & mask_high) + carry; // C11
|
||||||
}
|
}
|
||||||
|
|
||||||
void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
|
// Modular addition, c = a+b mod p434.
|
||||||
{ // Modular addition, c = a+b mod p434.
|
|
||||||
// Inputs: a, b in [0, 2*p434-1]
|
// Inputs: a, b in [0, 2*p434-1]
|
||||||
// Output: c in [0, 2*p434-1]
|
// Output: c in [0, 2*p434-1]
|
||||||
|
void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
|
||||||
|
{
|
||||||
unsigned int i, carry = 0;
|
unsigned int i, carry = 0;
|
||||||
crypto_word_t mask;
|
crypto_word_t mask;
|
||||||
|
|
||||||
@ -84,12 +89,20 @@ void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
|
||||||
|
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
|
||||||
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
|
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
|
||||||
{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
|
{
|
||||||
unsigned int i, j;
|
unsigned int i, j;
|
||||||
crypto_word_t t = 0, u = 0, v = 0, UV[2];
|
crypto_word_t t = 0, u = 0, v = 0, UV[2];
|
||||||
unsigned int carry = 0;
|
unsigned int carry = 0;
|
||||||
|
|
||||||
|
// TODO: it actually needs BMI2 & ADOX. cpu_features needs to be updated
|
||||||
|
if (CPU_CAPS.bmi2) {
|
||||||
|
sike_mpmul_asm(a,b,c);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < NWORDS_FIELD; i++) {
|
for (i = 0; i < NWORDS_FIELD; i++) {
|
||||||
for (j = 0; j <= i; j++) {
|
for (j = 0; j <= i; j++) {
|
||||||
MUL(a[j], b[i-j], UV+1, UV[0]);
|
MUL(a[j], b[i-j], UV+1, UV[0]);
|
||||||
@ -118,11 +131,18 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
|
|||||||
c[2*NWORDS_FIELD-1] = v;
|
c[2*NWORDS_FIELD-1] = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sike_fprdc(const felm_t ma, felm_t mc)
|
// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
|
||||||
{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
|
|
||||||
// mc = ma*R^-1 mod p434x2, where R = 2^448.
|
// mc = ma*R^-1 mod p434x2, where R = 2^448.
|
||||||
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
|
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
|
||||||
// ma is assumed to be in Montgomery representation.
|
// ma is assumed to be in Montgomery representation.
|
||||||
|
void sike_fprdc_asm(const felm_t ma, felm_t mc);
|
||||||
|
void sike_fprdc(const felm_t ma, felm_t mc)
|
||||||
|
{
|
||||||
|
if (CPU_CAPS.bmi2) {
|
||||||
|
sike_fprdc_asm(ma, mc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int i, j, carry, count = ZERO_WORDS;
|
unsigned int i, j, carry, count = ZERO_WORDS;
|
||||||
crypto_word_t UV[2], t = 0, u = 0, v = 0;
|
crypto_word_t UV[2], t = 0, u = 0, v = 0;
|
||||||
|
|
||||||
|
4
src/kem/sike/p434/fp_glue.c
Normal file
4
src/kem/sike/p434/fp_glue.c
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#include "fpx.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
void sike_mpmul_asm_X(const felm_t a, const felm_t b, dfelm_t c);
|
Loading…
Reference in New Issue
Block a user