Kris Kwiatkowski
eb43eca5a8
Based on Microsoft's implementation available on github: Source: https://github.com/Microsoft/PQCrypto-SIDH Commit: 77044b76181eb61c744ac8eb7ddc7a8fe72f6919 Following changes has been applied * In intel assembly, use MOV instead of MOVQ: Intel instruction reference in the Intel Software Developer's Manual volume 2A, the MOVQ has 4 forms. None of them mentions moving literal to GPR, hence "movq $rax, 0x0" is wrong. Instead, on 64bit system, MOV can be used. * Some variables were wrongly zero-initialized (as per C99 spec) * Move constant values to .RODATA segment, as keeping them in .TEXT segment is not compatible with XOM. * Fixes issue in arm64 code related to the fact that compiler doesn't reserve enough space for the linker to relocate address of a global variable when used by 'ldr' instructions. Solution is to use 'adrp' followed by 'add' instruction. Relocations for 'adrp' and 'add' instructions is generated by prefixing the label with :pg_hi21: and :lo12: respectively. * Enable MULX and ADX. Code from MS doesn't support PIC. MULX can't reference global variable directly. Instead RIP-relative addressing can be used. This improves performance around 10%-13% on SkyLake * Check if CPU supports BMI2 and ADOX instruction at runtime. On AMD64 optimized implementation of montgomery multiplication and reduction have 2 implementations - faster one takes advantage of BMI2 instruction set introduced in Haswell and ADOX introduced in Broadwell. Thanks to OPENSSL_ia32cap_P it can be decided at runtime which implementation to choose. As CPU configuration is static by nature, branch predictor will be correct most of the time and hence this check very often has no cost. * Reuse some utilities from boringssl instead of reimplementing them. This includes things like: * definition of a limb size (use crypto_word_t instead of digit_t) * use functions for checking in constant time if value is 0 and/or less then * #define's used for conditional compilation * Use SSE2 for conditional swap on vector registers. Improves performance a little bit. * Fix f2elm_t definition. Code imported from MSR defines f2elm_t type as a array of arrays. This decays to a pointer to an array (when passing as an argument). In C, one can't assign const pointer to an array with non-const pointer to an array. Seems it violates 6.7.3/8 from C99 (same for C11). This problem occures in GCC 6, only when -pedantic flag is specified and it occures always in GCC 4.9 (debian jessie). * Fix definition of eval_3_isog. Second argument in eval_3_isog mustn't be const. Similar reason as above. * Use HMAC-SHA256 instead of cSHAKE-256 to avoid upstreaming cSHAKE and SHA3 code. * Add speed and unit tests for SIKE. Change-Id: I22f0bb1f9edff314a35cd74b48e8c4962568e330
113 lines
4.2 KiB
C
113 lines
4.2 KiB
C
#ifndef FPX_H_
|
|
#define FPX_H_
|
|
|
|
#include "utils.h"
|
|
|
|
#if defined(__cplusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
// Modular addition, c = a+b mod p503.
|
|
void sike_fpadd(const felm_t a, const felm_t b, felm_t c);
|
|
// Modular subtraction, c = a-b mod p503.
|
|
void sike_fpsub(const felm_t a, const felm_t b, felm_t c);
|
|
// Modular division by two, c = a/2 mod p503.
|
|
void sike_fpdiv2(const felm_t a, felm_t c);
|
|
// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
|
|
void sike_fpcorrection(felm_t a);
|
|
// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
|
|
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c);
|
|
// 503-bit Montgomery reduction, c = a mod p
|
|
void sike_fprdc(const dfelm_t a, felm_t c);
|
|
// Double 2x503-bit multiprecision subtraction, c = c-a-b
|
|
void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
|
|
// Multiprecision subtraction, c = a-b
|
|
crypto_word_t sike_mpsubx2_asm(const felm_t a, const felm_t b, felm_t c);
|
|
// 503-bit multiprecision addition, c = a+b
|
|
void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c);
|
|
// Modular negation, a = -a mod p503.
|
|
void sike_fpneg(felm_t a);
|
|
// Copy of a field element, c = a
|
|
void sike_fpcopy(const felm_t a, felm_t c);
|
|
// Copy a field element, c = a.
|
|
void sike_fpzero(felm_t a);
|
|
// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
|
|
void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
|
|
// Conversion from Montgomery representation to standard representation,
|
|
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
|
|
void sike_from_mont(const felm_t ma, felm_t c);
|
|
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
|
|
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);
|
|
// GF(p503^2) multiplication using Montgomery arithmetic, c = a*b in GF(p503^2)
|
|
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
|
|
// GF(p503^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
|
|
void sike_fp2inv_mont(f2elm_t a);
|
|
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
|
|
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c);
|
|
// Modular correction, a = a in GF(p^2).
|
|
void sike_fp2correction(f2elm_t a);
|
|
|
|
#if defined(__cplusplus)
|
|
} // extern C
|
|
#endif
|
|
|
|
// GF(p^2) addition, c = a+b in GF(p^2).
|
|
#define sike_fp2add(a, b, c) \
|
|
do { \
|
|
sike_fpadd(a->c0, b->c0, c->c0); \
|
|
sike_fpadd(a->c1, b->c1, c->c1); \
|
|
} while(0)
|
|
|
|
// GF(p^2) subtraction, c = a-b in GF(p^2).
|
|
#define sike_fp2sub(a,b,c) \
|
|
do { \
|
|
sike_fpsub(a->c0, b->c0, c->c0); \
|
|
sike_fpsub(a->c1, b->c1, c->c1); \
|
|
} while(0)
|
|
|
|
// Copy a GF(p^2) element, c = a.
|
|
#define sike_fp2copy(a, c) \
|
|
do { \
|
|
sike_fpcopy(a->c0, c->c0); \
|
|
sike_fpcopy(a->c1, c->c1); \
|
|
} while(0)
|
|
|
|
// GF(p^2) negation, a = -a in GF(p^2).
|
|
#define sike_fp2neg(a) \
|
|
do { \
|
|
sike_fpneg(a->c0); \
|
|
sike_fpneg(a->c1); \
|
|
} while(0)
|
|
|
|
// GF(p^2) division by two, c = a/2 in GF(p^2).
|
|
#define sike_fp2div2(a, c) \
|
|
do { \
|
|
sike_fpdiv2(a->c0, c->c0); \
|
|
sike_fpdiv2(a->c1, c->c1); \
|
|
} while(0)
|
|
|
|
// Modular correction, a = a in GF(p^2).
|
|
#define sike_fp2correction(a) \
|
|
do { \
|
|
sike_fpcorrection(a->c0); \
|
|
sike_fpcorrection(a->c1); \
|
|
} while(0)
|
|
|
|
// Conversion of a GF(p^2) element to Montgomery representation,
|
|
// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
|
|
#define sike_to_fp2mont(a, mc) \
|
|
do { \
|
|
sike_fpmul_mont(a->c0, (crypto_word_t*)&p503.mont_R2, mc->c0); \
|
|
sike_fpmul_mont(a->c1, (crypto_word_t*)&p503.mont_R2, mc->c1); \
|
|
} while(0)
|
|
|
|
// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
|
|
// c_i = ma_i*R^(-1) = a_i in GF(p^2).
|
|
#define sike_from_fp2mont(ma, c) \
|
|
do { \
|
|
sike_from_mont(ma->c0, c->c0); \
|
|
sike_from_mont(ma->c1, c->c1); \
|
|
} while(0)
|
|
|
|
#endif // FPX_H_
|