Kris Kwiatkowski
eb43eca5a8
Based on Microsoft's implementation available on github: Source: https://github.com/Microsoft/PQCrypto-SIDH Commit: 77044b76181eb61c744ac8eb7ddc7a8fe72f6919 Following changes has been applied * In intel assembly, use MOV instead of MOVQ: Intel instruction reference in the Intel Software Developer's Manual volume 2A, the MOVQ has 4 forms. None of them mentions moving literal to GPR, hence "movq $rax, 0x0" is wrong. Instead, on 64bit system, MOV can be used. * Some variables were wrongly zero-initialized (as per C99 spec) * Move constant values to .RODATA segment, as keeping them in .TEXT segment is not compatible with XOM. * Fixes issue in arm64 code related to the fact that compiler doesn't reserve enough space for the linker to relocate address of a global variable when used by 'ldr' instructions. Solution is to use 'adrp' followed by 'add' instruction. Relocations for 'adrp' and 'add' instructions is generated by prefixing the label with :pg_hi21: and :lo12: respectively. * Enable MULX and ADX. Code from MS doesn't support PIC. MULX can't reference global variable directly. Instead RIP-relative addressing can be used. This improves performance around 10%-13% on SkyLake * Check if CPU supports BMI2 and ADOX instruction at runtime. On AMD64 optimized implementation of montgomery multiplication and reduction have 2 implementations - faster one takes advantage of BMI2 instruction set introduced in Haswell and ADOX introduced in Broadwell. Thanks to OPENSSL_ia32cap_P it can be decided at runtime which implementation to choose. As CPU configuration is static by nature, branch predictor will be correct most of the time and hence this check very often has no cost. * Reuse some utilities from boringssl instead of reimplementing them. This includes things like: * definition of a limb size (use crypto_word_t instead of digit_t) * use functions for checking in constant time if value is 0 and/or less then * #define's used for conditional compilation * Use SSE2 for conditional swap on vector registers. Improves performance a little bit. * Fix f2elm_t definition. Code imported from MSR defines f2elm_t type as a array of arrays. This decays to a pointer to an array (when passing as an argument). In C, one can't assign const pointer to an array with non-const pointer to an array. Seems it violates 6.7.3/8 from C99 (same for C11). This problem occures in GCC 6, only when -pedantic flag is specified and it occures always in GCC 4.9 (debian jessie). * Fix definition of eval_3_isog. Second argument in eval_3_isog mustn't be const. Similar reason as above. * Use HMAC-SHA256 instead of cSHAKE-256 to avoid upstreaming cSHAKE and SHA3 code. * Add speed and unit tests for SIKE. Change-Id: I22f0bb1f9edff314a35cd74b48e8c4962568e330
584 lines
20 KiB
C
584 lines
20 KiB
C
/********************************************************************************************
|
|
* SIDH: an efficient supersingular isogeny cryptography library
|
|
*
|
|
* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
|
|
*********************************************************************************************/
|
|
|
|
#include <assert.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <openssl/bn.h>
|
|
#include <openssl/base.h>
|
|
#include <openssl/rand.h>
|
|
#include <openssl/mem.h>
|
|
#include <openssl/hmac.h>
|
|
#include <openssl/sha.h>
|
|
|
|
#include "utils.h"
|
|
#include "isogeny.h"
|
|
#include "fpx.h"
|
|
|
|
extern const struct params_t p503;
|
|
|
|
// Domain separation parameters for HMAC
|
|
static const uint8_t G[2] = {0,0};
|
|
static const uint8_t H[2] = {1,0};
|
|
static const uint8_t F[2] = {2,0};
|
|
|
|
// SIDHp503_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
|
|
#define SIDHp503_JINV_BYTESZ 126U
|
|
// SIDHp503_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
|
|
#define SIDHp503_PRV_A_BITSZ 250U
|
|
// SIDHp503_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
|
|
#define SIDHp503_PRV_B_BITSZ 253U
|
|
// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
|
|
#define MAX_INT_POINTS_ALICE 7U
|
|
// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
|
|
#define MAX_INT_POINTS_BOB 8U
|
|
|
|
// Produces HMAC-SHA256 of data |S| mac'ed with the key |key|. Result is stored in |out|
|
|
// which must have size of at least |outsz| bytes and must be not bigger than
|
|
// SHA256_DIGEST_LENGTH. The output of a HMAC may be truncated.
|
|
// The |key| buffer is reused by the hmac_sum and hence, it's size must be equal
|
|
// to SHA256_CBLOCK. The HMAC key provided in |key| buffer must be smaller or equal
|
|
// to SHA256_DIGHEST_LENTH. |key| can overlap |out|.
|
|
static void hmac_sum(
|
|
uint8_t *out, size_t outsz, const uint8_t S[2], uint8_t key[SHA256_CBLOCK]) {
|
|
for(size_t i=0; i<SHA256_DIGEST_LENGTH; i++) {
|
|
key[i] = key[i] ^ 0x36;
|
|
}
|
|
// set rest of the buffer to ipad = 0x36
|
|
memset(&key[SHA256_DIGEST_LENGTH], 0x36, SHA256_CBLOCK - SHA256_DIGEST_LENGTH);
|
|
|
|
SHA256_CTX ctx;
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, key, SHA256_CBLOCK);
|
|
SHA256_Update(&ctx, S, 2);
|
|
uint8_t digest[SHA256_DIGEST_LENGTH];
|
|
SHA256_Final(digest, &ctx);
|
|
|
|
// XOR key with an opad = 0x5C
|
|
for(size_t i=0; i<SHA256_CBLOCK; i++) {
|
|
key[i] = key[i] ^ 0x36 ^ 0x5C;
|
|
}
|
|
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, key, SHA256_CBLOCK);
|
|
SHA256_Update(&ctx, digest, SHA256_DIGEST_LENGTH);
|
|
SHA256_Final(digest, &ctx);
|
|
assert(outsz <= sizeof(digest));
|
|
memcpy(out, digest, outsz);
|
|
}
|
|
|
|
// Swap points.
|
|
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
|
|
#if !defined(OPENSSL_X86_64) || defined(OPENSSL_NO_ASM)
|
|
static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
|
|
{
|
|
crypto_word_t temp;
|
|
for (size_t i = 0; i < NWORDS_FIELD; i++) {
|
|
temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
|
|
P->X->c0[i] = temp ^ P->X->c0[i];
|
|
Q->X->c0[i] = temp ^ Q->X->c0[i];
|
|
temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
|
|
P->Z->c0[i] = temp ^ P->Z->c0[i];
|
|
Q->Z->c0[i] = temp ^ Q->Z->c0[i];
|
|
temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
|
|
P->X->c1[i] = temp ^ P->X->c1[i];
|
|
Q->X->c1[i] = temp ^ Q->X->c1[i];
|
|
temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
|
|
P->Z->c1[i] = temp ^ P->Z->c1[i];
|
|
Q->Z->c1[i] = temp ^ Q->Z->c1[i];
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Swap points.
|
|
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
|
|
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
|
|
{
|
|
#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM)
|
|
sike_cswap_asm(P, Q, option);
|
|
#else
|
|
sike_cswap(P, Q, option);
|
|
#endif
|
|
}
|
|
|
|
static void LADDER3PT(
|
|
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const crypto_word_t* m,
|
|
int is_A, point_proj_t R, const f2elm_t A) {
|
|
point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
|
|
f2elm_t A24 = F2ELM_INIT;
|
|
crypto_word_t mask;
|
|
int bit, swap, prevbit = 0;
|
|
|
|
const size_t nbits = is_A?SIDHp503_PRV_A_BITSZ:SIDHp503_PRV_B_BITSZ;
|
|
|
|
// Initializing constant
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, A24[0].c0);
|
|
sike_fp2add(A24, A24, A24);
|
|
sike_fp2add(A, A24, A24);
|
|
sike_fp2div2(A24, A24);
|
|
sike_fp2div2(A24, A24); // A24 = (A+2)/4
|
|
|
|
// Initializing points
|
|
sike_fp2copy(xQ, R0->X);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, R0->Z[0].c0);
|
|
sike_fp2copy(xPQ, R2->X);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, R2->Z[0].c0);
|
|
sike_fp2copy(xP, R->X);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, R->Z[0].c0);
|
|
memset(R->Z->c1, 0, sizeof(R->Z->c1));
|
|
|
|
// Main loop
|
|
for (size_t i = 0; i < nbits; i++) {
|
|
bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1;
|
|
swap = bit ^ prevbit;
|
|
prevbit = bit;
|
|
mask = 0 - (crypto_word_t)swap;
|
|
|
|
sike_fp2cswap(R, R2, mask);
|
|
xDBLADD(R0, R2, R->X, A24);
|
|
sike_fp2mul_mont(R2->X, R->Z, R2->X);
|
|
}
|
|
}
|
|
|
|
// Initialization of basis points
|
|
static inline void sike_init_basis(crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
|
|
sike_fpcopy(gen, XP->c0);
|
|
sike_fpcopy(gen + NWORDS_FIELD, XP->c1);
|
|
sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
|
|
memset(XQ->c1, 0, sizeof(XQ->c1));
|
|
sike_fpcopy(gen + 3*NWORDS_FIELD, XR->c0);
|
|
sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c1);
|
|
}
|
|
|
|
// Conversion of GF(p^2) element from Montgomery to standard representation.
|
|
static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) {
|
|
f2elm_t t;
|
|
sike_from_fp2mont(x, t);
|
|
|
|
// convert to bytes in little endian form
|
|
for (size_t i=0; i<FIELD_BYTESZ; i++) {
|
|
enc[i+ 0] = (t[0].c0[i/LSZ] >> (LSZ*(i%LSZ))) & 0xFF;
|
|
enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (LSZ*(i%LSZ))) & 0xFF;
|
|
}
|
|
}
|
|
|
|
// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
|
|
// Elements over GF(p503) are encoded in 63 octets in little endian format
|
|
// (i.e., the least significant octet is located in the lowest memory address).
|
|
static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
|
|
memset(t[0].c0, 0, sizeof(t[0].c0));
|
|
memset(t[0].c1, 0, sizeof(t[0].c1));
|
|
// convert bytes in little endian form to f2elm_t
|
|
for (size_t i = 0; i < FIELD_BYTESZ; i++) {
|
|
t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (LSZ*(i%LSZ));
|
|
t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (LSZ*(i%LSZ));
|
|
}
|
|
sike_to_fp2mont(t, t);
|
|
}
|
|
|
|
// Alice's ephemeral public key generation
|
|
// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
|
|
// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
|
|
static void gen_iso_A(const uint8_t* skA, uint8_t* pkA)
|
|
{
|
|
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
|
|
point_proj_t phiP = POINT_PROJ_INIT;
|
|
point_proj_t phiQ = POINT_PROJ_INIT;
|
|
point_proj_t phiR = POINT_PROJ_INIT;
|
|
f2elm_t XPA, XQA, XRA, coeff[3];
|
|
f2elm_t A24plus = F2ELM_INIT;
|
|
f2elm_t C24 = F2ELM_INIT;
|
|
f2elm_t A = F2ELM_INIT;
|
|
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
|
|
|
|
// Initialize basis points
|
|
sike_init_basis((crypto_word_t*)p503.A_gen, XPA, XQA, XRA);
|
|
sike_init_basis((crypto_word_t*)p503.B_gen, phiP->X, phiQ->X, phiR->X);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, (phiP->Z)->c0);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, (phiQ->Z)->c0);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, (phiR->Z)->c0);
|
|
|
|
// Initialize constants
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, A24plus->c0);
|
|
sike_fp2add(A24plus, A24plus, C24);
|
|
|
|
// Retrieve kernel point
|
|
LADDER3PT(XPA, XQA, XRA, (crypto_word_t*)skA, 1, R, A);
|
|
|
|
// Traverse tree
|
|
index = 0;
|
|
for (size_t row = 1; row < A_max; row++) {
|
|
while (index < A_max-row) {
|
|
sike_fp2copy(R->X, pts[npts]->X);
|
|
sike_fp2copy(R->Z, pts[npts]->Z);
|
|
pts_index[npts++] = index;
|
|
m = p503.A_strat[ii++];
|
|
xDBLe(R, R, A24plus, C24, (2*m));
|
|
index += m;
|
|
}
|
|
get_4_isog(R, A24plus, C24, coeff);
|
|
|
|
for (size_t i = 0; i < npts; i++) {
|
|
eval_4_isog(pts[i], coeff);
|
|
}
|
|
eval_4_isog(phiP, coeff);
|
|
eval_4_isog(phiQ, coeff);
|
|
eval_4_isog(phiR, coeff);
|
|
|
|
sike_fp2copy(pts[npts-1]->X, R->X);
|
|
sike_fp2copy(pts[npts-1]->Z, R->Z);
|
|
index = pts_index[npts-1];
|
|
npts -= 1;
|
|
}
|
|
|
|
get_4_isog(R, A24plus, C24, coeff);
|
|
eval_4_isog(phiP, coeff);
|
|
eval_4_isog(phiQ, coeff);
|
|
eval_4_isog(phiR, coeff);
|
|
|
|
inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
|
|
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
|
|
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
|
|
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
|
|
|
|
// Format public key
|
|
sike_fp2_encode(phiP->X, pkA);
|
|
sike_fp2_encode(phiQ->X, pkA + SIDHp503_JINV_BYTESZ);
|
|
sike_fp2_encode(phiR->X, pkA + 2*SIDHp503_JINV_BYTESZ);
|
|
}
|
|
|
|
// Bob's ephemeral key-pair generation
|
|
// It produces a private key skB and computes the public key pkB.
|
|
// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
|
|
// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
|
|
static void gen_iso_B(const uint8_t* skB, uint8_t* pkB)
|
|
{
|
|
point_proj_t R, pts[MAX_INT_POINTS_BOB];
|
|
point_proj_t phiP = POINT_PROJ_INIT;
|
|
point_proj_t phiQ = POINT_PROJ_INIT;
|
|
point_proj_t phiR = POINT_PROJ_INIT;
|
|
f2elm_t XPB, XQB, XRB, coeff[3];
|
|
f2elm_t A24plus = F2ELM_INIT;
|
|
f2elm_t A24minus = F2ELM_INIT;
|
|
f2elm_t A = F2ELM_INIT;
|
|
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
|
|
|
|
// Initialize basis points
|
|
sike_init_basis((crypto_word_t*)p503.B_gen, XPB, XQB, XRB);
|
|
sike_init_basis((crypto_word_t*)p503.A_gen, phiP->X, phiQ->X, phiR->X);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, (phiP->Z)->c0);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, (phiQ->Z)->c0);
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, (phiR->Z)->c0);
|
|
|
|
// Initialize constants
|
|
sike_fpcopy((crypto_word_t*)&p503.mont_one, A24plus->c0);
|
|
sike_fp2add(A24plus, A24plus, A24plus);
|
|
sike_fp2copy(A24plus, A24minus);
|
|
sike_fp2neg(A24minus);
|
|
|
|
// Retrieve kernel point
|
|
LADDER3PT(XPB, XQB, XRB, (crypto_word_t*)skB, 0, R, A);
|
|
|
|
// Traverse tree
|
|
index = 0;
|
|
for (size_t row = 1; row < B_max; row++) {
|
|
while (index < B_max-row) {
|
|
sike_fp2copy(R->X, pts[npts]->X);
|
|
sike_fp2copy(R->Z, pts[npts]->Z);
|
|
pts_index[npts++] = index;
|
|
m = p503.B_strat[ii++];
|
|
xTPLe(R, R, A24minus, A24plus, m);
|
|
index += m;
|
|
}
|
|
get_3_isog(R, A24minus, A24plus, coeff);
|
|
|
|
for (size_t i = 0; i < npts; i++) {
|
|
eval_3_isog(pts[i], coeff);
|
|
}
|
|
eval_3_isog(phiP, coeff);
|
|
eval_3_isog(phiQ, coeff);
|
|
eval_3_isog(phiR, coeff);
|
|
|
|
sike_fp2copy(pts[npts-1]->X, R->X);
|
|
sike_fp2copy(pts[npts-1]->Z, R->Z);
|
|
index = pts_index[npts-1];
|
|
npts -= 1;
|
|
}
|
|
|
|
get_3_isog(R, A24minus, A24plus, coeff);
|
|
eval_3_isog(phiP, coeff);
|
|
eval_3_isog(phiQ, coeff);
|
|
eval_3_isog(phiR, coeff);
|
|
|
|
inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
|
|
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
|
|
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
|
|
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
|
|
|
|
// Format public key
|
|
sike_fp2_encode(phiP->X, pkB);
|
|
sike_fp2_encode(phiQ->X, pkB + SIDHp503_JINV_BYTESZ);
|
|
sike_fp2_encode(phiR->X, pkB + 2*SIDHp503_JINV_BYTESZ);
|
|
}
|
|
|
|
// Alice's ephemeral shared secret computation
|
|
// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
|
|
// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
|
|
// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
|
|
// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
|
|
static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
|
|
{
|
|
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
|
|
f2elm_t coeff[3], PKB[3], jinv;
|
|
f2elm_t A24plus = F2ELM_INIT;
|
|
f2elm_t C24 = F2ELM_INIT;
|
|
f2elm_t A = F2ELM_INIT;
|
|
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
|
|
|
|
// Initialize images of Bob's basis
|
|
fp2_decode(pkB, PKB[0]);
|
|
fp2_decode(pkB + SIDHp503_JINV_BYTESZ, PKB[1]);
|
|
fp2_decode(pkB + 2*SIDHp503_JINV_BYTESZ, PKB[2]);
|
|
|
|
// Initialize constants
|
|
get_A(PKB[0], PKB[1], PKB[2], A); // TODO: Can return projective A?
|
|
sike_fpadd((crypto_word_t*)&p503.mont_one, (crypto_word_t*)&p503.mont_one, C24->c0);
|
|
sike_fp2add(A, C24, A24plus);
|
|
sike_fpadd(C24->c0, C24->c0, C24->c0);
|
|
|
|
// Retrieve kernel point
|
|
LADDER3PT(PKB[0], PKB[1], PKB[2], (crypto_word_t*)skA, 1, R, A);
|
|
|
|
// Traverse tree
|
|
index = 0;
|
|
for (size_t row = 1; row < A_max; row++) {
|
|
while (index < A_max-row) {
|
|
sike_fp2copy(R->X, pts[npts]->X);
|
|
sike_fp2copy(R->Z, pts[npts]->Z);
|
|
pts_index[npts++] = index;
|
|
m = p503.A_strat[ii++];
|
|
xDBLe(R, R, A24plus, C24, (2*m));
|
|
index += m;
|
|
}
|
|
get_4_isog(R, A24plus, C24, coeff);
|
|
|
|
for (size_t i = 0; i < npts; i++) {
|
|
eval_4_isog(pts[i], coeff);
|
|
}
|
|
|
|
sike_fp2copy(pts[npts-1]->X, R->X);
|
|
sike_fp2copy(pts[npts-1]->Z, R->Z);
|
|
index = pts_index[npts-1];
|
|
npts -= 1;
|
|
}
|
|
|
|
get_4_isog(R, A24plus, C24, coeff);
|
|
sike_fp2div2(C24, C24);
|
|
sike_fp2sub(A24plus, C24, A24plus);
|
|
sike_fp2div2(C24, C24);
|
|
j_inv(A24plus, C24, jinv);
|
|
sike_fp2_encode(jinv, ssA);
|
|
}
|
|
|
|
// Bob's ephemeral shared secret computation
|
|
// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
|
|
// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
|
|
// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
|
|
// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
|
|
static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
|
|
{
|
|
point_proj_t R, pts[MAX_INT_POINTS_BOB];
|
|
f2elm_t coeff[3], PKB[3], jinv;
|
|
f2elm_t A24plus = F2ELM_INIT;
|
|
f2elm_t A24minus = F2ELM_INIT;
|
|
f2elm_t A = F2ELM_INIT;
|
|
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
|
|
|
|
// Initialize images of Alice's basis
|
|
fp2_decode(pkA, PKB[0]);
|
|
fp2_decode(pkA + SIDHp503_JINV_BYTESZ, PKB[1]);
|
|
fp2_decode(pkA + 2*SIDHp503_JINV_BYTESZ, PKB[2]);
|
|
|
|
// Initialize constants
|
|
get_A(PKB[0], PKB[1], PKB[2], A);
|
|
sike_fpadd((crypto_word_t*)&p503.mont_one, (crypto_word_t*)&p503.mont_one, A24minus->c0);
|
|
sike_fp2add(A, A24minus, A24plus);
|
|
sike_fp2sub(A, A24minus, A24minus);
|
|
|
|
// Retrieve kernel point
|
|
LADDER3PT(PKB[0], PKB[1], PKB[2], (crypto_word_t*)skB, 0, R, A);
|
|
|
|
// Traverse tree
|
|
index = 0;
|
|
for (size_t row = 1; row < B_max; row++) {
|
|
while (index < B_max-row) {
|
|
sike_fp2copy(R->X, pts[npts]->X);
|
|
sike_fp2copy(R->Z, pts[npts]->Z);
|
|
pts_index[npts++] = index;
|
|
m = p503.B_strat[ii++];
|
|
xTPLe(R, R, A24minus, A24plus, m);
|
|
index += m;
|
|
}
|
|
get_3_isog(R, A24minus, A24plus, coeff);
|
|
|
|
for (size_t i = 0; i < npts; i++) {
|
|
eval_3_isog(pts[i], coeff);
|
|
}
|
|
|
|
sike_fp2copy(pts[npts-1]->X, R->X);
|
|
sike_fp2copy(pts[npts-1]->Z, R->Z);
|
|
index = pts_index[npts-1];
|
|
npts -= 1;
|
|
}
|
|
|
|
get_3_isog(R, A24minus, A24plus, coeff);
|
|
sike_fp2add(A24plus, A24minus, A);
|
|
sike_fp2add(A, A, A);
|
|
sike_fp2sub(A24plus, A24minus, A24plus);
|
|
j_inv(A, A24plus, jinv);
|
|
sike_fp2_encode(jinv, ssB);
|
|
}
|
|
|
|
int SIKE_keypair(uint8_t out_priv[SIKEp503_PRV_BYTESZ], uint8_t out_pub[SIKEp503_PUB_BYTESZ]) {
|
|
int ret = 0;
|
|
|
|
BN_CTX *ctx = BN_CTX_new();
|
|
if (!ctx) {
|
|
goto end;
|
|
}
|
|
|
|
// Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and < 253 bits
|
|
BIGNUM *bn_sidh_prv = BN_CTX_get(ctx);
|
|
if (!bn_sidh_prv) {
|
|
goto end;
|
|
}
|
|
|
|
if (!BN_rand(bn_sidh_prv, SIDHp503_PRV_B_BITSZ, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) {
|
|
goto end;
|
|
}
|
|
|
|
// Convert to little endian
|
|
if (!BN_bn2le_padded(out_priv, BITS_TO_BYTES(SIDHp503_PRV_B_BITSZ), bn_sidh_prv)) {
|
|
goto end;
|
|
}
|
|
|
|
// Never fails
|
|
gen_iso_B(out_priv, out_pub);
|
|
|
|
// All good
|
|
ret = 1;
|
|
|
|
end:
|
|
BN_CTX_free(ctx);
|
|
return ret;
|
|
}
|
|
|
|
void SIKE_encaps(
|
|
uint8_t out_shared_key[SIKEp503_SS_BYTESZ],
|
|
uint8_t out_ciphertext[SIKEp503_CT_BYTESZ],
|
|
const uint8_t pub_key[SIKEp503_PUB_BYTESZ])
|
|
{
|
|
// Secret buffer is reused by the function to store some ephemeral
|
|
// secret data. It's size must be maximum of SHA256_CBLOCK,
|
|
// SIKEp503_MSG_BYTESZ and SIDHp503_PRV_A_BITSZ in bytes.
|
|
uint8_t secret[SHA256_CBLOCK];
|
|
uint8_t j[SIDHp503_JINV_BYTESZ];
|
|
uint8_t temp[SIKEp503_MSG_BYTESZ + SIKEp503_CT_BYTESZ];
|
|
SHA256_CTX ctx;
|
|
|
|
// Generate secret key for A
|
|
// secret key A = HMAC({0,1}^n || pub_key), G) mod SIDHp503_PRV_A_BITSZ
|
|
(void)RAND_bytes(temp, SIKEp503_MSG_BYTESZ);
|
|
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
|
|
SHA256_Update(&ctx, pub_key, SIKEp503_PUB_BYTESZ);
|
|
SHA256_Final(secret, &ctx);
|
|
hmac_sum(secret, BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ), G, secret);
|
|
secret[BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ) - 1] &= (1 << (SIDHp503_PRV_A_BITSZ%8)) - 1;
|
|
|
|
// Generate public key for A - first part of the ciphertext
|
|
gen_iso_A(secret, out_ciphertext);
|
|
|
|
// Generate c1:
|
|
// h = HMAC(j-invariant(secret key A, public key B), F)
|
|
// c1 = h ^ m
|
|
ex_iso_A(secret, pub_key, j);
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, j, sizeof(j));
|
|
SHA256_Final(secret, &ctx);
|
|
hmac_sum(secret, SIKEp503_MSG_BYTESZ, F, secret);
|
|
|
|
// c1 = h ^ m
|
|
uint8_t *c1 = &out_ciphertext[SIKEp503_PUB_BYTESZ];
|
|
for (size_t i = 0; i < SIKEp503_MSG_BYTESZ; i++) {
|
|
c1[i] = temp[i] ^ secret[i];
|
|
}
|
|
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
|
|
SHA256_Update(&ctx, out_ciphertext, SIKEp503_CT_BYTESZ);
|
|
SHA256_Final(secret, &ctx);
|
|
// Generate shared secret out_shared_key = HMAC(m||out_ciphertext, F)
|
|
hmac_sum(out_shared_key, SIKEp503_SS_BYTESZ, H, secret);
|
|
}
|
|
|
|
void SIKE_decaps(
|
|
uint8_t out_shared_key[SIKEp503_SS_BYTESZ],
|
|
const uint8_t ciphertext[SIKEp503_CT_BYTESZ],
|
|
const uint8_t pub_key[SIKEp503_PUB_BYTESZ],
|
|
const uint8_t priv_key[SIKEp503_PRV_BYTESZ])
|
|
{
|
|
// Secret buffer is reused by the function to store some ephemeral
|
|
// secret data. It's size must be maximum of SHA256_CBLOCK,
|
|
// SIKEp503_MSG_BYTESZ and SIDHp503_PRV_A_BITSZ in bytes.
|
|
uint8_t secret[SHA256_CBLOCK];
|
|
uint8_t j[SIDHp503_JINV_BYTESZ];
|
|
uint8_t c0[SIKEp503_PUB_BYTESZ];
|
|
uint8_t temp[SIKEp503_MSG_BYTESZ];
|
|
uint8_t shared_nok[SIKEp503_MSG_BYTESZ];
|
|
SHA256_CTX ctx;
|
|
|
|
(void)RAND_bytes(shared_nok, SIKEp503_MSG_BYTESZ);
|
|
|
|
// Recover m
|
|
// Let ciphertext = c0 || c1 - both have fixed sizes
|
|
// m = F(j-invariant(c0, priv_key)) ^ c1
|
|
ex_iso_B(priv_key, ciphertext, j);
|
|
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, j, sizeof(j));
|
|
SHA256_Final(secret, &ctx);
|
|
hmac_sum(secret, SIKEp503_MSG_BYTESZ, F, secret);
|
|
|
|
const uint8_t *c1 = &ciphertext[sizeof(c0)];
|
|
for (size_t i = 0; i < SIKEp503_MSG_BYTESZ; i++) {
|
|
temp[i] = c1[i] ^ secret[i];
|
|
}
|
|
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
|
|
SHA256_Update(&ctx, pub_key, SIKEp503_PUB_BYTESZ);
|
|
SHA256_Final(secret, &ctx);
|
|
hmac_sum(secret, BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ), G, secret);
|
|
|
|
// Recover secret key A = G(m||pub_key) mod
|
|
secret[BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ) - 1] &= (1 << (SIDHp503_PRV_A_BITSZ%8)) - 1;
|
|
|
|
// Recover c0 = public key A
|
|
gen_iso_A(secret, c0);
|
|
crypto_word_t ok = constant_time_is_zero_w(CRYPTO_memcmp(c0, ciphertext, SIKEp503_PUB_BYTESZ));
|
|
for (size_t i=0; i<SIKEp503_MSG_BYTESZ; i++) {
|
|
temp[i] = constant_time_select_8(ok, temp[i], shared_nok[i]);
|
|
}
|
|
|
|
SHA256_Init(&ctx);
|
|
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
|
|
SHA256_Update(&ctx, ciphertext, SIKEp503_CT_BYTESZ);
|
|
SHA256_Final(secret, &ctx);
|
|
hmac_sum(out_shared_key, SIKEp503_SS_BYTESZ, H, secret);
|
|
}
|