Selaa lähdekoodia

Add support for SIKE/p503 post-quantum KEM

Based on Microsoft's implementation available on github:
Source: https://github.com/Microsoft/PQCrypto-SIDH
Commit: 77044b7618

Following changes has been applied

* In intel assembly, use MOV instead of MOVQ:
  Intel instruction reference in the Intel Software Developer's Manual
  volume 2A, the MOVQ has 4 forms. None of them mentions moving
  literal to GPR, hence "movq $rax, 0x0" is wrong. Instead, on 64bit
  system, MOV can be used.

* Some variables were wrongly zero-initialized (as per C99 spec)

* Move constant values to .RODATA segment, as keeping them in .TEXT
  segment is not compatible with XOM.

* Fixes issue in arm64 code related to the fact that compiler doesn't
  reserve enough space for the linker to relocate address of a global
  variable when used by 'ldr' instructions. Solution is to use 'adrp'
  followed by 'add' instruction. Relocations for 'adrp' and 'add'
  instructions is generated by prefixing the label with :pg_hi21:
  and :lo12: respectively.

* Enable MULX and ADX. Code from MS doesn't support PIC. MULX can't
  reference global variable directly. Instead RIP-relative addressing
  can be used. This improves performance around 10%-13% on SkyLake

* Check if CPU supports BMI2 and ADOX instruction at runtime. On AMD64
  optimized implementation of montgomery multiplication and reduction
  have 2 implementations - faster one takes advantage of BMI2
  instruction set introduced in Haswell and ADOX introduced in
  Broadwell. Thanks to OPENSSL_ia32cap_P it can be decided at runtime
  which implementation to choose. As CPU configuration is static by
  nature, branch predictor will be correct most of the time and hence
  this check very often has no cost.

* Reuse some utilities from boringssl instead of reimplementing them.
  This includes things like:
  * definition of a limb size (use crypto_word_t instead of digit_t)
  * use functions for checking in constant time if value is 0 and/or
    less then
  * #define's used for conditional compilation

* Use SSE2 for conditional swap on vector registers. Improves
  performance a little bit.

* Fix f2elm_t definition. Code imported from MSR defines f2elm_t type as
  a array of arrays. This decays to a pointer to an array (when passing
  as an argument). In C, one can't assign const pointer to an array with
  non-const pointer to an array. Seems it violates 6.7.3/8 from C99
  (same for C11). This problem occures in GCC 6, only when -pedantic
  flag is specified and it occures always in GCC 4.9 (debian jessie).

* Fix definition of eval_3_isog. Second argument in eval_3_isog mustn't be
  const. Similar reason as above.

* Use HMAC-SHA256 instead of cSHAKE-256 to avoid upstreaming cSHAKE
  and SHA3 code.

* Add speed and unit tests for SIKE.

Change-Id: I22f0bb1f9edff314a35cd74b48e8c4962568e330
kris/for/master/CECPQ3
Henry Case 5 vuotta sitten
vanhempi
commit
628c450c81
17 muutettua tiedostoa jossa 4557 lisäystä ja 0 poistoa
  1. +1
    -0
      CMakeLists.txt
  2. +1
    -0
      crypto/CMakeLists.txt
  3. +59
    -0
      third_party/sike/CMakeLists.txt
  4. +21
    -0
      third_party/sike/LICENSE
  5. +68
    -0
      third_party/sike/include/sike/sike.h
  6. +70
    -0
      third_party/sike/src/P503.c
  7. +813
    -0
      third_party/sike/src/asm/fp_arm64_asm.S
  8. +176
    -0
      third_party/sike/src/asm/fp_generic.c
  9. +1642
    -0
      third_party/sike/src/asm/fp_x64_asm.S
  10. +311
    -0
      third_party/sike/src/fpx.c
  11. +98
    -0
      third_party/sike/src/fpx.h
  12. +260
    -0
      third_party/sike/src/isogeny.c
  13. +49
    -0
      third_party/sike/src/isogeny.h
  14. +592
    -0
      third_party/sike/src/sike.c
  15. +190
    -0
      third_party/sike/src/sike_test.cc
  16. +144
    -0
      third_party/sike/src/utils.h
  17. +62
    -0
      tool/speed.cc

+ 1
- 0
CMakeLists.txt Näytä tiedosto

@@ -568,6 +568,7 @@ add_subdirectory(ssl/test)
add_subdirectory(fipstools)
add_subdirectory(tool)
add_subdirectory(decrepit)
add_subdirectory(third_party/sike)

if(FUZZ)
if(LIBFUZZER_FROM_DEPS)


+ 1
- 0
crypto/CMakeLists.txt Näytä tiedosto

@@ -406,6 +406,7 @@ add_library(
../third_party/fiat/curve25519.c

$<TARGET_OBJECTS:fipsmodule>
$<TARGET_OBJECTS:sike>

${CRYPTO_ARCH_SOURCES}
${CRYPTO_FIPS_OBJECTS}


+ 59
- 0
third_party/sike/CMakeLists.txt Näytä tiedosto

@@ -0,0 +1,59 @@
cmake_minimum_required(VERSION 2.8.11)
include_directories(../../include)

set(ASM_EXT S)
enable_language(ASM)

# Platform specific sources
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" AND NOT OPENSL_NO_ASM)
set(
SIKE_PLATFORM_SOURCES

src/asm/fp_x64_asm.${ASM_EXT}
)
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64" AND NOT OPENSSL_NO_ASM)
set(
SIKE_PLATFORM_SOURCES

src/asm/fp_arm64_asm.${ASM_EXT}
)
else()
set(
SIKE_PLATFORM_SOURCES

src/asm/fp_generic.c
)
add_definitions(-DSIKE_NO_ASM)
endif()

# Compile to object files, we will link them with libssl
add_library(
sike

OBJECT

src/isogeny.c
src/fpx.c
src/P503.c
src/sike.c

${SIKE_PLATFORM_SOURCES}
)

target_include_directories(sike PUBLIC
include
)

add_executable(
sike_test

src/sike_test.cc

$<TARGET_OBJECTS:boringssl_gtest_main>
)

target_include_directories(sike_test PUBLIC
include
)

target_link_libraries(sike_test test_support_lib boringssl_gtest crypto)

+ 21
- 0
third_party/sike/LICENSE Näytä tiedosto

@@ -0,0 +1,21 @@
MIT License

Copyright (c) Microsoft Corporation. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

+ 68
- 0
third_party/sike/include/sike/sike.h Näytä tiedosto

@@ -0,0 +1,68 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: API header file for SIKE
*********************************************************************************************/

#ifndef SIKE_H_
#define SIKE_H_

#include <stdint.h>
#include <openssl/base.h>

#ifdef __cplusplus
extern "C" {
#endif

/* SIKEp503
*
* SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the
* algorithm is provided in [SIKE]. This implementation uses 503-bit field size. The code
* is based on "Additional_Implementations" from PQC NIST submission package which can
* be found here:
* https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip
*
* [SIKE] https://sike.org/files/SIDH-spec.pdf
*/

// SIKEp503_PUB_BYTESZ is the number of bytes in a public key.
#define SIKEp503_PUB_BYTESZ 378
// SIKEp503_PRV_BYTESZ is the number of bytes in a private key.
#define SIKEp503_PRV_BYTESZ 32
// SIKEp503_SS_BYTESZ is the number of bytes in a shared key.
#define SIKEp503_SS_BYTESZ 16
// SIKEp503_MSG_BYTESZ is the number of bytes in a random bit string concatenated
// with the public key (see 1.4 of SIKE).
#define SIKEp503_MSG_BYTESZ 24
// SIKEp503_SS_BYTESZ is the number of bytes in a ciphertext.
#define SIKEp503_CT_BYTESZ (SIKEp503_PUB_BYTESZ + SIKEp503_MSG_BYTESZ)

// SIKE_keypair outputs a public and secret key. Internally it uses BN_rand() as
// a entropy source. In case of success function returns 0, otherwise negative
// value.s
OPENSSL_EXPORT int SIKE_keypair(
uint8_t sk[SIKEp503_PRV_BYTESZ],
uint8_t pk[SIKEp503_PUB_BYTESZ]);

// SIKE_encaps is a encapsulation function of a SIKE key encapsulation mechnism used to
// fix random session key. It writes ephemeral session key to the |ss| and ciphertext
// to |ct|.
OPENSSL_EXPORT void SIKE_encaps(
uint8_t ss[SIKEp503_SS_BYTESZ],
uint8_t ct[SIKEp503_CT_BYTESZ],
const uint8_t pk[SIKEp503_PUB_BYTESZ]);

// SIKE_decaps is a decapsulation function of a SIKE key encapsulation mechnism used
// by possessor of secret key |sk| and corresponding public key |pk| to decapsulate
// ciphertext |ct| in order to get session key written to |ss|.
OPENSSL_EXPORT void SIKE_decaps(
uint8_t ss[SIKEp503_SS_BYTESZ],
const uint8_t ct[SIKEp503_CT_BYTESZ],
const uint8_t pk[SIKEp503_PUB_BYTESZ],
const uint8_t sk[SIKEp503_PRV_BYTESZ]);

#ifdef __cplusplus
}
#endif

#endif

+ 70
- 0
third_party/sike/src/P503.c Näytä tiedosto

@@ -0,0 +1,70 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: supersingular isogeny parameters and generation of functions for P503
*********************************************************************************************/

#include "utils.h"

// Parameters for isogeny system "SIKEp503"
const struct params_t p503 = {
.prime = {
0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
},
.prime_p1 = {
0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
},
.prime_x2 = {
0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C
},
.A_gen = {
0xE7EF4AA786D855AF, 0xED5758F03EB34D3B, 0x09AE172535A86AA9, 0x237B9CC07D622723,
0xE3A284CBA4E7932D, 0x27481D9176C5E63F, 0x6A323FF55C6E71BF, 0x002ECC31A6FB8773, // XPA0
0x64D02E4E90A620B8, 0xDAB8128537D4B9F1, 0x4BADF77B8A228F98, 0x0F5DBDF9D1FB7D1B,
0xBEC4DB288E1A0DCC, 0xE76A8665E80675DB, 0x6D6F252E12929463, 0x003188BD1463FACC, // XPA1
0xB79D41025DE85D56, 0x0B867DA9DF169686, 0x740E5368021C827D, 0x20615D72157BF25C,
0xFF1590013C9B9F5B, 0xC884DCADE8C16CEA, 0xEBD05E53BF724E01, 0x0032FEF8FDA5748C, // XQA0
0x12E2E849AA0A8006, 0x41CF47008635A1E8, 0x9CD720A70798AED7, 0x42A820B42FCF04CF,
0x7BF9BAD32AAE88B1, 0xF619127A54090BBE, 0x1CB10D8F56408EAA, 0x001D6B54C3C0EDEB, // XRA0
0x34DB54931CBAAC36, 0x420A18CB8DD5F0C4, 0x32008C1A48C0F44D, 0x3B3BA772B1CFD44D,
0xA74B058FDAF13515, 0x095FC9CA7EEC17B4, 0x448E829D28F120F8, 0x00261EC3ED16A489 // XRA1
},
.B_gen = {
0x7EDE37F4FA0BC727, 0xF7F8EC5C8598941C, 0xD15519B516B5F5C8, 0xF6D5AC9B87A36282,
0x7B19F105B30E952E, 0x13BD8B2025B4EBEE, 0x7B96D27F4EC579A2, 0x00140850CAB7E5DE, // XPB0
0x7764909DAE7B7B2D, 0x578ABB16284911AB, 0x76E2BFD146A6BF4D, 0x4824044B23AA02F0,
0x1105048912A321F3, 0xB8A2E482CF0F10C1, 0x42FF7D0BE2152085, 0x0018E599C5223352, // XPB1
0x4256C520FB388820, 0x744FD7C3BAAF0A13, 0x4B6A2DDDB12CBCB8, 0xE46826E27F427DF8,
0xFE4A663CD505A61B, 0xD6B3A1BAF025C695, 0x7C3BB62B8FCC00BD, 0x003AFDDE4A35746C, // XQB0
0x75601CD1E6C0DFCB, 0x1A9007239B58F93E, 0xC1F1BE80C62107AC, 0x7F513B898F29FF08,
0xEA0BEDFF43E1F7B2, 0x2C6D94018CBAE6D0, 0x3A430D31BCD84672, 0x000D26892ECCFE83, // XRB0
0x1119D62AEA3007A1, 0xE3702AA4E04BAE1B, 0x9AB96F7D59F990E7, 0xF58440E8B43319C0,
0xAF8134BEE1489775, 0xE7F7774E905192AA, 0xF54AE09308E98039, 0x001EF7A041A86112 // XRB1
},
.mont_R2 = {
0x5289A0CF641D011F, 0x9B88257189FED2B9, 0xA3B365D58DC8F17A, 0x5BC57AB6EFF168EC,
0x9E51998BD84D4423, 0xBF8999CBAC3B5695, 0x46E9127BCE14CDB6, 0x003F6CFCE8B81771
},
.mont_one = {
0x00000000000003F9, 0x0000000000000000, 0x0000000000000000, 0xB400000000000000,
0x63CB1A6EA6DED2B4, 0x51689D8D667EB37D, 0x8ACD77C71AB24142, 0x0026FBAEC60F5953
},
.A_strat = {
61, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1,
4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1,
1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 29, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1,
1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2,
1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1
},
.B_strat = {
71, 38, 21, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1,
1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9,
5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1,
1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1,
2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2,
1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1
}
};

+ 813
- 0
third_party/sike/src/asm/fp_arm64_asm.S Näytä tiedosto

@@ -0,0 +1,813 @@
//*******************************************************************************************
// SIDH: an efficient supersingular isogeny cryptography library
//
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P503 on Linux
//*******************************************************************************************

.section .rodata

.p503p1_nz_s8:
.quad 0x085BDA2211E7A0AC, 0x9BF6C87B7E7DAF13
.quad 0x45C6BDDA77A4D01B, 0x4066F541811E1E60

.p503x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0x57FFFFFFFFFFFFFF, 0x2610B7B44423CF41
.quad 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0
.quad 0x0080CDEA83023C3C

.text
//***********************************************************************
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global fpadd
fpadd:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]

// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18

// Subtract 2xp503
adrp x18, .p503x2 // pc-relative addressing
add x18, x18, :lo12:.p503x2
ldp x11, x12, [x18, #0]
ldp x13, x14, [x18, #16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14

ldp x15, x16, [x18, #32]
ldr x17, [x18, #48]
sbcs x8, x8, x15
sbcs x9, x9, x16
sbcs x10, x10, x17
sbc x18, xzr, xzr

// Add 2xp503 anded with the mask in x18
and x11, x11, x18
and x12, x12, x18
and x13, x13, x18
and x14, x14, x18
and x15, x15, x18
and x16, x16, x18
and x17, x17, x18

adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adcs x9, x9, x16
adc x10, x10, x17

stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret


//***********************************************************************
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global fpsub
fpsub:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]

// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbcs x10, x10, x18
sbc x18, xzr, xzr

// Add 2xp503 anded with the mask in x18
adrp x19, .p503x2 // pc-relative addressing
add x19, x19, :lo12:.p503x2
ldp x11, x12, [x19, #0]
ldp x13, x14, [x19, #16]
and x11, x11, x18
and x12, x12, x18
and x13, x13, x18
and x14, x14, x18
ldp x15, x16, [x19, #32]
ldr x17, [x19, #48]
and x15, x15, x18
and x16, x16, x18
and x17, x17, x18

adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adcs x9, x9, x16
adc x10, x10, x17

stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret


//////////////////////////////////////////// MACRO
.macro MUL128_COMBA_CUT A0, A1, B0, B1, C0, C1, C2, C3, T0
mul \A0, \A1, \B0
umulh \B0, \A1, \B0
adds \C1, \C1, \C3
adc \C2, \C2, xzr

mul \T0, \A1, \B1
umulh \B1, \A1, \B1
adds \C1, \C1, \A0
adcs \C2, \C2, \B0
adc \C3, xzr, xzr

adds \C2, \C2, \T0
adc \C3, \C3, \B1
.endm


//////////////////////////////////////////// MACRO
.macro MUL256_KARATSUBA_COMBA M,A0,A1,A2,A3,B0,B1,B2,B3,C0,C1,C2,C3,C4,C5,C6,C7,T0,T1

// A0-A1 <- AH + AL, T0 <- mask
adds \A0, \A0, \A2
adcs \A1, \A1, \A3
adc \T0, xzr, xzr

// C6, T1 <- BH + BL, C7 <- mask
adds \C6, \B0, \B2
adcs \T1, \B1, \B3
adc \C7, xzr, xzr

// C0-C1 <- masked (BH + BL)
sub \C2, xzr, \T0
sub \C3, xzr, \C7
and \C0, \C6, \C2
and \C1, \T1, \C2

// C4-C5 <- masked (AH + AL), T0 <- combined carry
and \C4, \A0, \C3
and \C5, \A1, \C3
mul \C2, \A0, \C6
mul \C3, \A0, \T1
and \T0, \T0, \C7

// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds \C0, \C4, \C0
umulh \C4, \A0, \T1
adcs \C1, \C5, \C1
umulh \C5, \A0, \C6
adc \T0, \T0, xzr

// C2-C5 <- (AH+AL) x (BH+BL), low part
MUL128_COMBA_CUT \A0, \A1, \C6, \T1, \C2, \C3, \C4, \C5, \C7
ldp \A0, \A1, [\M,#0]

// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds \C4, \C0, \C4
umulh \C7, \A0, \B0
umulh \T1, \A0, \B1
adcs \C5, \C1, \C5
mul \C0, \A0, \B0
mul \C1, \A0, \B1
adc \T0, \T0, xzr

// C0-C1, T1, C7 <- AL x BL
MUL128_COMBA_CUT \A0, \A1, \B0, \B1, \C0, \C1, \T1, \C7, \C6

// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul \A0, \A2, \B2
umulh \B0, \A2, \B2
subs \C2, \C2, \C0
sbcs \C3, \C3, \C1
sbcs \C4, \C4, \T1
mul \A1, \A2, \B3
umulh \C6, \A2, \B3
sbcs \C5, \C5, \C7
sbc \T0, \T0, xzr

// A0, A1, C6, B0 <- AH x BH
MUL128_COMBA_CUT \A2, \A3, \B2, \B3, \A0, \A1, \C6, \B0, \B1

// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs \C2, \C2, \A0
sbcs \C3, \C3, \A1
sbcs \C4, \C4, \C6
sbcs \C5, \C5, \B0
sbc \T0, \T0, xzr

adds \C2, \C2, \T1
adcs \C3, \C3, \C7
adcs \C4, \C4, \A0
adcs \C5, \C5, \A1
adcs \C6, \T0, \C6
adc \C7, \B0, xzr
.endm


//***********************************************************************************
// 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
// Operation: c [x2] = a [x0] * b [x1]
//***********************************************************************************
.global mp_mul
mp_mul:
sub sp, sp, #96
stp x19, x20, [sp,#0]
stp x21, x22, [sp,#16]
stp x23, x24, [sp,#32]
stp x25, x26, [sp,#48]
stp x27, x28, [sp,#64]
str x29, [sp, #80]

ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]

// x26-x29 <- AH + AL, x7 <- mask
adds x26, x3, x7
adcs x27, x4, x8
adcs x28, x5, x9
adcs x29, x6, x10
adc x7, xzr, xzr

// x11-x14 <- BH + BL, x8 <- mask
adds x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, x17
adcs x14, x14, x18
adc x8, xzr, xzr

// x15-x18 <- masked (BH + BL)
sub x9, xzr, x7
sub x10, xzr, x8
and x15, x11, x9
and x16, x12, x9
and x17, x13, x9
and x18, x14, x9

// x19-x22 <- masked (AH + AL), x7 <- combined carry
and x19, x26, x10
and x20, x27, x10
and x21, x28, x10
and x22, x29, x10
and x7, x7, x8

// x15-x18, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x15, x15, x19
adcs x16, x16, x20
adcs x17, x17, x21
adcs x18, x18, x22
adc x7, x7, xzr

// x8-x10,x19-x23 <- (AH+AL) x (BH+BL), low part
stp x26, x27, [x2,#0]
MUL256_KARATSUBA_COMBA x2, x26, x27, x28, x29, x11, x12, x13, x14, x8, x9, x10, x19, x20, x21, x22, x23, x24, x25

// x15-x18, x7 <- (AH+AL) x (BH+BL), final step
adds x15, x15, x20
adcs x16, x16, x21
adcs x17, x17, x22
adcs x18, x18, x23
adc x7, x7, xzr

// x20-x27 <- AL x BL
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
MUL256_KARATSUBA_COMBA x0, x3, x4, x5, x6, x11, x12, x13, x14, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29

// x13-x14, x3-x5 <- (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x20
sbcs x9, x9, x21
sbcs x10, x10, x22
sbcs x19, x19, x23
sbcs x15, x15, x24
sbcs x16, x16, x25
sbcs x17, x17, x26
sbcs x18, x18, x27
sbc x7, x7, xzr

stp x20, x21, [x2]
stp x22, x23, [x2,#16]

ldp x3, x4, [x0,#32]
ldp x5, x6, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]

adds x8, x8, x24
adcs x9, x9, x25
adcs x10, x10, x26
adcs x19, x19, x27
adc x1, xzr, xzr

// x20-x27 <- AH x BH
add x0, x0, #32
MUL256_KARATSUBA_COMBA x0, x3, x4, x5, x6, x11, x12, x13, x14, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29
neg x1, x1

// x13-x14, x3-x5 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x20
sbcs x9, x9, x21
sbcs x10, x10, x22
sbcs x19, x19, x23
sbcs x15, x15, x24
sbcs x16, x16, x25
sbcs x17, x17, x26
sbcs x18, x18, x27
sbc x7, x7, xzr

stp x8, x9, [x2,#32]
stp x10, x19, [x2,#48]

adds x1, x1, #1
adcs x15, x15, x20
adcs x16, x16, x21
adcs x17, x17, x22
adcs x18, x18, x23
adcs x24, x7, x24
adcs x25, x25, xzr
adcs x26, x26, xzr
adc x27, x27, xzr

stp x15, x16, [x2,#64]
stp x17, x18, [x2,#80]
stp x24, x25, [x2,#96]
stp x26, x27, [x2,#112]

ldp x19, x20, [sp,#0]
ldp x21, x22, [sp,#16]
ldp x23, x24, [sp,#32]
ldp x25, x26, [sp,#48]
ldp x27, x28, [sp,#64]
ldr x29, [sp,#80]
add sp, sp, #96
ret


//////////////////////////////////////////// MACRO
.macro MUL128x256_COMBA_CUT A0, A1, B0, B1, B2, B3, C0, C1, C2, C3, C4, C5, T0, T1, T2, T3
mul \T0, \A1, \B0
umulh \T1, \A1, \B0
adds \C1, \C1, \C3
adc \C2, \C2, xzr

mul \T2, \A0, \B2
umulh \T3, \A0, \B2
adds \C1, \C1, \T0
adcs \C2, \C2, \T1
adc \C3, xzr, xzr

mul \T0, \A1, \B1
umulh \T1, \A1, \B1
adds \C2, \C2, \T2
adcs \C3, \C3, \T3
adc \C4, xzr, xzr

mul \T2, \A0, \B3
umulh \T3, \A0, \B3
adds \C2, \C2, \T0
adcs \C3, \C3, \T1
adc \C4, \C4, xzr

mul \T0, \A1, \B2
umulh \T1, \A1, \B2
adds \C3, \C3, \T2
adcs \C4, \C4, \T3
adc \C5, xzr, xzr

mul \T2, \A1, \B3
umulh \T3, \A1, \B3
adds \C3, \C3, \T0
adcs \C4, \C4, \T1
adc \C5, \C5, xzr
adds \C4, \C4, \T2
adc \C5, \C5, \T3
.endm


//**************************************************************************************
// Montgomery reduction
// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
.global rdc_mont
rdc_mont:
sub sp, sp, #96
stp x19, x20, [sp]
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]

ldp x2, x3, [x0,#0] // a[0-1]

// Load the prime constant
adrp x23, .p503p1_nz_s8 // pc-relative addressing
add x23, x23, :lo12:.p503p1_nz_s8
ldp x24, x25, [x23, #0]
ldp x26, x27, [x23, #16]

// a[0-1] x p503p1_nz_s8 --> result: x4:x9
mul x4, x2, x24 // a[0] x p503p1_nz_s8[0]
umulh x7, x2, x24
mul x5, x2, x25 // a[0] x p503p1_nz_s8[1]
umulh x6, x2, x25
MUL128x256_COMBA_CUT x2, x3, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10

ldp x3, x11, [x0,#16] // a[2]
ldp x12, x13, [x0,#32]
ldp x14, x15, [x0,#48]

orr x10, xzr, x9, lsr #8
lsl x9, x9, #56
orr x9, x9, x8, lsr #8
lsl x8, x8, #56
orr x8, x8, x7, lsr #8
lsl x7, x7, #56
orr x7, x7, x6, lsr #8
lsl x6, x6, #56
orr x6, x6, x5, lsr #8
lsl x5, x5, #56
orr x5, x5, x4, lsr #8
lsl x4, x4, #56

adds x11, x4, x11 // a[3]
adcs x12, x5, x12 // a[4]
adcs x13, x6, x13
adcs x14, x7, x14
adcs x15, x8, x15
ldp x16, x17, [x0,#64]
ldp x18, x19, [x0,#80]
mul x4, x3, x24 // a[2] x p503p1_nz_s8[0]
umulh x7, x3, x24
adcs x16, x9, x16
adcs x17, x10, x17
adcs x18, xzr, x18
adcs x19, xzr, x19
ldp x20, x21, [x0,#96]
ldp x22, x23, [x0,#112]
mul x5, x3, x25 // a[2] x p503p1_nz_s8[1]
umulh x6, x3, x25
adcs x20, xzr, x20
adcs x21, xzr, x21
adcs x22, xzr, x22
adc x23, xzr, x23

// a[2-3] x p503p1_nz_s8 --> result: x4:x9
MUL128x256_COMBA_CUT x3, x11, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10

orr x10, xzr, x9, lsr #8
lsl x9, x9, #56
orr x9, x9, x8, lsr #8
lsl x8, x8, #56
orr x8, x8, x7, lsr #8
lsl x7, x7, #56
orr x7, x7, x6, lsr #8
lsl x6, x6, #56
orr x6, x6, x5, lsr #8
lsl x5, x5, #56
orr x5, x5, x4, lsr #8
lsl x4, x4, #56

adds x13, x4, x13 // a[5]
adcs x14, x5, x14 // a[6]
adcs x15, x6, x15
adcs x16, x7, x16
mul x4, x12, x24 // a[4] x p503p1_nz_s8[0]
umulh x7, x12, x24
adcs x17, x8, x17
adcs x18, x9, x18
adcs x19, x10, x19
adcs x20, xzr, x20
mul x5, x12, x25 // a[4] x p503p1_nz_s8[1]
umulh x6, x12, x25
adcs x21, xzr, x21
adcs x22, xzr, x22
adc x23, xzr, x23

// a[4-5] x p503p1_nz_s8 --> result: x4:x9
MUL128x256_COMBA_CUT x12, x13, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10

orr x10, xzr, x9, lsr #8
lsl x9, x9, #56
orr x9, x9, x8, lsr #8
lsl x8, x8, #56
orr x8, x8, x7, lsr #8
lsl x7, x7, #56
orr x7, x7, x6, lsr #8
lsl x6, x6, #56
orr x6, x6, x5, lsr #8
lsl x5, x5, #56
orr x5, x5, x4, lsr #8
lsl x4, x4, #56

adds x15, x4, x15 // a[7]
adcs x16, x5, x16 // a[8]
adcs x17, x6, x17
adcs x18, x7, x18
mul x4, x14, x24 // a[6] x p503p1_nz_s8[0]
umulh x7, x14, x24
adcs x19, x8, x19
adcs x20, x9, x20
adcs x21, x10, x21
mul x5, x14, x25 // a[6] x p503p1_nz_s8[1]
umulh x6, x14, x25
adcs x22, xzr, x22
adc x23, xzr, x23

// a[6-7] x p503p1_nz_s8 --> result: x4:x9
MUL128x256_COMBA_CUT x14, x15, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10

orr x10, xzr, x9, lsr #8
lsl x9, x9, #56
orr x9, x9, x8, lsr #8
lsl x8, x8, #56
orr x8, x8, x7, lsr #8
lsl x7, x7, #56
orr x7, x7, x6, lsr #8
lsl x6, x6, #56
orr x6, x6, x5, lsr #8
lsl x5, x5, #56
orr x5, x5, x4, lsr #8
lsl x4, x4, #56

adds x17, x4, x17
adcs x18, x5, x18
adcs x19, x6, x19
adcs x20, x7, x20
stp x16, x17, [x1,#0] // Final result
stp x18, x19, [x1,#16]
adcs x21, x8, x21
adcs x22, x9, x22
adc x23, x10, x23
stp x20, x21, [x1,#32]
stp x22, x23, [x1,#48]

ldp x19, x20, [sp]
ldp x21, x22, [sp, #16]
ldp x23, x24, [sp, #32]
ldp x25, x26, [sp, #48]
ldp x27, x28, [sp, #64]
ldp x29, x30, [sp, #80]
add sp, sp, #96
ret


//***********************************************************************
// 503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add_asm
mp_add_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]

adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18

stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret


//***********************************************************************
// 2x503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add503x2_asm
mp_add503x2_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adcs x10, x10, x18

stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]

ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
adcs x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x9, x10, [x0,#112]
ldp x15, x16, [x1,#96]
ldp x17, x18, [x1,#112]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18

stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
stp x9, x10, [x2,#112]
ret


//***********************************************************************
// 2x503-bit multiprecision subtraction
// Operation: c [x2] = a [x0] - b [x1]. Returns borrow mask
//***********************************************************************
.global mp_subx2_asm
mp_subx2_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbcs x10, x10, x18

stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]

ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x9, x10, [x0,#112]
ldp x15, x16, [x1,#96]
ldp x17, x18, [x1,#112]
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbcs x10, x10, x18
sbc x0, xzr, xzr

stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
stp x9, x10, [x2,#112]
ret


//***********************************************************************
// Double 2x503-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global mp_dblsubx2_asm
mp_dblsubx2_asm:
sub sp, sp, #32
stp x27, x28, [sp, #0]
stp x29, x30, [sp, #16]
ldp x3, x4, [x2,#0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x9, x10, [x2,#48]
ldp x11, x12, [x2,#64]
ldp x13, x14, [x2,#80]
ldp x15, x16, [x2,#96]
ldp x17, x18, [x2,#112]

ldp x27, x28, [x0,#0]
ldp x29, x30, [x0,#16]
subs x3, x3, x27
sbcs x4, x4, x28
sbcs x5, x5, x29
sbcs x6, x6, x30
ldp x27, x28, [x0,#32]
ldp x29, x30, [x0,#48]
sbcs x7, x7, x27
sbcs x8, x8, x28
sbcs x9, x9, x29
sbcs x10, x10, x30
ldp x27, x28, [x0,#64]
ldp x29, x30, [x0,#80]
sbcs x11, x11, x27
sbcs x12, x12, x28
sbcs x13, x13, x29
sbcs x14, x14, x30
ldp x27, x28, [x0,#96]
ldp x29, x30, [x0,#112]
sbcs x15, x15, x27
sbcs x16, x16, x28
sbcs x17, x17, x29
sbc x18, x18, x30

ldp x27, x28, [x1,#0]
ldp x29, x30, [x1,#16]
subs x3, x3, x27
sbcs x4, x4, x28
sbcs x5, x5, x29
sbcs x6, x6, x30
ldp x27, x28, [x1,#32]
ldp x29, x30, [x1,#48]
sbcs x7, x7, x27
sbcs x8, x8, x28
sbcs x9, x9, x29
sbcs x10, x10, x30
ldp x27, x28, [x1,#64]
ldp x29, x30, [x1,#80]
sbcs x11, x11, x27
sbcs x12, x12, x28
sbcs x13, x13, x29
sbcs x14, x14, x30
ldp x27, x28, [x1,#96]
ldp x29, x30, [x1,#112]
sbcs x15, x15, x27
sbcs x16, x16, x28
sbcs x17, x17, x29
sbc x18, x18, x30

stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
stp x11, x12, [x2,#64]
stp x13, x14, [x2,#80]
stp x15, x16, [x2,#96]
stp x17, x18, [x2,#112]

ldp x27, x28, [sp, #0]
ldp x29, x30, [sp, #16]
add sp, sp, #32
ret

+ 176
- 0
third_party/sike/src/asm/fp_generic.c Näytä tiedosto

@@ -0,0 +1,176 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: portable modular arithmetic for P503
*********************************************************************************************/

#include "../utils.h"
#include "../fpx.h"

// Global constants
extern const struct params_t p503;

static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
{ // Digit multiplication, digit * digit -> 2-digit result
register crypto_word_t al, ah, bl, bh, temp;
crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4);

al = a & mask_low; // Low part
ah = a >> (sizeof(crypto_word_t) * 4); // High part
bl = b & mask_low;
bh = b >> (sizeof(crypto_word_t) * 4);

albl = al*bl;
albh = al*bh;
ahbl = ah*bl;
ahbh = ah*bh;
c[0] = albl & mask_low; // C00

res1 = albl >> (sizeof(crypto_word_t) * 4);
res2 = ahbl & mask_low;
res3 = albh & mask_low;
temp = res1 + res2 + res3;
carry = temp >> (sizeof(crypto_word_t) * 4);
c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01

res1 = ahbl >> (sizeof(crypto_word_t) * 4);
res2 = albh >> (sizeof(crypto_word_t) * 4);
res3 = ahbh & mask_low;
temp = res1 + res2 + res3 + carry;
c[1] = temp & mask_low; // C10
carry = temp & mask_high;
c[1] ^= (ahbh & mask_high) + carry; // C11
}


void fpadd(const felm_t a, const felm_t b, felm_t c)
{ // Modular addition, c = a+b mod p503.
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
unsigned int i, carry = 0;
crypto_word_t mask;

for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}

carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(carry, c[i], ((crypto_word_t*)p503.prime_x2)[i], carry, c[i]);
}
mask = 0 - (crypto_word_t)carry;

carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, c[i], ((crypto_word_t*)p503.prime_x2)[i] & mask, carry, c[i]);
}
}


void fpsub(const felm_t a, const felm_t b, felm_t c)
{ // Modular subtraction, c = a-b mod p503.
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
unsigned int i, borrow = 0;
crypto_word_t mask;

for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
mask = 0 - (crypto_word_t)borrow;

borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, c[i], ((crypto_word_t*)p503.prime_x2)[i] & mask, borrow, c[i]);
}
}

void mp_mul(const felm_t a, const felm_t b, dfelm_t c)
{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
unsigned int i, j;
crypto_word_t t = 0, u = 0, v = 0, UV[2];
unsigned int carry = 0;

for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j <= i; j++) {
MUL(a[j], b[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}

for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
MUL(a[j], b[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}
c[2*NWORDS_FIELD-1] = v;
}

void rdc_mont(const felm_t ma, felm_t mc)
{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
// mc = ma*R^-1 mod p503x2, where R = 2^512.
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.
unsigned int i, j, carry, count = p503_ZERO_WORDS;
crypto_word_t UV[2], t = 0, u = 0, v = 0;

for (i = 0; i < NWORDS_FIELD; i++) {
mc[i] = 0;
}

for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j < i; j++) {
if (j < (i-p503_ZERO_WORDS+1)) {
MUL(mc[j], ((crypto_word_t*)p503.prime_p1)[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i] = v;
v = u;
u = t;
t = 0;
}

for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
if (count > 0) {
count -= 1;
}
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
if (j < (NWORDS_FIELD-count)) {
MUL(mc[j], ((crypto_word_t*)p503.prime_p1)[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i-NWORDS_FIELD] = v;
v = u;
u = t;
t = 0;
}
ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v);
mc[NWORDS_FIELD-1] = v;
}

+ 1642
- 0
third_party/sike/src/asm/fp_x64_asm.S
File diff suppressed because it is too large
Näytä tiedosto


+ 311
- 0
third_party/sike/src/fpx.c Näytä tiedosto

@@ -0,0 +1,311 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: core functions over GF(p) and GF(p^2)
*********************************************************************************************/

#include "utils.h"
#include "fpx.h"

extern const struct params_t p503;

// Double 2x503-bit multiprecision subtraction, c = c-a-b
void mp_dblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
// Multiprecision subtraction, c = a-b
crypto_word_t mp_subx2_asm(const felm_t a, const felm_t b, felm_t c);
// 503-bit multiprecision addition, c = a+b
void mp_add_asm(const felm_t a, const felm_t b, felm_t c);

// Multiprecision squaring, c = a^2 mod p.
static void fpsqr_mont(const felm_t ma, felm_t mc)
{
dfelm_t temp = {0};
mp_mul(ma, ma, temp);
rdc_mont(temp, mc);
}

// Chain to compute a^(p-3)/4 using Montgomery arithmetic.
static void fpinv_chain_mont(felm_t a)
{
unsigned int i, j;
felm_t t[15], tt;

// Precomputed table
fpsqr_mont(a, tt);
fpmul_mont(a, tt, t[0]);
for (i = 0; i <= 13; i++) fpmul_mont(t[i], tt, t[i+1]);

fpcopy(a, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
fpmul_mont(a, tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[8], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[6], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[9], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[0], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
fpmul_mont(a, tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[6], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[2], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[8], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
fpmul_mont(a, tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[10], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[0], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[10], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[10], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[5], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[2], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[6], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[3], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[5], tt, tt);
for (i = 0; i < 12; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[12], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[8], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[6], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[12], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[11], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[6], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[5], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[14], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[14], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[5], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[6], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[8], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(a, tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[4], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[6], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[5], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[7], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(a, tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[0], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[11], tt, tt);
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[13], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[1], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[10], tt, tt);
for (j = 0; j < 49; j++) {
for (i = 0; i < 5; i++) fpsqr_mont(tt, tt);
fpmul_mont(t[14], tt, tt);
}
fpcopy(tt, a);
}

// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
static void fpinv_mont(felm_t a)
{
felm_t tt = {0};
fpcopy(a, tt);
fpinv_chain_mont(tt);
fpsqr_mont(tt, tt);
fpsqr_mont(tt, tt);
fpmul_mont(a, tt, a);
}

// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
#if defined(SIKE_NO_ASM)
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
uint8_t carry = 0;
for (size_t i = 0; i < nwords; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}
return carry;
}

// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
uint32_t borrow = 0;
for (size_t i = 0; i < nwords; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
return borrow;
}
#endif

// Multiprecision addition, c = a+b.
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
{
#if defined(SIKE_NO_ASM)
mp_add(a, b, c, NWORDS_FIELD);
#else
mp_add_asm(a, b, c);
#endif
}

// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
inline static crypto_word_t mp_subfast(const felm_t a, const felm_t b, felm_t c) {
#if defined(SIKE_NO_ASM)
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
#else
return mp_subx2_asm(a, b, c);
#endif
}

// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// Inputs should be s.t. c > a and c > b
inline static void mp_dblsubfast(const felm_t a, const felm_t b, felm_t c) {
#if defined(SIKE_NO_ASM)
mp_sub(c, a, c, 2*NWORDS_FIELD);
mp_sub(c, b, c, 2*NWORDS_FIELD);
#else
mp_dblsubx2_asm(a, b, c);
#endif
}

// Copy a field element, c = a.
void fpcopy(const felm_t a, felm_t c) {
for (size_t i = 0; i < NWORDS_FIELD; i++) {
c[i] = a[i];
}
}

// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
{
dfelm_t temp = {0};
mp_mul(ma, mb, temp);
rdc_mont(temp, mc);
}

// Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
void from_mont(const felm_t ma, felm_t c)
{
felm_t one = {0};
one[0] = 1;

fpmul_mont(ma, one, c);
fpcorrection(c);
}

// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
void fp2sqr_mont(const f2elm_t a, f2elm_t c) {
felm_t t1, t2, t3;

mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
fpsub(a->c0, a->c1, t2); // t2 = a0-a1
mp_addfast(a->c0, a->c0, t3); // t3 = 2a0
fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1)
fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1
}

// Modular negation, a = -a mod p503.
// Input/output: a in [0, 2*p503-1]
void fpneg(felm_t a) {
uint32_t borrow = 0;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, ((crypto_word_t*)p503.prime_x2)[i], a[i], borrow, a[i]);
}
}

// Modular division by two, c = a/2 mod p503.
// Input : a in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
void fpdiv2(const felm_t a, felm_t c) {
uint32_t carry = 0;
crypto_word_t mask;

mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503
for (size_t i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], ((crypto_word_t*)p503.prime)[i] & mask, carry, c[i]);
}

// Multiprecision right shift by one.
for (size_t i = 0; i < NWORDS_FIELD-1; i++) {
c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1));
}
c[NWORDS_FIELD-1] >>= 1;
}

// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
void fpcorrection(felm_t a) {
uint32_t borrow = 0;
crypto_word_t mask;

for (size_t i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], ((crypto_word_t*)p503.prime)[i], borrow, a[i]);
}
mask = 0 - (crypto_word_t)borrow;

borrow = 0;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, a[i], ((crypto_word_t*)p503.prime)[i] & mask, borrow, a[i]);
}
}

// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
void fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) {
felm_t t1, t2;
dfelm_t tt1, tt2, tt3;
crypto_word_t mask;

mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1
mp_mul(a->c0, b->c0, tt1); // tt1 = a0*b0
mp_mul(a->c1, b->c1, tt2); // tt2 = a1*b1
mp_mul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1)
mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0

for (size_t i = 0; i < NWORDS_FIELD; i++) {
t1[i] = ((crypto_word_t*)p503.prime)[i] & mask;
}

rdc_mont(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
mp_addfast((crypto_word_t*)&tt1[NWORDS_FIELD], t1, (crypto_word_t*)&tt1[NWORDS_FIELD]);
rdc_mont(tt1, c->c0); // c[0] = a0*b0 - a1*b1
}

// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
void fp2inv_mont(f2elm_t a) {
f2elm_t t1;

fpsqr_mont(a->c0, t1->c0); // t10 = a0^2
fpsqr_mont(a->c1, t1->c1); // t11 = a1^2
fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2
fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1
fpneg(a->c1); // a = a0-i*a1
fpmul_mont(a->c0, t1->c0, a->c0);
fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1
}

+ 98
- 0
third_party/sike/src/fpx.h Näytä tiedosto

@@ -0,0 +1,98 @@
#ifndef FPX_H_
#define FPX_H_

#include "utils.h"

// Modular addition, c = a+b mod p503.
void fpadd(const felm_t a, const felm_t b, felm_t c);
// Modular subtraction, c = a-b mod p503.
void fpsub(const felm_t a, const felm_t b, felm_t c);
// Modular division by two, c = a/2 mod p503.
void fpdiv2(const felm_t a, felm_t c);
// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
void fpcorrection(felm_t a);
// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
void mp_mul(const felm_t a, const felm_t b, dfelm_t c);
// 503-bit Montgomery reduction, c = a mod p
void rdc_mont(const dfelm_t a, felm_t c);
// Modular negation, a = -a mod p503.
void fpneg(felm_t a);
// Copy of a field element, c = a
void fpcopy(const felm_t a, felm_t c);
// Copy a field element, c = a.
void fpzero(felm_t a);
// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
void cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
// GF(p503^2) multiplication using Montgomery arithmetic, c = a*b in GF(p503^2)
void fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
// GF(p503^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
void fp2inv_mont(f2elm_t a);
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
void fp2sqr_mont(const f2elm_t a, f2elm_t c);
// Modular correction, a = a in GF(p^2).
void fp2correction(f2elm_t a);
// Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
void from_mont(const felm_t ma, felm_t c);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);

// GF(p^2) addition, c = a+b in GF(p^2).
#define fp2add(a, b, c) \
do { \
fpadd(a->c0, b->c0, c->c0); \
fpadd(a->c1, b->c1, c->c1); \
} while(0)

// GF(p^2) subtraction, c = a-b in GF(p^2).
#define fp2sub(a,b,c) \
do { \
fpsub(a->c0, b->c0, c->c0); \
fpsub(a->c1, b->c1, c->c1); \
} while(0)

// Copy a GF(p^2) element, c = a.
#define fp2copy(a, c) \
do { \
fpcopy(a->c0, c->c0); \
fpcopy(a->c1, c->c1); \
} while(0)

// GF(p^2) negation, a = -a in GF(p^2).
#define fp2neg(a) \
do { \
fpneg(a->c0); \
fpneg(a->c1); \
} while(0)

// GF(p^2) division by two, c = a/2 in GF(p^2).
#define fp2div2(a, c) \
do { \
fpdiv2(a->c0, c->c0); \
fpdiv2(a->c1, c->c1); \
} while(0)

// Modular correction, a = a in GF(p^2).
#define fp2correction(a) \
do { \
fpcorrection(a->c0); \
fpcorrection(a->c1); \
} while(0)

// Conversion of a GF(p^2) element to Montgomery representation,
// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
#define to_fp2mont(a, mc) \
do { \
fpmul_mont(a->c0, (crypto_word_t*)&p503.mont_R2, mc->c0); \
fpmul_mont(a->c1, (crypto_word_t*)&p503.mont_R2, mc->c1); \
} while(0)

// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
// c_i = ma_i*R^(-1) = a_i in GF(p^2).
#define from_fp2mont(ma, c) \
do { \
from_mont(ma->c0, c->c0); \
from_mont(ma->c1, c->c1); \
} while(0)

#endif // FPX_H_

+ 260
- 0
third_party/sike/src/isogeny.c Näytä tiedosto

@@ -0,0 +1,260 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: elliptic curve and isogeny functions
*********************************************************************************************/
#include "utils.h"
#include "isogeny.h"
#include "fpx.h"

static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
{ // Doubling of a Montgomery point in projective coordinates (X:Z).
// Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
// Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
f2elm_t t0, t1;

fp2sub(P->X, P->Z, t0); // t0 = X1-Z1
fp2add(P->X, P->Z, t1); // t1 = X1+Z1
fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2
fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2
fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2
fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2
fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
}

void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e)
{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
// Output: projective Montgomery x-coordinates Q <- (2^e)*P.

memcpy(Q, P, sizeof(*P));
for (size_t i = 0; i < e; i++) {
xDBL(Q, Q, A24plus, C24);
}
}

void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
// Input: projective point of order four P = (X4:Z4).
// Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
// that are used to evaluate the isogeny at a point in eval_4_isog().

fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4
fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4
fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2
fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2
fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4
fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2
fp2sqr_mont(P->X, A24plus); // A24plus = X4^2
fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2
fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4
}

void eval_4_isog(point_proj_t P, f2elm_t* coeff)
{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
// by the 3 coefficients in coeff (computed in the function get_4_isog()).
// Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
// Output: the projective point P = phi(P) = (X:Z) in the codomain.
f2elm_t t0, t1;

fp2add(P->X, P->Z, t0); // t0 = X+Z
fp2sub(P->X, P->Z, t1); // t1 = X-Z
fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1]
fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2]
fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z)
fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z)
fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
fp2mul_mont(P->X, t1, P->X); // Xfinal
fp2mul_mont(P->Z, t0, P->Z); // Zfinal
}


void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
{ // Tripling of a Montgomery point in projective coordinates (X:Z).
// Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
// Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
f2elm_t t0, t1, t2, t3, t4, t5, t6;

fp2sub(P->X, P->Z, t0); // t0 = X-Z
fp2sqr_mont(t0, t2); // t2 = (X-Z)^2
fp2add(P->X, P->Z, t1); // t1 = X+Z
fp2sqr_mont(t1, t3); // t3 = (X+Z)^2
fp2add(t0, t1, t4); // t4 = 2*X
fp2sub(t1, t0, t0); // t0 = 2*Z
fp2sqr_mont(t4, t1); // t1 = 4*X^2
fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2
fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2
fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3
fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2
fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3
fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
fp2sqr_mont(t2, t2); // t2 = t2^2
fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2
fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
fp2sqr_mont(t1, t1); // t1 = t1^2
fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1
}

void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e)
{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
// Output: projective Montgomery x-coordinates Q <- (3^e)*P.
memcpy(Q, P, sizeof(*P));
for (size_t i = 0; i < e; i++) {
xTPL(Q, Q, A24minus, A24plus);
}
}

void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
// Input: projective point of order three P = (X3:Z3).
// Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
f2elm_t t0, t1, t2, t3, t4;

fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z
fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2
fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z
fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2
fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2
fp2add(coeff[0], coeff[1], t3); // t3 = 2*X
fp2sqr_mont(t3, t3); // t3 = 4*X^2
fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2
fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2
fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
}


void eval_3_isog(point_proj_t Q, f2elm_t* coeff)
{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
// a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
// Inputs: projective points P = (X3:Z3) and Q = (X:Z).
// Output: the projective point Q <- phi(Q) = (X3:Z3).
f2elm_t t0, t1, t2;

fp2add(Q->X, Q->Z, t0); // t0 = X+Z
fp2sub(Q->X, Q->Z, t1); // t1 = X-Z
fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z)
fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z)
fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z)
fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z)
fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2
fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
}


void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
{ // 3-way simultaneous inversion
// Input: z1,z2,z3
// Output: 1/z1,1/z2,1/z3 (override inputs).
f2elm_t t0, t1, t2, t3;

fp2mul_mont(z1, z2, t0); // t0 = z1*z2
fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3
fp2inv_mont(t1); // t1 = 1/(z1*z2*z3)
fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2)
fp2mul_mont(t2, z2, t3); // t3 = 1/z1
fp2mul_mont(t2, z1, z2); // z2 = 1/z2
fp2mul_mont(t0, t1, z3); // z3 = 1/z3
fp2copy(t3, z1); // z1 = 1/z1
}


void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
// Input: the x-coordinates xP, xQ, and xR of the points P, Q and R.
// Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
f2elm_t t0, t1, one = F2ELM_INIT;

extern const struct params_t p503;
fpcopy((crypto_word_t*)&p503.mont_one, one->c0);
fp2add(xP, xQ, t1); // t1 = xP+xQ
fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ
fp2mul_mont(xR, t1, A); // A = xR*t1
fp2add(t0, A, A); // A = A+t0
fp2mul_mont(t0, xR, t0); // t0 = t0*xR
fp2sub(A, one, A); // A = A-1
fp2add(t0, t0, t0); // t0 = t0+t0
fp2add(t1, xR, t1); // t1 = t1+xR
fp2add(t0, t0, t0); // t0 = t0+t0
fp2sqr_mont(A, A); // A = A^2
fp2inv_mont(t0); // t0 = 1/t0
fp2mul_mont(A, t0, A); // A = A*t0
fp2sub(A, t1, A); // Afinal = A-t1
}


void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
{ // Computes the j-invariant of a Montgomery curve with projective constant.
// Input: A,C in GF(p^2).
// Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
f2elm_t t0, t1;

fp2sqr_mont(A, jinv); // jinv = A^2
fp2sqr_mont(C, t1); // t1 = C^2
fp2add(t1, t1, t0); // t0 = t1+t1
fp2sub(jinv, t0, t0); // t0 = jinv-t0
fp2sub(t0, t1, t0); // t0 = t0-t1
fp2sub(t0, t1, jinv); // jinv = t0-t1
fp2sqr_mont(t1, t1); // t1 = t1^2
fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1
fp2add(t0, t0, t0); // t0 = t0+t0
fp2add(t0, t0, t0); // t0 = t0+t0
fp2sqr_mont(t0, t1); // t1 = t0^2
fp2mul_mont(t0, t1, t0); // t0 = t0*t1
fp2add(t0, t0, t0); // t0 = t0+t0
fp2add(t0, t0, t0); // t0 = t0+t0
fp2inv_mont(jinv); // jinv = 1/jinv
fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv
}


void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
{ // Simultaneous doubling and differential addition.
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
// Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
f2elm_t t0, t1, t2;

fp2add(P->X, P->Z, t0); // t0 = XP+ZP
fp2sub(P->X, P->Z, t1); // t1 = XP-ZP
fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2
fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ
fp2correction(t2);
fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ
fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ)
fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2
fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ)
fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2
fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2
fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
}

+ 49
- 0
third_party/sike/src/isogeny.h Näytä tiedosto

@@ -0,0 +1,49 @@
#ifndef ISOGENY_H_
#define ISOGENY_H_

// Computes [2^e](X:Z) on Montgomery curve with projective
// constant via e repeated doublings.
void xDBLe(
const point_proj_t P, point_proj_t Q, const f2elm_t A24plus,
const f2elm_t C24, size_t e);
// Simultaneous doubling and differential addition.
void xDBLADD(
point_proj_t P, point_proj_t Q, const f2elm_t xPQ,
const f2elm_t A24);
// Tripling of a Montgomery point in projective coordinates (X:Z).
void xTPL(
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
const f2elm_t A24plus);
// Computes [3^e](X:Z) on Montgomery curve with projective constant
// via e repeated triplings.
void xTPLe(
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
const f2elm_t A24plus, size_t e);
// Given the x-coordinates of P, Q, and R, returns the value A
// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
void get_A(
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
// Computes the j-invariant of a Montgomery curve with projective constant.
void j_inv(
const f2elm_t A, const f2elm_t C, f2elm_t jinv);
// Computes the corresponding 4-isogeny of a projective Montgomery
// point (X4:Z4) of order 4.
void get_4_isog(
const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
// Computes the corresponding 3-isogeny of a projective Montgomery
// point (X3:Z3) of order 3.
void get_3_isog(
const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus,
f2elm_t* coeff);
// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3)
// of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
void eval_3_isog(
point_proj_t Q, f2elm_t* coeff);
// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
void eval_4_isog(
point_proj_t P, f2elm_t* coeff);
// 3-way simultaneous inversion
void inv_3_way(
f2elm_t z1, f2elm_t z2, f2elm_t z3);

#endif // ISOGENY_H_

+ 592
- 0
third_party/sike/src/sike.c Näytä tiedosto

@@ -0,0 +1,592 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
*********************************************************************************************/

#include <stdint.h>
#include <string.h>
#include <openssl/bn.h>
#include <openssl/base.h>
#include <openssl/rand.h>
#include <openssl/mem.h>
#include <openssl/hmac.h>
#include <openssl/sha.h>

#include "utils.h"
#include "isogeny.h"
#include "fpx.h"

extern const struct params_t p503;

// Domain separation parameters for HMAC
static const uint8_t G[2] = {0,0};
static const uint8_t H[2] = {1,0};
static const uint8_t F[2] = {2,0};

// SIDHp503_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
#define SIDHp503_JINV_BYTESZ 126U
// SIDHp503_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
#define SIDHp503_PRV_A_BITSZ 250U
// SIDHp503_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
#define SIDHp503_PRV_B_BITSZ 253U
// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
#define MAX_INT_POINTS_ALICE 7U
// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
#define MAX_INT_POINTS_BOB 8U

// Produces HMAC-SHA256 of data |S| mac'ed with the key |key|. Result is stored in |out|
// which must have size of at least |outsz| bytes and must be not bigger than
// SHA256_DIGEST_LENGTH. The output of a HMAC may be truncated.
// The |key| buffer is reused by the hmac_sum and hence, it's size must be equal
// to SHA256_CBLOCK. The HMAC key provided in |key| buffer must be smaller or equal
// to SHA256_DIGHEST_LENTH. |key| can overlap |out|.
static void hmac_sum(
uint8_t *out, size_t outsz, const uint8_t S[2], uint8_t key[SHA256_CBLOCK]) {
for(size_t i=0; i<SHA256_DIGEST_LENGTH; i++) {
key[i] = key[i] ^ 0x36;
}
// set rest of the buffer to ipad = 0x36
memset(&key[SHA256_DIGEST_LENGTH], 0x36, SHA256_CBLOCK - SHA256_DIGEST_LENGTH);

SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, key, SHA256_CBLOCK);
SHA256_Update(&ctx, S, 2);
uint8_t digest[SHA256_DIGEST_LENGTH] = {0};
SHA256_Final(digest, &ctx);

// XOR key with an opad = 0x5C
for(size_t i=0; i<SHA256_CBLOCK; i++) {
key[i] = key[i] ^ 0x36 ^ 0x5C;
}

SHA256_Init(&ctx);
SHA256_Update(&ctx, key, SHA256_CBLOCK);
SHA256_Update(&ctx, digest, SHA256_DIGEST_LENGTH);
SHA256_Final(digest, &ctx);
memcpy(out, digest, outsz);

// clear sensitive data
OPENSSL_cleanse(digest, sizeof(digest));
}

// Swap points.
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
#if !defined(OPENSSL_X86_64) || defined(OPENSSL_NO_ASM)
static void cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
{
crypto_word_t temp;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
P->X->c0[i] = temp ^ P->X->c0[i];
Q->X->c0[i] = temp ^ Q->X->c0[i];
temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
P->Z->c0[i] = temp ^ P->Z->c0[i];
Q->Z->c0[i] = temp ^ Q->Z->c0[i];
temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
P->X->c1[i] = temp ^ P->X->c1[i];
Q->X->c1[i] = temp ^ Q->X->c1[i];
temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
P->Z->c1[i] = temp ^ P->Z->c1[i];
Q->Z->c1[i] = temp ^ Q->Z->c1[i];
}
}
#endif

// Swap points.
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
static inline void fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
{
#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM)
cswap_asm(P, Q, option);
#else
cswap(P, Q, option);
#endif
}

static void LADDER3PT(
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const crypto_word_t* m,
int is_A, point_proj_t R, const f2elm_t A) {
point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
f2elm_t A24 = F2ELM_INIT;
crypto_word_t mask;
int bit, swap, prevbit = 0;

const size_t nbits = is_A?SIDHp503_PRV_A_BITSZ:SIDHp503_PRV_B_BITSZ;

// Initializing constant
fpcopy((crypto_word_t*)&p503.mont_one, A24[0].c0);
fp2add(A24, A24, A24);
fp2add(A, A24, A24);
fp2div2(A24, A24);
fp2div2(A24, A24); // A24 = (A+2)/4

// Initializing points
fp2copy(xQ, R0->X);
fpcopy((crypto_word_t*)&p503.mont_one, R0->Z[0].c0);
fp2copy(xPQ, R2->X);
fpcopy((crypto_word_t*)&p503.mont_one, R2->Z[0].c0);
fp2copy(xP, R->X);
fpcopy((crypto_word_t*)&p503.mont_one, R->Z[0].c0);
memset(R->Z->c1, 0, sizeof(R->Z->c1));

// Main loop
for (size_t i = 0; i < nbits; i++) {
bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1;
swap = bit ^ prevbit;
prevbit = bit;
mask = 0 - (crypto_word_t)swap;

fp2cswap(R, R2, mask);
xDBLADD(R0, R2, R->X, A24);
fp2mul_mont(R2->X, R->Z, R2->X);
}
}

// Initialization of basis points
static inline void init_basis(crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
fpcopy(gen, XP->c0);
fpcopy(gen + NWORDS_FIELD, XP->c1);
fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
memset(XQ->c1, 0, sizeof(XQ->c1));
fpcopy(gen + 3*NWORDS_FIELD, XR->c0);
fpcopy(gen + 4*NWORDS_FIELD, XR->c1);
}

// Conversion of GF(p^2) element from Montgomery to standard representation.
static inline void fp2_encode(const f2elm_t x, uint8_t *enc) {
f2elm_t t;
from_fp2mont(x, t);

// convert to bytes in little endian form
for (size_t i=0; i<FIELD_BYTESZ; i++) {
enc[i+ 0] = (t[0].c0[i/LSZ] >> (LSZ*(i%LSZ))) & 0xFF;
enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (LSZ*(i%LSZ))) & 0xFF;
}
}

// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
// Elements over GF(p503) are encoded in 63 octets in little endian format
// (i.e., the least significant octet is located in the lowest memory address).
static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
memset(t[0].c0, 0, sizeof(t[0].c0));
memset(t[0].c1, 0, sizeof(t[0].c1));
// convert bytes in little endian form to f2elm_t
for (size_t i = 0; i < FIELD_BYTESZ; i++) {
t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (LSZ*(i%LSZ));
t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (LSZ*(i%LSZ));
}
to_fp2mont(t, t);
}

// Alice's ephemeral public key generation
// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
static int gen_iso_A(const uint8_t* skA, uint8_t* pkA)
{
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
point_proj_t phiP = POINT_PROJ_INIT;
point_proj_t phiQ = POINT_PROJ_INIT;
point_proj_t phiR = POINT_PROJ_INIT;
f2elm_t XPA, XQA, XRA, coeff[3];
f2elm_t A24plus = F2ELM_INIT;
f2elm_t C24 = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;

// Initialize basis points
init_basis((crypto_word_t*)p503.A_gen, XPA, XQA, XRA);
init_basis((crypto_word_t*)p503.B_gen, phiP->X, phiQ->X, phiR->X);
fpcopy((crypto_word_t*)&p503.mont_one, (phiP->Z)->c0);
fpcopy((crypto_word_t*)&p503.mont_one, (phiQ->Z)->c0);
fpcopy((crypto_word_t*)&p503.mont_one, (phiR->Z)->c0);

// Initialize constants
fpcopy((crypto_word_t*)&p503.mont_one, A24plus->c0);
fp2add(A24plus, A24plus, C24);

// Retrieve kernel point
LADDER3PT(XPA, XQA, XRA, (crypto_word_t*)skA, 1, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < A_max; row++) {
while (index < A_max-row) {
fp2copy(R->X, pts[npts]->X);
fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = p503.A_strat[ii++];
xDBLe(R, R, A24plus, C24, (2*m));
index += m;
}
get_4_isog(R, A24plus, C24, coeff);

for (size_t i = 0; i < npts; i++) {
eval_4_isog(pts[i], coeff);
}
eval_4_isog(phiP, coeff);
eval_4_isog(phiQ, coeff);
eval_4_isog(phiR, coeff);

fp2copy(pts[npts-1]->X, R->X);
fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_4_isog(R, A24plus, C24, coeff);
eval_4_isog(phiP, coeff);
eval_4_isog(phiQ, coeff);
eval_4_isog(phiR, coeff);

inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
fp2mul_mont(phiP->X, phiP->Z, phiP->X);
fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
fp2mul_mont(phiR->X, phiR->Z, phiR->X);

// Format public key
fp2_encode(phiP->X, pkA);
fp2_encode(phiQ->X, pkA + SIDHp503_JINV_BYTESZ);
fp2_encode(phiR->X, pkA + 2*SIDHp503_JINV_BYTESZ);
return 0;
}

// Bob's ephemeral key-pair generation
// It produces a private key skB and computes the public key pkB.
// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
static int gen_iso_B(const uint8_t* skB, uint8_t* pkB)
{
point_proj_t R, pts[MAX_INT_POINTS_BOB];
point_proj_t phiP = POINT_PROJ_INIT;
point_proj_t phiQ = POINT_PROJ_INIT;
point_proj_t phiR = POINT_PROJ_INIT;
f2elm_t XPB, XQB, XRB, coeff[3];
f2elm_t A24plus = F2ELM_INIT;
f2elm_t A24minus = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;

// Initialize basis points
init_basis((crypto_word_t*)p503.B_gen, XPB, XQB, XRB);
init_basis((crypto_word_t*)p503.A_gen, phiP->X, phiQ->X, phiR->X);
fpcopy((crypto_word_t*)&p503.mont_one, (phiP->Z)->c0);
fpcopy((crypto_word_t*)&p503.mont_one, (phiQ->Z)->c0);
fpcopy((crypto_word_t*)&p503.mont_one, (phiR->Z)->c0);

// Initialize constants
fpcopy((crypto_word_t*)&p503.mont_one, A24plus->c0);
fp2add(A24plus, A24plus, A24plus);
fp2copy(A24plus, A24minus);
fp2neg(A24minus);

// Retrieve kernel point
LADDER3PT(XPB, XQB, XRB, (crypto_word_t*)skB, 0, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < B_max; row++) {
while (index < B_max-row) {
fp2copy(R->X, pts[npts]->X);
fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = p503.B_strat[ii++];
xTPLe(R, R, A24minus, A24plus, m);
index += m;
}
get_3_isog(R, A24minus, A24plus, coeff);

for (size_t i = 0; i < npts; i++) {
eval_3_isog(pts[i], coeff);
}
eval_3_isog(phiP, coeff);
eval_3_isog(phiQ, coeff);
eval_3_isog(phiR, coeff);

fp2copy(pts[npts-1]->X, R->X);
fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_3_isog(R, A24minus, A24plus, coeff);
eval_3_isog(phiP, coeff);
eval_3_isog(phiQ, coeff);
eval_3_isog(phiR, coeff);

inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
fp2mul_mont(phiP->X, phiP->Z, phiP->X);
fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
fp2mul_mont(phiR->X, phiR->Z, phiR->X);

// Format public key
fp2_encode(phiP->X, pkB);
fp2_encode(phiQ->X, pkB + SIDHp503_JINV_BYTESZ);
fp2_encode(phiR->X, pkB + 2*SIDHp503_JINV_BYTESZ);
return 0;
}

// Alice's ephemeral shared secret computation
// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
static int ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
{
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
f2elm_t coeff[3], PKB[3], jinv;
f2elm_t A24plus = F2ELM_INIT;
f2elm_t C24 = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;

// Initialize images of Bob's basis
fp2_decode(pkB, PKB[0]);
fp2_decode(pkB + SIDHp503_JINV_BYTESZ, PKB[1]);
fp2_decode(pkB + 2*SIDHp503_JINV_BYTESZ, PKB[2]);

// Initialize constants
get_A(PKB[0], PKB[1], PKB[2], A); // TODO: Can return projective A?
fpadd((crypto_word_t*)&p503.mont_one, (crypto_word_t*)&p503.mont_one, C24->c0);
fp2add(A, C24, A24plus);
fpadd(C24->c0, C24->c0, C24->c0);

// Retrieve kernel point
LADDER3PT(PKB[0], PKB[1], PKB[2], (crypto_word_t*)skA, 1, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < A_max; row++) {
while (index < A_max-row) {
fp2copy(R->X, pts[npts]->X);
fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = p503.A_strat[ii++];
xDBLe(R, R, A24plus, C24, (2*m));
index += m;
}
get_4_isog(R, A24plus, C24, coeff);

for (size_t i = 0; i < npts; i++) {
eval_4_isog(pts[i], coeff);
}

fp2copy(pts[npts-1]->X, R->X);
fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_4_isog(R, A24plus, C24, coeff);
fp2div2(C24, C24);
fp2sub(A24plus, C24, A24plus);
fp2div2(C24, C24);
j_inv(A24plus, C24, jinv);
fp2_encode(jinv, ssA);
return 0;
}

// Bob's ephemeral shared secret computation
// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
static int ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
{
point_proj_t R, pts[MAX_INT_POINTS_BOB];
f2elm_t coeff[3], PKB[3], jinv;
f2elm_t A24plus = F2ELM_INIT;
f2elm_t A24minus = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;

// Initialize images of Alice's basis
fp2_decode(pkA, PKB[0]);
fp2_decode(pkA + SIDHp503_JINV_BYTESZ, PKB[1]);
fp2_decode(pkA + 2*SIDHp503_JINV_BYTESZ, PKB[2]);

// Initialize constants
get_A(PKB[0], PKB[1], PKB[2], A);
fpadd((crypto_word_t*)&p503.mont_one, (crypto_word_t*)&p503.mont_one, A24minus->c0);
fp2add(A, A24minus, A24plus);
fp2sub(A, A24minus, A24minus);

// Retrieve kernel point
LADDER3PT(PKB[0], PKB[1], PKB[2], (crypto_word_t*)skB, 0, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < B_max; row++) {
while (index < B_max-row) {
fp2copy(R->X, pts[npts]->X);
fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = p503.B_strat[ii++];
xTPLe(R, R, A24minus, A24plus, m);
index += m;
}
get_3_isog(R, A24minus, A24plus, coeff);

for (size_t i = 0; i < npts; i++) {
eval_3_isog(pts[i], coeff);
}

fp2copy(pts[npts-1]->X, R->X);
fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_3_isog(R, A24minus, A24plus, coeff);
fp2add(A24plus, A24minus, A);
fp2add(A, A, A);
fp2sub(A24plus, A24minus, A24plus);
j_inv(A, A24plus, jinv);
fp2_encode(jinv, ssB);
return 0;
}

int SIKE_keypair(uint8_t sk[SIKEp503_PRV_BYTESZ], uint8_t pk[SIKEp503_PUB_BYTESZ]) {
int ret = -1;

BN_CTX *ctx = BN_CTX_new();
if (!ctx) {
goto end;
}

// Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and < 253 bits
BIGNUM *bn_sidh_prv = BN_CTX_get(ctx);
if (!bn_sidh_prv) {
goto end;
}

if (!BN_rand(bn_sidh_prv, SIDHp503_PRV_B_BITSZ, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) {
goto end;
}

// Convert to little endian
if (!BN_bn2le_padded(sk, BITS_TO_BYTES(SIDHp503_PRV_B_BITSZ), bn_sidh_prv)) {
goto end;
}

// Never fails
gen_iso_B(sk, pk);

// All good
ret = 0;

end:
BN_CTX_free(ctx);
return ret;
}

void SIKE_encaps(
uint8_t ss[SIKEp503_SS_BYTESZ],
uint8_t ct[SIKEp503_CT_BYTESZ],
const uint8_t pkB[SIKEp503_PUB_BYTESZ])
{
// Secret buffer is reused by the function to store some ephemeral
// secret data. It's size must be maximum of SHA256_CBLOCK,
// SIKEp503_MSG_BYTESZ and SIDHp503_PRV_A_BITSZ in bytes.
uint8_t secret[SHA256_CBLOCK] = {0};
uint8_t j[SIDHp503_JINV_BYTESZ] = {0};
uint8_t temp[SIKEp503_MSG_BYTESZ + SIKEp503_CT_BYTESZ] = {0};
SHA256_CTX ctx;

// Generate secret key for A
// secret key A = HMAC({0,1}^n || pkB), G) mod SIDHp503_PRV_A_BITSZ
(void)RAND_bytes(temp, SIKEp503_MSG_BYTESZ);

SHA256_Init(&ctx);
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
SHA256_Update(&ctx, pkB, SIKEp503_PUB_BYTESZ);
SHA256_Final(secret, &ctx);
hmac_sum(secret, BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ), G, secret);
secret[BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ) - 1] &= (1 << (SIDHp503_PRV_A_BITSZ%8)) - 1;

// Generate public key for A - first part of the ciphertext
gen_iso_A(secret, ct);

// Generate c1:
// h = HMAC(j-invariant(secret key A, public key B), F)
// c1 = h ^ m
ex_iso_A(secret, pkB, j);
SHA256_Init(&ctx);
SHA256_Update(&ctx, j, sizeof(j));
SHA256_Final(secret, &ctx);
hmac_sum(secret, SIKEp503_MSG_BYTESZ, F, secret);

// c1 = h ^ m
uint8_t *c1 = &ct[SIKEp503_PUB_BYTESZ];
for (size_t i = 0; i < SIKEp503_MSG_BYTESZ; i++) {
c1[i] = temp[i] ^ secret[i];
}

SHA256_Init(&ctx);
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
SHA256_Update(&ctx, ct, SIKEp503_CT_BYTESZ);
SHA256_Final(secret, &ctx);
// Generate shared secret ss = HMAC(m||ct, F)
hmac_sum(ss, SIKEp503_SS_BYTESZ, H, secret);
OPENSSL_cleanse(j, sizeof(j));
OPENSSL_cleanse(secret, sizeof(secret));
}

void SIKE_decaps(
uint8_t ss[SIKEp503_SS_BYTESZ],
const uint8_t ct[SIKEp503_CT_BYTESZ],
const uint8_t pkB[SIKEp503_PUB_BYTESZ],
const uint8_t skB[SIKEp503_PRV_BYTESZ])
{
// Secret buffer is reused by the function to store some ephemeral
// secret data. It's size must be maximum of SHA256_CBLOCK,
// SIKEp503_MSG_BYTESZ and SIDHp503_PRV_A_BITSZ in bytes.
uint8_t secret[SHA256_CBLOCK] = {0};
uint8_t j[SIDHp503_JINV_BYTESZ] = {0};
uint8_t c0[SIKEp503_PUB_BYTESZ] = {0};
uint8_t temp[SIKEp503_CT_BYTESZ + SIKEp503_MSG_BYTESZ] = {0};
uint8_t shared_nok[SIKEp503_CT_BYTESZ + SIKEp503_MSG_BYTESZ] = {0};
SHA256_CTX ctx;

(void)RAND_bytes(shared_nok, SIKEp503_MSG_BYTESZ);

// Recover m
// Let ct = c0 || c1 - both have fixed sizes
// m = F(j-invariant(c0, skB)) ^ c1
ex_iso_B(skB, ct, j);

SHA256_Init(&ctx);
SHA256_Update(&ctx, j, sizeof(j));
SHA256_Final(secret, &ctx);
hmac_sum(secret, SIKEp503_MSG_BYTESZ, F, secret);

const uint8_t *c1 = &ct[sizeof(c0)];
for (size_t i = 0; i < SIKEp503_MSG_BYTESZ; i++) {
temp[i] = c1[i] ^ secret[i];
}

SHA256_Init(&ctx);
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
SHA256_Update(&ctx, pkB, SIKEp503_PUB_BYTESZ);
SHA256_Final(secret, &ctx);
hmac_sum(secret, BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ), G, secret);

// Recover secret key A = G(m||pkB) mod
secret[BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ) - 1] &= (1 << (SIDHp503_PRV_A_BITSZ%8)) - 1;

// Recover c0 = public key A
gen_iso_A(secret, c0);
crypto_word_t ok = constant_time_is_zero_w(CRYPTO_memcmp(c0, ct, SIKEp503_PUB_BYTESZ));
for (size_t i=0; i<SIKEp503_MSG_BYTESZ; i++) {
temp[i] = constant_time_select_8(ok, temp[i], shared_nok[i]);
}

SHA256_Init(&ctx);
SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ);
SHA256_Update(&ctx, ct, SIKEp503_CT_BYTESZ);
SHA256_Final(secret, &ctx);
hmac_sum(ss, SIKEp503_SS_BYTESZ, H, secret);
OPENSSL_cleanse(j, sizeof(j));
OPENSSL_cleanse(secret, sizeof(secret));
}

+ 190
- 0
third_party/sike/src/sike_test.cc Näytä tiedosto

@@ -0,0 +1,190 @@
/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */

#include <stdint.h>
#include <gtest/gtest.h>
#include "../include/sike/sike.h"

TEST(SIKE, RoundTrip) {
uint8_t sk[SIKEp503_PRV_BYTESZ] = {0};
uint8_t pk[SIKEp503_PUB_BYTESZ] = {0};
uint8_t ct[SIKEp503_CT_BYTESZ] = {0};
uint8_t ss_enc[SIKEp503_SS_BYTESZ] = {0};
uint8_t ss_dec[SIKEp503_SS_BYTESZ] = {0};

SIKE_keypair(sk, pk);
SIKE_encaps(ss_enc, ct, pk);
SIKE_decaps(ss_dec, ct, pk, sk);

EXPECT_EQ(memcmp(ss_enc, ss_dec, SIKEp503_SS_BYTESZ), 0);
}

TEST(SIKE, Decapsulation) {
const uint8_t sk[SIKEp503_PRV_BYTESZ] = {
0xDB, 0xAF, 0x2C, 0x89, 0xCA, 0x5A, 0xD4, 0x9D, 0x4F, 0x13,
0x40, 0xDF, 0x2D, 0xB1, 0x5F, 0x4C, 0x91, 0xA7, 0x1F, 0x0B,
0x29, 0x15, 0x01, 0x59, 0xBC, 0x5F, 0x0B, 0x4A, 0x03, 0x27,
0x6F, 0x18};

const uint8_t pk[SIKEp503_PUB_BYTESZ] = {
0x07, 0xAA, 0x51, 0x45, 0x3E, 0x1F, 0x53, 0x2A, 0x0A, 0x05,
0x46, 0xF6, 0x54, 0x7F, 0x5D, 0x56, 0xD6, 0x76, 0xD3, 0xEA,
0x4B, 0x6B, 0x01, 0x9B, 0x11, 0x72, 0x6F, 0x75, 0xEA, 0x34,
0x3C, 0x28, 0x2C, 0x36, 0xFD, 0x77, 0xDA, 0xBE, 0xB6, 0x20,
0x18, 0xC1, 0x93, 0x98, 0x18, 0x86, 0x30, 0x2F, 0x2E, 0xD2,
0x00, 0x61, 0xFF, 0xAE, 0x78, 0xAE, 0xFB, 0x6F, 0x32, 0xAC,
0x06, 0xBF, 0x35, 0xF6, 0xF7, 0x5B, 0x98, 0x26, 0x95, 0xC2,
0xD8, 0xD6, 0x1C, 0x0E, 0x47, 0xDA, 0x76, 0xCE, 0xB5, 0xF1,
0x19, 0xCC, 0x01, 0xE1, 0x17, 0xA9, 0x62, 0xF7, 0x82, 0x6C,
0x25, 0x51, 0x25, 0xAE, 0xFE, 0xE3, 0xE2, 0xE1, 0x35, 0xAE,
0x2E, 0x8F, 0x38, 0xE0, 0x7C, 0x74, 0x3C, 0x1D, 0x39, 0x91,
0x1B, 0xC7, 0x9F, 0x8E, 0x33, 0x4E, 0x84, 0x19, 0xB8, 0xD9,
0xC2, 0x71, 0x35, 0x02, 0x47, 0x3E, 0x79, 0xEF, 0x47, 0xE1,
0xD8, 0x21, 0x96, 0x1F, 0x11, 0x59, 0x39, 0x34, 0x76, 0xEF,
0x3E, 0xB7, 0x4E, 0xFB, 0x7C, 0x55, 0xA1, 0x85, 0xAA, 0xAB,
0xAD, 0xF0, 0x09, 0xCB, 0xD1, 0xE3, 0x7C, 0x4F, 0x5D, 0x2D,
0xE1, 0x13, 0xF0, 0x71, 0xD9, 0xE5, 0xF6, 0xAF, 0x7F, 0xC1,
0x27, 0x95, 0x8D, 0x52, 0xD5, 0x96, 0x42, 0x38, 0x41, 0xF7,
0x24, 0x3F, 0x3A, 0xB5, 0x7E, 0x11, 0xE4, 0xF9, 0x33, 0xEE,
0x4D, 0xBE, 0x74, 0x48, 0xF9, 0x98, 0x04, 0x01, 0x16, 0xEB,
0xA9, 0x0D, 0x61, 0xC6, 0xFD, 0x4C, 0xCF, 0x98, 0x84, 0x4A,
0x94, 0xAC, 0x69, 0x2C, 0x02, 0x8B, 0xE3, 0xD1, 0x41, 0x0D,
0xF2, 0x2D, 0x46, 0x1F, 0x57, 0x1C, 0x77, 0x86, 0x18, 0xE3,
0x63, 0xDE, 0xF3, 0xE3, 0x02, 0x30, 0x54, 0x73, 0xAE, 0xC2,
0x32, 0xA2, 0xCE, 0xEB, 0xCF, 0x81, 0x46, 0x54, 0x5C, 0xF4,
0x5D, 0x2A, 0x03, 0x5D, 0x9C, 0xAE, 0xE0, 0x60, 0x03, 0x80,
0x11, 0x30, 0xA5, 0xAA, 0xD1, 0x75, 0x67, 0xE0, 0x1C, 0x2B,
0x6B, 0x5D, 0x83, 0xDE, 0x92, 0x9B, 0x0E, 0xD7, 0x11, 0x0F,
0x00, 0xC4, 0x59, 0xE4, 0x81, 0x04, 0x3B, 0xEE, 0x5C, 0x04,
0xD1, 0x0E, 0xD0, 0x67, 0xF5, 0xCC, 0xAA, 0x72, 0x73, 0xEA,
0xC4, 0x76, 0x99, 0x3B, 0x4C, 0x90, 0x2F, 0xCB, 0xD8, 0x0A,
0x5B, 0xEC, 0x0E, 0x0E, 0x1F, 0x59, 0xEA, 0x14, 0x8D, 0x34,
0x53, 0x65, 0x4C, 0x1A, 0x59, 0xA8, 0x95, 0x66, 0x60, 0xBB,
0xC4, 0xCC, 0x32, 0xA9, 0x8D, 0x2A, 0xAA, 0x14, 0x6F, 0x0F,
0x81, 0x4D, 0x32, 0x02, 0xFD, 0x33, 0x58, 0x42, 0xCF, 0xF3,
0x67, 0xD0, 0x9F, 0x0B, 0xB1, 0xCC, 0x18, 0xA5, 0xC4, 0x19,
0xB6, 0x00, 0xED, 0xFA, 0x32, 0x1A, 0x5F, 0x67, 0xC8, 0xC3,
0xEB, 0x0D, 0xB5, 0x9A, 0x36, 0x47, 0x82, 0x00};

const uint8_t ct_exp[SIKEp503_CT_BYTESZ] = {
0xE6, 0xB7, 0xE5, 0x7B, 0xA9, 0x19, 0xD1, 0x2C, 0xB8, 0x5C,
0x7B, 0x66, 0x74, 0xB0, 0x71, 0xA1, 0xFF, 0x71, 0x7F, 0x4B,
0xB5, 0xA6, 0xAF, 0x48, 0x32, 0x52, 0xD5, 0x82, 0xEE, 0x8A,
0xBB, 0x08, 0x1E, 0xF6, 0xAC, 0x91, 0xA2, 0xCB, 0x6B, 0x6A,
0x09, 0x2B, 0xD9, 0xC6, 0x27, 0xD6, 0x3A, 0x6B, 0x8D, 0xFC,
0xB8, 0x90, 0x8F, 0x72, 0xB3, 0xFA, 0x7D, 0x34, 0x7A, 0xC4,
0x7E, 0xE3, 0x30, 0xC5, 0xA0, 0xFE, 0x3D, 0x43, 0x14, 0x4E,
0x3A, 0x14, 0x76, 0x3E, 0xFB, 0xDF, 0xE3, 0xA8, 0xE3, 0x5E,
0x38, 0xF2, 0xE0, 0x39, 0x67, 0x60, 0xFD, 0xFB, 0xB4, 0x19,
0xCD, 0xE1, 0x93, 0xA2, 0x06, 0xCC, 0x65, 0xCD, 0x6E, 0xC8,
0xB4, 0x5E, 0x41, 0x4B, 0x6C, 0xA5, 0xF4, 0xE4, 0x9D, 0x52,
0x8C, 0x25, 0x60, 0xDD, 0x3D, 0xA9, 0x7F, 0xF2, 0x88, 0xC1,
0x0C, 0xEE, 0x97, 0xE0, 0xE7, 0x3B, 0xB7, 0xD3, 0x6F, 0x28,
0x79, 0x2F, 0x50, 0xB2, 0x4F, 0x74, 0x3A, 0x0C, 0x88, 0x27,
0x98, 0x3A, 0x27, 0xD3, 0x26, 0x83, 0x59, 0x49, 0x81, 0x5B,
0x0D, 0xA7, 0x0C, 0x4F, 0xEF, 0xFB, 0x1E, 0xAF, 0xE9, 0xD2,
0x1C, 0x10, 0x25, 0xEC, 0x9E, 0xFA, 0x57, 0x36, 0xAA, 0x3F,
0xC1, 0xA3, 0x2C, 0xE9, 0xB5, 0xC9, 0xED, 0x72, 0x51, 0x4C,
0x02, 0xB4, 0x7B, 0xB3, 0xED, 0x9F, 0x45, 0x03, 0x34, 0xAC,
0x9A, 0x9E, 0x62, 0x5F, 0x82, 0x7A, 0x77, 0x34, 0xF9, 0x21,
0x94, 0xD2, 0x38, 0x3D, 0x05, 0xF0, 0x8A, 0x60, 0x1C, 0xB7,
0x1D, 0xF5, 0xB7, 0x53, 0x77, 0xD3, 0x9D, 0x3D, 0x70, 0x6A,
0xCB, 0x18, 0x20, 0x6B, 0x29, 0x17, 0x3A, 0x6D, 0xA1, 0xB2,
0x64, 0xDB, 0x6C, 0xE6, 0x1A, 0x95, 0xA7, 0xF4, 0x1A, 0x78,
0x1D, 0xA2, 0x40, 0x15, 0x41, 0x59, 0xDD, 0xEE, 0x23, 0x57,
0xCE, 0x36, 0x0D, 0x55, 0xBD, 0xB8, 0xFD, 0x0F, 0x35, 0xBD,
0x5B, 0x92, 0xD6, 0x1C, 0x84, 0x8C, 0x32, 0x64, 0xA6, 0x5C,
0x45, 0x18, 0x07, 0x6B, 0xF9, 0xA9, 0x43, 0x9A, 0x83, 0xCD,
0xB5, 0xB3, 0xD9, 0x17, 0x99, 0x2C, 0x2A, 0x8B, 0xE0, 0x8E,
0xAF, 0xA6, 0x4C, 0x95, 0xBB, 0x70, 0x60, 0x1A, 0x3A, 0x97,
0xAA, 0x2F, 0x3D, 0x22, 0x83, 0xB7, 0x4F, 0x59, 0xED, 0x3F,
0x4E, 0xF4, 0x19, 0xC6, 0x25, 0x0B, 0x0A, 0x5E, 0x21, 0xB9,
0x91, 0xB8, 0x19, 0x84, 0x48, 0x78, 0xCE, 0x27, 0xBF, 0x41,
0x89, 0xF6, 0x30, 0xFD, 0x6B, 0xD9, 0xB8, 0x1D, 0x72, 0x8A,
0x56, 0xCC, 0x2F, 0x82, 0xE4, 0x46, 0x4D, 0x75, 0xD8, 0x92,
0xE6, 0x9C, 0xCC, 0xD2, 0xCD, 0x35, 0xE4, 0xFC, 0x2A, 0x85,
0x6B, 0xA9, 0xB2, 0x27, 0xC9, 0xA1, 0xFF, 0xB3, 0x96, 0x3E,
0x59, 0xF6, 0x4C, 0x66, 0x56, 0x2E, 0xF5, 0x1B, 0x97, 0x32,
0xB0, 0x71, 0x5A, 0x9C, 0x50, 0x4B, 0x6F, 0xC4, 0xCA, 0x94,
0x75, 0x37, 0x46, 0x10, 0x12, 0x2F, 0x4F, 0xA3, 0x82, 0xCD,
0xBD, 0x7C};

const uint8_t ss_exp[SIKEp503_SS_BYTESZ] = {
0x74, 0x3D, 0x25, 0x36, 0x00, 0x24, 0x63, 0x1A, 0x39, 0x1A,
0xB4, 0xAD, 0x01, 0x17, 0x78, 0xE9};

uint8_t ss_dec[SIKEp503_SS_BYTESZ] = {0};
SIKE_decaps(ss_dec, ct_exp, pk, sk);
EXPECT_EQ(memcmp(ss_dec, ss_exp, sizeof(ss_exp)), 0);
}

// SIKE_encaps and SIKE_keypair doesn't return zeros.
TEST(SIKE, NonZero) {
uint8_t sk[SIKEp503_PRV_BYTESZ] = {0};
uint8_t pk[SIKEp503_PUB_BYTESZ] = {0};
uint8_t ct[SIKEp503_CT_BYTESZ] = {0};
uint8_t ss[SIKEp503_SS_BYTESZ] = {0};

// Check secret and public key returned by SIKE_keypair
SIKE_keypair(sk, pk);
uint8_t tmp = 0;
for (size_t i=0; i<sizeof(sk); i++) tmp|=sk[i];
EXPECT_NE(tmp, 0);

tmp = 0;
for (size_t i=0; i<sizeof(pk); i++) tmp|=pk[i];
EXPECT_NE(tmp, 0);

// Check shared secret and ciphertext returned by SIKE_encaps
SIKE_encaps(ss, ct, pk);
tmp = 0;
for (size_t i=0; i<sizeof(ct); i++) tmp|=ct[i];
EXPECT_NE(tmp, 0);

tmp = 0;
for (size_t i=0; i<sizeof(ss); i++) tmp|=ss[i];
EXPECT_NE(tmp, 0);
}

TEST(SIKE, Negative) {
uint8_t sk[SIKEp503_PRV_BYTESZ] = {0};
uint8_t pk[SIKEp503_PUB_BYTESZ] = {0};
uint8_t ct[SIKEp503_CT_BYTESZ] = {0};
uint8_t ss_enc[SIKEp503_SS_BYTESZ] = {0};
uint8_t ss_dec[SIKEp503_SS_BYTESZ] = {0};

SIKE_keypair(sk, pk);
SIKE_encaps(ss_enc, ct, pk);

// Change cipertext
uint8_t ct_tmp[SIKEp503_CT_BYTESZ] = {0};
memcpy(ct_tmp, ct, sizeof(ct));
ct_tmp[0] = ~ct_tmp[0];
SIKE_decaps(ss_dec, ct_tmp, pk, sk);
EXPECT_NE(memcmp(ss_enc, ss_dec, SIKEp503_SS_BYTESZ), 0);

// Change secret key
uint8_t sk_tmp[SIKEp503_PRV_BYTESZ] = {0};
memcpy(sk_tmp, sk, sizeof(sk));
sk_tmp[0] = ~sk_tmp[0];
SIKE_decaps(ss_dec, ct, pk, sk_tmp);
EXPECT_NE(memcmp(ss_enc, ss_dec, SIKEp503_SS_BYTESZ), 0);

// Change public key
uint8_t pk_tmp[SIKEp503_PUB_BYTESZ] = {0};
memcpy(pk_tmp, pk, sizeof(pk));
pk_tmp[0] = ~pk_tmp[0];
SIKE_decaps(ss_dec, ct, pk_tmp, sk);
EXPECT_NE(memcmp(ss_enc, ss_dec, SIKEp503_SS_BYTESZ), 0);
}

+ 144
- 0
third_party/sike/src/utils.h Näytä tiedosto

@@ -0,0 +1,144 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: internal header file for P503
*********************************************************************************************/

#ifndef UTILS_H_
#define UTILS_H_

#include <stdbool.h>
#include "openssl/base.h"
#include "../crypto/internal.h"
#include "sike/sike.h"

// Conversion macro from number of bits to number of bytes
#define BITS_TO_BYTES(nbits) (((nbits)+7)/8)

// Bit size of the field
#define BITS_FIELD 503
// Byte size of the field
#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD)
// Number of 64-bit words of a 503-bit field element
#define NWORDS64_FIELD ((BITS_FIELD+63)/64)
// Number of 64-bit words of a 256-bit element
#define NBITS_ORDER 256
#define NWORDS64_ORDER ((NBITS_ORDER+63)/64)
// Number of elements in Alice's strategy
#define A_max 125
// Number of elements in Bob's strategy
#define B_max 159
// Word size size
#define RADIX sizeof(crypto_word_t)*8
// Byte size of a limb
#define LSZ sizeof(crypto_word_t)

#if defined(OPENSSL_64_BIT)
// Number of words of a 503-bit field element
#define NWORDS_FIELD 8
// Number of "0" digits in the least significant part of p503 + 1
#define p503_ZERO_WORDS 3
// log_2(RADIX)
#define LOG2RADIX 6
#else
// Number of words of a 503-bit field element
#define NWORDS_FIELD 16
// Number of "0" digits in the least significant part of p503 + 1
#define p503_ZERO_WORDS 7
// log_2(RADIX)
#define LOG2RADIX 5
#endif

// Extended datatype support
#if !defined(BORINGSSL_HAS_UINT128)
typedef uint64_t uint128_t[2];
#endif

// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
// Digit multiplication
#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo));

// If mask |x|==0xff.ff set |x| to 1, otherwise 0
#define M2B(x) ((x)>>(RADIX-1))

// Digit addition with carry
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
do { \
crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \
(sumOut) = (addend2) + tempReg; \
(carryOut) = M2B(constant_time_lt_w(tempReg, (crypto_word_t)(carryIn)) | \
constant_time_lt_w((sumOut), tempReg)); \
} while(0)

// Digit subtraction with borrow
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
do { \
crypto_word_t tempReg = (minuend) - (subtrahend); \
crypto_word_t borrowReg = M2B(constant_time_lt_w((minuend), (subtrahend))); \
borrowReg |= ((borrowIn) & constant_time_is_zero_w(tempReg)); \
(differenceOut) = tempReg - (crypto_word_t)(borrowIn); \
(borrowOut) = borrowReg; \
} while(0)

/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly,
which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8).
Defines below are used to work around the bug, and provide a way
to initialize f2elem_t and point_proj_t structs.
Bug has been fixed in GCC6 (debian stretch).
*/
#define F2ELM_INIT {{ {0}, {0} }}
#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }}

// Datatype for representing 503-bit field elements (512-bit max.)
// Elements over GF(p503) are encoded in 63 octets in little endian format
// (i.e., the least significant octet is located in the lowest memory address).
typedef crypto_word_t felm_t[NWORDS_FIELD];

// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e.
// Fp2 element = c0 + c1*i in F_{p^2}
// Datatype for representing double-precision 2x503-bit field elements (512-bit max.)
// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are
// encoded as {a, b}, with a in the lowest memory portion.
typedef struct {
felm_t c0;
felm_t c1;
} fp2;

// Our F_{p^2} element type is a pointer to the struct.
typedef fp2 f2elm_t[1];

// Datatype for representing double-precision 2x503-bit
// field elements in contiguous memory.
typedef crypto_word_t dfelm_t[2*NWORDS_FIELD];

// Constants used during SIKEp503 computation.
struct params_t {
// Stores P503 prime
const uint64_t prime[NWORDS64_FIELD];
// Stores P503 + 1
const uint64_t prime_p1[NWORDS64_FIELD];
// Stores P503 * 2
const uint64_t prime_x2[NWORDS64_FIELD];
// Alice's generator values {XPA0 + XPA1*i, XQA0, XRA0 + XRA1*i}
// in GF(p503^2), expressed in Montgomery representation
const uint64_t A_gen[5*NWORDS64_FIELD];
// Bob's generator values {XPB0 + XPB1*i, XQB0, XRB0 + XRB1*i}
// in GF(p503^2), expressed in Montgomery representation
const uint64_t B_gen[5*NWORDS64_FIELD];
// Montgomery constant mont_R2 = (2^512)^2 mod p503
const uint64_t mont_R2[NWORDS64_FIELD];
// Value 'one' in Montgomery representation
const uint64_t mont_one[NWORDS64_FIELD];
// Fixed parameters for isogeny tree computation
const unsigned int A_strat[A_max-1];
const unsigned int B_strat[B_max-1];
};

// Point representation in projective XZ Montgomery coordinates.
typedef struct {
f2elm_t X;
f2elm_t Z;
} point_proj;
typedef point_proj point_proj_t[1];

#endif // UTILS_H_

+ 62
- 0
tool/speed.cc Näytä tiedosto

@@ -51,6 +51,8 @@ OPENSSL_MSVC_PRAGMA(warning(pop))
#include "../crypto/internal.h"
#include "internal.h"

#include "../third_party/sike/include/sike/sike.h"


// TimeResults represents the results of benchmarking a function.
struct TimeResults {
@@ -294,6 +296,65 @@ static bool SpeedRSAKeyGen(const std::string &selected) {
return true;
}

static bool SpeedSIKEP503(const std::string &selected) {
if (!selected.empty() && selected.find("SIKE") == std::string::npos) {
return true;
}
// speed generation
uint8_t public_SIKE[SIKEp503_PUB_BYTESZ];
uint8_t private_SIKE[SIKEp503_PRV_BYTESZ];
uint8_t ct[SIKEp503_CT_BYTESZ];
bool res;

{
TimeResults results;
res = TimeFunction(&results,
[&private_SIKE, &public_SIKE]() -> bool {
(void)SIKE_keypair(private_SIKE, public_SIKE);
return true;
});
results.Print("SIKE/P503 generate");
}

if (!res) {
fprintf(stderr, "Failed to time SIKE_keypair.\n");
return false;
}

{
TimeResults results;
TimeFunction(&results,
[&ct, &public_SIKE]() -> bool {
uint8_t ss[SIKEp503_SS_BYTESZ];
(void)SIKE_encaps(ss, ct, public_SIKE);
return true;
});
results.Print("SIKE/P503 encap");
}

if (!res) {
fprintf(stderr, "Failed to time SIKE_encaps.\n");
return false;
}

{
TimeResults results;
TimeFunction(&results,
[&ct, &public_SIKE, &private_SIKE]() -> bool {
uint8_t ss[SIKEp503_SS_BYTESZ];
(void)SIKE_decaps(ss, ct, public_SIKE, private_SIKE);
return true;
});
results.Print("SIKE/P503 decap");
}

if (!res) {
fprintf(stderr, "Failed to time SIKE_decaps.\n");
return false;
}
return true;
}

static uint8_t *align(uint8_t *in, unsigned alignment) {
return reinterpret_cast<uint8_t *>(
(reinterpret_cast<uintptr_t>(in) + alignment) &
@@ -938,6 +999,7 @@ bool Speed(const std::vector<std::string> &args) {
!SpeedECDH(selected) ||
!SpeedECDSA(selected) ||
!Speed25519(selected) ||
!SpeedSIKEP503(selected) ||
!SpeedSPAKE2(selected) ||
!SpeedScrypt(selected) ||
!SpeedRSAKeyGen(selected) ||


Ladataan…
Peruuta
Tallenna