@@ -1,14 +0,0 @@ | |||
#--unpad-paren | |||
# disable backup files |
@@ -0,0 +1,45 @@ | |||
include(ExternalProject) | |||
find_program(MAKE_PROGRAM make) | |||
string (REPLACE " " "$<SEMICOLON>" LLVM_PROJECT_TARGETS "libcxx libcxxabi") | |||
set(PREFIX ${CMAKE_CURRENT_BINARY_DIR}/3rd/llvm-project) | |||
set(LLVM_LIB_CXX | |||
${PREFIX}/usr/local/lib/libc++${CMAKE_STATIC_LIBRARY_SUFFIX}) | |||
set(LLVM_LIB_CXXABI | |||
${PREFIX}/usr/local/lib/libc++abi${CMAKE_STATIC_LIBRARY_SUFFIX}) | |||
ExternalProject_Add( | |||
llvm-project | |||
GIT_REPOSITORY https://github.com/llvm/llvm-project.git | |||
GIT_TAG llvmorg-12.0.0 | |||
GIT_SHALLOW TRUE | |||
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS=${LLVM_PROJECT_TARGETS} -DLLVM_USE_SANITIZER=MemoryWithOrigins -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ ../llvm-project/llvm -DLLVM_INCLUDE_BENCHMARKS=OFF | |||
BUILD_COMMAND ${MAKE_PROGRAM} cxx cxxabi | |||
INSTALL_COMMAND DESTDIR=${PREFIX} make install-cxx-headers install-cxx install-cxxabi | |||
COMMENT "Building memcheck instrumented libc++ and libc++abi" | |||
PREFIX ${PREFIX} | |||
# Don't try updating the source. This prevents running update when calling 'make' (not sure why update step is run during make). | |||
# It will also cause not updateing source during calling 'cmake' again. But we use fixed branch, so this shouldn't be needed | |||
UPDATE_DISCONNECTED TRUE | |||
) | |||
add_library( | |||
cxx SHARED IMPORTED GLOBAL) | |||
add_library( | |||
cxxabi SHARED IMPORTED GLOBAL) | |||
add_dependencies( | |||
cxx | |||
llvm-project) | |||
add_dependencies( | |||
cxxabi | |||
llvm-project) | |||
set_target_properties( | |||
cxx PROPERTIES IMPORTED_LOCATION ${LLVM_LIB_CXX}) | |||
set_target_properties( | |||
cxxabi PROPERTIES IMPORTED_LOCATION ${LLVM_LIB_CXXABI}) | |||
set_property( | |||
GLOBAL PROPERTY llvmproject_build_install_dir_property | |||
${PREFIX}/usr/local) |
@@ -1,6 +0,0 @@ | |||
* text=auto | |||
*.[ch] text whitespacestrict | |||
*.yaml text whitespacestrict | |||
Makefile text whitespace="tabwidth=4,-tab-in-indent,indent-with-non-tab" | |||
[attr]whitespacestrict whitespace="trailing-space,tab-in-indent,space-before-tab,tabwidth=4" |
@@ -8,6 +8,62 @@ jobs: | |||
unit-test: | |||
name: Unit tests | |||
runs-on: [ubuntu-20.04] | |||
env: | |||
CC: ${{ matrix.cc }} | |||
CXX: ${{ matrix.cxx }} | |||
CMAKE_FLAGS: ${{matrix.flags}} | |||
strategy: | |||
fail-fast: false | |||
max-parallel: 4 | |||
matrix: | |||
name: [ | |||
gcc-release-build, | |||
clang-release-build, | |||
gcc-debug-build, | |||
clang-debug-build, | |||
clang-release-asan-build, | |||
] | |||
include: | |||
- name: gcc-release-build | |||
cc: gcc | |||
cxx: g++ | |||
flags: -DCMAKE_BUILD_TYPE=Release | |||
- name: gcc-debug-build | |||
cc: gcc | |||
cxx: g++ | |||
flags: -DCMAKE_BUILD_TYPE=Debug | |||
- name: clang-release-build | |||
cc: clang | |||
cxx: clang++ | |||
flags: -DCMAKE_BUILD_TYPE=Release | |||
- name: clang-debug-build | |||
cc: /usr/bin/clang | |||
cxx: /usr/bin/clang++ | |||
flags: -DCMAKE_BUILD_TYPE=Debug | |||
- name: clang-release-asan-build | |||
cc: clang | |||
cxx: clang++ | |||
flags: -DCMAKE_BUILD_TYPE=Release -DADDRSAN=1 | |||
steps: | |||
- uses: actions/checkout@v1 | |||
with: | |||
submodules: true | |||
- name: build | |||
run: | | |||
mkdir -p build | |||
cd build | |||
CC=${CC} CXX=${CXX} cmake ${CMAKE_FLAGS} .. | |||
make | |||
- name: run tests | |||
run: | | |||
cd build && ./ut | |||
- name: Build Rust bindings | |||
run: | | |||
cd src/rustapi/pqc-sys && cargo build | |||
KAT: | |||
name: Known Answer Tests | |||
runs-on: [ubuntu-20.04] | |||
steps: | |||
- uses: actions/checkout@v1 | |||
with: | |||
@@ -16,7 +72,7 @@ jobs: | |||
run: | | |||
mkdir -p build | |||
cd build | |||
cmake -DCMAKE_BUILD_TYPE=Release .. | |||
CC=clang CXX=clang++ cmake -DCMAKE_BUILD_TYPE=Release .. | |||
make | |||
- name: run tests | |||
run: | | |||
@@ -29,4 +85,20 @@ jobs: | |||
cd test/katrunner && | |||
curl http://amongbytes.com/~flowher/permalinks/kat.zip --output kat.zip | |||
unzip kat.zip | |||
cargo run -- --katdir KAT | |||
cargo run --release -- --katdir KAT | |||
MEMSAN: | |||
name: Memory Sanitizer build | |||
runs-on: [ubuntu-20.04] | |||
steps: | |||
- uses: actions/checkout@v1 | |||
with: | |||
submodules: true | |||
- name: build | |||
run: | | |||
mkdir -p build | |||
cd build | |||
CC=clang CXX=clang++ cmake -DCMAKE_BUILD_TYPE=Release -DMEMSAN=1 -DCTSAN=1 .. | |||
make | |||
- name: run tests | |||
run: | | |||
cd build && ./ut |
@@ -7,7 +7,4 @@ bin/ | |||
# Object and library files on Windows | |||
*.lib | |||
*.obj | |||
__pycache__ | |||
testcases/ | |||
*.obj |
@@ -1,9 +0,0 @@ | |||
[submodule "test/pycparser"] | |||
path = test/pycparser | |||
url = https://github.com/eliben/pycparser.git | |||
[submodule "3rd/gtest"] | |||
path = 3rd/gtest | |||
url = https://github.com/google/googletest.git | |||
[submodule "3rd/gbench"] | |||
path = 3rd/gbench | |||
url = https://github.com/henrydcase/benchmark.git |
@@ -1 +0,0 @@ | |||
Subproject commit e45fcc64e02489f718df499a162b41f742a1b7e5 |
@@ -1 +0,0 @@ | |||
Subproject commit 1a8ecf1813d022cc7914e04564b92decff6161fc |
@@ -1,10 +1,73 @@ | |||
cmake_minimum_required(VERSION 3.13) | |||
project(cryptocore NONE) | |||
project(cryptocore VERSION 0.0.1 LANGUAGES C) | |||
include(FetchContent) | |||
include(ExternalProject) | |||
set(CMAKE_CXX_STANDARD 20) | |||
set(CMAKE_C_STANDARD 99) | |||
set(CMAKE_POSITION_INDEPENDENT_CODE ON) | |||
enable_language(C) | |||
enable_language(CXX) | |||
enable_language(ASM) | |||
set_property(GLOBAL PROPERTY obj_libs "") | |||
# Build with address sanitizer | |||
if(ADDRSAN) | |||
string(APPEND EXTRA_CXX_FLAGS " -fsanitize=undefined,address,leak -fno-omit-frame-pointer") | |||
set(EXTRA_LDFLAGS " -fsanitize=undefined,address,leak") | |||
endif() | |||
if(MEMSAN) | |||
# PQC_MEMSAN enables usage of some internals from clang | |||
if (NOT CMAKE_C_COMPILER_ID MATCHES "Clang") | |||
message(FATAL_ERROR "Must use clang if compiled with memory sanitizer.") | |||
endif() | |||
if(ADDRSAN) | |||
message(FATAL_ERROR "Can't use MSAN and ASAN") | |||
endif() | |||
include(.cmake/libstd-memory_sanitizer.mk) | |||
# LLVM project location | |||
set(LLVM_PRJ ${CMAKE_CURRENT_BINARY_DIR}/3rd/llvm-project) | |||
set(LLVM_PRJ_LIB ${LLVM_PRJ}/usr/local/lib) | |||
set(LLVM_PRJ_INC ${LLVM_PRJ}/usr/local/include) | |||
# Add memory sanitizer instrumented libraries | |||
set(CMAKE_ARGS_MEMCHECK_LIB "-stdlib=libc++") | |||
set(CMAKE_ARGS_MEMCHECK_INC "-isystem -I${LLVM_PRJ_INC} -I${LLVM_PRJ_INC}/c++/v1") | |||
set(CMAKE_ARGS_MEMCHECK_FLAGS "-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-omit-frame-pointer -Wno-unused-command-line-argument") | |||
# Enablin "keep-going" flag alows two things: | |||
# 1. Enables CT_EXPECT_UMR()/CT_REQUIRE_UMR() in tests. For some reason MSan will halt | |||
# on error even if it expects UMR. And hence, CT can't be tested. This is probably a bug. | |||
# 2. reports all the errors from the run, not only the first one (don't fail-fast) | |||
string(APPEND CMAKE_ARGS_MEMCHECK_FLAGS " -mllvm -msan-keep-going=1") | |||
set(EXTRA_CXX_FLAGS "${CMAKE_ARGS_MEMCHECK_FLAGS} ${CMAKE_ARGS_MEMCHECK_LIB} ${CMAKE_ARGS_MEMCHECK_INC} -DPQC_MEMSAN_BUILD") | |||
set(CXXLIBS_FOR_MEMORY_SANITIZER cxx cxxabi) | |||
endif() | |||
# Contant time memory checks with CTGRIND (requires clang and -DMEMSAN) | |||
if(CTSAN) | |||
if (NOT MEMSAN) | |||
message(FATAL_ERROR "Constant time sanitizer requires -DMEMSAN") | |||
endif() | |||
if (NOT CMAKE_C_COMPILER_ID MATCHES "Clang") | |||
message(FATAL_ERROR "Constant time sanitizer requires Clang") | |||
endif() | |||
string(APPEND EXTRA_CXX_FLAGS " -DPQC_USE_CTSANITIZER") | |||
endif() | |||
# Contant time memory checks with CTGRIND (requires valgrind) | |||
if (CTGRIND) | |||
if (MEMSAN OR CTSAN) | |||
message(FATAL_ERROR "Can't use memory sanitizer (MEMSAN) and CTGRIND") | |||
endif() | |||
string(APPEND EXTRA_CXX_FLAGS " -DPQC_USE_CTGRIND") | |||
endif() | |||
set(CMAKE_VERBOSE_MAKEFILE ON) | |||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "~/.cmake/Modules") | |||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "3rd/cmake-modules") | |||
@@ -33,8 +96,6 @@ else() | |||
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR}) | |||
endif() | |||
add_subdirectory(3rd/gtest) | |||
# Arch settings | |||
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") | |||
@@ -42,7 +103,8 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") | |||
endif() | |||
if(CMAKE_C_COMPILER_ID MATCHES "Clang") | |||
set(CLANG 1) | |||
# Additional flags only useful when compiling with clang | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wconditional-uninitialized -Wno-missing-variable-declarations -Wno-unused-command-line-argument") | |||
endif() | |||
if (MACOSX) | |||
@@ -52,55 +114,99 @@ endif() | |||
# Global configuration | |||
set(C_CXX_FLAGS | |||
"-Wno-ignored-qualifiers \ | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wno-ignored-qualifiers \ | |||
-Wall \ | |||
-Werror \ | |||
-Wextra \ | |||
-Wpedantic \ | |||
-Wshadow \ | |||
-Wno-variadic-macros \ | |||
-Wundef \ | |||
-Wunused-result") | |||
if(CLANG) | |||
set(C_CXX_FLAGS | |||
"-Wconditional-uninitialized \ | |||
-Wmissing-variable-declarations") | |||
-Wunused-result \ | |||
-Wno-unused-command-line-argument \ | |||
-Wno-undef") | |||
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 11.0) | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wno-stringop-overread \ | |||
-Wno-stringop-overflow \ | |||
-Wno-array-parameter") | |||
endif() | |||
include(.cmake/common.mk) | |||
# Control Debug/Release mode | |||
if(CMAKE_BUILD_TYPE_LOWER STREQUAL "debug") | |||
set(C_CXX_FLAGS "${C_CXX_FLAGS} -g3 -O0 -Wno-unused") | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -g3 -O0 -Wno-unused") | |||
else() | |||
set(C_CXX_FLAGS "${C_CXX_FLAGS} -O3") | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -O3") | |||
endif() | |||
include_directories( | |||
public | |||
src/common/ | |||
src | |||
) | |||
# Set CPU architecture | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -D${ARCH}") | |||
set_property(GLOBAL PROPERTY obj_libs "") | |||
# Build for haswell if on x86_64 | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
add_compile_options("-march=haswell") | |||
endif() | |||
# Dependencies | |||
ExternalProject_Add( | |||
gtest_project | |||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/gtest | |||
GIT_REPOSITORY https://github.com/google/googletest.git | |||
GIT_TAG a3460d1aeeaa43fdf137a6adefef10ba0b59fe4b | |||
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/3rd/gtest | |||
INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/3rd/gtest | |||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/3rd/gtest -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS=${EXTRA_CXX_FLAGS} -DCMAKE_C_FLAGS=${EXTRA_CXX_FLAGS} -Dgtest_disable_pthreads=ON | |||
) | |||
if(MEMSAN) | |||
add_dependencies(gtest_project ${CXXLIBS_FOR_MEMORY_SANITIZER}) | |||
endif() | |||
# Set CPU architecture | |||
set(CMAKE_C_FLAGS "${C_CXX_FLAGS} -D${ARCH}") | |||
set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -D${ARCH}") | |||
FetchContent_Declare( | |||
gbench | |||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/gbench | |||
GIT_REPOSITORY https://github.com/kriskwiatkowski/benchmark.git | |||
GIT_TAG 49862ab56b6b7c3afd87b80bd5d787ed78ce3b96 | |||
) | |||
FetchContent_Populate(gbench) | |||
FetchContent_Declare( | |||
cpu_features | |||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/cpu_features | |||
GIT_REPOSITORY https://github.com/kriskwiatkowski/cpu_features.git | |||
GIT_TAG 38f4324533390b09079a38b524be8b178be8e435 | |||
) | |||
FetchContent_Populate(cpu_features) | |||
if(PQC_WEAK_RANDOMBYTES) | |||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DPQC_WEAK_RANDOMBYTES") | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -DPQC_WEAK_RANDOMBYTES") | |||
endif() | |||
# Build CPU features | |||
set(CMAKE_C_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") | |||
set(CMAKE_CXX_FLAGS "$${PQC_CMAKE_C_CXX_FLAGS} {EXTRA_CXX_FLAGS}") | |||
set(BUILD_PIC ON CACHE BOOL "") | |||
add_subdirectory(3rd/cpu_features) | |||
# PQC library | |||
# Set C, CXX, and LD flags | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wpedantic") | |||
set(CMAKE_C_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") | |||
set(CMAKE_CXX_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") | |||
string(APPEND LDFLAGS "${EXTRA_LDFLAGS}") | |||
include_directories( | |||
public | |||
src/common/ | |||
src | |||
3rd/cpu_features/include | |||
) | |||
# Define sources of the components | |||
add_subdirectory(src/sign/dilithium/dilithium2/clean) | |||
add_subdirectory(src/sign/dilithium/dilithium3/clean) | |||
add_subdirectory(src/sign/dilithium/dilithium5/clean) | |||
add_subdirectory(src/sign/falcon/falcon-1024/clean) | |||
add_subdirectory(src/sign/falcon/falcon-512/clean) | |||
add_subdirectory(src/sign/falcon) | |||
add_subdirectory(src/sign/rainbow/rainbowV-classic/clean) | |||
add_subdirectory(src/sign/rainbow/rainbowI-classic/clean) | |||
add_subdirectory(src/sign/rainbow/rainbowIII-classic/clean) | |||
@@ -148,19 +254,23 @@ add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean) | |||
add_subdirectory(src/kem/sike) | |||
add_subdirectory(src/kem/mceliece/mceliece348864/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece460896/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece6688128/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece6960119/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece8192128/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece348864f/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece460896f/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece6688128f/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece6960119f/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece8192128f/clean) | |||
# Hardware optimized targets | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
set(CMAKE_C_FLAGS | |||
"${CMAKE_C_FLAGS} -march=native -mtune=native") | |||
set(SRC_COMMON_AVX2 | |||
src/common/keccak4x/KeccakP-1600-times4-SIMD256.c | |||
) | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
set(COMMON_EXTRA_SRC "src/common/keccak4x/KeccakP-1600-times4-SIMD256.c") | |||
# Sign | |||
add_subdirectory(src/sign/falcon/falcon-512/avx2) | |||
add_subdirectory(src/sign/falcon/falcon-1024/avx2) | |||
add_subdirectory(src/sign/dilithium/dilithium2/avx2) | |||
add_subdirectory(src/sign/dilithium/dilithium3/avx2) | |||
add_subdirectory(src/sign/dilithium/dilithium5/avx2) | |||
@@ -188,7 +298,6 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/avx2) | |||
# KEMs | |||
add_subdirectory(src/kem/kyber/kyber512/avx2) | |||
add_subdirectory(src/kem/kyber/kyber768/avx2) | |||
@@ -208,25 +317,20 @@ add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2) | |||
endif() | |||
# The rest of the library | |||
set(SRC_COMMON_GENERIC | |||
add_library( | |||
common | |||
OBJECT | |||
src/common/aes.c | |||
src/common/fips202.c | |||
src/common/sp800-185.c | |||
src/common/randombytes.c | |||
src/common/sha2.c | |||
src/common/nistseedexpander.c | |||
src/common/utils.c | |||
src/capi/pqapi.c | |||
) | |||
add_library( | |||
common | |||
OBJECT | |||
${SRC_COMMON_GENERIC} | |||
${SRC_COMMON_AVX2} | |||
) | |||
${COMMON_EXTRA_SRC}) | |||
add_library( | |||
pqc | |||
@@ -241,33 +345,77 @@ get_property(OBJ_LIBS GLOBAL PROPERTY obj_libs) | |||
target_link_libraries( | |||
pqc | |||
common | |||
${OBJ_LIBS} | |||
cpu_features | |||
common | |||
) | |||
target_link_libraries( | |||
pqc_s | |||
cpu_features | |||
common | |||
${OBJ_LIBS} | |||
) | |||
SET(UT_SRC test/ut.cpp) | |||
if(CTGRIND OR CTSAN) | |||
SET(UT_SRC ${UT_SRC} test/ct.cpp) | |||
endif() | |||
add_executable( | |||
ut | |||
test/ut.cpp | |||
${UT_SRC} | |||
) | |||
target_link_libraries( | |||
ut | |||
gtest | |||
gtest_main | |||
pqc_s) | |||
pqc_s | |||
${CXXLIBS_FOR_MEMORY_SANITIZER}) | |||
ExternalProject_Get_Property(gtest_project INSTALL_DIR) | |||
target_include_directories( | |||
ut PRIVATE | |||
${CMAKE_SOURCE_DIR}) | |||
${CMAKE_SOURCE_DIR} | |||
${INSTALL_DIR}/include) | |||
target_link_directories( | |||
ut | |||
PRIVATE | |||
${INSTALL_DIR}/lib) | |||
# github CI requires that | |||
add_dependencies(ut gtest_project) | |||
if(NOT CMAKE_BUILD_TYPE_LOWER STREQUAL "debug") | |||
# settings below are required by benchmark library | |||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) | |||
# Target for benchmark - it also builds gtest library | |||
set(BENCHMARK_ENABLE_GTEST_TESTS ON CACHE BOOL "Enable testing of the benchmark library." FORCE) | |||
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark tests" FORCE) | |||
set(GOOGLETEST_PATH "${CMAKE_SOURCE_DIR}/3rd/gtest" CACHE PATH "Path to the gtest sources" FORCE) | |||
#if (NOT MACOSX) | |||
# set(BENCHMARK_ENABLE_LTO ON CACHE BOOL "Enable link time optim" FORCE) | |||
#endif() | |||
set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) | |||
set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE) | |||
set(CMAKE_C_FLAGS "${EXTRA_CXX_FLAGS}") | |||
set(CMAKE_CXX_FLAGS "${EXTRA_CXX_FLAGS}") | |||
if (MEMSAN) | |||
set(BENCHMARK_USE_LIBCXX ON CACHE BOOL "" FORCE) | |||
# Since build requires C++20 it is safe to assume that std::regex is available. | |||
# It seems I need to force it as benchmark build doesn't work very well with libc++ | |||
set(HAVE_STD_REGEX ON CACHE BOOL "OK" FORCE) | |||
endif() | |||
add_subdirectory(${CMAKE_SOURCE_DIR}/3rd/gbench) | |||
add_subdirectory(test/bench) | |||
endif() | |||
install(TARGETS pqc pqc_s | |||
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE | |||
@@ -1,23 +1,27 @@ | |||
# PQ Crypto Catalog | |||
This is a repository of post-quantum schemes copied from either the submission to the NIST Post-Quantum Standardization or [PQClean](https://github.com/PQClean/PQClean) project. The goal of the library is to provide easy to use API which enables quick experimentation with some post-quantum cryptographic schemes. | |||
Implementation of quantum-safe signature and KEM schemes submitted to NIST PQC Standardization Process. | |||
The goal is to provide an easy-to-use API in C and Rust to enable experimentation. The code is derived from the submission to the NIST Post-Quantum Standardization, either directly or by leveraging [PQClean](https://github.com/PQClean/PQClean) project. | |||
Users shouldn't expect any level of security provided by this code. The library is not meant to be used on live production systems. | |||
## Schemes support | |||
## Supported schemes | |||
| Name | NIST Round | x86 optimized | | |||
|--------------------------|------------|---------------| | |||
| Kyber | 3 | x | | |||
| NTRU | 3 | x | | |||
| SABER | 3 | x | | |||
| FrodoKEM | 3 | | | |||
| Dilithium | 3 | x | | |||
| Falcon | 3 | | | |||
| SPHINCS+ SHA256/SHAKE256 | 3 | x | | |||
| NTRU | 3 | x | | |||
| NTRU Prime | 3 | x | | |||
| HQC-RMRS | 3 | x | | |||
| Dilithium | 3 | x | | |||
| Falcon | 2 | | | |||
| Rainbow | 3 | | | |||
| SPHINCS+ SHA256/SHAKE256 | 3 | x | | |||
| SIKE/p434 | 3 | x | | |||
| McEliece | 3 | | | |||
## Building | |||
@@ -38,13 +42,13 @@ Library provides simple API, wrapping PQClean. For example to use KEM, one shoul | |||
```c | |||
#include <pqc/pqc.h> | |||
const params_t *p = pqc_kem_alg_by_id(KYBER512); | |||
std::vector<uint8_t> ct(ciphertext_bsz(p)); | |||
std::vector<uint8_t> ss1(shared_secret_bsz(p)); | |||
std::vector<uint8_t> ss2(shared_secret_bsz(p)); | |||
std::vector<uint8_t> sk(private_key_bsz(p)); | |||
std::vector<uint8_t> pk(public_key_bsz(p)); | |||
const params_t *p = pqc_kem_alg_by_id(KYBER512); | |||
pqc_keygen(p, pk.data(), sk.data()); | |||
pqc_kem_encapsulate(p, ct.data(), ss1.data(), pk.data()); | |||
pqc_kem_decapsulate(p, ss2.data(), ct.data(), sk.data()); | |||
@@ -0,0 +1,9 @@ | |||
# Security Policy | |||
## Supported Versions | |||
No security guaranteed. | |||
## Reporting a Vulnerability | |||
Any comments welcome: contact (at) amongbytes.com |
@@ -8,73 +8,88 @@ extern "C" { | |||
#include <stdint.h> | |||
#include <stdbool.h> | |||
// defines supported signature algorithm list | |||
#define PQC_SUPPORTED_SIGS(_) \ | |||
// Defines supported signature algorithm list. The resulting | |||
// ID of an algorithm is PQC_ALG_SIG_(NAME_AS_BELOW) | |||
#define PQC_SUPPORTED_SIGS(_) \ | |||
_(DILITHIUM2) \ | |||
_(DILITHIUM3) \ | |||
_(DILITHIUM5) \ | |||
_(FALCON1024) \ | |||
_(FALCON512) \ | |||
_(RAINBOWVCLASSIC) \ | |||
_(FALCON1024) \ | |||
_(RAINBOWICLASSIC) \ | |||
_(RAINBOWIIICLASSIC) \ | |||
_(SPHINCSSHA256192FSIMPLE) \ | |||
_(SPHINCSSHAKE256256FSIMPLE) \ | |||
_(SPHINCSSHAKE256192FROBUST) \ | |||
_(RAINBOWVCLASSIC) \ | |||
_(SPHINCSSHAKE256128FSIMPLE) \ | |||
_(SPHINCSSHAKE256256SSIMPLE) \ | |||
_(SPHINCSSHAKE256128SSIMPLE) \ | |||
_(SPHINCSSHA256128FROBUST) \ | |||
_(SPHINCSSHA256192SROBUST) \ | |||
_(SPHINCSSHAKE256128FROBUST) \ | |||
_(SPHINCSSHAKE256128SROBUST) \ | |||
_(SPHINCSSHAKE256256SROBUST) \ | |||
_(SPHINCSSHA256192SSIMPLE) \ | |||
_(SPHINCSSHAKE256192FSIMPLE) \ | |||
_(SPHINCSSHAKE256192SSIMPLE) \ | |||
_(SPHINCSSHAKE256192FROBUST) \ | |||
_(SPHINCSSHAKE256192SROBUST) \ | |||
_(SPHINCSSHAKE256192FSIMPLE) \ | |||
_(SPHINCSSHA256256SSIMPLE) \ | |||
_(SPHINCSSHA256128SSIMPLE) \ | |||
_(SPHINCSSHAKE256256FSIMPLE) \ | |||
_(SPHINCSSHAKE256256SSIMPLE) \ | |||
_(SPHINCSSHAKE256256FROBUST) \ | |||
_(SPHINCSSHA256256FROBUST) \ | |||
_(SPHINCSSHA256256FSIMPLE) \ | |||
_(SPHINCSSHA256256SROBUST) \ | |||
_(SPHINCSSHA256128SROBUST) \ | |||
_(SPHINCSSHAKE256256SROBUST) \ | |||
_(SPHINCSSHA256128FSIMPLE) \ | |||
_(SPHINCSSHA256192FROBUST) | |||
_(SPHINCSSHA256128SSIMPLE) \ | |||
_(SPHINCSSHA256128FROBUST) \ | |||
_(SPHINCSSHA256128SROBUST) \ | |||
_(SPHINCSSHA256192FSIMPLE) \ | |||
_(SPHINCSSHA256192SSIMPLE) \ | |||
_(SPHINCSSHA256192FROBUST) \ | |||
_(SPHINCSSHA256192SROBUST) \ | |||
_(SPHINCSSHA256256FSIMPLE) \ | |||
_(SPHINCSSHA256256SSIMPLE) \ | |||
_(SPHINCSSHA256256FROBUST) \ | |||
_(SPHINCSSHA256256SROBUST) | |||
// defines supported kem algorithm list | |||
// Defines supported kem algorithm list. The resulting | |||
// ID of an algorithm is PQC_ALG_KEM_(NAME_AS_BELOW) | |||
#define PQC_SUPPORTED_KEMS(_)\ | |||
_(FRODOKEM640SHAKE) \ | |||
_(FRODOKEM976SHAKE) \ | |||
_(FRODOKEM1344SHAKE) \ | |||
_(FRODOKEM640SHAKE) \ | |||
_(KYBER512) \ | |||
_(KYBER768) \ | |||
_(KYBER1024) \ | |||
_(KYBER512) \ | |||
_(NTRUHPS4096821) \ | |||
_(NTRUHPS2048509) \ | |||
_(NTRUHPS4096821) \ | |||
_(NTRUHRSS701) \ | |||
_(NTRUHPS2048677) \ | |||
_(NTRULPR761) \ | |||
_(NTRULPR653) \ | |||
_(NTRULPR857) \ | |||
_(LIGHTSABER) \ | |||
_(FIRESABER) \ | |||
_(SABER) \ | |||
_(FIRESABER) \ | |||
_(HQCRMRS128) \ | |||
_(HQCRMRS192) \ | |||
_(HQCRMRS256) | |||
_(HQCRMRS256) \ | |||
_(SIKE434) \ | |||
_(MCELIECE348864) \ | |||
_(MCELIECE460896) \ | |||
_(MCELIECE6688128) \ | |||
_(MCELIECE6960119) \ | |||
_(MCELIECE8192128) \ | |||
_(MCELIECE348864F) \ | |||
_(MCELIECE460896F) \ | |||
_(MCELIECE6688128F) \ | |||
_(MCELIECE6960119F) \ | |||
_(MCELIECE8192128F) | |||
// Defines IDs for each algorithm. The | |||
// PQC_ALG_SIG/KEM_MAX indicates number | |||
// of KEM and signature schemes supported. | |||
#define DEFNUM(N) N, | |||
enum { PQC_SUPPORTED_SIGS(DEFNUM) PQC_ALG_SIG_MAX }; | |||
enum { PQC_SUPPORTED_KEMS(DEFNUM) PQC_ALG_KEM_MAX }; | |||
#undef DEFNUM | |||
#define DEFNUM_SIG(N) PQC_ALG_SIG_##N, | |||
#define DEFNUM_KEM(N) PQC_ALG_KEM_##N, | |||
enum { PQC_SUPPORTED_SIGS(DEFNUM_SIG) PQC_ALG_SIG_MAX }; | |||
enum { PQC_SUPPORTED_KEMS(DEFNUM_KEM) PQC_ALG_KEM_MAX }; | |||
#undef DEFNUM_SIG | |||
#undef DEFNUM_KEM | |||
// Parameters of the scheme | |||
typedef struct params_t { | |||
typedef struct pqc_ctx_t { | |||
const uint8_t alg_id; | |||
const char* alg_name; | |||
const uint32_t prv_key_bsz; | |||
@@ -82,73 +97,59 @@ typedef struct params_t { | |||
const bool is_kem; | |||
int (*keygen)(uint8_t *sk, uint8_t *pk); | |||
} params_t; | |||
} pqc_ctx_t; | |||
typedef struct kem_params_t { | |||
params_t p; | |||
typedef struct pqc_kem_ctx_t { | |||
pqc_ctx_t p; | |||
const uint32_t ciphertext_bsz; | |||
const uint32_t secret_bsz; | |||
int (*encapsulate)(uint8_t *ct, uint8_t *ss, const uint8_t *pk); | |||
int (*decapsulate)(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); | |||
} kem_params_t; | |||
} pqc_kem_ctx_t; | |||
typedef struct sig_params_t { | |||
params_t p; | |||
typedef struct pqc_sig_ctx_t { | |||
pqc_ctx_t p; | |||
const uint32_t sign_bsz; | |||
int (*sign)(uint8_t *sig, uint64_t *siglen, const uint8_t *m, uint64_t mlen, const uint8_t *sk); | |||
int (*verify)(const uint8_t *sig, uint64_t siglen, const uint8_t *m, uint64_t mlen, const uint8_t *pk); | |||
} sig_params_t; | |||
inline uint32_t ciphertext_bsz(const params_t *p) { | |||
return ((kem_params_t *)p)->ciphertext_bsz; | |||
} | |||
inline uint32_t shared_secret_bsz(const params_t *p) { | |||
return ((kem_params_t *)p)->secret_bsz; | |||
} | |||
inline uint32_t signature_bsz(const params_t *p) { | |||
return ((sig_params_t *)p)->sign_bsz; | |||
} | |||
inline uint32_t public_key_bsz(const params_t *p) { | |||
return p->pub_key_bsz; | |||
} | |||
inline uint32_t private_key_bsz(const params_t *p) { | |||
return p->prv_key_bsz; | |||
} | |||
} pqc_sig_ctx_t; | |||
bool pqc_keygen( | |||
const params_t *p, | |||
const pqc_ctx_t *p, | |||
uint8_t *pk, uint8_t *sk); | |||
bool pqc_kem_encapsulate( | |||
const params_t *p, | |||
const pqc_ctx_t *p, | |||
uint8_t *ct, uint8_t *ss, | |||
const uint8_t *pk); | |||
bool pqc_kem_decapsulate( | |||
const params_t *p, | |||
const pqc_ctx_t *p, | |||
uint8_t *ss, const uint8_t *ct, | |||
const uint8_t *sk); | |||
bool pqc_sig_create( | |||
const params_t *p, | |||
const pqc_ctx_t *p, | |||
uint8_t *sig, uint64_t *siglen, | |||
const uint8_t *m, uint64_t mlen, | |||
const uint8_t *sk); | |||
bool pqc_sig_verify( | |||
const params_t *p, | |||
const pqc_ctx_t *p, | |||
const uint8_t *sig, uint64_t siglen, | |||
const uint8_t *m, uint64_t mlen, | |||
const uint8_t *pk); | |||
const params_t *pqc_kem_alg_by_id(uint8_t id); | |||
const params_t *pqc_sig_alg_by_id(uint8_t id); | |||
const pqc_ctx_t *pqc_kem_alg_by_id(uint8_t id); | |||
const pqc_ctx_t *pqc_sig_alg_by_id(uint8_t id); | |||
uint32_t pqc_ciphertext_bsz(const pqc_ctx_t *p); | |||
uint32_t pqc_shared_secret_bsz(const pqc_ctx_t *p); | |||
uint32_t pqc_signature_bsz(const pqc_ctx_t *p); | |||
uint32_t pqc_public_key_bsz(const pqc_ctx_t *p); | |||
uint32_t pqc_private_key_bsz(const pqc_ctx_t *p); | |||
#ifdef __cplusplus | |||
} | |||
@@ -1,138 +1,14 @@ | |||
#include <stdint.h> | |||
#include <stdbool.h> | |||
#include <pqc/pqc.h> | |||
#include <cpuinfo_x86.h> | |||
#include <common/utils.h> | |||
// PQClean include | |||
#include "sign/rainbow/rainbowV-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowI-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowIII-classic/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h" | |||
#include "sign/falcon/falcon-1024/clean/api.h" | |||
#include "sign/falcon/falcon-1024/avx2/api.h" | |||
#include "sign/falcon/falcon-512/clean/api.h" | |||
#include "sign/falcon/falcon-512/avx2/api.h" | |||
#include "sign/dilithium/dilithium2/clean/api.h" | |||
#include "sign/dilithium/dilithium2/avx2/api.h" | |||
#include "sign/dilithium/dilithium3/clean/api.h" | |||
#include "sign/dilithium/dilithium3/avx2/api.h" | |||
#include "sign/dilithium/dilithium5/clean/api.h" | |||
#include "sign/dilithium/dilithium5/avx2/api.h" | |||
#include "kem/ntru/ntruhps4096821/clean/api.h" | |||
#include "kem/ntru/ntruhps4096821/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048509/clean/api.h" | |||
#include "kem/ntru/ntruhps2048509/avx2/api.h" | |||
#include "kem/ntru/ntruhrss701/clean/api.h" | |||
#include "kem/ntru/ntruhrss701/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048677/clean/api.h" | |||
#include "kem/ntru/ntruhps2048677/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/avx2/api.h" | |||
#include "kem/kyber/kyber768/clean/api.h" | |||
#include "kem/kyber/kyber768/avx2/api.h" | |||
#include "kem/kyber/kyber1024/clean/api.h" | |||
#include "kem/kyber/kyber1024/avx2/api.h" | |||
#include "kem/kyber/kyber512/clean/api.h" | |||
#include "kem/kyber/kyber512/avx2/api.h" | |||
#include "kem/mceliece/mceliece460896f/avx/api.h" | |||
#include "kem/mceliece/mceliece460896f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128f/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128f/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119f/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119f/clean/api.h" | |||
#include "kem/mceliece/mceliece460896/avx/api.h" | |||
#include "kem/mceliece/mceliece460896/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128/clean/api.h" | |||
#include "kem/mceliece/mceliece348864f/avx/api.h" | |||
#include "kem/mceliece/mceliece348864f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119/clean/api.h" | |||
#include "kem/mceliece/mceliece348864/avx/api.h" | |||
#include "kem/mceliece/mceliece348864/clean/api.h" | |||
#include "kem/frodo/frodokem976shake/clean/api.h" | |||
#include "kem/frodo/frodokem1344shake/clean/api.h" | |||
#include "kem/frodo/frodokem640shake/clean/api.h" | |||
#include "kem/saber/lightsaber/clean/api.h" | |||
#include "kem/saber/lightsaber/avx2/api.h" | |||
#include "kem/saber/firesaber/clean/api.h" | |||
#include "kem/saber/firesaber/avx2/api.h" | |||
#include "kem/saber/saber/clean/api.h" | |||
#include "kem/saber/saber/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/avx2/api.h" | |||
#include "schemes.h" | |||
// not proud of this thingy | |||
#define OPT_VERSION _CLEAN_ | |||
// Helper to stringify constants | |||
#define STR(x) STR_(x) | |||
#define STR_(x) #x | |||
/* Concatenate tokens X and Y. Can be done by the "##" operator in | |||
* simple cases, but has some side effects in more complicated cases. | |||
*/ | |||
#define GLUE(a, b) GLUE_(a, b) | |||
#define GLUE_(a, b) a##b | |||
// Returns prefix defined by PQClean, depending | |||
// on OPT_VERSION setting. | |||
// Something like: "PQCLEAN_KYBER512_CLEAN_" | |||
@@ -153,9 +29,9 @@ | |||
#define PQC_FN_SIGN(x) GLUE(A(x), crypto_sign_signature) | |||
#define PQC_FN_VERIFY(x) GLUE(A(x), crypto_sign_verify) | |||
#define REG_ALG(ID) \ | |||
#define REG_ALG(PFX,ID) \ | |||
{ \ | |||
.alg_id = ID, \ | |||
.alg_id = GLUE(PFX,ID), \ | |||
.alg_name = STR(ID), \ | |||
.prv_key_bsz = PQC_PRV_KEY_BSZ(ID), \ | |||
.pub_key_bsz = PQC_PUB_KEY_BSZ(ID), \ | |||
@@ -164,7 +40,7 @@ | |||
// Macro magic needed to initialize parameters for a scheme | |||
#define REG_KEM(ID) \ | |||
{ \ | |||
.p = REG_ALG(ID), \ | |||
.p = REG_ALG(PQC_ALG_KEM_,ID), \ | |||
.p.keygen = PQC_FN_KEM_KEYGEN(ID),\ | |||
.ciphertext_bsz = PQC_CT_BSZ(ID), \ | |||
.secret_bsz = PQC_KEM_BSZ(ID), \ | |||
@@ -175,7 +51,7 @@ | |||
// Macro magic needed to initialize parameters for a scheme | |||
#define REG_SIG(ID) \ | |||
{ \ | |||
.p = REG_ALG(ID), \ | |||
.p = REG_ALG(PQC_ALG_SIG_,ID), \ | |||
.p.keygen = PQC_FN_SIG_KEYGEN(ID),\ | |||
.sign_bsz = PQC_SIGN_BSZ(ID), \ | |||
.sign = PQC_FN_SIGN(ID), \ | |||
@@ -183,62 +59,94 @@ | |||
}, | |||
// Registers supported KEMs | |||
const kem_params_t kems[] = { | |||
const pqc_kem_ctx_t kems[] = { | |||
PQC_SUPPORTED_KEMS(REG_KEM) | |||
}; | |||
// Registers supported signatures | |||
const sig_params_t sigs[] = { | |||
const pqc_sig_ctx_t sigs[] = { | |||
PQC_SUPPORTED_SIGS(REG_SIG) | |||
}; | |||
const params_t *pqc_kem_alg_by_id(uint8_t id) { | |||
// Contains capabilities on x86 CPU on which implementation is running | |||
X86Features CPU_CAPS; | |||
const X86Features * get_cpu_caps(void) { | |||
return &CPU_CAPS; | |||
} | |||
const pqc_ctx_t *pqc_kem_alg_by_id(uint8_t id) { | |||
int i; | |||
for(i=0; i<PQC_ALG_KEM_MAX; i++) { | |||
if (kems[i].p.alg_id == id) { | |||
return (params_t*)&kems[i]; | |||
return (pqc_ctx_t*)&kems[i]; | |||
} | |||
} | |||
return 0; | |||
} | |||
const params_t *pqc_sig_alg_by_id(uint8_t id) { | |||
const pqc_ctx_t *pqc_sig_alg_by_id(uint8_t id) { | |||
int i; | |||
for(i=0; i<PQC_ALG_SIG_MAX; i++) { | |||
if (sigs[i].p.alg_id == id) { | |||
return (params_t*)&sigs[i]; | |||
return (pqc_ctx_t*)&sigs[i]; | |||
} | |||
} | |||
return 0; | |||
} | |||
bool pqc_keygen(const params_t *p, | |||
bool pqc_keygen(const pqc_ctx_t *p, | |||
uint8_t *pk, uint8_t *sk) { | |||
return !p->keygen(pk, sk); | |||
} | |||
bool pqc_kem_encapsulate(const params_t *p, | |||
bool pqc_kem_encapsulate(const pqc_ctx_t *p, | |||
uint8_t *ct, uint8_t *ss, | |||
const uint8_t *pk) { | |||
return !((kem_params_t*)p)->encapsulate(ct, ss, pk); | |||
return !((pqc_kem_ctx_t*)p)->encapsulate(ct, ss, pk); | |||
} | |||
bool pqc_kem_decapsulate(const params_t *p, | |||
bool pqc_kem_decapsulate(const pqc_ctx_t *p, | |||
uint8_t *ss, const uint8_t *ct, | |||
const uint8_t *sk) { | |||
return !((kem_params_t*)p)->decapsulate(ss, ct, sk); | |||
return !((pqc_kem_ctx_t*)p)->decapsulate(ss, ct, sk); | |||
} | |||
bool pqc_sig_create(const params_t *p, | |||
bool pqc_sig_create(const pqc_ctx_t *p, | |||
uint8_t *sig, uint64_t *siglen, | |||
const uint8_t *m, uint64_t mlen, | |||
const uint8_t *sk) { | |||
return !((sig_params_t *)p)->sign(sig, siglen, m, mlen, sk); | |||
return !((pqc_sig_ctx_t *)p)->sign(sig, siglen, m, mlen, sk); | |||
} | |||
bool pqc_sig_verify(const params_t *p, | |||
bool pqc_sig_verify(const pqc_ctx_t *p, | |||
const uint8_t *sig, uint64_t siglen, | |||
const uint8_t *m, uint64_t mlen, | |||
const uint8_t *pk) { | |||
return !((sig_params_t *)p)->verify(sig, siglen, m, mlen, pk); | |||
return !((pqc_sig_ctx_t *)p)->verify(sig, siglen, m, mlen, pk); | |||
} | |||
uint32_t pqc_ciphertext_bsz(const pqc_ctx_t *p) { | |||
return ((pqc_kem_ctx_t *)p)->ciphertext_bsz; | |||
} | |||
uint32_t pqc_shared_secret_bsz(const pqc_ctx_t *p) { | |||
return ((pqc_kem_ctx_t *)p)->secret_bsz; | |||
} | |||
uint32_t pqc_signature_bsz(const pqc_ctx_t *p) { | |||
return ((pqc_sig_ctx_t *)p)->sign_bsz; | |||
} | |||
uint32_t pqc_public_key_bsz(const pqc_ctx_t *p) { | |||
return p->pub_key_bsz; | |||
} | |||
uint32_t pqc_private_key_bsz(const pqc_ctx_t *p) { | |||
return p->prv_key_bsz; | |||
} | |||
void static_initialization(void) __attribute__((constructor)); | |||
void static_initialization(void) { | |||
CPU_CAPS = GetX86Info().features; | |||
} |
@@ -0,0 +1,120 @@ | |||
#ifndef PQC_SCHEMES_ | |||
#define PQC_SCHEMES_ | |||
// PQClean include | |||
#include "sign/rainbow/rainbowV-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowI-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowIII-classic/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h" | |||
#include "sign/dilithium/dilithium2/clean/api.h" | |||
#include "sign/dilithium/dilithium2/avx2/api.h" | |||
#include "sign/dilithium/dilithium3/clean/api.h" | |||
#include "sign/dilithium/dilithium3/avx2/api.h" | |||
#include "sign/dilithium/dilithium5/clean/api.h" | |||
#include "sign/dilithium/dilithium5/avx2/api.h" | |||
#include "sign/falcon/api.h" | |||
#include "kem/ntru/ntruhps4096821/clean/api.h" | |||
#include "kem/ntru/ntruhps4096821/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048509/clean/api.h" | |||
#include "kem/ntru/ntruhps2048509/avx2/api.h" | |||
#include "kem/ntru/ntruhrss701/clean/api.h" | |||
#include "kem/ntru/ntruhrss701/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048677/clean/api.h" | |||
#include "kem/ntru/ntruhps2048677/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/avx2/api.h" | |||
#include "kem/kyber/kyber768/clean/api.h" | |||
#include "kem/kyber/kyber768/avx2/api.h" | |||
#include "kem/kyber/kyber1024/clean/api.h" | |||
#include "kem/kyber/kyber1024/avx2/api.h" | |||
#include "kem/kyber/kyber512/clean/api.h" | |||
#include "kem/kyber/kyber512/avx2/api.h" | |||
#include "kem/mceliece/mceliece460896f/avx/api.h" | |||
#include "kem/mceliece/mceliece460896f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128f/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128f/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119f/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119f/clean/api.h" | |||
#include "kem/mceliece/mceliece460896/avx/api.h" | |||
#include "kem/mceliece/mceliece460896/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128/clean/api.h" | |||
#include "kem/mceliece/mceliece348864f/avx/api.h" | |||
#include "kem/mceliece/mceliece348864f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119/clean/api.h" | |||
#include "kem/mceliece/mceliece348864/avx/api.h" | |||
#include "kem/mceliece/mceliece348864/clean/api.h" | |||
#include "kem/frodo/frodokem976shake/clean/api.h" | |||
#include "kem/frodo/frodokem1344shake/clean/api.h" | |||
#include "kem/frodo/frodokem640shake/clean/api.h" | |||
#include "kem/saber/lightsaber/clean/api.h" | |||
#include "kem/saber/lightsaber/avx2/api.h" | |||
#include "kem/saber/firesaber/clean/api.h" | |||
#include "kem/saber/firesaber/avx2/api.h" | |||
#include "kem/saber/saber/clean/api.h" | |||
#include "kem/saber/saber/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/avx2/api.h" | |||
#include "kem/sike/includes/sike/sike.h" | |||
#endif |
@@ -1,22 +0,0 @@ | |||
# This Makefile can be used with GNU Make or BSD Make | |||
LIB=libcommon.a | |||
HEADERS= fips202.h aes.h sha2.h randombytes.h sp800-185.h nistseedexpander.h cpucycles.h speed_print.h | |||
OBJECTS= fips202.o aes.o sha2.o randombytes.o sp800-185.o nistseedexpander.o cpucycles.o speed_print.o | |||
CFLAGS=-O3 -march=native -mtune=native -flto -mavx2 -maes -mbmi2 -Wall -Wextra -Wpedantic -Wvla -Wredundant-decls -Wmissing-prototypes -std=gnu99 $(EXTRAFLAGS) | |||
all: $(LIB) | |||
%.o: %.s $(HEADERS) | |||
$(AS) -o $@ $< | |||
%.o: %.c $(HEADERS) | |||
$(CC) $(CFLAGS) -c -o $@ $< | |||
$(LIB): $(OBJECTS) | |||
$(AR) -r $@ $(OBJECTS) | |||
clean: | |||
$(RM) $(OBJECTS) | |||
$(RM) $(LIB) |
@@ -0,0 +1,55 @@ | |||
#ifndef CT_CHECK_H | |||
#define CT_CHECK_H | |||
// helper | |||
#define VOID(V) ((void)V) | |||
// Uses Clang's Memory Sanitizer | |||
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer) | |||
#include <stddef.h> | |||
#include <sanitizer/msan_interface.h> | |||
#elif defined(PQC_USE_CTGRIND) | |||
#include <valgrind/valgrind.h> | |||
#include <valgrind/memcheck.h> | |||
#endif | |||
// Set sz bytes of memory starting at address p as uninitialized. Switches on constat time checks. | |||
static inline void ct_poison(const volatile void *p, size_t sz) { | |||
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer) | |||
__msan_allocated_memory(p,sz); | |||
#elif defined(PQC_USE_CTGRIND) | |||
VALGRIND_MAKE_MEM_UNDEFINED(p,sz); | |||
#else | |||
VOID(p), VOID(sz); | |||
#endif | |||
} | |||
// Set sz bytes of memory starting at p as initialized. Switches off constat time checks. | |||
static inline void ct_purify(const volatile void *p, size_t sz) { | |||
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer) | |||
__msan_unpoison(p,sz); | |||
#elif defined(PQC_USE_CTGRIND) | |||
VALGRIND_MAKE_MEM_DEFINED(p,sz); | |||
#else | |||
VOID(p), VOID(sz); | |||
#endif | |||
} | |||
// Function instructs memory sanitizer that code expects to do operation on unintialized memory. | |||
static inline void ct_expect_umr() { | |||
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer) | |||
__msan_set_expect_umr(1); | |||
#endif | |||
} | |||
// Checks if action on unintialized memory has occured. If this is not a case | |||
// then error is reported. It works in tandem with ct_expect_umr(). In current version of | |||
// MSan, the code needs to be compiled with `-mllvm -msan-keep-going=1` flags in order to work | |||
// correctly. | |||
static inline void ct_require_umr() { | |||
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer) | |||
__msan_set_expect_umr(0); | |||
#endif | |||
} | |||
#endif // CT_CHECK_H |
@@ -542,6 +542,10 @@ void shake128_inc_squeeze(uint8_t *output, size_t outlen, shake128incctx *state) | |||
keccak_inc_squeeze(output, outlen, state->ctx, SHAKE128_RATE); | |||
} | |||
void shake128_inc_reset(shake128incctx *state) { | |||
keccak_inc_init(state->ctx); | |||
} | |||
void shake128_inc_ctx_clone(shake128incctx *dest, const shake128incctx *src) { | |||
dest->ctx = malloc(PQC_SHAKEINCCTX_BYTES); | |||
if (dest->ctx == NULL) { | |||
@@ -566,6 +570,10 @@ void shake256_inc_absorb(shake256incctx *state, const uint8_t *input, size_t inl | |||
keccak_inc_absorb(state->ctx, SHAKE256_RATE, input, inlen); | |||
} | |||
void shake256_inc_reset(shake256incctx *state) { | |||
keccak_inc_init(state->ctx); | |||
} | |||
void shake256_inc_finalize(shake256incctx *state) { | |||
keccak_inc_finalize(state->ctx, SHAKE256_RATE, 0x1F); | |||
} | |||
@@ -72,6 +72,8 @@ void shake128_inc_init(shake128incctx *state); | |||
* Can be called multiple times. | |||
*/ | |||
void shake128_inc_absorb(shake128incctx *state, const uint8_t *input, size_t inlen); | |||
// Reset the state | |||
void shake128_inc_reset(shake128incctx *state); | |||
/* Finalize the XOF for squeezing */ | |||
void shake128_inc_finalize(shake128incctx *state); | |||
/* Squeeze output out of the sponge. | |||
@@ -95,6 +97,8 @@ void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen); | |||
* Supports being called multiple times | |||
*/ | |||
void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state); | |||
// Reset the state | |||
void shake256_inc_reset(shake256incctx *state); | |||
/* Free the context held by this XOF */ | |||
void shake256_ctx_release(shake256ctx *state); | |||
/* Copy the context held by this XOF */ | |||
@@ -301,6 +301,10 @@ static int randombytes_js_randombytes_nodejs(void *buf, size_t n) { | |||
#endif /* defined(__EMSCRIPTEN__) */ | |||
int randombytes(uint8_t *buf, size_t n) { | |||
#ifdef PQC_MEMSAN_BUILD | |||
size_t i; | |||
for (i=0; i<n; i++) buf[i]=0; | |||
#endif | |||
#if defined(__EMSCRIPTEN__) | |||
return randombytes_js_randombytes_nodejs(buf, n); | |||
#elif defined(__linux__) | |||
@@ -0,0 +1,48 @@ | |||
#ifndef PQC_COMMON_UTILS_ | |||
#define PQC_COMMON_UTILS_ | |||
#include <cpuinfo_x86.h> | |||
#include <stdint.h> | |||
#include <stddef.h> | |||
// Helper to stringify constants | |||
#define STR(x) STR_(x) | |||
#define STR_(x) #x | |||
/* Concatenate tokens X and Y. Can be done by the "##" operator in | |||
* simple cases, but has some side effects in more complicated cases. | |||
*/ | |||
#define GLUE(a, b) GLUE_(a, b) | |||
#define GLUE_(a, b) a##b | |||
#define ARRAY_LEN(x) sizeof(x)/sizeof(x[0]) | |||
#define LOAD32L(x) \ | |||
(((uint32_t)((x)[0])<< 0) | \ | |||
((uint32_t)((x)[1])<< 8) | \ | |||
((uint32_t)((x)[2])<<16) | \ | |||
((uint32_t)((x)[3])<<24)) | |||
#define LOAD64L(x) \ | |||
(((uint64_t)LOAD32L((x)+4)) << 32) | \ | |||
(((uint64_t)LOAD32L((x)+0)) << 0) | |||
#define STORE16B(x,y) do { \ | |||
(x)[0] = (((y) >> 8)&0xFF); \ | |||
(x)[1] = (((y) >> 0)&0xFF); \ | |||
} while(0) | |||
#define LOAD16B(x) \ | |||
(((uint16_t)(x)[0])<<8 | \ | |||
((uint16_t)(x)[1])<<0) \ | |||
/** | |||
* \brief Compares two arrays in constant time. | |||
* \param [in] a first array | |||
* \param [in] b second arrray | |||
* \param [in] sz number of bytes to compare | |||
* \returns 0 if arrays are equal, otherwise 1. | |||
*/ | |||
uint8_t ct_memcmp(const void *a, const void *b, size_t sz); | |||
const X86Features * get_cpu_caps(void); | |||
#endif |
@@ -14,6 +14,9 @@ | |||
#include "common.h" | |||
#include "params.h" | |||
#include "common/ct_check.h" | |||
#include "common/utils.h" | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
// FrodoKEM's key generation | |||
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes) | |||
@@ -139,7 +142,6 @@ int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, cons | |||
return 0; | |||
} | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { | |||
// FrodoKEM's key decapsulation | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
@@ -218,9 +220,25 @@ int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct | |||
// Needs to avoid branching on secret data as per: | |||
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum | |||
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020. | |||
int8_t selector = PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR); | |||
#if 0 | |||
int8_t selector = ct_memcmp(Bp, BBp, PARAMS_N * PARAMS_NBAR) | ct_memcmp(C, CC, PARAMS_NBAR * PARAMS_NBAR); | |||
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s) | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector); | |||
#else | |||
// Is (Bp == BBp & C == CC) = true | |||
//ct_poison(Bp, sizeof(Bp)); | |||
//ct_poison(BBp, sizeof(BBp)); | |||
if (ct_memcmp(Bp, BBp, 2*PARAMS_N*PARAMS_NBAR) == 0 && ct_memcmp(C, CC, 2*PARAMS_NBAR*PARAMS_NBAR) == 0) { | |||
// Load k' to do ss = F(ct || k') | |||
memcpy(Fin_k, kprime, CRYPTO_BYTES); | |||
} else { | |||
// Load s to do ss = F(ct || s) | |||
// This branch is executed when a malicious ciphertext is decapsulated | |||
// and is necessary for security. Note that the known answer tests | |||
// will not exercise this line of code but it should not be removed. | |||
memcpy(Fin_k, sk_s, CRYPTO_BYTES); | |||
} | |||
#endif | |||
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); | |||
// Cleanup: | |||
@@ -11,6 +11,8 @@ | |||
#include "common.h" | |||
#include "params.h" | |||
#include "common/ct_check.h" | |||
static inline uint8_t min(uint8_t x, uint8_t y) { | |||
if (x < y) { | |||
return x; | |||
@@ -246,9 +248,9 @@ int8_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_ | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) { | |||
// Select one of the two input arrays to be moved to r | |||
// If (selector == 0) then load r with a, else if (selector == -1) load r with b | |||
uint8_t mask = 0 - selector; | |||
for (size_t i = 0; i < len; i++) { | |||
r[i] = (~selector & a[i]) | (selector & b[i]); | |||
r[i] = (~mask & a[i]) | (mask & b[i]); | |||
} | |||
} | |||
@@ -3,7 +3,7 @@ | |||
#include <stdint.h> | |||
/************************************************* | |||
* Name: PQCLEAN_KYBER512_CLEAN_montgomery_reduce | |||
* Name: kyber_montgomery_reduce | |||
* | |||
* Description: Montgomery reduction; given a 32-bit integer a, computes | |||
* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 | |||
@@ -13,7 +13,7 @@ | |||
* | |||
* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. | |||
**************************************************/ | |||
int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a) { | |||
int16_t kyber_montgomery_reduce(int32_t a) { | |||
int32_t t; | |||
int16_t u; | |||
@@ -25,20 +25,18 @@ int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a) { | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_KYBER512_CLEAN_barrett_reduce | |||
* Name: kyber_barrett_reduce | |||
* | |||
* Description: Barrett reduction; given a 16-bit integer a, computes | |||
* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} | |||
* centered representative congruent to a mod q in {0,q} | |||
* | |||
* Arguments: - int16_t a: input integer to be reduced | |||
* | |||
* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. | |||
* Returns: integer in {0,q} congruent to a modulo q. | |||
**************************************************/ | |||
int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a) { | |||
int16_t kyber_barrett_reduce(int16_t a) { | |||
int16_t t; | |||
const int16_t v = ((1U << 26) + KYBER_Q / 2) / KYBER_Q; | |||
t = ((int32_t)v * a + (1 << 25)) >> 26; | |||
t *= KYBER_Q; | |||
return a - t; | |||
static const int32_t v = 20159; | |||
t = ((v * a) + (1 << 25)) >> 26; | |||
return a - (t*KYBER_Q); | |||
} |
@@ -0,0 +1,22 @@ | |||
#ifndef KYBER_REDUCE_H | |||
#define KYBER_REDUCE_H | |||
#include <stdint.h> | |||
// TODO: Remove those once not used | |||
#define PQCLEAN_KYBER512_CLEAN_montgomery_reduce kyber_montgomery_reduce | |||
#define PQCLEAN_KYBER768_CLEAN_montgomery_reduce kyber_montgomery_reduce | |||
#define PQCLEAN_KYBER1024_CLEAN_montgomery_reduce kyber_montgomery_reduce | |||
#define PQCLEAN_KYBER512_CLEAN_barrett_reduce kyber_barrett_reduce | |||
#define PQCLEAN_KYBER768_CLEAN_barrett_reduce kyber_barrett_reduce | |||
#define PQCLEAN_KYBER1024_CLEAN_barrett_reduce kyber_barrett_reduce | |||
#define MONT 2285 // 2^16 mod q | |||
#define QINV 62209 // q^-1 mod 2^16 | |||
int16_t kyber_montgomery_reduce(int32_t a); | |||
int16_t kyber_barrett_reduce(int16_t a); | |||
#endif |
@@ -6,7 +6,7 @@ set( | |||
ntt.c | |||
poly.c | |||
polyvec.c | |||
reduce.c | |||
../../common/reduce.c | |||
symmetric-shake.c | |||
verify.c | |||
) | |||
@@ -1,6 +1,6 @@ | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "reduce.h" | |||
#include "../../common/reduce.h" | |||
#include <stdint.h> | |||
/* Code to generate PQCLEAN_KYBER1024_CLEAN_zetas and zetas_inv used in the number-theoretic transform: | |||
@@ -2,7 +2,7 @@ | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include "../../common/reduce.h" | |||
#include "symmetric.h" | |||
#include <stdint.h> | |||
@@ -1,44 +0,0 @@ | |||
#include "params.h" | |||
#include "reduce.h" | |||
#include <stdint.h> | |||
/************************************************* | |||
* Name: PQCLEAN_KYBER1024_CLEAN_montgomery_reduce | |||
* | |||
* Description: Montgomery reduction; given a 32-bit integer a, computes | |||
* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 | |||
* | |||
* Arguments: - int32_t a: input integer to be reduced; | |||
* has to be in {-q2^15,...,q2^15-1} | |||
* | |||
* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. | |||
**************************************************/ | |||
int16_t PQCLEAN_KYBER1024_CLEAN_montgomery_reduce(int32_t a) { | |||
int32_t t; | |||
int16_t u; | |||
u = (int16_t)(a * (int64_t)QINV); | |||
t = (int32_t)u * KYBER_Q; | |||
t = a - t; | |||
t >>= 16; | |||
return (int16_t)t; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_KYBER1024_CLEAN_barrett_reduce | |||
* | |||
* Description: Barrett reduction; given a 16-bit integer a, computes | |||
* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} | |||
* | |||
* Arguments: - int16_t a: input integer to be reduced | |||
* | |||
* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. | |||
**************************************************/ | |||
int16_t PQCLEAN_KYBER1024_CLEAN_barrett_reduce(int16_t a) { | |||
int16_t t; | |||
const int16_t v = ((1U << 26) + KYBER_Q / 2) / KYBER_Q; | |||
t = ((int32_t)v * a + (1 << 25)) >> 26; | |||
t *= KYBER_Q; | |||
return a - t; | |||
} |
@@ -1,13 +0,0 @@ | |||
#ifndef PQCLEAN_KYBER1024_CLEAN_REDUCE_H | |||
#define PQCLEAN_KYBER1024_CLEAN_REDUCE_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
#define MONT 2285 // 2^16 mod q | |||
#define QINV 62209 // q^-1 mod 2^16 | |||
int16_t PQCLEAN_KYBER1024_CLEAN_montgomery_reduce(int32_t a); | |||
int16_t PQCLEAN_KYBER1024_CLEAN_barrett_reduce(int16_t a); | |||
#endif |
@@ -289,7 +289,7 @@ void PQCLEAN_KYBER512_AVX2_indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], | |||
const uint8_t coins[KYBER_SYMBYTES]) { | |||
unsigned int i; | |||
uint8_t seed[KYBER_SYMBYTES]; | |||
polyvec sp, pkpv, ep, at[KYBER_K], b; | |||
polyvec sp, pkpv, ep, at[KYBER_K], b = {0}; | |||
poly v, k, epp; | |||
unpack_pk(&pkpv, seed, pk); | |||
@@ -51,9 +51,9 @@ int PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(unsigned char pk[KYBER_PUBLICKEYBYT | |||
int PQCLEAN_KYBER512_AVX2_crypto_kem_enc(unsigned char ct[KYBER_CIPHERTEXTBYTES], | |||
unsigned char ss[KYBER_SSBYTES], | |||
const unsigned char pk[KYBER_PUBLICKEYBYTES]) { | |||
uint8_t buf[2 * KYBER_SYMBYTES]; | |||
uint8_t buf[2 * KYBER_SYMBYTES] = {0}; | |||
/* Will contain key, coins */ | |||
uint8_t kr[2 * KYBER_SYMBYTES]; | |||
uint8_t kr[2 * KYBER_SYMBYTES] = {0}; | |||
randombytes(buf, KYBER_SYMBYTES); | |||
/* Don't release system RNG output */ | |||
@@ -182,7 +182,7 @@ void PQCLEAN_KYBER512_AVX2_polyvec_invntt_tomont(polyvec *r) { | |||
**************************************************/ | |||
void PQCLEAN_KYBER512_AVX2_polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) { | |||
size_t i; | |||
poly tmp; | |||
poly tmp = {0}; | |||
PQCLEAN_KYBER512_AVX2_poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); | |||
for (i = 1; i < KYBER_K; i++) { | |||
@@ -6,7 +6,6 @@ set( | |||
ntt.c | |||
poly.c | |||
polyvec.c | |||
reduce.c | |||
symmetric-shake.c | |||
verify.c | |||
) | |||
@@ -1,6 +1,6 @@ | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "reduce.h" | |||
#include "../../common/reduce.h" | |||
#include <stdint.h> | |||
/* Code to generate PQCLEAN_KYBER512_CLEAN_zetas and zetas_inv used in the number-theoretic transform: | |||
@@ -2,7 +2,7 @@ | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include "../../common/reduce.h" | |||
#include "symmetric.h" | |||
#include <stdint.h> | |||
@@ -1,13 +0,0 @@ | |||
#ifndef PQCLEAN_KYBER512_CLEAN_REDUCE_H | |||
#define PQCLEAN_KYBER512_CLEAN_REDUCE_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
#define MONT 2285 // 2^16 mod q | |||
#define QINV 62209 // q^-1 mod 2^16 | |||
int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a); | |||
int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a); | |||
#endif |
@@ -6,7 +6,6 @@ set( | |||
ntt.c | |||
poly.c | |||
polyvec.c | |||
reduce.c | |||
symmetric-shake.c | |||
verify.c | |||
) | |||
@@ -1,6 +1,6 @@ | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "reduce.h" | |||
#include "../../common/reduce.h" | |||
#include <stdint.h> | |||
/* Code to generate PQCLEAN_KYBER768_CLEAN_zetas and zetas_inv used in the number-theoretic transform: | |||
@@ -2,7 +2,7 @@ | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include "../../common/reduce.h" | |||
#include "symmetric.h" | |||
#include <stdint.h> | |||
@@ -1,44 +0,0 @@ | |||
#include "params.h" | |||
#include "reduce.h" | |||
#include <stdint.h> | |||
/************************************************* | |||
* Name: PQCLEAN_KYBER768_CLEAN_montgomery_reduce | |||
* | |||
* Description: Montgomery reduction; given a 32-bit integer a, computes | |||
* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 | |||
* | |||
* Arguments: - int32_t a: input integer to be reduced; | |||
* has to be in {-q2^15,...,q2^15-1} | |||
* | |||
* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. | |||
**************************************************/ | |||
int16_t PQCLEAN_KYBER768_CLEAN_montgomery_reduce(int32_t a) { | |||
int32_t t; | |||
int16_t u; | |||
u = (int16_t)(a * (int64_t)QINV); | |||
t = (int32_t)u * KYBER_Q; | |||
t = a - t; | |||
t >>= 16; | |||
return (int16_t)t; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_KYBER768_CLEAN_barrett_reduce | |||
* | |||
* Description: Barrett reduction; given a 16-bit integer a, computes | |||
* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} | |||
* | |||
* Arguments: - int16_t a: input integer to be reduced | |||
* | |||
* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. | |||
**************************************************/ | |||
int16_t PQCLEAN_KYBER768_CLEAN_barrett_reduce(int16_t a) { | |||
int16_t t; | |||
const int16_t v = ((1U << 26) + KYBER_Q / 2) / KYBER_Q; | |||
t = ((int32_t)v * a + (1 << 25)) >> 26; | |||
t *= KYBER_Q; | |||
return a - t; | |||
} |
@@ -1,13 +0,0 @@ | |||
#ifndef PQCLEAN_KYBER768_CLEAN_REDUCE_H | |||
#define PQCLEAN_KYBER768_CLEAN_REDUCE_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
#define MONT 2285 // 2^16 mod q | |||
#define QINV 62209 // q^-1 mod 2^16 | |||
int16_t PQCLEAN_KYBER768_CLEAN_montgomery_reduce(int32_t a); | |||
int16_t PQCLEAN_KYBER768_CLEAN_barrett_reduce(int16_t a); | |||
#endif |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE348864 | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece348864_clean | |||
PQCLEAN_MCELIECE348864_OPT "${SRC_CLEAN_MCELIECE348864}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE348864F | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece348864f_clean | |||
PQCLEAN_MCELIECE348864F_OPT "${SRC_CLEAN_MCELIECE348864F}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE460896 | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece460896_clean | |||
PQCLEAN_MCELIECE460896_OPT "${SRC_CLEAN_MCELIECE460896}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE460896F | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece460896f_clean | |||
PQCLEAN_MCELIECE460896F_OPT "${SRC_CLEAN_MCELIECE460896F}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE6688128 | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece6688128_clean | |||
PQCLEAN_MCELIECE6688128_OPT "${SRC_CLEAN_MCELIECE6688128}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE6688128F | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece6688128f_clean | |||
PQCLEAN_MCELIECE6688128F_OPT "${SRC_CLEAN_MCELIECE6688128F}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE6960119 | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece6960119_clean | |||
PQCLEAN_MCELIECE6960119_OPT "${SRC_CLEAN_MCELIECE6960119}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE6960119F | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece6960119f_clean | |||
PQCLEAN_MCELIECE6960119F_OPT "${SRC_CLEAN_MCELIECE6960119F}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE8192128 | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece8192128_clean | |||
PQCLEAN_MCELIECE8192128_OPT "${SRC_CLEAN_MCELIECE8192128}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_MCELIECE8192128F | |||
aes256ctr.c | |||
benes.c | |||
bm.c | |||
controlbits.c | |||
decrypt.c | |||
encrypt.c | |||
gf.c | |||
operations.c | |||
pk_gen.c | |||
root.c | |||
sk_gen.c | |||
synd.c | |||
transpose.c | |||
util.c | |||
) | |||
define_kem_alg(mceliece8192128f_clean | |||
PQCLEAN_MCELIECE8192128F_OPT "${SRC_CLEAN_MCELIECE8192128F}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_SIKE_P434 | |||
p434/fpx.c | |||
p434/fp_generic.c | |||
p434/isogeny.c | |||
p434/params.c | |||
p434/sike.c) | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
add_definitions(-DPQC_ASM=1) | |||
set( | |||
SRC_CLEAN_SIKE_P434 | |||
${SRC_CLEAN_SIKE_P434} | |||
p434/fp-x86_64.S | |||
) | |||
endif() | |||
define_kem_alg( | |||
sike_p434_clean | |||
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,81 @@ | |||
#ifndef SIKE_H_ | |||
#define SIKE_H_ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "randombytes.h" | |||
/* SIKE | |||
* | |||
* SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the | |||
* algorithm is provided in [SIKE]. This implementation uses 434-bit field size. The code | |||
* is based on "Additional_Implementations" from PQC NIST submission package which can | |||
* be found here: | |||
* https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip | |||
* | |||
* [SIKE] https://sike.org/files/SIDH-spec.pdf | |||
*/ | |||
// SIKE_PUB_BYTESZ is the number of bytes in a public key. | |||
#define SIKE_PUB_BYTESZ 330 | |||
// SIKE_PRV_BYTESZ is the number of bytes in a private key. | |||
#define SIKE_PRV_BYTESZ 28 | |||
// SIKE_SS_BYTESZ is the number of bytes in a shared key. | |||
#define SIKE_SS_BYTESZ 16 | |||
// SIKE_MSG_BYTESZ is the number of bytes in a random bit string concatenated | |||
// with the public key (see 1.4 of SIKE). | |||
#define SIKE_MSG_BYTESZ 16 | |||
// SIKE_SS_BYTESZ is the number of bytes in a ciphertext. | |||
#define SIKE_CT_BYTESZ (SIKE_PUB_BYTESZ + SIKE_MSG_BYTESZ) | |||
// SIKE_keypair outputs a public and secret key. In case of success | |||
// function returns 1, otherwise 0. | |||
int SIKE_keypair( | |||
uint8_t out_priv[SIKE_PRV_BYTESZ], | |||
uint8_t out_pub[SIKE_PUB_BYTESZ]); | |||
// SIKE_encaps generates and encrypts a random session key, writing those values to | |||
// |out_shared_key| and |out_ciphertext|, respectively. | |||
void SIKE_encaps( | |||
uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
uint8_t out_ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ]); | |||
// SIKE_decaps outputs a random session key, writing it to |out_shared_key|. | |||
void SIKE_decaps( | |||
uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
const uint8_t ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ], | |||
const uint8_t priv_key[SIKE_PRV_BYTESZ]); | |||
// boilerplate needed for integration | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_BYTES SIKE_SS_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_ALGNAME "SIKE/p434" | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_BYTES SIKE_SS_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_ALGNAME "SIKE/p434" | |||
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
SIKE_keypair(sk, pk); | |||
// KATs require the public key to be concatenated after private key | |||
memcpy(&sk[SIKE_MSG_BYTESZ+SIKE_PRV_BYTESZ], pk, SIKE_PUB_BYTESZ); | |||
return 0; | |||
} | |||
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { | |||
SIKE_encaps(ss,ct,pk); | |||
return 0; | |||
} | |||
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { | |||
SIKE_decaps(ss, ct, &sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], sk); | |||
return 0; | |||
} | |||
#endif |
@@ -0,0 +1,926 @@ | |||
.text | |||
.Lp434x2: | |||
.quad 0xFFFFFFFFFFFFFFFE | |||
.quad 0xFFFFFFFFFFFFFFFF | |||
.quad 0xFB82ECF5C5FFFFFF | |||
.quad 0xF78CB8F062B15D47 | |||
.quad 0xD9F8BFAD038A40AC | |||
.quad 0x0004683E4E2EE688 | |||
.Lp434p1: | |||
.quad 0xFDC1767AE3000000 | |||
.quad 0x7BC65C783158AEA3 | |||
.quad 0x6CFC5FD681C52056 | |||
.quad 0x0002341F27177344 | |||
.globl sike_fpadd_asm | |||
.hidden sike_fpadd_asm | |||
.type sike_fpadd_asm,@function | |||
sike_fpadd_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
addq 0(%rsi),%r8 | |||
movq 8(%rdi),%r9 | |||
adcq 8(%rsi),%r9 | |||
movq 16(%rdi),%r10 | |||
adcq 16(%rsi),%r10 | |||
movq 24(%rdi),%r11 | |||
adcq 24(%rsi),%r11 | |||
movq 32(%rdi),%r12 | |||
adcq 32(%rsi),%r12 | |||
movq 40(%rdi),%r13 | |||
adcq 40(%rsi),%r13 | |||
movq 48(%rdi),%r14 | |||
adcq 48(%rsi),%r14 | |||
movq .Lp434x2(%rip),%rcx | |||
subq %rcx,%r8 | |||
movq 8+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r9 | |||
sbbq %rcx,%r10 | |||
movq 16+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r11 | |||
movq 24+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r12 | |||
movq 32+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r13 | |||
movq 40+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r14 | |||
sbbq $0,%rax | |||
movq .Lp434x2(%rip),%rdi | |||
andq %rax,%rdi | |||
movq 8+.Lp434x2(%rip),%rsi | |||
andq %rax,%rsi | |||
movq 16+.Lp434x2(%rip),%rcx | |||
andq %rax,%rcx | |||
addq %rdi,%r8 | |||
movq %r8,0(%rdx) | |||
adcq %rsi,%r9 | |||
movq %r9,8(%rdx) | |||
adcq %rsi,%r10 | |||
movq %r10,16(%rdx) | |||
adcq %rcx,%r11 | |||
movq %r11,24(%rdx) | |||
setc %cl | |||
movq 24+.Lp434x2(%rip),%r8 | |||
andq %rax,%r8 | |||
movq 32+.Lp434x2(%rip),%r9 | |||
andq %rax,%r9 | |||
movq 40+.Lp434x2(%rip),%r10 | |||
andq %rax,%r10 | |||
btq $0,%rcx | |||
adcq %r8,%r12 | |||
movq %r12,32(%rdx) | |||
adcq %r9,%r13 | |||
movq %r13,40(%rdx) | |||
adcq %r10,%r14 | |||
movq %r14,48(%rdx) | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_fpsub_asm | |||
.hidden sike_fpsub_asm | |||
.type sike_fpsub_asm,@function | |||
sike_fpsub_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
subq 0(%rsi),%r8 | |||
movq 8(%rdi),%r9 | |||
sbbq 8(%rsi),%r9 | |||
movq 16(%rdi),%r10 | |||
sbbq 16(%rsi),%r10 | |||
movq 24(%rdi),%r11 | |||
sbbq 24(%rsi),%r11 | |||
movq 32(%rdi),%r12 | |||
sbbq 32(%rsi),%r12 | |||
movq 40(%rdi),%r13 | |||
sbbq 40(%rsi),%r13 | |||
movq 48(%rdi),%r14 | |||
sbbq 48(%rsi),%r14 | |||
sbbq $0x0,%rax | |||
movq .Lp434x2(%rip),%rdi | |||
andq %rax,%rdi | |||
movq 8+.Lp434x2(%rip),%rsi | |||
andq %rax,%rsi | |||
movq 16+.Lp434x2(%rip),%rcx | |||
andq %rax,%rcx | |||
addq %rdi,%r8 | |||
movq %r8,0(%rdx) | |||
adcq %rsi,%r9 | |||
movq %r9,8(%rdx) | |||
adcq %rsi,%r10 | |||
movq %r10,16(%rdx) | |||
adcq %rcx,%r11 | |||
movq %r11,24(%rdx) | |||
setc %cl | |||
movq 24+.Lp434x2(%rip),%r8 | |||
andq %rax,%r8 | |||
movq 32+.Lp434x2(%rip),%r9 | |||
andq %rax,%r9 | |||
movq 40+.Lp434x2(%rip),%r10 | |||
andq %rax,%r10 | |||
btq $0x0,%rcx | |||
adcq %r8,%r12 | |||
adcq %r9,%r13 | |||
adcq %r10,%r14 | |||
movq %r12,32(%rdx) | |||
movq %r13,40(%rdx) | |||
movq %r14,48(%rdx) | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpadd_asm | |||
.hidden sike_mpadd_asm | |||
.type sike_mpadd_asm,@function | |||
sike_mpadd_asm: | |||
.cfi_startproc | |||
movq 0(%rdi),%r8; | |||
movq 8(%rdi),%r9 | |||
movq 16(%rdi),%r10 | |||
movq 24(%rdi),%r11 | |||
movq 32(%rdi),%rcx | |||
addq 0(%rsi),%r8 | |||
adcq 8(%rsi),%r9 | |||
adcq 16(%rsi),%r10 | |||
adcq 24(%rsi),%r11 | |||
adcq 32(%rsi),%rcx | |||
movq %r8,0(%rdx) | |||
movq %r9,8(%rdx) | |||
movq %r10,16(%rdx) | |||
movq %r11,24(%rdx) | |||
movq %rcx,32(%rdx) | |||
movq 40(%rdi),%r8 | |||
movq 48(%rdi),%r9 | |||
adcq 40(%rsi),%r8 | |||
adcq 48(%rsi),%r9 | |||
movq %r8,40(%rdx) | |||
movq %r9,48(%rdx) | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpsubx2_asm | |||
.hidden sike_mpsubx2_asm | |||
.type sike_mpsubx2_asm,@function | |||
sike_mpsubx2_asm: | |||
.cfi_startproc | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
movq 8(%rdi),%r9 | |||
movq 16(%rdi),%r10 | |||
movq 24(%rdi),%r11 | |||
movq 32(%rdi),%rcx | |||
subq 0(%rsi),%r8 | |||
sbbq 8(%rsi),%r9 | |||
sbbq 16(%rsi),%r10 | |||
sbbq 24(%rsi),%r11 | |||
sbbq 32(%rsi),%rcx | |||
movq %r8,0(%rdx) | |||
movq %r9,8(%rdx) | |||
movq %r10,16(%rdx) | |||
movq %r11,24(%rdx) | |||
movq %rcx,32(%rdx) | |||
movq 40(%rdi),%r8 | |||
movq 48(%rdi),%r9 | |||
movq 56(%rdi),%r10 | |||
movq 64(%rdi),%r11 | |||
movq 72(%rdi),%rcx | |||
sbbq 40(%rsi),%r8 | |||
sbbq 48(%rsi),%r9 | |||
sbbq 56(%rsi),%r10 | |||
sbbq 64(%rsi),%r11 | |||
sbbq 72(%rsi),%rcx | |||
movq %r8,40(%rdx) | |||
movq %r9,48(%rdx) | |||
movq %r10,56(%rdx) | |||
movq %r11,64(%rdx) | |||
movq %rcx,72(%rdx) | |||
movq 80(%rdi),%r8 | |||
movq 88(%rdi),%r9 | |||
movq 96(%rdi),%r10 | |||
movq 104(%rdi),%r11 | |||
sbbq 80(%rsi),%r8 | |||
sbbq 88(%rsi),%r9 | |||
sbbq 96(%rsi),%r10 | |||
sbbq 104(%rsi),%r11 | |||
sbbq $0x0,%rax | |||
movq %r8,80(%rdx) | |||
movq %r9,88(%rdx) | |||
movq %r10,96(%rdx) | |||
movq %r11,104(%rdx) | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpdblsubx2_asm | |||
.hidden sike_mpdblsubx2_asm | |||
.type sike_mpdblsubx2_asm,@function | |||
sike_mpdblsubx2_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
xorq %rax,%rax | |||
movq 0(%rdx),%r8 | |||
movq 8(%rdx),%r9 | |||
movq 16(%rdx),%r10 | |||
movq 24(%rdx),%r11 | |||
movq 32(%rdx),%r12 | |||
movq 40(%rdx),%r13 | |||
movq 48(%rdx),%rcx | |||
subq 0(%rdi),%r8 | |||
sbbq 8(%rdi),%r9 | |||
sbbq 16(%rdi),%r10 | |||
sbbq 24(%rdi),%r11 | |||
sbbq 32(%rdi),%r12 | |||
sbbq 40(%rdi),%r13 | |||
sbbq 48(%rdi),%rcx | |||
adcq $0x0,%rax | |||
subq 0(%rsi),%r8 | |||
sbbq 8(%rsi),%r9 | |||
sbbq 16(%rsi),%r10 | |||
sbbq 24(%rsi),%r11 | |||
sbbq 32(%rsi),%r12 | |||
sbbq 40(%rsi),%r13 | |||
sbbq 48(%rsi),%rcx | |||
adcq $0x0,%rax | |||
movq %r8,0(%rdx) | |||
movq %r9,8(%rdx) | |||
movq %r10,16(%rdx) | |||
movq %r11,24(%rdx) | |||
movq %r12,32(%rdx) | |||
movq %r13,40(%rdx) | |||
movq %rcx,48(%rdx) | |||
movq 56(%rdx),%r8 | |||
movq 64(%rdx),%r9 | |||
movq 72(%rdx),%r10 | |||
movq 80(%rdx),%r11 | |||
movq 88(%rdx),%r12 | |||
movq 96(%rdx),%r13 | |||
movq 104(%rdx),%rcx | |||
subq %rax,%r8 | |||
sbbq 56(%rdi),%r8 | |||
sbbq 64(%rdi),%r9 | |||
sbbq 72(%rdi),%r10 | |||
sbbq 80(%rdi),%r11 | |||
sbbq 88(%rdi),%r12 | |||
sbbq 96(%rdi),%r13 | |||
sbbq 104(%rdi),%rcx | |||
subq 56(%rsi),%r8 | |||
sbbq 64(%rsi),%r9 | |||
sbbq 72(%rsi),%r10 | |||
sbbq 80(%rsi),%r11 | |||
sbbq 88(%rsi),%r12 | |||
sbbq 96(%rsi),%r13 | |||
sbbq 104(%rsi),%rcx | |||
movq %r8,56(%rdx) | |||
movq %r9,64(%rdx) | |||
movq %r10,72(%rdx) | |||
movq %r11,80(%rdx) | |||
movq %r12,88(%rdx) | |||
movq %r13,96(%rdx) | |||
movq %rcx,104(%rdx) | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_fprdc_asm | |||
.hidden sike_fprdc_asm | |||
.type sike_fprdc_asm,@function | |||
sike_fprdc_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
pushq %r15 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r15, -40 | |||
xorq %rax,%rax | |||
movq 0+0(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
xorq %rax,%rax | |||
movq 0+8(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r13,%rcx | |||
adcxq %r13,%r9 | |||
adcxq %rcx,%r10 | |||
mulxq 8+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r11 | |||
adoxq %rcx,%r10 | |||
mulxq 16+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r12 | |||
adoxq %rcx,%r11 | |||
mulxq 24+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %rax,%r13 | |||
adoxq %rcx,%r12 | |||
adoxq %rax,%r13 | |||
xorq %rcx,%rcx | |||
addq 24(%rdi),%r8 | |||
adcq 32(%rdi),%r9 | |||
adcq 40(%rdi),%r10 | |||
adcq 48(%rdi),%r11 | |||
adcq 56(%rdi),%r12 | |||
adcq 64(%rdi),%r13 | |||
adcq 72(%rdi),%rcx | |||
movq %r8,24(%rdi) | |||
movq %r9,32(%rdi) | |||
movq %r10,40(%rdi) | |||
movq %r11,48(%rdi) | |||
movq %r12,56(%rdi) | |||
movq %r13,64(%rdi) | |||
movq %rcx,72(%rdi) | |||
movq 80(%rdi),%r8 | |||
movq 88(%rdi),%r9 | |||
movq 96(%rdi),%r10 | |||
movq 104(%rdi),%r11 | |||
adcq $0x0,%r8 | |||
adcq $0x0,%r9 | |||
adcq $0x0,%r10 | |||
adcq $0x0,%r11 | |||
movq %r8,80(%rdi) | |||
movq %r9,88(%rdi) | |||
movq %r10,96(%rdi) | |||
movq %r11,104(%rdi) | |||
xorq %rax,%rax | |||
movq 16+0(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
xorq %rax,%rax | |||
movq 16+8(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r13,%rcx | |||
adcxq %r13,%r9 | |||
adcxq %rcx,%r10 | |||
mulxq 8+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r11 | |||
adoxq %rcx,%r10 | |||
mulxq 16+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r12 | |||
adoxq %rcx,%r11 | |||
mulxq 24+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %rax,%r13 | |||
adoxq %rcx,%r12 | |||
adoxq %rax,%r13 | |||
xorq %rcx,%rcx | |||
addq 40(%rdi),%r8 | |||
adcq 48(%rdi),%r9 | |||
adcq 56(%rdi),%r10 | |||
adcq 64(%rdi),%r11 | |||
adcq 72(%rdi),%r12 | |||
adcq 80(%rdi),%r13 | |||
adcq 88(%rdi),%rcx | |||
movq %r8,40(%rdi) | |||
movq %r9,48(%rdi) | |||
movq %r10,56(%rdi) | |||
movq %r11,64(%rdi) | |||
movq %r12,72(%rdi) | |||
movq %r13,80(%rdi) | |||
movq %rcx,88(%rdi) | |||
movq 96(%rdi),%r8 | |||
movq 104(%rdi),%r9 | |||
adcq $0x0,%r8 | |||
adcq $0x0,%r9 | |||
movq %r8,96(%rdi) | |||
movq %r9,104(%rdi) | |||
xorq %rax,%rax | |||
movq 32+0(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
xorq %rax,%rax | |||
movq 32+8(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r13,%rcx | |||
adcxq %r13,%r9 | |||
adcxq %rcx,%r10 | |||
mulxq 8+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r11 | |||
adoxq %rcx,%r10 | |||
mulxq 16+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r12 | |||
adoxq %rcx,%r11 | |||
mulxq 24+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %rax,%r13 | |||
adoxq %rcx,%r12 | |||
adoxq %rax,%r13 | |||
xorq %rcx,%rcx | |||
addq 56(%rdi),%r8 | |||
adcq 64(%rdi),%r9 | |||
adcq 72(%rdi),%r10 | |||
adcq 80(%rdi),%r11 | |||
adcq 88(%rdi),%r12 | |||
adcq 96(%rdi),%r13 | |||
adcq 104(%rdi),%rcx | |||
movq %r8,0(%rsi) | |||
movq %r9,8(%rsi) | |||
movq %r10,72(%rdi) | |||
movq %r11,80(%rdi) | |||
movq %r12,88(%rdi) | |||
movq %r13,96(%rdi) | |||
movq %rcx,104(%rdi) | |||
xorq %rax,%rax | |||
movq 48(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
addq 72(%rdi),%r8 | |||
adcq 80(%rdi),%r9 | |||
adcq 88(%rdi),%r10 | |||
adcq 96(%rdi),%r11 | |||
adcq 104(%rdi),%r12 | |||
movq %r8,16(%rsi) | |||
movq %r9,24(%rsi) | |||
movq %r10,32(%rsi) | |||
movq %r11,40(%rsi) | |||
movq %r12,48(%rsi) | |||
popq %r15 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpmul_asm | |||
.hidden sike_mpmul_asm | |||
.type sike_mpmul_asm,@function | |||
sike_mpmul_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
pushq %r15 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r15, -40 | |||
movq %rdx,%rcx | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
movq 8(%rdi),%r9 | |||
movq 16(%rdi),%r10 | |||
movq 24(%rdi),%r11 | |||
pushq %rbx | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset rbx, -48 | |||
pushq %rbp | |||
.cfi_offset rbp, -56 | |||
.cfi_adjust_cfa_offset 8 | |||
subq $96,%rsp | |||
.cfi_adjust_cfa_offset 96 | |||
addq 32(%rdi),%r8 | |||
adcq 40(%rdi),%r9 | |||
adcq 48(%rdi),%r10 | |||
adcq $0x0,%r11 | |||
sbbq $0x0,%rax | |||
movq %r8,0(%rsp) | |||
movq %r9,8(%rsp) | |||
movq %r10,16(%rsp) | |||
movq %r11,24(%rsp) | |||
xorq %rbx,%rbx | |||
movq 0(%rsi),%r12 | |||
movq 8(%rsi),%r13 | |||
movq 16(%rsi),%r14 | |||
movq 24(%rsi),%r15 | |||
addq 32(%rsi),%r12 | |||
adcq 40(%rsi),%r13 | |||
adcq 48(%rsi),%r14 | |||
adcq $0x0,%r15 | |||
sbbq $0x0,%rbx | |||
movq %r12,32(%rsp) | |||
movq %r13,40(%rsp) | |||
movq %r14,48(%rsp) | |||
movq %r15,56(%rsp) | |||
andq %rax,%r12 | |||
andq %rax,%r13 | |||
andq %rax,%r14 | |||
andq %rax,%r15 | |||
andq %rbx,%r8 | |||
andq %rbx,%r9 | |||
andq %rbx,%r10 | |||
andq %rbx,%r11 | |||
addq %r12,%r8 | |||
adcq %r13,%r9 | |||
adcq %r14,%r10 | |||
adcq %r15,%r11 | |||
movq %r8,64(%rsp) | |||
movq %r9,72(%rsp) | |||
movq %r10,80(%rsp) | |||
movq %r11,88(%rsp) | |||
movq 0+0(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r9,%r8 | |||
movq %r9,0+0(%rsp) | |||
mulxq 32+8(%rsp),%r10,%r9 | |||
xorq %rax,%rax | |||
adoxq %r10,%r8 | |||
mulxq 32+16(%rsp),%r11,%r10 | |||
adoxq %r11,%r9 | |||
mulxq 32+24(%rsp),%r12,%r11 | |||
adoxq %r12,%r10 | |||
movq 0+8(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r12,%r13 | |||
adoxq %rax,%r11 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsp),%r15,%r14 | |||
adoxq %r8,%r12 | |||
movq %r12,0+8(%rsp) | |||
adcxq %r15,%r13 | |||
mulxq 32+16(%rsp),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r9,%r13 | |||
mulxq 32+24(%rsp),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r10,%r14 | |||
movq 0+16(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r8,%r9 | |||
adoxq %r11,%r15 | |||
adoxq %rax,%rbx | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsp),%r11,%r10 | |||
adoxq %r13,%r8 | |||
movq %r8,0+16(%rsp) | |||
adcxq %r11,%r9 | |||
mulxq 32+16(%rsp),%r12,%r11 | |||
adcxq %r12,%r10 | |||
adoxq %r14,%r9 | |||
mulxq 32+24(%rsp),%rbp,%r12 | |||
adcxq %rbp,%r11 | |||
adcxq %rax,%r12 | |||
adoxq %r15,%r10 | |||
adoxq %rbx,%r11 | |||
adoxq %rax,%r12 | |||
movq 0+24(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r8,%r13 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsp),%r15,%r14 | |||
adcxq %r15,%r13 | |||
adoxq %r8,%r9 | |||
mulxq 32+16(%rsp),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r13,%r10 | |||
mulxq 32+24(%rsp),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r14,%r11 | |||
adoxq %r15,%r12 | |||
adoxq %rax,%rbx | |||
movq %r9,0+24(%rsp) | |||
movq %r10,0+32(%rsp) | |||
movq %r11,0+40(%rsp) | |||
movq %r12,0+48(%rsp) | |||
movq %rbx,0+56(%rsp) | |||
movq 0+0(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r9,%r8 | |||
movq %r9,0+0(%rcx) | |||
mulxq 0+8(%rsi),%r10,%r9 | |||
xorq %rax,%rax | |||
adoxq %r10,%r8 | |||
mulxq 0+16(%rsi),%r11,%r10 | |||
adoxq %r11,%r9 | |||
mulxq 0+24(%rsi),%r12,%r11 | |||
adoxq %r12,%r10 | |||
movq 0+8(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r12,%r13 | |||
adoxq %rax,%r11 | |||
xorq %rax,%rax | |||
mulxq 0+8(%rsi),%r15,%r14 | |||
adoxq %r8,%r12 | |||
movq %r12,0+8(%rcx) | |||
adcxq %r15,%r13 | |||
mulxq 0+16(%rsi),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r9,%r13 | |||
mulxq 0+24(%rsi),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r10,%r14 | |||
movq 0+16(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r8,%r9 | |||
adoxq %r11,%r15 | |||
adoxq %rax,%rbx | |||
xorq %rax,%rax | |||
mulxq 0+8(%rsi),%r11,%r10 | |||
adoxq %r13,%r8 | |||
movq %r8,0+16(%rcx) | |||
adcxq %r11,%r9 | |||
mulxq 0+16(%rsi),%r12,%r11 | |||
adcxq %r12,%r10 | |||
adoxq %r14,%r9 | |||
mulxq 0+24(%rsi),%rbp,%r12 | |||
adcxq %rbp,%r11 | |||
adcxq %rax,%r12 | |||
adoxq %r15,%r10 | |||
adoxq %rbx,%r11 | |||
adoxq %rax,%r12 | |||
movq 0+24(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r8,%r13 | |||
xorq %rax,%rax | |||
mulxq 0+8(%rsi),%r15,%r14 | |||
adcxq %r15,%r13 | |||
adoxq %r8,%r9 | |||
mulxq 0+16(%rsi),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r13,%r10 | |||
mulxq 0+24(%rsi),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r14,%r11 | |||
adoxq %r15,%r12 | |||
adoxq %rax,%rbx | |||
movq %r9,0+24(%rcx) | |||
movq %r10,0+32(%rcx) | |||
movq %r11,0+40(%rcx) | |||
movq %r12,0+48(%rcx) | |||
movq %rbx,0+56(%rcx) | |||
movq 32+0(%rdi),%rdx | |||
mulxq 32+0(%rsi),%r9,%r8 | |||
movq %r9,64+0(%rcx) | |||
mulxq 32+8(%rsi),%r10,%r9 | |||
xorq %rax,%rax | |||
adoxq %r10,%r8 | |||
mulxq 32+16(%rsi),%r11,%r10 | |||
adoxq %r11,%r9 | |||
movq 32+8(%rdi),%rdx | |||
mulxq 32+0(%rsi),%r12,%r11 | |||
adoxq %rax,%r10 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsi),%r14,%r13 | |||
adoxq %r8,%r12 | |||
movq %r12,64+8(%rcx) | |||
adcxq %r14,%r11 | |||
mulxq 32+16(%rsi),%r8,%r14 | |||
adoxq %r9,%r11 | |||
adcxq %r8,%r13 | |||
adcxq %rax,%r14 | |||
adoxq %r10,%r13 | |||
movq 32+16(%rdi),%rdx | |||
mulxq 32+0(%rsi),%r8,%r9 | |||
adoxq %rax,%r14 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsi),%r10,%r12 | |||
adoxq %r11,%r8 | |||
movq %r8,64+16(%rcx) | |||
adcxq %r13,%r9 | |||
mulxq 32+16(%rsi),%r11,%r8 | |||
adcxq %r14,%r12 | |||
adcxq %rax,%r8 | |||
adoxq %r10,%r9 | |||
adoxq %r12,%r11 | |||
adoxq %rax,%r8 | |||
movq %r9,64+24(%rcx) | |||
movq %r11,64+32(%rcx) | |||
movq %r8,64+40(%rcx) | |||
movq 64(%rsp),%r8 | |||
movq 72(%rsp),%r9 | |||
movq 80(%rsp),%r10 | |||
movq 88(%rsp),%r11 | |||
movq 32(%rsp),%rax | |||
addq %rax,%r8 | |||
movq 40(%rsp),%rax | |||
adcq %rax,%r9 | |||
movq 48(%rsp),%rax | |||
adcq %rax,%r10 | |||
movq 56(%rsp),%rax | |||
adcq %rax,%r11 | |||
movq 0(%rsp),%r12 | |||
movq 8(%rsp),%r13 | |||
movq 16(%rsp),%r14 | |||
movq 24(%rsp),%r15 | |||
subq 0(%rcx),%r12 | |||
sbbq 8(%rcx),%r13 | |||
sbbq 16(%rcx),%r14 | |||
sbbq 24(%rcx),%r15 | |||
sbbq 32(%rcx),%r8 | |||
sbbq 40(%rcx),%r9 | |||
sbbq 48(%rcx),%r10 | |||
sbbq 56(%rcx),%r11 | |||
subq 64(%rcx),%r12 | |||
sbbq 72(%rcx),%r13 | |||
sbbq 80(%rcx),%r14 | |||
sbbq 88(%rcx),%r15 | |||
sbbq 96(%rcx),%r8 | |||
sbbq 104(%rcx),%r9 | |||
sbbq $0x0,%r10 | |||
sbbq $0x0,%r11 | |||
addq 32(%rcx),%r12 | |||
movq %r12,32(%rcx) | |||
adcq 40(%rcx),%r13 | |||
movq %r13,40(%rcx) | |||
adcq 48(%rcx),%r14 | |||
movq %r14,48(%rcx) | |||
adcq 56(%rcx),%r15 | |||
movq %r15,56(%rcx) | |||
adcq 64(%rcx),%r8 | |||
movq %r8,64(%rcx) | |||
adcq 72(%rcx),%r9 | |||
movq %r9,72(%rcx) | |||
adcq 80(%rcx),%r10 | |||
movq %r10,80(%rcx) | |||
adcq 88(%rcx),%r11 | |||
movq %r11,88(%rcx) | |||
movq 96(%rcx),%r12 | |||
adcq $0x0,%r12 | |||
movq %r12,96(%rcx) | |||
movq 104(%rcx),%r13 | |||
adcq $0x0,%r13 | |||
movq %r13,104(%rcx) | |||
addq $96,%rsp | |||
.cfi_adjust_cfa_offset -96 | |||
popq %rbp | |||
.cfi_adjust_cfa_offset -8 | |||
.cfi_same_value rbp | |||
popq %rbx | |||
.cfi_adjust_cfa_offset -8 | |||
.cfi_same_value rbx | |||
popq %r15 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc |
@@ -0,0 +1,207 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: portable modular arithmetic for P503 | |||
*********************************************************************************************/ | |||
#include "common/utils.h" | |||
#include "utils.h" | |||
#include "fpx.h" | |||
#ifndef PQC_NOASM | |||
void sike_fprdc_asm(const felm_t ma, felm_t mc); | |||
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); | |||
void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c); | |||
void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c); | |||
#endif | |||
// Global constants | |||
extern const struct params_t params; | |||
// Digit multiplication, digit * digit -> 2-digit result | |||
static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c) | |||
{ | |||
crypto_word_t al, ah, bl, bh, temp; | |||
crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; | |||
crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4); | |||
crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4); | |||
al = a & mask_low; // Low part | |||
ah = a >> (sizeof(crypto_word_t) * 4); // High part | |||
bl = b & mask_low; | |||
bh = b >> (sizeof(crypto_word_t) * 4); | |||
albl = al*bl; | |||
albh = al*bh; | |||
ahbl = ah*bl; | |||
ahbh = ah*bh; | |||
c[0] = albl & mask_low; // C00 | |||
res1 = albl >> (sizeof(crypto_word_t) * 4); | |||
res2 = ahbl & mask_low; | |||
res3 = albh & mask_low; | |||
temp = res1 + res2 + res3; | |||
carry = temp >> (sizeof(crypto_word_t) * 4); | |||
c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01 | |||
res1 = ahbl >> (sizeof(crypto_word_t) * 4); | |||
res2 = albh >> (sizeof(crypto_word_t) * 4); | |||
res3 = ahbh & mask_low; | |||
temp = res1 + res2 + res3 + carry; | |||
c[1] = temp & mask_low; // C10 | |||
carry = temp & mask_high; | |||
c[1] ^= (ahbh & mask_high) + carry; // C11 | |||
} | |||
// Modular addition, c = a+b mod p434. | |||
// Inputs: a, b in [0, 2*p434-1] | |||
// Output: c in [0, 2*p434-1] | |||
void sike_fpadd(const felm_t a, const felm_t b, felm_t c) | |||
{ | |||
#ifdef PQC_ASM | |||
sike_fpadd_asm(a,b,c); | |||
#else | |||
unsigned int i, carry = 0; | |||
crypto_word_t mask; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(carry, a[i], b[i], carry, c[i]); | |||
} | |||
carry = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(carry, c[i], params.prime_x2[i], carry, c[i]); | |||
} | |||
mask = 0 - (crypto_word_t)carry; | |||
carry = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]); | |||
} | |||
#endif | |||
} | |||
void sike_fpsub(const felm_t a, const felm_t b, felm_t c) | |||
{ // Modular subtraction, c = a-b mod p434. | |||
// Inputs: a, b in [0, 2*p434-1] | |||
// Output: c in [0, 2*p434-1] | |||
#ifdef PQC_ASM | |||
sike_fpsub_asm(a,b,c); | |||
#else | |||
unsigned int i, borrow = 0; | |||
crypto_word_t mask; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(borrow, a[i], b[i], borrow, c[i]); | |||
} | |||
mask = 0 - (crypto_word_t)borrow; | |||
borrow = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]); | |||
} | |||
#endif | |||
} | |||
// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. | |||
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) | |||
{ | |||
#ifdef PQC_ASM | |||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | |||
sike_mpmul_asm(a,b,c); | |||
return; | |||
} | |||
#endif | |||
unsigned int i, j; | |||
crypto_word_t t = 0, u = 0, v = 0, UV[2]; | |||
unsigned int carry = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
for (j = 0; j <= i; j++) { | |||
MUL(a[j], b[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
c[i] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { | |||
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { | |||
MUL(a[j], b[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
c[i] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
c[2*NWORDS_FIELD-1] = v; | |||
} | |||
// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434. | |||
// mc = ma*R^-1 mod p434x2, where R = 2^448. | |||
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. | |||
// ma is assumed to be in Montgomery representation. | |||
void sike_fprdc(const felm_t ma, felm_t mc) | |||
{ | |||
#ifdef PQC_ASM | |||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | |||
sike_fprdc_asm(ma, mc); | |||
return; | |||
} | |||
#endif | |||
unsigned int i, j, carry, count = ZERO_WORDS; | |||
crypto_word_t UV[2], t = 0, u = 0, v = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
mc[i] = 0; | |||
} | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
for (j = 0; j < i; j++) { | |||
if (j < (i-ZERO_WORDS+1)) { | |||
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
} | |||
ADDC(0, v, ma[i], carry, v); | |||
ADDC(carry, u, 0, carry, u); | |||
t += carry; | |||
mc[i] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { | |||
if (count > 0) { | |||
count -= 1; | |||
} | |||
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { | |||
if (j < (NWORDS_FIELD-count)) { | |||
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
} | |||
ADDC(0, v, ma[i], carry, v); | |||
ADDC(carry, u, 0, carry, u); | |||
t += carry; | |||
mc[i-NWORDS_FIELD] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); | |||
mc[NWORDS_FIELD-1] = v; | |||
} |
@@ -0,0 +1,282 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: core functions over GF(p) and GF(p^2) | |||
*********************************************************************************************/ | |||
#include <stddef.h> | |||
#include "utils.h" | |||
#include "fpx.h" | |||
extern const struct params_t params; | |||
// Multiprecision squaring, c = a^2 mod p. | |||
static void fpsqr_mont(const felm_t ma, felm_t mc) | |||
{ | |||
dfelm_t temp = {0}; | |||
sike_mpmul(ma, ma, temp); | |||
sike_fprdc(temp, mc); | |||
} | |||
// Chain to compute a^(p-3)/4 using Montgomery arithmetic. | |||
static void fpinv_chain_mont(felm_t a) | |||
{ | |||
unsigned int i, j; | |||
felm_t t[31], tt; | |||
// Precomputed table | |||
fpsqr_mont(a, tt); | |||
sike_fpmul_mont(a, tt, t[0]); | |||
for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]); | |||
sike_fpcopy(a, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[5], tt, tt); | |||
for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[14], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[3], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[23], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[13], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[24], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[7], tt, tt); | |||
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[12], tt, tt); | |||
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[1], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[21], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[2], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[19], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[1], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[24], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[26], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[16], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[10], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[6], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[0], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[20], tt, tt); | |||
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[9], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[25], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[26], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(a, tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[28], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[6], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[10], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[22], tt, tt); | |||
for (j = 0; j < 35; j++) { | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
} | |||
sike_fpcopy(tt, a); | |||
} | |||
// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. | |||
static void fpinv_mont(felm_t a) | |||
{ | |||
felm_t tt = {0}; | |||
sike_fpcopy(a, tt); | |||
fpinv_chain_mont(tt); | |||
fpsqr_mont(tt, tt); | |||
fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(a, tt, a); | |||
} | |||
// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. | |||
#ifndef PQC_ASM | |||
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { | |||
uint8_t carry = 0; | |||
for (size_t i = 0; i < nwords; i++) { | |||
ADDC(carry, a[i], b[i], carry, c[i]); | |||
} | |||
return carry; | |||
} | |||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. | |||
inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { | |||
uint32_t borrow = 0; | |||
for (size_t i = 0; i < nwords; i++) { | |||
SUBC(borrow, a[i], b[i], borrow, c[i]); | |||
} | |||
return borrow; | |||
} | |||
#endif | |||
// Multiprecision addition, c = a+b. | |||
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) | |||
{ | |||
#ifdef PQC_ASM | |||
sike_mpadd_asm(a, b, c); | |||
#else | |||
mp_add(a, b, c, NWORDS_FIELD); | |||
#endif | |||
} | |||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | |||
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 | |||
inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | |||
#ifdef PQC_ASM | |||
return sike_mpsubx2_asm(a, b, c); | |||
#else | |||
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); | |||
#endif | |||
} | |||
// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | |||
// Inputs should be s.t. c > a and c > b | |||
inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | |||
#ifdef PQC_ASM | |||
sike_mpdblsubx2_asm(a, b, c); | |||
#else | |||
mp_sub(c, a, c, 2*NWORDS_FIELD); | |||
mp_sub(c, b, c, 2*NWORDS_FIELD); | |||
#endif | |||
} | |||
// Copy a field element, c = a. | |||
void sike_fpcopy(const felm_t a, felm_t c) { | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
c[i] = a[i]; | |||
} | |||
} | |||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768 | |||
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) | |||
{ | |||
dfelm_t temp = {0}; | |||
sike_mpmul(ma, mb, temp); | |||
sike_fprdc(temp, mc); | |||
} | |||
// Conversion from Montgomery representation to standard representation, | |||
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. | |||
void sike_from_mont(const felm_t ma, felm_t c) | |||
{ | |||
felm_t one = {0}; | |||
one[0] = 1; | |||
sike_fpmul_mont(ma, one, c); | |||
sike_fpcorrection(c); | |||
} | |||
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). | |||
// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] | |||
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] | |||
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) { | |||
felm_t t1 = {0}, t2 = {0}, t3 = {0}; | |||
mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 | |||
sike_fpsub(a->c0, a->c1, t2); // t2 = a0-a1 | |||
mp_addfast(a->c0, a->c0, t3); // t3 = 2a0 | |||
sike_fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1) | |||
sike_fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1 | |||
} | |||
// Modular negation, a = -a mod p503. | |||
// Input/output: a in [0, 2*p503-1] | |||
void sike_fpneg(felm_t a) { | |||
uint32_t borrow = 0; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(borrow, params.prime_x2[i], a[i], borrow, a[i]); | |||
} | |||
} | |||
// Modular division by two, c = a/2 mod p503. | |||
// Input : a in [0, 2*p503-1] | |||
// Output: c in [0, 2*p503-1] | |||
void sike_fpdiv2(const felm_t a, felm_t c) { | |||
uint32_t carry = 0; | |||
crypto_word_t mask; | |||
mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503 | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(carry, a[i], params.prime[i] & mask, carry, c[i]); | |||
} | |||
// Multiprecision right shift by one. | |||
for (size_t i = 0; i < NWORDS_FIELD-1; i++) { | |||
c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1)); | |||
} | |||
c[NWORDS_FIELD-1] >>= 1; | |||
} | |||
// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. | |||
void sike_fpcorrection(felm_t a) { | |||
uint32_t borrow = 0; | |||
crypto_word_t mask; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(borrow, a[i], params.prime[i], borrow, a[i]); | |||
} | |||
mask = 0 - (crypto_word_t)borrow; | |||
borrow = 0; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(borrow, a[i], params.prime[i] & mask, borrow, a[i]); | |||
} | |||
} | |||
// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). | |||
// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] | |||
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] | |||
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) { | |||
felm_t t1 = {0}, t2 = {0}; | |||
dfelm_t tt1, tt2, tt3; | |||
crypto_word_t mask; | |||
mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 | |||
mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1 | |||
sike_mpmul(a->c0, b->c0, tt1); // tt1 = a0*b0 | |||
sike_mpmul(a->c1, b->c1, tt2); // tt2 = a1*b1 | |||
sike_mpmul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1) | |||
mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 | |||
mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0 | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
t1[i] = params.prime[i] & mask; | |||
} | |||
sike_fprdc(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 | |||
mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]); | |||
sike_fprdc(tt1, c->c0); // c[0] = a0*b0 - a1*b1 | |||
} | |||
// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). | |||
void sike_fp2inv_mont(f2elm_t a) { | |||
f2elm_t t1 = {0}; | |||
fpsqr_mont(a->c0, t1->c0); // t10 = a0^2 | |||
fpsqr_mont(a->c1, t1->c1); // t11 = a1^2 | |||
sike_fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2 | |||
fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1 | |||
sike_fpneg(a->c1); // a = a0-i*a1 | |||
sike_fpmul_mont(a->c0, t1->c0, a->c0); | |||
sike_fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1 | |||
} |
@@ -0,0 +1,110 @@ | |||
#ifndef FPX_H_ | |||
#define FPX_H_ | |||
#include "utils.h" | |||
#if defined(__cplusplus) | |||
extern "C" { | |||
#endif | |||
// Modular addition, c = a+b mod p. | |||
void sike_fpadd(const felm_t a, const felm_t b, felm_t c); | |||
// Modular subtraction, c = a-b mod p. | |||
void sike_fpsub(const felm_t a, const felm_t b, felm_t c); | |||
// Modular division by two, c = a/2 mod p. | |||
void sike_fpdiv2(const felm_t a, felm_t c); | |||
// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1]. | |||
void sike_fpcorrection(felm_t a); | |||
// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. | |||
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c); | |||
// 443-bit Montgomery reduction, c = a mod p | |||
void sike_fprdc(const dfelm_t a, felm_t c); | |||
// Double 2x443-bit multiprecision subtraction, c = c-a-b | |||
void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c); | |||
// Multiprecision subtraction, c = a-b | |||
crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c); | |||
// 443-bit multiprecision addition, c = a+b | |||
void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c); | |||
// Modular negation, a = -a mod p. | |||
void sike_fpneg(felm_t a); | |||
// Copy of a field element, c = a | |||
void sike_fpcopy(const felm_t a, felm_t c); | |||
// Copy a field element, c = a. | |||
void sike_fpzero(felm_t a); | |||
// Conversion from Montgomery representation to standard representation, | |||
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. | |||
void sike_from_mont(const felm_t ma, felm_t c); | |||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768 | |||
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc); | |||
// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2) | |||
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); | |||
// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) | |||
void sike_fp2inv_mont(f2elm_t a); | |||
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). | |||
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c); | |||
// Modular correction, a = a in GF(p^2). | |||
void sike_fp2correction(f2elm_t a); | |||
#if defined(__cplusplus) | |||
} // extern C | |||
#endif | |||
// GF(p^2) addition, c = a+b in GF(p^2). | |||
#define sike_fp2add(a, b, c) \ | |||
do { \ | |||
sike_fpadd(a->c0, b->c0, c->c0); \ | |||
sike_fpadd(a->c1, b->c1, c->c1); \ | |||
} while(0) | |||
// GF(p^2) subtraction, c = a-b in GF(p^2). | |||
#define sike_fp2sub(a,b,c) \ | |||
do { \ | |||
sike_fpsub(a->c0, b->c0, c->c0); \ | |||
sike_fpsub(a->c1, b->c1, c->c1); \ | |||
} while(0) | |||
// Copy a GF(p^2) element, c = a. | |||
#define sike_fp2copy(a, c) \ | |||
do { \ | |||
sike_fpcopy(a->c0, c->c0); \ | |||
sike_fpcopy(a->c1, c->c1); \ | |||
} while(0) | |||
// GF(p^2) negation, a = -a in GF(p^2). | |||
#define sike_fp2neg(a) \ | |||
do { \ | |||
sike_fpneg(a->c0); \ | |||
sike_fpneg(a->c1); \ | |||
} while(0) | |||
// GF(p^2) division by two, c = a/2 in GF(p^2). | |||
#define sike_fp2div2(a, c) \ | |||
do { \ | |||
sike_fpdiv2(a->c0, c->c0); \ | |||
sike_fpdiv2(a->c1, c->c1); \ | |||
} while(0) | |||
// Modular correction, a = a in GF(p^2). | |||
#define sike_fp2correction(a) \ | |||
do { \ | |||
sike_fpcorrection(a->c0); \ | |||
sike_fpcorrection(a->c1); \ | |||
} while(0) | |||
// Conversion of a GF(p^2) element to Montgomery representation, | |||
// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). | |||
#define sike_to_fp2mont(a, mc) \ | |||
do { \ | |||
sike_fpmul_mont(a->c0, params.mont_R2, mc->c0); \ | |||
sike_fpmul_mont(a->c1, params.mont_R2, mc->c1); \ | |||
} while(0) | |||
// Conversion of a GF(p^2) element from Montgomery representation to standard representation, | |||
// c_i = ma_i*R^(-1) = a_i in GF(p^2). | |||
#define sike_from_fp2mont(ma, c) \ | |||
do { \ | |||
sike_from_mont(ma->c0, c->c0); \ | |||
sike_from_mont(ma->c1, c->c1); \ | |||
} while(0) | |||
#endif // FPX_H_ |
@@ -0,0 +1,262 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: elliptic curve and isogeny functions | |||
*********************************************************************************************/ | |||
#include <stddef.h> | |||
#include <string.h> | |||
#include "utils.h" | |||
#include "isogeny.h" | |||
#include "fpx.h" | |||
static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24) | |||
{ // Doubling of a Montgomery point in projective coordinates (X:Z). | |||
// Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. | |||
// Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). | |||
f2elm_t t0 = {0}, t1 = {0}; | |||
sike_fp2sub(P->X, P->Z, t0); // t0 = X1-Z1 | |||
sike_fp2add(P->X, P->Z, t1); // t1 = X1+Z1 | |||
sike_fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 | |||
sike_fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 | |||
sike_fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 | |||
sike_fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 | |||
sike_fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 | |||
sike_fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] | |||
sike_fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 | |||
sike_fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] | |||
} | |||
void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e) | |||
{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. | |||
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. | |||
// Output: projective Montgomery x-coordinates Q <- (2^e)*P. | |||
memmove(Q, P, sizeof(*P)); | |||
for (size_t i = 0; i < e; i++) { | |||
xDBL(Q, Q, A24plus, C24); | |||
} | |||
} | |||
void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff) | |||
{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. | |||
// Input: projective point of order four P = (X4:Z4). | |||
// Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients | |||
// that are used to evaluate the isogeny at a point in eval_4_isog(). | |||
sike_fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4 | |||
sike_fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4 | |||
sike_fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2 | |||
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2 | |||
sike_fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4 | |||
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2 | |||
sike_fp2sqr_mont(P->X, A24plus); // A24plus = X4^2 | |||
sike_fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 | |||
sike_fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 | |||
} | |||
void eval_4_isog(point_proj_t P, f2elm_t* coeff) | |||
{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined | |||
// by the 3 coefficients in coeff (computed in the function get_4_isog()). | |||
// Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). | |||
// Output: the projective point P = phi(P) = (X:Z) in the codomain. | |||
f2elm_t t0 = {0}, t1 = {0}; | |||
sike_fp2add(P->X, P->Z, t0); // t0 = X+Z | |||
sike_fp2sub(P->X, P->Z, t1); // t1 = X-Z | |||
sike_fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1] | |||
sike_fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2] | |||
sike_fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) | |||
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) | |||
sike_fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] | |||
sike_fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] | |||
sike_fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 | |||
sike_fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 | |||
sike_fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 | |||
sike_fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) | |||
sike_fp2mul_mont(P->X, t1, P->X); // Xfinal | |||
sike_fp2mul_mont(P->Z, t0, P->Z); // Zfinal | |||
} | |||
void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus) | |||
{ // Tripling of a Montgomery point in projective coordinates (X:Z). | |||
// Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. | |||
// Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). | |||
f2elm_t t0, t1, t2, t3, t4, t5, t6; | |||
sike_fp2sub(P->X, P->Z, t0); // t0 = X-Z | |||
sike_fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 | |||
sike_fp2add(P->X, P->Z, t1); // t1 = X+Z | |||
sike_fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 | |||
sike_fp2add(t0, t1, t4); // t4 = 2*X | |||
sike_fp2sub(t1, t0, t0); // t0 = 2*Z | |||
sike_fp2sqr_mont(t4, t1); // t1 = 4*X^2 | |||
sike_fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 | |||
sike_fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 | |||
sike_fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 | |||
sike_fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 | |||
sike_fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 | |||
sike_fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 | |||
sike_fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 | |||
sike_fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 | |||
sike_fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] | |||
sike_fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 | |||
sike_fp2sqr_mont(t2, t2); // t2 = t2^2 | |||
sike_fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2 | |||
sike_fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] | |||
sike_fp2sqr_mont(t1, t1); // t1 = t1^2 | |||
sike_fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1 | |||
} | |||
void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e) | |||
{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. | |||
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. | |||
// Output: projective Montgomery x-coordinates Q <- (3^e)*P. | |||
memmove(Q, P, sizeof(*P)); | |||
for (size_t i = 0; i < e; i++) { | |||
xTPL(Q, Q, A24minus, A24plus); | |||
} | |||
} | |||
void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff) | |||
{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. | |||
// Input: projective point of order three P = (X3:Z3). | |||
// Output: the 3-isogenous Montgomery curve with projective coefficient A/C. | |||
f2elm_t t0 = {0}, t1 = {0}, t2 = {0}, t3 = {0}, t4 = {0}; | |||
sike_fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z | |||
sike_fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2 | |||
sike_fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z | |||
sike_fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2 | |||
sike_fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 | |||
sike_fp2add(coeff[0], coeff[1], t3); // t3 = 2*X | |||
sike_fp2sqr_mont(t3, t3); // t3 = 4*X^2 | |||
sike_fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 | |||
sike_fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 | |||
sike_fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 | |||
sike_fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 | |||
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) | |||
sike_fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 | |||
sike_fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] | |||
sike_fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 | |||
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) | |||
sike_fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 | |||
sike_fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] | |||
sike_fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] | |||
sike_fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 | |||
} | |||
void eval_3_isog(point_proj_t Q, f2elm_t* coeff) | |||
{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and | |||
// a point P with 2 coefficients in coeff (computed in the function get_3_isog()). | |||
// Inputs: projective points P = (X3:Z3) and Q = (X:Z). | |||
// Output: the projective point Q <- phi(Q) = (X3:Z3). | |||
f2elm_t t0, t1, t2; | |||
sike_fp2add(Q->X, Q->Z, t0); // t0 = X+Z | |||
sike_fp2sub(Q->X, Q->Z, t1); // t1 = X-Z | |||
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z) | |||
sike_fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z) | |||
sike_fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z) | |||
sike_fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z) | |||
sike_fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2 | |||
sike_fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2 | |||
sike_fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2 | |||
sike_fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2 | |||
} | |||
void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3) | |||
{ // 3-way simultaneous inversion | |||
// Input: z1,z2,z3 | |||
// Output: 1/z1,1/z2,1/z3 (override inputs). | |||
f2elm_t t0, t1, t2, t3; | |||
sike_fp2mul_mont(z1, z2, t0); // t0 = z1*z2 | |||
sike_fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 | |||
sike_fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) | |||
sike_fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) | |||
sike_fp2mul_mont(t2, z2, t3); // t3 = 1/z1 | |||
sike_fp2mul_mont(t2, z1, z2); // z2 = 1/z2 | |||
sike_fp2mul_mont(t0, t1, z3); // z3 = 1/z3 | |||
sike_fp2copy(t3, z1); // z1 = 1/z1 | |||
} | |||
void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A) | |||
{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. | |||
// Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. | |||
// Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. | |||
f2elm_t t0 = F2ELM_INIT, t1 = F2ELM_INIT, one = F2ELM_INIT; | |||
extern const struct params_t params; | |||
sike_fpcopy(params.mont_one, one->c0); | |||
sike_fp2add(xP, xQ, t1); // t1 = xP+xQ | |||
sike_fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ | |||
sike_fp2mul_mont(xR, t1, A); // A = xR*t1 | |||
sike_fp2add(t0, A, A); // A = A+t0 | |||
sike_fp2mul_mont(t0, xR, t0); // t0 = t0*xR | |||
sike_fp2sub(A, one, A); // A = A-1 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2add(t1, xR, t1); // t1 = t1+xR | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2sqr_mont(A, A); // A = A^2 | |||
sike_fp2inv_mont(t0); // t0 = 1/t0 | |||
sike_fp2mul_mont(A, t0, A); // A = A*t0 | |||
sike_fp2sub(A, t1, A); // Afinal = A-t1 | |||
} | |||
void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv) | |||
{ // Computes the j-invariant of a Montgomery curve with projective constant. | |||
// Input: A,C in GF(p^2). | |||
// Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. | |||
f2elm_t t0 = F2ELM_INIT, t1 = F2ELM_INIT; | |||
sike_fp2sqr_mont(A, jinv); // jinv = A^2 | |||
sike_fp2sqr_mont(C, t1); // t1 = C^2 | |||
sike_fp2add(t1, t1, t0); // t0 = t1+t1 | |||
sike_fp2sub(jinv, t0, t0); // t0 = jinv-t0 | |||
sike_fp2sub(t0, t1, t0); // t0 = t0-t1 | |||
sike_fp2sub(t0, t1, jinv); // jinv = t0-t1 | |||
sike_fp2sqr_mont(t1, t1); // t1 = t1^2 | |||
sike_fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2sqr_mont(t0, t1); // t1 = t0^2 | |||
sike_fp2mul_mont(t0, t1, t0); // t0 = t0*t1 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2inv_mont(jinv); // jinv = 1/jinv | |||
sike_fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv | |||
} | |||
void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24) | |||
{ // Simultaneous doubling and differential addition. | |||
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. | |||
// Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. | |||
f2elm_t t0 = F2ELM_INIT, t1 = F2ELM_INIT, t2 = F2ELM_INIT; | |||
sike_fp2add(P->X, P->Z, t0); // t0 = XP+ZP | |||
sike_fp2sub(P->X, P->Z, t1); // t1 = XP-ZP | |||
sike_fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 | |||
sike_fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ | |||
sike_fp2correction(t2); | |||
sike_fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ | |||
sike_fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) | |||
sike_fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 | |||
sike_fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) | |||
sike_fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 | |||
sike_fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 | |||
sike_fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] | |||
sike_fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) | |||
sike_fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 | |||
sike_fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) | |||
sike_fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] | |||
sike_fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 | |||
sike_fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 | |||
sike_fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 | |||
} |
@@ -0,0 +1,49 @@ | |||
#ifndef ISOGENY_H_ | |||
#define ISOGENY_H_ | |||
// Computes [2^e](X:Z) on Montgomery curve with projective | |||
// constant via e repeated doublings. | |||
void xDBLe( | |||
const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, | |||
const f2elm_t C24, size_t e); | |||
// Simultaneous doubling and differential addition. | |||
void xDBLADD( | |||
point_proj_t P, point_proj_t Q, const f2elm_t xPQ, | |||
const f2elm_t A24); | |||
// Tripling of a Montgomery point in projective coordinates (X:Z). | |||
void xTPL( | |||
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, | |||
const f2elm_t A24plus); | |||
// Computes [3^e](X:Z) on Montgomery curve with projective constant | |||
// via e repeated triplings. | |||
void xTPLe( | |||
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, | |||
const f2elm_t A24plus, size_t e); | |||
// Given the x-coordinates of P, Q, and R, returns the value A | |||
// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. | |||
void get_A( | |||
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A); | |||
// Computes the j-invariant of a Montgomery curve with projective constant. | |||
void j_inv( | |||
const f2elm_t A, const f2elm_t C, f2elm_t jinv); | |||
// Computes the corresponding 4-isogeny of a projective Montgomery | |||
// point (X4:Z4) of order 4. | |||
void get_4_isog( | |||
const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff); | |||
// Computes the corresponding 3-isogeny of a projective Montgomery | |||
// point (X3:Z3) of order 3. | |||
void get_3_isog( | |||
const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, | |||
f2elm_t* coeff); | |||
// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) | |||
// of order 3 on a Montgomery curve and a point P with coefficients given in coeff. | |||
void eval_3_isog( | |||
point_proj_t Q, f2elm_t* coeff); | |||
// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. | |||
void eval_4_isog( | |||
point_proj_t P, f2elm_t* coeff); | |||
// 3-way simultaneous inversion | |||
void inv_3_way( | |||
f2elm_t z1, f2elm_t z2, f2elm_t z3); | |||
#endif // ISOGENY_H_ |
@@ -0,0 +1,128 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: supersingular isogeny parameters and generation of functions for P434 | |||
*********************************************************************************************/ | |||
#include "utils.h" | |||
// Parameters for isogeny system "SIKE" | |||
const struct params_t params = { | |||
.prime = { | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF), | |||
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056), | |||
U64_TO_WORDS(0x0002341F27177344) | |||
}, | |||
.prime_p1 = { | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000), | |||
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056), | |||
U64_TO_WORDS(0x0002341F27177344) | |||
}, | |||
.prime_x2 = { | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF), | |||
U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC), | |||
U64_TO_WORDS(0x0004683E4E2EE688) | |||
}, | |||
.A_gen = { | |||
U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B), | |||
U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1), | |||
U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F), | |||
U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0 | |||
U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B), | |||
U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA), | |||
U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C), | |||
U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1 | |||
U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6), | |||
U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03), | |||
U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215), | |||
U64_TO_WORDS(0x0001C4CB77542876), // XQA0 | |||
U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050), | |||
U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374), | |||
U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508), | |||
U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1 | |||
U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611), | |||
U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD), | |||
U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3), | |||
U64_TO_WORDS(0x00022A81D8D55643), // XRA0 | |||
U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED), | |||
U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4), | |||
U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2), | |||
U64_TO_WORDS(0x0000B87FC716C0C6) // XRA1 | |||
}, | |||
.B_gen = { | |||
U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05), | |||
U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F), | |||
U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9), | |||
U64_TO_WORDS(0x0001BED4772E551F), // XPB0 | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), // XPB1 | |||
U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C), | |||
U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62), | |||
U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F), | |||
U64_TO_WORDS(0x000034080181D8AE), // XQB0 | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), // XQB1 | |||
U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647), | |||
U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D), | |||
U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504), | |||
U64_TO_WORDS(0x00007E8A50F02E37), // XRB0 | |||
U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230), | |||
U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53), | |||
U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B), | |||
U64_TO_WORDS(0x000173FA910377D3) // XRB1 | |||
}, | |||
.mont_R2 = { | |||
U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2), | |||
U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B), | |||
U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A), | |||
U64_TO_WORDS(0x000025A89BCDD12A) | |||
}, | |||
.mont_one = { | |||
U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000), | |||
U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C), | |||
U64_TO_WORDS(0x0000ECEEA7BD2EDA) | |||
}, | |||
.mont_six = { | |||
U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000), | |||
U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0), | |||
U64_TO_WORDS(0x00012559A0403298) | |||
}, | |||
.A_strat = { | |||
0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, | |||
0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, | |||
0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, | |||
0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, | |||
0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, | |||
0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01, | |||
0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03, | |||
0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04, | |||
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01 | |||
}, | |||
.B_strat = { | |||
0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01, | |||
0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01, | |||
0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, | |||
0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10, | |||
0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, | |||
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, | |||
0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01, | |||
0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, | |||
0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, | |||
0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01, | |||
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, | |||
0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01 | |||
} | |||
}; |
@@ -0,0 +1,505 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: supersingular isogeny key encapsulation (SIKE) protocol | |||
*********************************************************************************************/ | |||
#include <assert.h> | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include <randombytes.h> | |||
#include <common/fips202.h> | |||
#include "utils.h" | |||
#include "isogeny.h" | |||
#include "fpx.h" | |||
extern const struct params_t params; | |||
// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant. | |||
#define SIDH_JINV_BYTESZ 110U | |||
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny) | |||
#define SIDH_PRV_A_BITSZ 216U | |||
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny) | |||
#define SIDH_PRV_B_BITSZ 217U | |||
// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation | |||
#define MAX_INT_POINTS_ALICE 7U | |||
// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation | |||
#define MAX_INT_POINTS_BOB 8U | |||
// Swap points. | |||
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P | |||
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||
{ | |||
crypto_word_t temp; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
temp = option & (P->X->c0[i] ^ Q->X->c0[i]); | |||
P->X->c0[i] = temp ^ P->X->c0[i]; | |||
Q->X->c0[i] = temp ^ Q->X->c0[i]; | |||
temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]); | |||
P->Z->c0[i] = temp ^ P->Z->c0[i]; | |||
Q->Z->c0[i] = temp ^ Q->Z->c0[i]; | |||
temp = option & (P->X->c1[i] ^ Q->X->c1[i]); | |||
P->X->c1[i] = temp ^ P->X->c1[i]; | |||
Q->X->c1[i] = temp ^ Q->X->c1[i]; | |||
temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]); | |||
P->Z->c1[i] = temp ^ P->Z->c1[i]; | |||
Q->Z->c1[i] = temp ^ Q->Z->c1[i]; | |||
} | |||
} | |||
static void ladder3Pt( | |||
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m, | |||
int is_A, point_proj_t R, const f2elm_t A) { | |||
point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT; | |||
f2elm_t A24 = F2ELM_INIT; | |||
crypto_word_t mask; | |||
int bit, swap, prevbit = 0; | |||
const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ; | |||
// Initializing constant | |||
sike_fpcopy(params.mont_one, A24[0].c0); | |||
sike_fp2add(A24, A24, A24); | |||
sike_fp2add(A, A24, A24); | |||
sike_fp2div2(A24, A24); | |||
sike_fp2div2(A24, A24); // A24 = (A+2)/4 | |||
// Initializing points | |||
sike_fp2copy(xQ, R0->X); | |||
sike_fpcopy(params.mont_one, R0->Z[0].c0); | |||
sike_fp2copy(xPQ, R2->X); | |||
sike_fpcopy(params.mont_one, R2->Z[0].c0); | |||
sike_fp2copy(xP, R->X); | |||
sike_fpcopy(params.mont_one, R->Z[0].c0); | |||
memset(R->Z->c1, 0, sizeof(R->Z->c1)); | |||
// Main loop | |||
for (size_t i = 0; i < nbits; i++) { | |||
bit = (m[i >> 3] >> (i & 7)) & 1; | |||
swap = bit ^ prevbit; | |||
prevbit = bit; | |||
mask = 0 - (crypto_word_t)swap; | |||
sike_fp2cswap(R, R2, mask); | |||
xDBLADD(R0, R2, R->X, A24); | |||
sike_fp2mul_mont(R2->X, R->Z, R2->X); | |||
} | |||
swap = 0 ^ prevbit; | |||
mask = 0 - (crypto_word_t)swap; | |||
sike_fp2cswap(R, R2, mask); | |||
} | |||
// Initialization of basis points | |||
static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) { | |||
sike_fpcopy(gen, XP->c0); | |||
sike_fpcopy(gen + NWORDS_FIELD, XP->c1); | |||
sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0); | |||
sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1); | |||
sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0); | |||
sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1); | |||
} | |||
// Conversion of GF(p^2) element from Montgomery to standard representation. | |||
static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) { | |||
f2elm_t t={0}; | |||
sike_from_fp2mont(x, t); | |||
// convert to bytes in little endian form | |||
for (size_t i=0; i<FIELD_BYTESZ; i++) { | |||
enc[i+ 0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF; | |||
enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF; | |||
} | |||
} | |||
// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation. | |||
// Elements over GF(p503) are encoded in 63 octets in little endian format | |||
// (i.e., the least significant octet is located in the lowest memory address). | |||
static inline void fp2_decode(const uint8_t *enc, f2elm_t t) { | |||
memset(t[0].c0, 0, sizeof(t[0].c0)); | |||
memset(t[0].c1, 0, sizeof(t[0].c1)); | |||
// convert bytes in little endian form to f2elm_t | |||
for (size_t i = 0; i < FIELD_BYTESZ; i++) { | |||
t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (8*(i%LSZ)); | |||
t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ)); | |||
} | |||
sike_to_fp2mont(t, t); | |||
} | |||
// Alice's ephemeral public key generation | |||
// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes. | |||
// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes. | |||
static void gen_iso_A(const uint8_t* skA, uint8_t* pkA) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_ALICE]; | |||
point_proj_t phiP = POINT_PROJ_INIT; | |||
point_proj_t phiQ = POINT_PROJ_INIT; | |||
point_proj_t phiR = POINT_PROJ_INIT; | |||
f2elm_t XPA, XQA, XRA, coeff[3] = {0}; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t C24 = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE] = {0}, npts = 0, ii = 0; | |||
// Initialize basis points | |||
sike_init_basis(params.A_gen, XPA, XQA, XRA); | |||
sike_init_basis(params.B_gen, phiP->X, phiQ->X, phiR->X); | |||
sike_fpcopy(params.mont_one, (phiP->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiQ->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiR->Z)->c0); | |||
// Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1 | |||
sike_fpcopy(params.mont_one, A24plus->c0); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
sike_fp2add(A24plus, A24plus, C24); | |||
sike_fp2add(A24plus, C24, A); | |||
sike_fp2add(C24, C24, A24plus); | |||
// Retrieve kernel point | |||
ladder3Pt(XPA, XQA, XRA, skA, 1, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < A_max; row++) { | |||
while (index < A_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.A_strat[ii++]; | |||
xDBLe(R, R, A24plus, C24, (2*m)); | |||
index += m; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_4_isog(pts[i], coeff); | |||
} | |||
eval_4_isog(phiP, coeff); | |||
eval_4_isog(phiQ, coeff); | |||
eval_4_isog(phiR, coeff); | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
eval_4_isog(phiP, coeff); | |||
eval_4_isog(phiQ, coeff); | |||
eval_4_isog(phiR, coeff); | |||
inv_3_way(phiP->Z, phiQ->Z, phiR->Z); | |||
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); | |||
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); | |||
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); | |||
// Format public key | |||
sike_fp2_encode(phiP->X, pkA); | |||
sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ); | |||
sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ); | |||
} | |||
// Bob's ephemeral key-pair generation | |||
// It produces a private key skB and computes the public key pkB. | |||
// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. | |||
// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes. | |||
static void gen_iso_B(const uint8_t* skB, uint8_t* pkB) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_BOB]; | |||
point_proj_t phiP = POINT_PROJ_INIT; | |||
point_proj_t phiQ = POINT_PROJ_INIT; | |||
point_proj_t phiR = POINT_PROJ_INIT; | |||
f2elm_t XPB, XQB, XRB, coeff[3] = {0}; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t A24minus = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB] = {0}, npts = 0, ii = 0; | |||
// Initialize basis points | |||
sike_init_basis(params.B_gen, XPB, XQB, XRB); | |||
sike_init_basis(params.A_gen, phiP->X, phiQ->X, phiR->X); | |||
sike_fpcopy(params.mont_one, (phiP->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiQ->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiR->Z)->c0); | |||
// Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1 | |||
sike_fpcopy(params.mont_one, A24plus->c0); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
sike_fp2add(A24plus, A24plus, A24minus); | |||
sike_fp2add(A24plus, A24minus, A); | |||
sike_fp2add(A24minus, A24minus, A24plus); | |||
// Retrieve kernel point | |||
ladder3Pt(XPB, XQB, XRB, skB, 0, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < B_max; row++) { | |||
while (index < B_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.B_strat[ii++]; | |||
xTPLe(R, R, A24minus, A24plus, m); | |||
index += m; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_3_isog(pts[i], coeff); | |||
} | |||
eval_3_isog(phiP, coeff); | |||
eval_3_isog(phiQ, coeff); | |||
eval_3_isog(phiR, coeff); | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
eval_3_isog(phiP, coeff); | |||
eval_3_isog(phiQ, coeff); | |||
eval_3_isog(phiR, coeff); | |||
inv_3_way(phiP->Z, phiQ->Z, phiR->Z); | |||
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); | |||
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); | |||
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); | |||
// Format public key | |||
sike_fp2_encode(phiP->X, pkB); | |||
sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ); | |||
sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ); | |||
} | |||
// Alice's ephemeral shared secret computation | |||
// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB | |||
// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. | |||
// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes. | |||
// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes. | |||
static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_ALICE]; | |||
f2elm_t coeff[3], PKB[3], jinv; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t C24 = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; | |||
// Initialize images of Bob's basis | |||
fp2_decode(pkB, PKB[0]); | |||
fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]); | |||
fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]); | |||
// Initialize constants | |||
get_A(PKB[0], PKB[1], PKB[2], A); | |||
sike_fpadd(params.mont_one, params.mont_one, C24->c0); | |||
sike_fp2add(A, C24, A24plus); | |||
sike_fpadd(C24->c0, C24->c0, C24->c0); | |||
// Retrieve kernel point | |||
ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < A_max; row++) { | |||
while (index < A_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.A_strat[ii++]; | |||
xDBLe(R, R, A24plus, C24, (2*m)); | |||
index += m; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_4_isog(pts[i], coeff); | |||
} | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
sike_fp2sub(A24plus, C24, A24plus); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
j_inv(A24plus, C24, jinv); | |||
sike_fp2_encode(jinv, ssA); | |||
} | |||
// Bob's ephemeral shared secret computation | |||
// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA | |||
// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. | |||
// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes. | |||
// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes. | |||
static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_BOB] = {0}; | |||
f2elm_t coeff[3] = {0}, PKB[3] = {0}, jinv; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t A24minus = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB] = {0}, npts = 0, ii = 0; | |||
// Initialize images of Alice's basis | |||
fp2_decode(pkA, PKB[0]); | |||
fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]); | |||
fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]); | |||
// Initialize constants | |||
get_A(PKB[0], PKB[1], PKB[2], A); | |||
sike_fpadd(params.mont_one, params.mont_one, A24minus->c0); | |||
sike_fp2add(A, A24minus, A24plus); | |||
sike_fp2sub(A, A24minus, A24minus); | |||
// Retrieve kernel point | |||
ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < B_max; row++) { | |||
while (index < B_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.B_strat[ii++]; | |||
xTPLe(R, R, A24minus, A24plus, m); | |||
index += m; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_3_isog(pts[i], coeff); | |||
} | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
sike_fp2add(A24plus, A24minus, A); | |||
sike_fp2add(A, A, A); | |||
sike_fp2sub(A24plus, A24minus, A24plus); | |||
j_inv(A, A24plus, jinv); | |||
sike_fp2_encode(jinv, ssB); | |||
} | |||
int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ], | |||
uint8_t out_pub[SIKE_PUB_BYTESZ]) { | |||
// Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and < | |||
// 253 bits | |||
randombytes(out_priv, SIKE_MSG_BYTESZ); | |||
randombytes(&out_priv[SIKE_MSG_BYTESZ], SIKE_PRV_BYTESZ); | |||
out_priv[SIKE_MSG_BYTESZ+28-1] = (out_priv[SIKE_MSG_BYTESZ+28-1] & 0x01); | |||
gen_iso_B(&out_priv[SIKE_MSG_BYTESZ], out_pub); | |||
return 1; | |||
} | |||
void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
uint8_t out_ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ]) { | |||
// Secret buffer is reused by the function to store some ephemeral | |||
// secret data. It's size must be maximum of 64, | |||
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes. | |||
uint8_t secret[32]; // OZAPTF, why? | |||
uint8_t j[SIDH_JINV_BYTESZ] = {0}; | |||
uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ]; | |||
shake256incctx ctx; | |||
// Generate secret key for A | |||
// secret key A = SHAKE256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ | |||
randombytes(temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Generate public key for A - first part of the ciphertext | |||
gen_iso_A(secret, out_ciphertext); | |||
// Generate c1: | |||
// h = SHAKE256(j-invariant) | |||
// c1 = h ^ m | |||
ex_iso_A(secret, pub_key, j); | |||
shake256(secret, sizeof secret, j, sizeof j); | |||
// c1 = h ^ m | |||
uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ]; | |||
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { | |||
c1[i] = temp[i] ^ secret[i]; | |||
} | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, out_ciphertext, SIKE_CT_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Generate shared secret out_shared_key = SHAKE256(m||out_ciphertext) | |||
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ); | |||
} | |||
void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
const uint8_t ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ], | |||
const uint8_t priv_key[SIKE_MSG_BYTESZ + SIKE_PRV_BYTESZ]) { | |||
// Secret buffer is reused by the function to store some ephemeral | |||
// secret data. It's size must be maximum of 64, | |||
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes. | |||
uint8_t secret[32]; | |||
uint8_t j[SIDH_JINV_BYTESZ] = {0}; | |||
uint8_t c0[SIKE_PUB_BYTESZ] = {0}; | |||
uint8_t temp[SIKE_MSG_BYTESZ] = {0}; | |||
shake256incctx ctx; | |||
// Recover m | |||
// Let ciphertext = c0 || c1 - both have fixed sizes | |||
// m = F(j-invariant(c0, priv_key)) ^ c1 | |||
ex_iso_B(&priv_key[SIKE_MSG_BYTESZ], ciphertext, j); | |||
shake256(secret, sizeof secret, j, sizeof j); | |||
const uint8_t *c1 = &ciphertext[sizeof(c0)]; | |||
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { | |||
temp[i] = c1[i] ^ secret[i]; | |||
} | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Recover c0 = public key A | |||
gen_iso_A(secret, c0); | |||
crypto_word_t ok = ct_uint_eq( | |||
ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1); | |||
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { | |||
temp[i] = ct_select_8(ok, temp[i], priv_key[i]); | |||
} | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, ciphertext, SIKE_CT_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Generate shared secret out_shared_key = SHAKE256(m||ciphertext) | |||
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ); | |||
} |
@@ -0,0 +1,214 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: internal header file for P434 | |||
*********************************************************************************************/ | |||
#ifndef UTILS_H_ | |||
#define UTILS_H_ | |||
#include <stddef.h> | |||
#include <kem/sike/includes/sike/sike.h> | |||
// Conversion macro from number of bits to number of bytes | |||
#define BITS_TO_BYTES(nbits) (((nbits)+7)/8) | |||
// Bit size of the field | |||
#define BITS_FIELD 434 | |||
// Byte size of the field | |||
#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD) | |||
// Number of 64-bit words of a 224-bit element | |||
#define NBITS_ORDER 224 | |||
#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) | |||
// Number of elements in Alice's strategy | |||
#define A_max 108 | |||
// Number of elements in Bob's strategy | |||
#define B_max 137 | |||
// Word size size | |||
#define RADIX sizeof(crypto_word_t)*8 | |||
// Byte size of a limb | |||
#define LSZ sizeof(crypto_word_t) | |||
#if defined(CPU_64_BIT) | |||
typedef uint64_t crypto_word_t; | |||
// Number of words of a 434-bit field element | |||
#define NWORDS_FIELD 7 | |||
// Number of "0" digits in the least significant part of p434 + 1 | |||
#define ZERO_WORDS 3 | |||
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. | |||
#define U64_TO_WORDS(x) UINT64_C(x) | |||
#else | |||
typedef uint32_t crypto_word_t; | |||
// Number of words of a 434-bit field element | |||
#define NWORDS_FIELD 14 | |||
// Number of "0" digits in the least significant part of p434 + 1 | |||
#define ZERO_WORDS 6 | |||
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. | |||
#define U64_TO_WORDS(x) \ | |||
(uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32) | |||
#endif | |||
// Extended datatype support | |||
#if !defined(HAS_UINT128) | |||
typedef uint64_t uint128_t[2]; | |||
#endif | |||
// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise | |||
// Digit multiplication | |||
#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo)); | |||
// If mask |x|==0xff.ff set |x| to 1, otherwise 0 | |||
#define M2B(x) ((x)>>(RADIX-1)) | |||
// Digit addition with carry | |||
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ | |||
do { \ | |||
crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \ | |||
(sumOut) = (addend2) + tempReg; \ | |||
(carryOut) = M2B(ct_uint_lt(tempReg, (crypto_word_t)(carryIn)) | \ | |||
ct_uint_lt((sumOut), tempReg)); \ | |||
} while(0) | |||
// Digit subtraction with borrow | |||
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ | |||
do { \ | |||
crypto_word_t tempReg = (minuend) - (subtrahend); \ | |||
crypto_word_t borrowReg = M2B(ct_uint_lt((minuend), (subtrahend))); \ | |||
borrowReg |= ((borrowIn) & ct_uint_eq(tempReg, 0)); \ | |||
(differenceOut) = tempReg - (crypto_word_t)(borrowIn); \ | |||
(borrowOut) = borrowReg; \ | |||
} while(0) | |||
/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly, | |||
which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8). | |||
Defines below are used to work around the bug, and provide a way | |||
to initialize f2elem_t and point_proj_t structs. | |||
Bug has been fixed in GCC6 (debian stretch). | |||
*/ | |||
#define F2ELM_INIT {{ {0}, {0} }} | |||
#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }} | |||
// Datatype for representing 434-bit field elements (448-bit max.) | |||
// Elements over GF(p434) are encoded in 63 octets in little endian format | |||
// (i.e., the least significant octet is located in the lowest memory address). | |||
typedef crypto_word_t felm_t[NWORDS_FIELD]; | |||
// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e. | |||
// Fp2 element = c0 + c1*i in F_{p^2} | |||
// Datatype for representing double-precision 2x434-bit field elements (448-bit max.) | |||
// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are | |||
// encoded as {a, b}, with a in the lowest memory portion. | |||
typedef struct { | |||
felm_t c0; | |||
felm_t c1; | |||
} fp2; | |||
// Our F_{p^2} element type is a pointer to the struct. | |||
typedef fp2 f2elm_t[1]; | |||
// Datatype for representing double-precision 2x434-bit | |||
// field elements in contiguous memory. | |||
typedef crypto_word_t dfelm_t[2*NWORDS_FIELD]; | |||
// Constants used during SIKE computation. | |||
struct params_t { | |||
// Stores a prime | |||
const crypto_word_t prime[NWORDS_FIELD]; | |||
// Stores prime + 1 | |||
const crypto_word_t prime_p1[NWORDS_FIELD]; | |||
// Stores prime * 2 | |||
const crypto_word_t prime_x2[NWORDS_FIELD]; | |||
// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} | |||
// in GF(prime^2), expressed in Montgomery representation | |||
const crypto_word_t A_gen[6*NWORDS_FIELD]; | |||
// Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i} | |||
// in GF(prime^2), expressed in Montgomery representation | |||
const crypto_word_t B_gen[6*NWORDS_FIELD]; | |||
// Montgomery constant mont_R2 = (2^448)^2 mod prime | |||
const crypto_word_t mont_R2[NWORDS_FIELD]; | |||
// Value 'one' in Montgomery representation | |||
const crypto_word_t mont_one[NWORDS_FIELD]; | |||
// Value '6' in Montgomery representation | |||
const crypto_word_t mont_six[NWORDS_FIELD]; | |||
// Fixed parameters for isogeny tree computation | |||
const unsigned int A_strat[A_max-1]; | |||
const unsigned int B_strat[B_max-1]; | |||
}; | |||
// Point representation in projective XZ Montgomery coordinates. | |||
typedef struct { | |||
f2elm_t X; | |||
f2elm_t Z; | |||
} point_proj; | |||
typedef point_proj point_proj_t[1]; | |||
// Checks whether two words are equal. Returns 1 in case it is, | |||
// otherwise 0. | |||
static inline crypto_word_t ct_uint_eq(crypto_word_t x, crypto_word_t y) | |||
{ | |||
// if x==y then t = 0 | |||
crypto_word_t t = x ^ y; | |||
// if x!=y t will have first bit set | |||
t = (t >> 1) - t; | |||
// return MSB - 1 in case x==y, otherwise 0 | |||
return ((~t) >> (RADIX-1)); | |||
} | |||
// Constant time select. | |||
// if pick == 1 (out = in1) | |||
// if pick == 0 (out = in2) | |||
// else out is undefined | |||
static inline uint8_t ct_select_8(uint8_t flag, uint8_t in1, uint8_t in2) { | |||
uint8_t mask = ((int8_t)(flag << 7))>>7; | |||
return (in1&mask) | (in2&(~mask)); | |||
} | |||
// Constant time memcmp. Returns 1 if p==q, otherwise 0 | |||
static inline int ct_mem_eq(const void *p, const void *q, size_t n) | |||
{ | |||
const uint8_t *pp = (uint8_t*)p, *qq = (uint8_t*)q; | |||
uint8_t a = 0; | |||
while (n--) a |= *pp++ ^ *qq++; | |||
return (ct_uint_eq(a, 0)); | |||
} | |||
static inline crypto_word_t constant_time_msb_w(crypto_word_t a) { | |||
return 0u - (a >> (sizeof(a) * 8 - 1)); | |||
} | |||
// constant_time_lt_w returns 0xff..f if a < b and 0 otherwise. | |||
static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y) | |||
{ | |||
// Consider the two cases of the problem: | |||
// msb(a) == msb(b): a < b iff the MSB of a - b is set. | |||
// msb(a) != msb(b): a < b iff the MSB of b is set. | |||
// | |||
// If msb(a) == msb(b) then the following evaluates as: | |||
// msb(a^((a^b)|((a-b)^a))) == | |||
// msb(a^((a-b) ^ a)) == (because msb(a^b) == 0) | |||
// msb(a^a^(a-b)) == (rearranging) | |||
// msb(a-b) (because ∀x. x^x == 0) | |||
// | |||
// Else, if msb(a) != msb(b) then the following evaluates as: | |||
// msb(a^((a^b)|((a-b)^a))) == | |||
// msb(a^(𝟙 | ((a-b)^a))) == (because msb(a^b) == 1 and 𝟙 | |||
// represents a value s.t. msb(𝟙) = 1) | |||
// msb(a^𝟙) == (because ORing with 1 results in 1) | |||
// msb(b) | |||
// | |||
// | |||
// Here is an SMT-LIB verification of this formula: | |||
// | |||
// (define-fun lt ((a (_ BitVec 32)) (b (_ BitVec 32))) (_ BitVec 32) | |||
// (bvxor a (bvor (bvxor a b) (bvxor (bvsub a b) a))) | |||
// ) | |||
// | |||
// (declare-fun a () (_ BitVec 32)) | |||
// (declare-fun b () (_ BitVec 32)) | |||
// | |||
// (assert (not (= (= #x00000001 (bvlshr (lt a b) #x0000001f)) (bvult a b)))) | |||
// (check-sat) | |||
// (get-model) | |||
return constant_time_msb_w(x^((x^y)|((x-y)^x))); | |||
} | |||
#endif // UTILS_H_ |
@@ -203,64 +203,75 @@ pub type uint_fast32_t = ::std::os::raw::c_ulong; | |||
pub type uint_fast64_t = ::std::os::raw::c_ulong; | |||
pub type intmax_t = __intmax_t; | |||
pub type uintmax_t = __uintmax_t; | |||
pub const DILITHIUM2: ::std::os::raw::c_uint = 0; | |||
pub const DILITHIUM3: ::std::os::raw::c_uint = 1; | |||
pub const DILITHIUM5: ::std::os::raw::c_uint = 2; | |||
pub const FALCON1024: ::std::os::raw::c_uint = 3; | |||
pub const FALCON512: ::std::os::raw::c_uint = 4; | |||
pub const RAINBOWVCLASSIC: ::std::os::raw::c_uint = 5; | |||
pub const RAINBOWICLASSIC: ::std::os::raw::c_uint = 6; | |||
pub const RAINBOWIIICLASSIC: ::std::os::raw::c_uint = 7; | |||
pub const SPHINCSSHA256192FSIMPLE: ::std::os::raw::c_uint = 8; | |||
pub const SPHINCSSHAKE256256FSIMPLE: ::std::os::raw::c_uint = 9; | |||
pub const SPHINCSSHAKE256192FROBUST: ::std::os::raw::c_uint = 10; | |||
pub const SPHINCSSHAKE256128FSIMPLE: ::std::os::raw::c_uint = 11; | |||
pub const SPHINCSSHAKE256256SSIMPLE: ::std::os::raw::c_uint = 12; | |||
pub const SPHINCSSHAKE256128SSIMPLE: ::std::os::raw::c_uint = 13; | |||
pub const SPHINCSSHA256128FROBUST: ::std::os::raw::c_uint = 14; | |||
pub const SPHINCSSHA256192SROBUST: ::std::os::raw::c_uint = 15; | |||
pub const SPHINCSSHAKE256128FROBUST: ::std::os::raw::c_uint = 16; | |||
pub const SPHINCSSHAKE256128SROBUST: ::std::os::raw::c_uint = 17; | |||
pub const SPHINCSSHAKE256256SROBUST: ::std::os::raw::c_uint = 18; | |||
pub const SPHINCSSHA256192SSIMPLE: ::std::os::raw::c_uint = 19; | |||
pub const SPHINCSSHAKE256192SSIMPLE: ::std::os::raw::c_uint = 20; | |||
pub const SPHINCSSHAKE256192SROBUST: ::std::os::raw::c_uint = 21; | |||
pub const SPHINCSSHAKE256192FSIMPLE: ::std::os::raw::c_uint = 22; | |||
pub const SPHINCSSHA256256SSIMPLE: ::std::os::raw::c_uint = 23; | |||
pub const SPHINCSSHA256128SSIMPLE: ::std::os::raw::c_uint = 24; | |||
pub const SPHINCSSHAKE256256FROBUST: ::std::os::raw::c_uint = 25; | |||
pub const SPHINCSSHA256256FROBUST: ::std::os::raw::c_uint = 26; | |||
pub const SPHINCSSHA256256FSIMPLE: ::std::os::raw::c_uint = 27; | |||
pub const SPHINCSSHA256256SROBUST: ::std::os::raw::c_uint = 28; | |||
pub const SPHINCSSHA256128SROBUST: ::std::os::raw::c_uint = 29; | |||
pub const SPHINCSSHA256128FSIMPLE: ::std::os::raw::c_uint = 30; | |||
pub const SPHINCSSHA256192FROBUST: ::std::os::raw::c_uint = 31; | |||
pub const PQC_ALG_SIG_DILITHIUM2: ::std::os::raw::c_uint = 0; | |||
pub const PQC_ALG_SIG_DILITHIUM3: ::std::os::raw::c_uint = 1; | |||
pub const PQC_ALG_SIG_DILITHIUM5: ::std::os::raw::c_uint = 2; | |||
pub const PQC_ALG_SIG_FALCON512: ::std::os::raw::c_uint = 3; | |||
pub const PQC_ALG_SIG_FALCON1024: ::std::os::raw::c_uint = 4; | |||
pub const PQC_ALG_SIG_RAINBOWICLASSIC: ::std::os::raw::c_uint = 5; | |||
pub const PQC_ALG_SIG_RAINBOWIIICLASSIC: ::std::os::raw::c_uint = 6; | |||
pub const PQC_ALG_SIG_RAINBOWVCLASSIC: ::std::os::raw::c_uint = 7; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256128FSIMPLE: ::std::os::raw::c_uint = 8; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256128SSIMPLE: ::std::os::raw::c_uint = 9; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256128FROBUST: ::std::os::raw::c_uint = 10; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256128SROBUST: ::std::os::raw::c_uint = 11; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256192FSIMPLE: ::std::os::raw::c_uint = 12; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256192SSIMPLE: ::std::os::raw::c_uint = 13; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256192FROBUST: ::std::os::raw::c_uint = 14; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256192SROBUST: ::std::os::raw::c_uint = 15; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256256FSIMPLE: ::std::os::raw::c_uint = 16; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256256SSIMPLE: ::std::os::raw::c_uint = 17; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256256FROBUST: ::std::os::raw::c_uint = 18; | |||
pub const PQC_ALG_SIG_SPHINCSSHAKE256256SROBUST: ::std::os::raw::c_uint = 19; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256128FSIMPLE: ::std::os::raw::c_uint = 20; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256128SSIMPLE: ::std::os::raw::c_uint = 21; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256128FROBUST: ::std::os::raw::c_uint = 22; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256128SROBUST: ::std::os::raw::c_uint = 23; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256192FSIMPLE: ::std::os::raw::c_uint = 24; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256192SSIMPLE: ::std::os::raw::c_uint = 25; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256192FROBUST: ::std::os::raw::c_uint = 26; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256192SROBUST: ::std::os::raw::c_uint = 27; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256256FSIMPLE: ::std::os::raw::c_uint = 28; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256256SSIMPLE: ::std::os::raw::c_uint = 29; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256256FROBUST: ::std::os::raw::c_uint = 30; | |||
pub const PQC_ALG_SIG_SPHINCSSHA256256SROBUST: ::std::os::raw::c_uint = 31; | |||
pub const PQC_ALG_SIG_MAX: ::std::os::raw::c_uint = 32; | |||
pub type _bindgen_ty_1 = ::std::os::raw::c_uint; | |||
pub const FRODOKEM976SHAKE: ::std::os::raw::c_uint = 0; | |||
pub const FRODOKEM1344SHAKE: ::std::os::raw::c_uint = 1; | |||
pub const FRODOKEM640SHAKE: ::std::os::raw::c_uint = 2; | |||
pub const KYBER768: ::std::os::raw::c_uint = 3; | |||
pub const KYBER1024: ::std::os::raw::c_uint = 4; | |||
pub const KYBER512: ::std::os::raw::c_uint = 5; | |||
pub const NTRUHPS4096821: ::std::os::raw::c_uint = 6; | |||
pub const NTRUHPS2048509: ::std::os::raw::c_uint = 7; | |||
pub const NTRUHRSS701: ::std::os::raw::c_uint = 8; | |||
pub const NTRUHPS2048677: ::std::os::raw::c_uint = 9; | |||
pub const NTRULPR761: ::std::os::raw::c_uint = 10; | |||
pub const NTRULPR653: ::std::os::raw::c_uint = 11; | |||
pub const NTRULPR857: ::std::os::raw::c_uint = 12; | |||
pub const LIGHTSABER: ::std::os::raw::c_uint = 13; | |||
pub const FIRESABER: ::std::os::raw::c_uint = 14; | |||
pub const SABER: ::std::os::raw::c_uint = 15; | |||
pub const HQCRMRS128: ::std::os::raw::c_uint = 16; | |||
pub const HQCRMRS192: ::std::os::raw::c_uint = 17; | |||
pub const HQCRMRS256: ::std::os::raw::c_uint = 18; | |||
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 19; | |||
pub const PQC_ALG_KEM_FRODOKEM640SHAKE: ::std::os::raw::c_uint = 0; | |||
pub const PQC_ALG_KEM_FRODOKEM976SHAKE: ::std::os::raw::c_uint = 1; | |||
pub const PQC_ALG_KEM_FRODOKEM1344SHAKE: ::std::os::raw::c_uint = 2; | |||
pub const PQC_ALG_KEM_KYBER512: ::std::os::raw::c_uint = 3; | |||
pub const PQC_ALG_KEM_KYBER768: ::std::os::raw::c_uint = 4; | |||
pub const PQC_ALG_KEM_KYBER1024: ::std::os::raw::c_uint = 5; | |||
pub const PQC_ALG_KEM_NTRUHPS2048509: ::std::os::raw::c_uint = 6; | |||
pub const PQC_ALG_KEM_NTRUHPS4096821: ::std::os::raw::c_uint = 7; | |||
pub const PQC_ALG_KEM_NTRUHRSS701: ::std::os::raw::c_uint = 8; | |||
pub const PQC_ALG_KEM_NTRUHPS2048677: ::std::os::raw::c_uint = 9; | |||
pub const PQC_ALG_KEM_NTRULPR761: ::std::os::raw::c_uint = 10; | |||
pub const PQC_ALG_KEM_NTRULPR653: ::std::os::raw::c_uint = 11; | |||
pub const PQC_ALG_KEM_NTRULPR857: ::std::os::raw::c_uint = 12; | |||
pub const PQC_ALG_KEM_LIGHTSABER: ::std::os::raw::c_uint = 13; | |||
pub const PQC_ALG_KEM_SABER: ::std::os::raw::c_uint = 14; | |||
pub const PQC_ALG_KEM_FIRESABER: ::std::os::raw::c_uint = 15; | |||
pub const PQC_ALG_KEM_HQCRMRS128: ::std::os::raw::c_uint = 16; | |||
pub const PQC_ALG_KEM_HQCRMRS192: ::std::os::raw::c_uint = 17; | |||
pub const PQC_ALG_KEM_HQCRMRS256: ::std::os::raw::c_uint = 18; | |||
pub const PQC_ALG_KEM_SIKE434: ::std::os::raw::c_uint = 19; | |||
pub const PQC_ALG_KEM_MCELIECE348864: ::std::os::raw::c_uint = 20; | |||
pub const PQC_ALG_KEM_MCELIECE460896: ::std::os::raw::c_uint = 21; | |||
pub const PQC_ALG_KEM_MCELIECE6688128: ::std::os::raw::c_uint = 22; | |||
pub const PQC_ALG_KEM_MCELIECE6960119: ::std::os::raw::c_uint = 23; | |||
pub const PQC_ALG_KEM_MCELIECE8192128: ::std::os::raw::c_uint = 24; | |||
pub const PQC_ALG_KEM_MCELIECE348864F: ::std::os::raw::c_uint = 25; | |||
pub const PQC_ALG_KEM_MCELIECE460896F: ::std::os::raw::c_uint = 26; | |||
pub const PQC_ALG_KEM_MCELIECE6688128F: ::std::os::raw::c_uint = 27; | |||
pub const PQC_ALG_KEM_MCELIECE6960119F: ::std::os::raw::c_uint = 28; | |||
pub const PQC_ALG_KEM_MCELIECE8192128F: ::std::os::raw::c_uint = 29; | |||
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 30; | |||
pub type _bindgen_ty_2 = ::std::os::raw::c_uint; | |||
#[repr(C)] | |||
#[derive(Debug, Copy, Clone)] | |||
pub struct params_t { | |||
pub struct pqc_ctx_t { | |||
pub alg_id: u8, | |||
pub alg_name: *const ::std::os::raw::c_char, | |||
pub prv_key_bsz: u32, | |||
@@ -271,87 +282,87 @@ pub struct params_t { | |||
>, | |||
} | |||
#[test] | |||
fn bindgen_test_layout_params_t() { | |||
fn bindgen_test_layout_pqc_ctx_t() { | |||
assert_eq!( | |||
::std::mem::size_of::<params_t>(), | |||
::std::mem::size_of::<pqc_ctx_t>(), | |||
40usize, | |||
concat!("Size of: ", stringify!(params_t)) | |||
concat!("Size of: ", stringify!(pqc_ctx_t)) | |||
); | |||
assert_eq!( | |||
::std::mem::align_of::<params_t>(), | |||
::std::mem::align_of::<pqc_ctx_t>(), | |||
8usize, | |||
concat!("Alignment of ", stringify!(params_t)) | |||
concat!("Alignment of ", stringify!(pqc_ctx_t)) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<params_t>())).alg_id as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).alg_id as *const _ as usize }, | |||
0usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(params_t), | |||
stringify!(pqc_ctx_t), | |||
"::", | |||
stringify!(alg_id) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<params_t>())).alg_name as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).alg_name as *const _ as usize }, | |||
8usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(params_t), | |||
stringify!(pqc_ctx_t), | |||
"::", | |||
stringify!(alg_name) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<params_t>())).prv_key_bsz as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).prv_key_bsz as *const _ as usize }, | |||
16usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(params_t), | |||
stringify!(pqc_ctx_t), | |||
"::", | |||
stringify!(prv_key_bsz) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<params_t>())).pub_key_bsz as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).pub_key_bsz as *const _ as usize }, | |||
20usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(params_t), | |||
stringify!(pqc_ctx_t), | |||
"::", | |||
stringify!(pub_key_bsz) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<params_t>())).is_kem as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).is_kem as *const _ as usize }, | |||
24usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(params_t), | |||
stringify!(pqc_ctx_t), | |||
"::", | |||
stringify!(is_kem) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<params_t>())).keygen as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).keygen as *const _ as usize }, | |||
32usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(params_t), | |||
stringify!(pqc_ctx_t), | |||
"::", | |||
stringify!(keygen) | |||
) | |||
); | |||
} | |||
impl Default for params_t { | |||
impl Default for pqc_ctx_t { | |||
fn default() -> Self { | |||
unsafe { ::std::mem::zeroed() } | |||
} | |||
} | |||
#[repr(C)] | |||
#[derive(Debug, Copy, Clone)] | |||
pub struct kem_params_t { | |||
pub p: params_t, | |||
pub struct pqc_kem_ctx_t { | |||
pub p: pqc_ctx_t, | |||
pub ciphertext_bsz: u32, | |||
pub secret_bsz: u32, | |||
pub encapsulate: ::std::option::Option< | |||
@@ -362,77 +373,77 @@ pub struct kem_params_t { | |||
>, | |||
} | |||
#[test] | |||
fn bindgen_test_layout_kem_params_t() { | |||
fn bindgen_test_layout_pqc_kem_ctx_t() { | |||
assert_eq!( | |||
::std::mem::size_of::<kem_params_t>(), | |||
::std::mem::size_of::<pqc_kem_ctx_t>(), | |||
64usize, | |||
concat!("Size of: ", stringify!(kem_params_t)) | |||
concat!("Size of: ", stringify!(pqc_kem_ctx_t)) | |||
); | |||
assert_eq!( | |||
::std::mem::align_of::<kem_params_t>(), | |||
::std::mem::align_of::<pqc_kem_ctx_t>(), | |||
8usize, | |||
concat!("Alignment of ", stringify!(kem_params_t)) | |||
concat!("Alignment of ", stringify!(pqc_kem_ctx_t)) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<kem_params_t>())).p as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).p as *const _ as usize }, | |||
0usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(kem_params_t), | |||
stringify!(pqc_kem_ctx_t), | |||
"::", | |||
stringify!(p) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<kem_params_t>())).ciphertext_bsz as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).ciphertext_bsz as *const _ as usize }, | |||
40usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(kem_params_t), | |||
stringify!(pqc_kem_ctx_t), | |||
"::", | |||
stringify!(ciphertext_bsz) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<kem_params_t>())).secret_bsz as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).secret_bsz as *const _ as usize }, | |||
44usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(kem_params_t), | |||
stringify!(pqc_kem_ctx_t), | |||
"::", | |||
stringify!(secret_bsz) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<kem_params_t>())).encapsulate as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).encapsulate as *const _ as usize }, | |||
48usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(kem_params_t), | |||
stringify!(pqc_kem_ctx_t), | |||
"::", | |||
stringify!(encapsulate) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<kem_params_t>())).decapsulate as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).decapsulate as *const _ as usize }, | |||
56usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(kem_params_t), | |||
stringify!(pqc_kem_ctx_t), | |||
"::", | |||
stringify!(decapsulate) | |||
) | |||
); | |||
} | |||
impl Default for kem_params_t { | |||
impl Default for pqc_kem_ctx_t { | |||
fn default() -> Self { | |||
unsafe { ::std::mem::zeroed() } | |||
} | |||
} | |||
#[repr(C)] | |||
#[derive(Debug, Copy, Clone)] | |||
pub struct sig_params_t { | |||
pub p: params_t, | |||
pub struct pqc_sig_ctx_t { | |||
pub p: pqc_ctx_t, | |||
pub sign_bsz: u32, | |||
pub sign: ::std::option::Option< | |||
unsafe extern "C" fn( | |||
@@ -454,73 +465,77 @@ pub struct sig_params_t { | |||
>, | |||
} | |||
#[test] | |||
fn bindgen_test_layout_sig_params_t() { | |||
fn bindgen_test_layout_pqc_sig_ctx_t() { | |||
assert_eq!( | |||
::std::mem::size_of::<sig_params_t>(), | |||
::std::mem::size_of::<pqc_sig_ctx_t>(), | |||
64usize, | |||
concat!("Size of: ", stringify!(sig_params_t)) | |||
concat!("Size of: ", stringify!(pqc_sig_ctx_t)) | |||
); | |||
assert_eq!( | |||
::std::mem::align_of::<sig_params_t>(), | |||
::std::mem::align_of::<pqc_sig_ctx_t>(), | |||
8usize, | |||
concat!("Alignment of ", stringify!(sig_params_t)) | |||
concat!("Alignment of ", stringify!(pqc_sig_ctx_t)) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<sig_params_t>())).p as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).p as *const _ as usize }, | |||
0usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(sig_params_t), | |||
stringify!(pqc_sig_ctx_t), | |||
"::", | |||
stringify!(p) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<sig_params_t>())).sign_bsz as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).sign_bsz as *const _ as usize }, | |||
40usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(sig_params_t), | |||
stringify!(pqc_sig_ctx_t), | |||
"::", | |||
stringify!(sign_bsz) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<sig_params_t>())).sign as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).sign as *const _ as usize }, | |||
48usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(sig_params_t), | |||
stringify!(pqc_sig_ctx_t), | |||
"::", | |||
stringify!(sign) | |||
) | |||
); | |||
assert_eq!( | |||
unsafe { &(*(::std::ptr::null::<sig_params_t>())).verify as *const _ as usize }, | |||
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).verify as *const _ as usize }, | |||
56usize, | |||
concat!( | |||
"Offset of field: ", | |||
stringify!(sig_params_t), | |||
stringify!(pqc_sig_ctx_t), | |||
"::", | |||
stringify!(verify) | |||
) | |||
); | |||
} | |||
impl Default for sig_params_t { | |||
impl Default for pqc_sig_ctx_t { | |||
fn default() -> Self { | |||
unsafe { ::std::mem::zeroed() } | |||
} | |||
} | |||
extern "C" { | |||
pub fn pqc_keygen(p: *const params_t, pk: *mut u8, sk: *mut u8) -> bool; | |||
pub fn pqc_keygen(p: *const pqc_ctx_t, pk: *mut u8, sk: *mut u8) -> bool; | |||
} | |||
extern "C" { | |||
pub fn pqc_kem_encapsulate(p: *const params_t, ct: *mut u8, ss: *mut u8, pk: *const u8) | |||
-> bool; | |||
pub fn pqc_kem_encapsulate( | |||
p: *const pqc_ctx_t, | |||
ct: *mut u8, | |||
ss: *mut u8, | |||
pk: *const u8, | |||
) -> bool; | |||
} | |||
extern "C" { | |||
pub fn pqc_kem_decapsulate( | |||
p: *const params_t, | |||
p: *const pqc_ctx_t, | |||
ss: *mut u8, | |||
ct: *const u8, | |||
sk: *const u8, | |||
@@ -528,7 +543,7 @@ extern "C" { | |||
} | |||
extern "C" { | |||
pub fn pqc_sig_create( | |||
p: *const params_t, | |||
p: *const pqc_ctx_t, | |||
sig: *mut u8, | |||
siglen: *mut u64, | |||
m: *const u8, | |||
@@ -538,7 +553,7 @@ extern "C" { | |||
} | |||
extern "C" { | |||
pub fn pqc_sig_verify( | |||
p: *const params_t, | |||
p: *const pqc_ctx_t, | |||
sig: *const u8, | |||
siglen: u64, | |||
m: *const u8, | |||
@@ -547,8 +562,23 @@ extern "C" { | |||
) -> bool; | |||
} | |||
extern "C" { | |||
pub fn pqc_kem_alg_by_id(id: u8) -> *const params_t; | |||
pub fn pqc_kem_alg_by_id(id: u8) -> *const pqc_ctx_t; | |||
} | |||
extern "C" { | |||
pub fn pqc_sig_alg_by_id(id: u8) -> *const pqc_ctx_t; | |||
} | |||
extern "C" { | |||
pub fn pqc_ciphertext_bsz(p: *const pqc_ctx_t) -> u32; | |||
} | |||
extern "C" { | |||
pub fn pqc_shared_secret_bsz(p: *const pqc_ctx_t) -> u32; | |||
} | |||
extern "C" { | |||
pub fn pqc_signature_bsz(p: *const pqc_ctx_t) -> u32; | |||
} | |||
extern "C" { | |||
pub fn pqc_public_key_bsz(p: *const pqc_ctx_t) -> u32; | |||
} | |||
extern "C" { | |||
pub fn pqc_sig_alg_by_id(id: u8) -> *const params_t; | |||
pub fn pqc_private_key_bsz(p: *const pqc_ctx_t) -> u32; | |||
} |
@@ -4,12 +4,14 @@ extern crate bindgen; | |||
fn main() { | |||
let dst = Config::new("../../../") | |||
.profile("Release") | |||
.profile("Debug") | |||
.very_verbose(true) | |||
.build(); | |||
.build(); | |||
println!("cargo:rustc-link-search=native={}/lib", dst.display()); | |||
println!("cargo:rustc-link-lib=static=pqc_s"); | |||
// For some reason GetX86Info symbol is undefined in the pqc_s. Hence this line | |||
println!("cargo:rustc-link-lib=static=cpu_features"); | |||
println!("cargo:rerun-if-changed=../../../capi/*,../../../kem/*,../../../sign/*,../../../../public/pqc/pqc.h"); | |||
// The bindgen::Builder is the main entry point | |||
@@ -0,0 +1,17 @@ | |||
set( | |||
SRC_CLEAN_FALCON | |||
api.c | |||
codec.c | |||
common.c | |||
falcon.c | |||
fft.c | |||
fpr.c | |||
keygen.c | |||
rng.c | |||
sign.c | |||
vrfy.c | |||
) | |||
define_sig_alg( | |||
falcon1024_clean | |||
PQCLEAN_FALCON_CLEAN "${SRC_CLEAN_FALCON}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,77 @@ | |||
/* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
*/ | |||
#include <common/utils.h> | |||
#include "inner.h" | |||
#include "api.h" | |||
// Forward declarations of signature API | |||
int Zf(keypair)(uint8_t *pk, size_t pk_sz, uint8_t *sk, size_t sk_sz, size_t logn); | |||
int Zf(sign)(uint8_t *sm, size_t *smsz, const uint8_t *m, size_t msz, | |||
const uint8_t *sk, size_t sk_sz, size_t logn); | |||
int Zf(verify)(const uint8_t *m, size_t msz, const uint8_t *sm, size_t smsz, | |||
const uint8_t *pk, size_t pk_sz, size_t logn, size_t sig_sz); | |||
// Integration wrappers | |||
// Falcon 512 | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
return Zf(keypair)(pk, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES, | |||
sk, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES, 9); | |||
} | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
return Zf(sign)(sig, siglen, m, mlen, sk, | |||
PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES, 9); | |||
} | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
return Zf(verify)(m,mlen,sig,siglen,pk, | |||
PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES,9, | |||
PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES); | |||
} | |||
// Falcon 1024 | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
return Zf(keypair)(pk, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES, | |||
sk, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES, 10); | |||
} | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
return Zf(sign)(sig, siglen, m, mlen, sk, | |||
PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES, 10); | |||
} | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
return Zf(verify)(m,mlen,sig,siglen,pk, | |||
PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES,10, | |||
PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES); | |||
} |
@@ -0,0 +1,37 @@ | |||
#ifndef PQCLEAN_FALCON_CLEAN_API_H | |||
#define PQCLEAN_FALCON_CLEAN_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES 897 | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES 1281 | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES 690 | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_ALGNAME "Falcon512" | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES 1793 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES 2305 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES 1330 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_ALGNAME "Falcon1024" | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,570 @@ | |||
/* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
*/ | |||
/* | |||
* Encoding/decoding of keys and signatures. | |||
*/ | |||
#include "inner.h" | |||
/* see inner.h */ | |||
size_t | |||
Zf(modq_encode)( | |||
void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn) | |||
{ | |||
size_t n, out_len, u; | |||
uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] >= 12289) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * 14) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << 14) | x[u]; | |||
acc_len += 14; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
Zf(modq_decode)( | |||
uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) | |||
{ | |||
size_t n, in_len, u; | |||
const uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * 14) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
u = 0; | |||
while (u < n) { | |||
acc = (acc << 8) | (*buf ++); | |||
acc_len += 8; | |||
if (acc_len >= 14) { | |||
unsigned w; | |||
acc_len -= 14; | |||
w = (acc >> acc_len) & 0x3FFF; | |||
if (w >= 12289) { | |||
return 0; | |||
} | |||
x[u ++] = (uint16_t)w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
Zf(trim_i16_encode)( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits) | |||
{ | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
Zf(trim_i16_decode)( | |||
int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) | |||
{ | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
w |= -(w & mask2); | |||
x[u ++] = (int16_t)*(int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
Zf(trim_i8_encode)( | |||
void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits) | |||
{ | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
Zf(trim_i8_decode)( | |||
int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) | |||
{ | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
x[u ++] = (int8_t)*(int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
Zf(comp_encode)( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn) | |||
{ | |||
uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = out; | |||
/* | |||
* Make sure that all values are within the -2047..+2047 range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < -2047 || x[u] > +2047) { | |||
return 0; | |||
} | |||
} | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
int t; | |||
unsigned w; | |||
/* | |||
* Get sign and absolute value of next integer; push the | |||
* sign bit. | |||
*/ | |||
acc <<= 1; | |||
t = x[u]; | |||
if (t < 0) { | |||
t = -t; | |||
acc |= 1; | |||
} | |||
w = (unsigned)t; | |||
/* | |||
* Push the low 7 bits of the absolute value. | |||
*/ | |||
acc <<= 7; | |||
acc |= w & 127u; | |||
w >>= 7; | |||
/* | |||
* We pushed exactly 8 bits. | |||
*/ | |||
acc_len += 8; | |||
/* | |||
* Push as many zeros as necessary, then a one. Since the | |||
* absolute value is at most 2047, w can only range up to | |||
* 15 at this point, thus we will add at most 16 bits | |||
* here. With the 8 bits above and possibly up to 7 bits | |||
* from previous iterations, we may go up to 31 bits, which | |||
* will fit in the accumulator, which is an uint32_t. | |||
*/ | |||
acc <<= (w + 1); | |||
acc |= 1; | |||
acc_len += w + 1; | |||
/* | |||
* Produce all full bytes. | |||
*/ | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc >> acc_len); | |||
} | |||
v ++; | |||
} | |||
} | |||
/* | |||
* Flush remaining bits (if any). | |||
*/ | |||
if (acc_len > 0) { | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
v ++; | |||
} | |||
return v; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
Zf(comp_decode)( | |||
int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) | |||
{ | |||
const uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
unsigned b, s, m; | |||
/* | |||
* Get next eight bits: sign and low seven bits of the | |||
* absolute value. | |||
*/ | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
b = acc >> acc_len; | |||
s = b & 128; | |||
m = b & 127; | |||
/* | |||
* Get next bits until a 1 is reached. | |||
*/ | |||
for (;;) { | |||
if (acc_len == 0) { | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
acc_len = 8; | |||
} | |||
acc_len --; | |||
if (((acc >> acc_len) & 1) != 0) { | |||
break; | |||
} | |||
m += 128; | |||
if (m > 2047) { | |||
return 0; | |||
} | |||
} | |||
/* | |||
* "-0" is forbidden. | |||
*/ | |||
if (s && m == 0) { | |||
return 0; | |||
} | |||
x[u] = (int16_t)(s ? -(int)m : (int)m); | |||
} | |||
/* | |||
* Unused bits in the last byte must be zero. | |||
*/ | |||
if ((acc & ((1u << acc_len) - 1u)) != 0) { | |||
return 0; | |||
} | |||
return v; | |||
} | |||
/* | |||
* Key elements and signatures are polynomials with small integer | |||
* coefficients. Here are some statistics gathered over many | |||
* generated key pairs (10000 or more for each degree): | |||
* | |||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||
* 1 2 129 56.31 143 60.02 | |||
* 2 4 123 40.93 160 46.52 | |||
* 3 8 97 28.97 159 38.01 | |||
* 4 16 100 21.48 154 32.50 | |||
* 5 32 71 15.41 151 29.36 | |||
* 6 64 59 11.07 138 27.77 | |||
* 7 128 39 7.91 144 27.00 | |||
* 8 256 32 5.63 148 26.61 | |||
* 9 512 22 4.00 137 26.46 | |||
* 10 1024 15 2.84 146 26.41 | |||
* | |||
* We want a compact storage format for private key, and, as part of | |||
* key generation, we are allowed to reject some keys which would | |||
* otherwise be fine (this does not induce any noticeable vulnerability | |||
* as long as we reject only a small proportion of possible keys). | |||
* Hence, we enforce at key generation time maximum values for the | |||
* elements of f, g, F and G, so that their encoding can be expressed | |||
* in fixed-width values. Limits have been chosen so that generated | |||
* keys are almost always within bounds, thus not impacting neither | |||
* security or performance. | |||
* | |||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||
*/ | |||
const uint8_t Zf(max_fg_bits)[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
7, | |||
7, | |||
6, | |||
6, | |||
5 | |||
}; | |||
const uint8_t Zf(max_FG_bits)[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8 | |||
}; | |||
/* | |||
* When generating a new key pair, we can always reject keys which | |||
* feature an abnormally large coefficient. This can also be done for | |||
* signatures, albeit with some care: in case the signature process is | |||
* used in a derandomized setup (explicitly seeded with the message and | |||
* private key), we have to follow the specification faithfully, and the | |||
* specification only enforces a limit on the L2 norm of the signature | |||
* vector. The limit on the L2 norm implies that the absolute value of | |||
* a coefficient of the signature cannot be more than the following: | |||
* | |||
* log(n) n max sig coeff (theoretical) | |||
* 1 2 412 | |||
* 2 4 583 | |||
* 3 8 824 | |||
* 4 16 1166 | |||
* 5 32 1649 | |||
* 6 64 2332 | |||
* 7 128 3299 | |||
* 8 256 4665 | |||
* 9 512 6598 | |||
* 10 1024 9331 | |||
* | |||
* However, the largest observed signature coefficients during our | |||
* experiments was 1077 (in absolute value), hence we can assume that, | |||
* with overwhelming probability, signature coefficients will fit | |||
* in -2047..2047, i.e. 12 bits. | |||
*/ | |||
const uint8_t Zf(max_sig_bits)[] = { | |||
0, /* unused */ | |||
10, | |||
11, | |||
11, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12 | |||
}; |
@@ -0,0 +1,298 @@ | |||
/* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
*/ | |||
#include "inner.h" | |||
/* see inner.h */ | |||
void | |||
Zf(hash_to_point_vartime)( | |||
shake256incctx *sc, | |||
uint16_t *x, unsigned logn) | |||
{ | |||
/* | |||
* This is the straightforward per-the-spec implementation. It | |||
* is not constant-time, thus it might reveal information on the | |||
* plaintext (at least, enough to check the plaintext against a | |||
* list of potential plaintexts) in a scenario where the | |||
* attacker does not have access to the signature value or to | |||
* the public key, but knows the nonce (without knowledge of the | |||
* nonce, the hashed output cannot be matched against potential | |||
* plaintexts). | |||
*/ | |||
size_t n; | |||
n = (size_t)1 << logn; | |||
while (n > 0) { | |||
uint8_t buf[2]; | |||
uint32_t w; | |||
shake256_inc_squeeze((void *)buf, sizeof buf, sc); | |||
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; | |||
if (w < 61445) { | |||
while (w >= 12289) { | |||
w -= 12289; | |||
} | |||
*x ++ = (uint16_t)w; | |||
n --; | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
Zf(hash_to_point_ct)( | |||
shake256incctx *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp) | |||
{ | |||
/* | |||
* Each 16-bit sample is a value in 0..65535. The value is | |||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||
* and rejected otherwise; thus, each sample has probability | |||
* about 0.93758 of being selected. | |||
* | |||
* We want to oversample enough to be sure that we will | |||
* have enough values with probability at least 1 - 2^(-256). | |||
* Depending on degree N, this leads to the following | |||
* required oversampling: | |||
* | |||
* logn n oversampling | |||
* 1 2 65 | |||
* 2 4 67 | |||
* 3 8 71 | |||
* 4 16 77 | |||
* 5 32 86 | |||
* 6 64 100 | |||
* 7 128 122 | |||
* 8 256 154 | |||
* 9 512 205 | |||
* 10 1024 287 | |||
* | |||
* If logn >= 7, then the provided temporary buffer is large | |||
* enough. Otherwise, we use a stack buffer of 63 entries | |||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||
*/ | |||
static const uint16_t overtab[] = { | |||
0, /* unused */ | |||
65, | |||
67, | |||
71, | |||
77, | |||
86, | |||
100, | |||
122, | |||
154, | |||
205, | |||
287 | |||
}; | |||
unsigned n, n2, u, m, p, over; | |||
uint16_t *tt1, tt2[63]; | |||
/* | |||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||
* We also reduce modulo q the values; rejected values are set | |||
* to 0xFFFF. | |||
*/ | |||
n = 1U << logn; | |||
n2 = n << 1; | |||
over = overtab[logn]; | |||
m = n + over; | |||
tt1 = (uint16_t *)tmp; | |||
for (u = 0; u < m; u ++) { | |||
uint8_t buf[2]; | |||
uint32_t w, wr; | |||
shake256_inc_squeeze(buf, sizeof buf, sc); | |||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||
wr |= ((w - 61445) >> 31) - 1; | |||
if (u < n) { | |||
x[u] = (uint16_t)wr; | |||
} else if (u < n2) { | |||
tt1[u - n] = (uint16_t)wr; | |||
} else { | |||
tt2[u - n2] = (uint16_t)wr; | |||
} | |||
} | |||
/* | |||
* Now we must "squeeze out" the invalid values. We do this in | |||
* a logarithmic sequence of passes; each pass computes where a | |||
* value should go, and moves it down by 'p' slots if necessary, | |||
* where 'p' uses an increasing powers-of-two scale. It can be | |||
* shown that in all cases where the loop decides that a value | |||
* has to be moved down by p slots, the destination slot is | |||
* "free" (i.e. contains an invalid value). | |||
*/ | |||
for (p = 1; p <= over; p <<= 1) { | |||
unsigned v; | |||
/* | |||
* In the loop below: | |||
* | |||
* - v contains the index of the final destination of | |||
* the value; it is recomputed dynamically based on | |||
* whether values are valid or not. | |||
* | |||
* - u is the index of the value we consider ("source"); | |||
* its address is s. | |||
* | |||
* - The loop may swap the value with the one at index | |||
* u-p. The address of the swap destination is d. | |||
*/ | |||
v = 0; | |||
for (u = 0; u < m; u ++) { | |||
uint16_t *s, *d; | |||
unsigned j, sv, dv, mk; | |||
if (u < n) { | |||
s = &x[u]; | |||
} else if (u < n2) { | |||
s = &tt1[u - n]; | |||
} else { | |||
s = &tt2[u - n2]; | |||
} | |||
sv = *s; | |||
/* | |||
* The value in sv should ultimately go to | |||
* address v, i.e. jump back by u-v slots. | |||
*/ | |||
j = u - v; | |||
/* | |||
* We increment v for the next iteration, but | |||
* only if the source value is valid. The mask | |||
* 'mk' is -1 if the value is valid, 0 otherwise, | |||
* so we _subtract_ mk. | |||
*/ | |||
mk = (sv >> 15) - 1U; | |||
v -= mk; | |||
/* | |||
* In this loop we consider jumps by p slots; if | |||
* u < p then there is nothing more to do. | |||
*/ | |||
if (u < p) { | |||
continue; | |||
} | |||
/* | |||
* Destination for the swap: value at address u-p. | |||
*/ | |||
if ((u - p) < n) { | |||
d = &x[u - p]; | |||
} else if ((u - p) < n2) { | |||
d = &tt1[(u - p) - n]; | |||
} else { | |||
d = &tt2[(u - p) - n2]; | |||
} | |||
dv = *d; | |||
/* | |||
* The swap should be performed only if the source | |||
* is valid AND the jump j has its 'p' bit set. | |||
*/ | |||
mk &= -(((j & p) + 0x1FF) >> 9); | |||
*s = (uint16_t)(sv ^ (mk & (sv ^ dv))); | |||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv))); | |||
} | |||
} | |||
} | |||
/* | |||
* Acceptance bound for the (squared) l2-norm of the signature depends | |||
* on the degree. This array is indexed by logn (1 to 10). These bounds | |||
* are _inclusive_ (they are equal to floor(beta^2)). | |||
*/ | |||
static const uint32_t l2bound[] = { | |||
0, /* unused */ | |||
101498, | |||
208714, | |||
428865, | |||
892039, | |||
1852696, | |||
3842630, | |||
7959734, | |||
16468416, | |||
34034726, | |||
70265242 | |||
}; | |||
/* see inner.h */ | |||
int | |||
Zf(is_short)( | |||
const int16_t *s1, const int16_t *s2, unsigned logn) | |||
{ | |||
/* | |||
* We use the l2-norm. Code below uses only 32-bit operations to | |||
* compute the square of the norm with saturation to 2^32-1 if | |||
* the value exceeds 2^31-1. | |||
*/ | |||
size_t n, u; | |||
uint32_t s, ng; | |||
n = (size_t)1 << logn; | |||
s = 0; | |||
ng = 0; | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s1[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
z = s2[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
} | |||
s |= -(ng >> 31); | |||
return s <= l2bound[logn]; | |||
} | |||
/* see inner.h */ | |||
int | |||
Zf(is_short_half)( | |||
uint32_t sqn, const int16_t *s2, unsigned logn) | |||
{ | |||
size_t n, u; | |||
uint32_t ng; | |||
n = (size_t)1 << logn; | |||
ng = -(sqn >> 31); | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s2[u]; | |||
sqn += (uint32_t)(z * z); | |||
ng |= sqn; | |||
} | |||
sqn |= -(ng >> 31); | |||
return sqn <= l2bound[logn]; | |||
} |
@@ -1,15 +0,0 @@ | |||
set( | |||
SRC_AVX2_FALCON1024 | |||
codec.c | |||
common.c | |||
fft.c | |||
fpr.c | |||
keygen.c | |||
pqclean.c | |||
rng.c | |||
sign.c | |||
vrfy.c) | |||
define_sig_alg( | |||
falcon1024_avx2 | |||
PQCLEAN_FALCON1024_AVX2 "${SRC_AVX2_FALCON1024}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,80 +0,0 @@ | |||
#ifndef PQCLEAN_FALCON1024_AVX2_API_H | |||
#define PQCLEAN_FALCON1024_AVX2_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES 2305 | |||
#define PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES 1793 | |||
#define PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES 1330 | |||
#define PQCLEAN_FALCON1024_AVX2_CRYPTO_ALGNAME "Falcon-1024" | |||
/* | |||
* Generate a new key pair. Public key goes into pk[], private key in sk[]. | |||
* Key sizes are exact (in bytes): | |||
* public (pk): PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES | |||
* private (sk): PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
/* | |||
* Compute a signature on a provided message (m, mlen), with a given | |||
* private key (sk). Signature is written in sig[], with length written | |||
* into *siglen. Signature length is variable; maximum signature length | |||
* (in bytes) is PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES. | |||
* | |||
* sig[], m[] and sk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Verify a signature (sig, siglen) on a message (m, mlen) with a given | |||
* public key (pk). | |||
* | |||
* sig[], m[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
/* | |||
* Compute a signature on a message and pack the signature and message | |||
* into a single object, written into sm[]. The length of that output is | |||
* written in *smlen; that length may be larger than the message length | |||
* (mlen) by up to PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES. | |||
* | |||
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall | |||
* not overlap with sk[]. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Open a signed message object (sm, smlen) and verify the signature; | |||
* on success, the message itself is written into m[] and its length | |||
* into *mlen. The message is shorter than the signed message object, | |||
* but the size difference depends on the signature value; the difference | |||
* may range up to PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES. | |||
* | |||
* m[], sm[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||
#endif |
@@ -1,555 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Encoding/decoding of keys and signatures. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_modq_encode( | |||
void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn) { | |||
size_t n, out_len, u; | |||
uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] >= 12289) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * 14) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << 14) | x[u]; | |||
acc_len += 14; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_modq_decode( | |||
uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len, u; | |||
const uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * 14) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
u = 0; | |||
while (u < n) { | |||
acc = (acc << 8) | (*buf ++); | |||
acc_len += 8; | |||
if (acc_len >= 14) { | |||
unsigned w; | |||
acc_len -= 14; | |||
w = (acc >> acc_len) & 0x3FFF; | |||
if (w >= 12289) { | |||
return 0; | |||
} | |||
x[u ++] = (uint16_t)w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_trim_i16_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_trim_i16_decode( | |||
int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
w |= -(w & mask2); | |||
x[u ++] = (int16_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_trim_i8_encode( | |||
void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_trim_i8_decode( | |||
int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
x[u ++] = (int8_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_comp_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn) { | |||
uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = out; | |||
/* | |||
* Make sure that all values are within the -2047..+2047 range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < -2047 || x[u] > +2047) { | |||
return 0; | |||
} | |||
} | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
int t; | |||
unsigned w; | |||
/* | |||
* Get sign and absolute value of next integer; push the | |||
* sign bit. | |||
*/ | |||
acc <<= 1; | |||
t = x[u]; | |||
if (t < 0) { | |||
t = -t; | |||
acc |= 1; | |||
} | |||
w = (unsigned)t; | |||
/* | |||
* Push the low 7 bits of the absolute value. | |||
*/ | |||
acc <<= 7; | |||
acc |= w & 127u; | |||
w >>= 7; | |||
/* | |||
* We pushed exactly 8 bits. | |||
*/ | |||
acc_len += 8; | |||
/* | |||
* Push as many zeros as necessary, then a one. Since the | |||
* absolute value is at most 2047, w can only range up to | |||
* 15 at this point, thus we will add at most 16 bits | |||
* here. With the 8 bits above and possibly up to 7 bits | |||
* from previous iterations, we may go up to 31 bits, which | |||
* will fit in the accumulator, which is an uint32_t. | |||
*/ | |||
acc <<= (w + 1); | |||
acc |= 1; | |||
acc_len += w + 1; | |||
/* | |||
* Produce all full bytes. | |||
*/ | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc >> acc_len); | |||
} | |||
v ++; | |||
} | |||
} | |||
/* | |||
* Flush remaining bits (if any). | |||
*/ | |||
if (acc_len > 0) { | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
v ++; | |||
} | |||
return v; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_AVX2_comp_decode( | |||
int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
const uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
unsigned b, s, m; | |||
/* | |||
* Get next eight bits: sign and low seven bits of the | |||
* absolute value. | |||
*/ | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
b = acc >> acc_len; | |||
s = b & 128; | |||
m = b & 127; | |||
/* | |||
* Get next bits until a 1 is reached. | |||
*/ | |||
for (;;) { | |||
if (acc_len == 0) { | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
acc_len = 8; | |||
} | |||
acc_len --; | |||
if (((acc >> acc_len) & 1) != 0) { | |||
break; | |||
} | |||
m += 128; | |||
if (m > 2047) { | |||
return 0; | |||
} | |||
} | |||
x[u] = (int16_t) m; | |||
if (s) { | |||
x[u] = (int16_t) - x[u]; | |||
} | |||
} | |||
return v; | |||
} | |||
/* | |||
* Key elements and signatures are polynomials with small integer | |||
* coefficients. Here are some statistics gathered over many | |||
* generated key pairs (10000 or more for each degree): | |||
* | |||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||
* 1 2 129 56.31 143 60.02 | |||
* 2 4 123 40.93 160 46.52 | |||
* 3 8 97 28.97 159 38.01 | |||
* 4 16 100 21.48 154 32.50 | |||
* 5 32 71 15.41 151 29.36 | |||
* 6 64 59 11.07 138 27.77 | |||
* 7 128 39 7.91 144 27.00 | |||
* 8 256 32 5.63 148 26.61 | |||
* 9 512 22 4.00 137 26.46 | |||
* 10 1024 15 2.84 146 26.41 | |||
* | |||
* We want a compact storage format for private key, and, as part of | |||
* key generation, we are allowed to reject some keys which would | |||
* otherwise be fine (this does not induce any noticeable vulnerability | |||
* as long as we reject only a small proportion of possible keys). | |||
* Hence, we enforce at key generation time maximum values for the | |||
* elements of f, g, F and G, so that their encoding can be expressed | |||
* in fixed-width values. Limits have been chosen so that generated | |||
* keys are almost always within bounds, thus not impacting neither | |||
* security or performance. | |||
* | |||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON1024_AVX2_max_fg_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
7, | |||
7, | |||
6, | |||
6, | |||
5 | |||
}; | |||
const uint8_t PQCLEAN_FALCON1024_AVX2_max_FG_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8 | |||
}; | |||
/* | |||
* When generating a new key pair, we can always reject keys which | |||
* feature an abnormally large coefficient. This can also be done for | |||
* signatures, albeit with some care: in case the signature process is | |||
* used in a derandomized setup (explicitly seeded with the message and | |||
* private key), we have to follow the specification faithfully, and the | |||
* specification only enforces a limit on the L2 norm of the signature | |||
* vector. The limit on the L2 norm implies that the absolute value of | |||
* a coefficient of the signature cannot be more than the following: | |||
* | |||
* log(n) n max sig coeff (theoretical) | |||
* 1 2 412 | |||
* 2 4 583 | |||
* 3 8 824 | |||
* 4 16 1166 | |||
* 5 32 1649 | |||
* 6 64 2332 | |||
* 7 128 3299 | |||
* 8 256 4665 | |||
* 9 512 6598 | |||
* 10 1024 9331 | |||
* | |||
* However, the largest observed signature coefficients during our | |||
* experiments was 1077 (in absolute value), hence we can assume that, | |||
* with overwhelming probability, signature coefficients will fit | |||
* in -2047..2047, i.e. 12 bits. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON1024_AVX2_max_sig_bits[] = { | |||
0, /* unused */ | |||
10, | |||
11, | |||
11, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12 | |||
}; |
@@ -1,294 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Support functions for signatures (hash-to-point, norm). | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime( | |||
inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn) { | |||
/* | |||
* This is the straightforward per-the-spec implementation. It | |||
* is not constant-time, thus it might reveal information on the | |||
* plaintext (at least, enough to check the plaintext against a | |||
* list of potential plaintexts) in a scenario where the | |||
* attacker does not have access to the signature value or to | |||
* the public key, but knows the nonce (without knowledge of the | |||
* nonce, the hashed output cannot be matched against potential | |||
* plaintexts). | |||
*/ | |||
size_t n; | |||
n = (size_t)1 << logn; | |||
while (n > 0) { | |||
uint8_t buf[2]; | |||
uint32_t w; | |||
inner_shake256_extract(sc, (void *)buf, sizeof buf); | |||
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; | |||
if (w < 61445) { | |||
while (w >= 12289) { | |||
w -= 12289; | |||
} | |||
*x ++ = (uint16_t)w; | |||
n --; | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_AVX2_hash_to_point_ct( | |||
inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp) { | |||
/* | |||
* Each 16-bit sample is a value in 0..65535. The value is | |||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||
* and rejected otherwise; thus, each sample has probability | |||
* about 0.93758 of being selected. | |||
* | |||
* We want to oversample enough to be sure that we will | |||
* have enough values with probability at least 1 - 2^(-256). | |||
* Depending on degree N, this leads to the following | |||
* required oversampling: | |||
* | |||
* logn n oversampling | |||
* 1 2 65 | |||
* 2 4 67 | |||
* 3 8 71 | |||
* 4 16 77 | |||
* 5 32 86 | |||
* 6 64 100 | |||
* 7 128 122 | |||
* 8 256 154 | |||
* 9 512 205 | |||
* 10 1024 287 | |||
* | |||
* If logn >= 7, then the provided temporary buffer is large | |||
* enough. Otherwise, we use a stack buffer of 63 entries | |||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||
*/ | |||
static const uint16_t overtab[] = { | |||
0, /* unused */ | |||
65, | |||
67, | |||
71, | |||
77, | |||
86, | |||
100, | |||
122, | |||
154, | |||
205, | |||
287 | |||
}; | |||
unsigned n, n2, u, m, p, over; | |||
uint16_t *tt1, tt2[63]; | |||
/* | |||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||
* We also reduce modulo q the values; rejected values are set | |||
* to 0xFFFF. | |||
*/ | |||
n = 1U << logn; | |||
n2 = n << 1; | |||
over = overtab[logn]; | |||
m = n + over; | |||
tt1 = (uint16_t *)tmp; | |||
for (u = 0; u < m; u ++) { | |||
uint8_t buf[2]; | |||
uint32_t w, wr; | |||
inner_shake256_extract(sc, buf, sizeof buf); | |||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||
wr |= ((w - 61445) >> 31) - 1; | |||
if (u < n) { | |||
x[u] = (uint16_t)wr; | |||
} else if (u < n2) { | |||
tt1[u - n] = (uint16_t)wr; | |||
} else { | |||
tt2[u - n2] = (uint16_t)wr; | |||
} | |||
} | |||
/* | |||
* Now we must "squeeze out" the invalid values. We do this in | |||
* a logarithmic sequence of passes; each pass computes where a | |||
* value should go, and moves it down by 'p' slots if necessary, | |||
* where 'p' uses an increasing powers-of-two scale. It can be | |||
* shown that in all cases where the loop decides that a value | |||
* has to be moved down by p slots, the destination slot is | |||
* "free" (i.e. contains an invalid value). | |||
*/ | |||
for (p = 1; p <= over; p <<= 1) { | |||
unsigned v; | |||
/* | |||
* In the loop below: | |||
* | |||
* - v contains the index of the final destination of | |||
* the value; it is recomputed dynamically based on | |||
* whether values are valid or not. | |||
* | |||
* - u is the index of the value we consider ("source"); | |||
* its address is s. | |||
* | |||
* - The loop may swap the value with the one at index | |||
* u-p. The address of the swap destination is d. | |||
*/ | |||
v = 0; | |||
for (u = 0; u < m; u ++) { | |||
uint16_t *s, *d; | |||
unsigned j, sv, dv, mk; | |||
if (u < n) { | |||
s = &x[u]; | |||
} else if (u < n2) { | |||
s = &tt1[u - n]; | |||
} else { | |||
s = &tt2[u - n2]; | |||
} | |||
sv = *s; | |||
/* | |||
* The value in sv should ultimately go to | |||
* address v, i.e. jump back by u-v slots. | |||
*/ | |||
j = u - v; | |||
/* | |||
* We increment v for the next iteration, but | |||
* only if the source value is valid. The mask | |||
* 'mk' is -1 if the value is valid, 0 otherwise, | |||
* so we _subtract_ mk. | |||
*/ | |||
mk = (sv >> 15) - 1U; | |||
v -= mk; | |||
/* | |||
* In this loop we consider jumps by p slots; if | |||
* u < p then there is nothing more to do. | |||
*/ | |||
if (u < p) { | |||
continue; | |||
} | |||
/* | |||
* Destination for the swap: value at address u-p. | |||
*/ | |||
if ((u - p) < n) { | |||
d = &x[u - p]; | |||
} else if ((u - p) < n2) { | |||
d = &tt1[(u - p) - n]; | |||
} else { | |||
d = &tt2[(u - p) - n2]; | |||
} | |||
dv = *d; | |||
/* | |||
* The swap should be performed only if the source | |||
* is valid AND the jump j has its 'p' bit set. | |||
*/ | |||
mk &= -(((j & p) + 0x1FF) >> 9); | |||
*s = (uint16_t)(sv ^ (mk & (sv ^ dv))); | |||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv))); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_is_short( | |||
const int16_t *s1, const int16_t *s2, unsigned logn) { | |||
/* | |||
* We use the l2-norm. Code below uses only 32-bit operations to | |||
* compute the square of the norm with saturation to 2^32-1 if | |||
* the value exceeds 2^31-1. | |||
*/ | |||
size_t n, u; | |||
uint32_t s, ng; | |||
n = (size_t)1 << logn; | |||
s = 0; | |||
ng = 0; | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s1[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
z = s2[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
} | |||
s |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_is_short_half( | |||
uint32_t sqn, const int16_t *s2, unsigned logn) { | |||
size_t n, u; | |||
uint32_t ng; | |||
n = (size_t)1 << logn; | |||
ng = -(sqn >> 31); | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s2[u]; | |||
sqn += (uint32_t)(z * z); | |||
ng |= sqn; | |||
} | |||
sqn |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} |
@@ -1,349 +0,0 @@ | |||
#ifndef PQCLEAN_FALCON1024_AVX2_FPR_H | |||
#define PQCLEAN_FALCON1024_AVX2_FPR_H | |||
/* | |||
* Floating-point operations. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* ====================================================================== */ | |||
#include <immintrin.h> | |||
#include <math.h> | |||
#define FMADD(a, b, c) _mm256_add_pd(_mm256_mul_pd(a, b), c) | |||
#define FMSUB(a, b, c) _mm256_sub_pd(_mm256_mul_pd(a, b), c) | |||
/* | |||
* We wrap the native 'double' type into a structure so that the C compiler | |||
* complains if we inadvertently use raw arithmetic operators on the 'fpr' | |||
* type instead of using the inline functions below. This should have no | |||
* extra runtime cost, since all the functions below are 'inline'. | |||
*/ | |||
typedef struct { | |||
double v; | |||
} fpr; | |||
static inline fpr | |||
FPR(double v) { | |||
fpr x; | |||
x.v = v; | |||
return x; | |||
} | |||
static inline fpr | |||
fpr_of(int64_t i) { | |||
return FPR((double)i); | |||
} | |||
static const fpr fpr_q = { 12289.0 }; | |||
static const fpr fpr_inverse_of_q = { 1.0 / 12289.0 }; | |||
static const fpr fpr_inv_2sqrsigma0 = { .150865048875372721532312163019 }; | |||
static const fpr fpr_inv_sigma = { .005819826392951607426919370871 }; | |||
static const fpr fpr_sigma_min_9 = { 1.291500756233514568549480827642 }; | |||
static const fpr fpr_sigma_min_10 = { 1.311734375905083682667395805765 }; | |||
static const fpr fpr_log2 = { 0.69314718055994530941723212146 }; | |||
static const fpr fpr_inv_log2 = { 1.4426950408889634073599246810 }; | |||
static const fpr fpr_bnorm_max = { 16822.4121 }; | |||
static const fpr fpr_zero = { 0.0 }; | |||
static const fpr fpr_one = { 1.0 }; | |||
static const fpr fpr_two = { 2.0 }; | |||
static const fpr fpr_onehalf = { 0.5 }; | |||
static const fpr fpr_invsqrt2 = { 0.707106781186547524400844362105 }; | |||
static const fpr fpr_invsqrt8 = { 0.353553390593273762200422181052 }; | |||
static const fpr fpr_ptwo31 = { 2147483648.0 }; | |||
static const fpr fpr_ptwo31m1 = { 2147483647.0 }; | |||
static const fpr fpr_mtwo31m1 = { -2147483647.0 }; | |||
static const fpr fpr_ptwo63m1 = { 9223372036854775807.0 }; | |||
static const fpr fpr_mtwo63m1 = { -9223372036854775807.0 }; | |||
static const fpr fpr_ptwo63 = { 9223372036854775808.0 }; | |||
static inline int64_t | |||
fpr_rint(fpr x) { | |||
/* | |||
* We do not want to use llrint() since it might be not | |||
* constant-time. | |||
* | |||
* Suppose that x >= 0. If x >= 2^52, then it is already an | |||
* integer. Otherwise, if x < 2^52, then computing x+2^52 will | |||
* yield a value that will be rounded to the nearest integer | |||
* with exactly the right rules (round-to-nearest-even). | |||
* | |||
* In order to have constant-time processing, we must do the | |||
* computation for both x >= 0 and x < 0 cases, and use a | |||
* cast to an integer to access the sign and select the proper | |||
* value. Such casts also allow us to find out if |x| < 2^52. | |||
*/ | |||
int64_t sx, tx, rp, rn, m; | |||
uint32_t ub; | |||
sx = (int64_t)(x.v - 1.0); | |||
tx = (int64_t)x.v; | |||
rp = (int64_t)(x.v + 4503599627370496.0) - 4503599627370496; | |||
rn = (int64_t)(x.v - 4503599627370496.0) + 4503599627370496; | |||
/* | |||
* If tx >= 2^52 or tx < -2^52, then result is tx. | |||
* Otherwise, if sx >= 0, then result is rp. | |||
* Otherwise, result is rn. We use the fact that when x is | |||
* close to 0 (|x| <= 0.25) then both rp and rn are correct; | |||
* and if x is not close to 0, then trunc(x-1.0) yields the | |||
* appropriate sign. | |||
*/ | |||
/* | |||
* Clamp rp to zero if tx < 0. | |||
* Clamp rn to zero if tx >= 0. | |||
*/ | |||
m = sx >> 63; | |||
rn &= m; | |||
rp &= ~m; | |||
/* | |||
* Get the 12 upper bits of tx; if they are not all zeros or | |||
* all ones, then tx >= 2^52 or tx < -2^52, and we clamp both | |||
* rp and rn to zero. Otherwise, we clamp tx to zero. | |||
*/ | |||
ub = (uint32_t)((uint64_t)tx >> 52); | |||
m = -(int64_t)((((ub + 1) & 0xFFF) - 2) >> 31); | |||
rp &= m; | |||
rn &= m; | |||
tx &= ~m; | |||
/* | |||
* Only one of tx, rn or rp (at most) can be non-zero at this | |||
* point. | |||
*/ | |||
return tx | rn | rp; | |||
} | |||
static inline int64_t | |||
fpr_floor(fpr x) { | |||
int64_t r; | |||
/* | |||
* The cast performs a trunc() (rounding toward 0) and thus is | |||
* wrong by 1 for most negative values. The correction below is | |||
* constant-time as long as the compiler turns the | |||
* floating-point conversion result into a 0/1 integer without a | |||
* conditional branch or another non-constant-time construction. | |||
* This should hold on all modern architectures with an FPU (and | |||
* if it is false on a given arch, then chances are that the FPU | |||
* itself is not constant-time, making the point moot). | |||
*/ | |||
r = (int64_t)x.v; | |||
return r - (x.v < (double)r); | |||
} | |||
static inline int64_t | |||
fpr_trunc(fpr x) { | |||
return (int64_t)x.v; | |||
} | |||
static inline fpr | |||
fpr_add(fpr x, fpr y) { | |||
return FPR(x.v + y.v); | |||
} | |||
static inline fpr | |||
fpr_sub(fpr x, fpr y) { | |||
return FPR(x.v - y.v); | |||
} | |||
static inline fpr | |||
fpr_neg(fpr x) { | |||
return FPR(-x.v); | |||
} | |||
static inline fpr | |||
fpr_half(fpr x) { | |||
return FPR(x.v * 0.5); | |||
} | |||
static inline fpr | |||
fpr_double(fpr x) { | |||
return FPR(x.v + x.v); | |||
} | |||
static inline fpr | |||
fpr_mul(fpr x, fpr y) { | |||
return FPR(x.v * y.v); | |||
} | |||
static inline fpr | |||
fpr_sqr(fpr x) { | |||
return FPR(x.v * x.v); | |||
} | |||
static inline fpr | |||
fpr_inv(fpr x) { | |||
return FPR(1.0 / x.v); | |||
} | |||
static inline fpr | |||
fpr_div(fpr x, fpr y) { | |||
return FPR(x.v / y.v); | |||
} | |||
static inline void | |||
fpr_sqrt_avx2(double *t) { | |||
__m128d x; | |||
x = _mm_load1_pd(t); | |||
x = _mm_sqrt_pd(x); | |||
_mm_storel_pd(t, x); | |||
} | |||
static inline fpr | |||
fpr_sqrt(fpr x) { | |||
/* | |||
* We prefer not to have a dependency on libm when it can be | |||
* avoided. On x86, calling the sqrt() libm function inlines | |||
* the relevant opcode (fsqrt or sqrtsd, depending on whether | |||
* the 387 FPU or SSE2 is used for floating-point operations) | |||
* but then makes an optional call to the library function | |||
* for proper error handling, in case the operand is negative. | |||
* | |||
* To avoid this dependency, we use intrinsics or inline assembly | |||
* on recognized platforms: | |||
* | |||
* - If AVX2 is explicitly enabled, then we use SSE2 intrinsics. | |||
* | |||
* - On GCC/Clang with SSE maths, we use SSE2 intrinsics. | |||
* | |||
* - On GCC/Clang on i386, or MSVC on i386, we use inline assembly | |||
* to call the 387 FPU fsqrt opcode. | |||
* | |||
* - On GCC/Clang/XLC on PowerPC, we use inline assembly to call | |||
* the fsqrt opcode (Clang needs a special hack). | |||
* | |||
* - On GCC/Clang on ARM with hardware floating-point, we use | |||
* inline assembly to call the vqsrt.f64 opcode. Due to a | |||
* complex ecosystem of compilers and assembly syntaxes, we | |||
* have to call it "fsqrt" or "fsqrtd", depending on case. | |||
* | |||
* If the platform is not recognized, a call to the system | |||
* library function sqrt() is performed. On some compilers, this | |||
* may actually inline the relevant opcode, and call the library | |||
* function only when the input is invalid (e.g. negative); | |||
* Falcon never actually calls sqrt() on a negative value, but | |||
* the dependency to libm will still be there. | |||
*/ | |||
fpr_sqrt_avx2(&x.v); | |||
return x; | |||
} | |||
static inline int | |||
fpr_lt(fpr x, fpr y) { | |||
return x.v < y.v; | |||
} | |||
static inline uint64_t | |||
fpr_expm_p63(fpr x, fpr ccs) { | |||
/* | |||
* Polynomial approximation of exp(-x) is taken from FACCT: | |||
* https://eprint.iacr.org/2018/1234 | |||
* Specifically, values are extracted from the implementation | |||
* referenced from the FACCT article, and available at: | |||
* https://github.com/raykzhao/gaussian | |||
* Tests over more than 24 billions of random inputs in the | |||
* 0..log(2) range have never shown a deviation larger than | |||
* 2^(-50) from the true mathematical value. | |||
*/ | |||
/* | |||
* AVX2 implementation uses more operations than Horner's method, | |||
* but with a lower expression tree depth. This helps because | |||
* additions and multiplications have a latency of 4 cycles on | |||
* a Skylake, but the CPU can issue two of them per cycle. | |||
*/ | |||
static const union { | |||
double d[12]; | |||
__m256d v[3]; | |||
} c = { | |||
{ | |||
0.999999999999994892974086724280, | |||
0.500000000000019206858326015208, | |||
0.166666666666984014666397229121, | |||
0.041666666666110491190622155955, | |||
0.008333333327800835146903501993, | |||
0.001388888894063186997887560103, | |||
0.000198412739277311890541063977, | |||
0.000024801566833585381209939524, | |||
0.000002755586350219122514855659, | |||
0.000000275607356160477811864927, | |||
0.000000025299506379442070029551, | |||
0.000000002073772366009083061987 | |||
} | |||
}; | |||
double d1, d2, d4, d8, y; | |||
__m256d d14, d58, d9c; | |||
d1 = -x.v; | |||
d2 = d1 * d1; | |||
d4 = d2 * d2; | |||
d8 = d4 * d4; | |||
d14 = _mm256_set_pd(d4, d2 * d1, d2, d1); | |||
d58 = _mm256_mul_pd(d14, _mm256_set1_pd(d4)); | |||
d9c = _mm256_mul_pd(d14, _mm256_set1_pd(d8)); | |||
d14 = _mm256_mul_pd(d14, _mm256_loadu_pd(&c.d[0])); | |||
d58 = FMADD(d58, _mm256_loadu_pd(&c.d[4]), d14); | |||
d9c = FMADD(d9c, _mm256_loadu_pd(&c.d[8]), d58); | |||
d9c = _mm256_hadd_pd(d9c, d9c); | |||
y = 1.0 + _mm_cvtsd_f64(_mm256_castpd256_pd128(d9c)) // _mm256_cvtsd_f64(d9c) | |||
+ _mm_cvtsd_f64(_mm256_extractf128_pd(d9c, 1)); | |||
y *= ccs.v; | |||
/* | |||
* Final conversion goes through int64_t first, because that's what | |||
* the underlying opcode (vcvttsd2si) will do, and we know that the | |||
* result will fit, since x >= 0 and ccs < 1. If we did the | |||
* conversion directly to uint64_t, then the compiler would add some | |||
* extra code to cover the case of a source value of 2^63 or more, | |||
* and though the alternate path would never be exercised, the | |||
* extra comparison would cost us some cycles. | |||
*/ | |||
return (uint64_t)(int64_t)(y * fpr_ptwo63.v); | |||
} | |||
#define fpr_gm_tab PQCLEAN_FALCON1024_AVX2_fpr_gm_tab | |||
extern const fpr fpr_gm_tab[]; | |||
#define fpr_p2_tab PQCLEAN_FALCON1024_AVX2_fpr_p2_tab | |||
extern const fpr fpr_p2_tab[]; | |||
/* ====================================================================== */ | |||
#endif |
@@ -1,826 +0,0 @@ | |||
#ifndef PQCLEAN_FALCON1024_AVX2_INNER_H | |||
#define PQCLEAN_FALCON1024_AVX2_INNER_H | |||
/* | |||
* Internal functions for Falcon. This is not the API intended to be | |||
* used by applications; instead, this internal API provides all the | |||
* primitives on which wrappers build to provide external APIs. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* | |||
* IMPORTANT API RULES | |||
* ------------------- | |||
* | |||
* This API has some non-trivial usage rules: | |||
* | |||
* | |||
* - All public functions (i.e. the non-static ones) must be referenced | |||
* with the PQCLEAN_FALCON1024_AVX2_ macro (e.g. PQCLEAN_FALCON1024_AVX2_verify_raw for the verify_raw() | |||
* function). That macro adds a prefix to the name, which is | |||
* configurable with the FALCON_PREFIX macro. This allows compiling | |||
* the code into a specific "namespace" and potentially including | |||
* several versions of this code into a single application (e.g. to | |||
* have an AVX2 and a non-AVX2 variants and select the one to use at | |||
* runtime based on availability of AVX2 opcodes). | |||
* | |||
* - Functions that need temporary buffers expects them as a final | |||
* tmp[] array of type uint8_t*, with a size which is documented for | |||
* each function. However, most have some alignment requirements, | |||
* because they will use the array to store 16-bit, 32-bit or 64-bit | |||
* values (e.g. uint64_t or double). The caller must ensure proper | |||
* alignment. What happens on unaligned access depends on the | |||
* underlying architecture, ranging from a slight time penalty | |||
* to immediate termination of the process. | |||
* | |||
* - Some functions rely on specific rounding rules and precision for | |||
* floating-point numbers. On some systems (in particular 32-bit x86 | |||
* with the 387 FPU), this requires setting an hardware control | |||
* word. The caller MUST use set_fpu_cw() to ensure proper precision: | |||
* | |||
* oldcw = set_fpu_cw(2); | |||
* PQCLEAN_FALCON1024_AVX2_sign_dyn(...); | |||
* set_fpu_cw(oldcw); | |||
* | |||
* On systems where the native floating-point precision is already | |||
* proper, or integer-based emulation is used, the set_fpu_cw() | |||
* function does nothing, so it can be called systematically. | |||
*/ | |||
#include "fips202.h" | |||
#include "fpr.h" | |||
#include <stdint.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
/* | |||
* Some computations with floating-point elements, in particular | |||
* rounding to the nearest integer, rely on operations using _exactly_ | |||
* the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit | |||
* x86, the 387 FPU may be used (depending on the target OS) and, in | |||
* that case, may use more precision bits (i.e. 64 bits, for an 80-bit | |||
* total type length); to prevent miscomputations, we define an explicit | |||
* function that modifies the precision in the FPU control word. | |||
* | |||
* set_fpu_cw() sets the precision to the provided value, and returns | |||
* the previously set precision; callers are supposed to restore the | |||
* previous precision on exit. The correct (52-bit) precision is | |||
* configured with the value "2". On unsupported compilers, or on | |||
* targets other than 32-bit x86, or when the native 'double' type is | |||
* not used, the set_fpu_cw() function does nothing at all. | |||
*/ | |||
static inline unsigned | |||
set_fpu_cw(unsigned x) { | |||
return x; | |||
} | |||
/* ==================================================================== */ | |||
/* | |||
* SHAKE256 implementation (shake.c). | |||
* | |||
* API is defined to be easily replaced with the fips202.h API defined | |||
* as part of PQClean. | |||
*/ | |||
#define inner_shake256_context shake256incctx | |||
#define inner_shake256_init(sc) shake256_inc_init(sc) | |||
#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) | |||
#define inner_shake256_flip(sc) shake256_inc_finalize(sc) | |||
#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) | |||
#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc) | |||
/* ==================================================================== */ | |||
/* | |||
* Encoding/decoding functions (codec.c). | |||
* | |||
* Encoding functions take as parameters an output buffer (out) with | |||
* a given maximum length (max_out_len); returned value is the actual | |||
* number of bytes which have been written. If the output buffer is | |||
* not large enough, then 0 is returned (some bytes may have been | |||
* written to the buffer). If 'out' is NULL, then 'max_out_len' is | |||
* ignored; instead, the function computes and returns the actual | |||
* required output length (in bytes). | |||
* | |||
* Decoding functions take as parameters an input buffer (in) with | |||
* its maximum length (max_in_len); returned value is the actual number | |||
* of bytes that have been read from the buffer. If the provided length | |||
* is too short, then 0 is returned. | |||
* | |||
* Values to encode or decode are vectors of integers, with N = 2^logn | |||
* elements. | |||
* | |||
* Three encoding formats are defined: | |||
* | |||
* - modq: sequence of values modulo 12289, each encoded over exactly | |||
* 14 bits. The encoder and decoder verify that integers are within | |||
* the valid range (0..12288). Values are arrays of uint16. | |||
* | |||
* - trim: sequence of signed integers, a specified number of bits | |||
* each. The number of bits is provided as parameter and includes | |||
* the sign bit. Each integer x must be such that |x| < 2^(bits-1) | |||
* (which means that the -2^(bits-1) value is forbidden); encode and | |||
* decode functions check that property. Values are arrays of | |||
* int16_t or int8_t, corresponding to names 'trim_i16' and | |||
* 'trim_i8', respectively. | |||
* | |||
* - comp: variable-length encoding for signed integers; each integer | |||
* uses a minimum of 9 bits, possibly more. This is normally used | |||
* only for signatures. | |||
* | |||
*/ | |||
size_t PQCLEAN_FALCON1024_AVX2_modq_encode(void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON1024_AVX2_trim_i16_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON1024_AVX2_trim_i8_encode(void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON1024_AVX2_comp_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON1024_AVX2_modq_decode(uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_AVX2_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_AVX2_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_AVX2_comp_decode(int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
/* | |||
* Number of bits for key elements, indexed by logn (1 to 10). This | |||
* is at most 8 bits for all degrees, but some degrees may have shorter | |||
* elements. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_fg_bits[]; | |||
extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_FG_bits[]; | |||
/* | |||
* Maximum size, in bits, of elements in a signature, indexed by logn | |||
* (1 to 10). The size includes the sign bit. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_sig_bits[]; | |||
/* ==================================================================== */ | |||
/* | |||
* Support functions used for both signature generation and signature | |||
* verification (common.c). | |||
*/ | |||
/* | |||
* From a SHAKE256 context (must be already flipped), produce a new | |||
* point. This is the non-constant-time version, which may leak enough | |||
* information to serve as a stop condition on a brute force attack on | |||
* the hashed message (provided that the nonce value is known). | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn); | |||
/* | |||
* From a SHAKE256 context (must be already flipped), produce a new | |||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes. | |||
* This function is constant-time but is typically more expensive than | |||
* PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(). | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_hash_to_point_ct(inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. This compares the appropriate norm of the | |||
* vector with the acceptance bound. Returned value is 1 on success | |||
* (vector is short enough to be acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. Instead of the first half s1, this | |||
* function receives the "saturated squared norm" of s1, i.e. the | |||
* sum of the squares of the coordinates of s1 (saturated at 2^32-1 | |||
* if the sum exceeds 2^31-1). | |||
* | |||
* Returned value is 1 on success (vector is short enough to be | |||
* acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature verification functions (vrfy.c). | |||
*/ | |||
/* | |||
* Convert a public key to NTT + Montgomery format. Conversion is done | |||
* in place. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_to_ntt_monty(uint16_t *h, unsigned logn); | |||
/* | |||
* Internal signature verification code: | |||
* c0[] contains the hashed nonce+message | |||
* s2[] is the decoded signature | |||
* h[] contains the public key, in NTT + Montgomery format | |||
* logn is the degree log | |||
* tmp[] temporary, must have at least 2*2^logn bytes | |||
* Returned value is 1 on success, 0 on error. | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute the public key h[], given the private key elements f[] and | |||
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial | |||
* modulus. This function returns 1 on success, 0 on error (an error is | |||
* reported if f is not invertible mod phi mod q). | |||
* | |||
* The tmp[] array must have room for at least 2*2^logn elements. | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Recompute the fourth private key element. Private key consists in | |||
* four polynomials with small coefficients f, g, F and G, which are | |||
* such that fG - gF = q mod phi; furthermore, f is invertible modulo | |||
* phi and modulo q. This function recomputes G from f, g and F. | |||
* | |||
* The tmp[] array must have room for at least 4*2^logn bytes. | |||
* | |||
* Returned value is 1 in success, 0 on error (f not invertible). | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp); | |||
/* | |||
* Test whether a given polynomial is invertible modulo phi and q. | |||
* Polynomial coefficients are small integers. | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_is_invertible( | |||
const int16_t *s2, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Count the number of elements of value zero in the NTT representation | |||
* of the given polynomial: this is the number of primitive 2n-th roots | |||
* of unity (modulo q = 12289) that are roots of the provided polynomial | |||
* (taken modulo q). | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Internal signature verification with public key recovery: | |||
* h[] receives the public key (NOT in NTT/Montgomery format) | |||
* c0[] contains the hashed nonce+message | |||
* s1[] is the first signature half | |||
* s2[] is the second signature half | |||
* logn is the degree log | |||
* tmp[] temporary, must have at least 2*2^logn bytes | |||
* Returned value is 1 on success, 0 on error. Success is returned if | |||
* the signature is a short enough vector; in that case, the public | |||
* key has been written to h[]. However, the caller must still | |||
* verify that h[] is the correct value (e.g. with regards to a known | |||
* hash of the public key). | |||
* | |||
* h[] may not overlap with any of the other arrays. | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_verify_recover(uint16_t *h, | |||
const uint16_t *c0, const int16_t *s1, const int16_t *s2, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Implementation of floating-point real numbers (fpr.h, fpr.c). | |||
*/ | |||
/* | |||
* Real numbers are implemented by an extra header file, included below. | |||
* This is meant to support pluggable implementations. The default | |||
* implementation relies on the C type 'double'. | |||
* | |||
* The included file must define the following types, functions and | |||
* constants: | |||
* | |||
* fpr | |||
* type for a real number | |||
* | |||
* fpr fpr_of(int64_t i) | |||
* cast an integer into a real number; source must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_scaled(int64_t i, int sc) | |||
* compute i*2^sc as a real number; source 'i' must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_ldexp(fpr x, int e) | |||
* compute x*2^e | |||
* | |||
* int64_t fpr_rint(fpr x) | |||
* round x to the nearest integer; x must be in the -(2^63-1) | |||
* to +(2^63-1) range | |||
* | |||
* int64_t fpr_trunc(fpr x) | |||
* round to an integer; this rounds towards zero; value must | |||
* be in the -(2^63-1) to +(2^63-1) range | |||
* | |||
* fpr fpr_add(fpr x, fpr y) | |||
* compute x + y | |||
* | |||
* fpr fpr_sub(fpr x, fpr y) | |||
* compute x - y | |||
* | |||
* fpr fpr_neg(fpr x) | |||
* compute -x | |||
* | |||
* fpr fpr_half(fpr x) | |||
* compute x/2 | |||
* | |||
* fpr fpr_double(fpr x) | |||
* compute x*2 | |||
* | |||
* fpr fpr_mul(fpr x, fpr y) | |||
* compute x * y | |||
* | |||
* fpr fpr_sqr(fpr x) | |||
* compute x * x | |||
* | |||
* fpr fpr_inv(fpr x) | |||
* compute 1/x | |||
* | |||
* fpr fpr_div(fpr x, fpr y) | |||
* compute x/y | |||
* | |||
* fpr fpr_sqrt(fpr x) | |||
* compute the square root of x | |||
* | |||
* int fpr_lt(fpr x, fpr y) | |||
* return 1 if x < y, 0 otherwise | |||
* | |||
* uint64_t fpr_expm_p63(fpr x) | |||
* return exp(x), assuming that 0 <= x < log(2). Returned value | |||
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), | |||
* rounded to the nearest integer). Computation should have a | |||
* precision of at least 45 bits. | |||
* | |||
* const fpr fpr_gm_tab[] | |||
* array of constants for FFT / iFFT | |||
* | |||
* const fpr fpr_p2_tab[] | |||
* precomputed powers of 2 (by index, 0 to 10) | |||
* | |||
* Constants of type 'fpr': | |||
* | |||
* fpr fpr_q 12289 | |||
* fpr fpr_inverse_of_q 1/12289 | |||
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) | |||
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) | |||
* fpr fpr_sigma_min_9 1.291500756233514568549480827642 | |||
* fpr fpr_sigma_min_10 1.311734375905083682667395805765 | |||
* fpr fpr_log2 log(2) | |||
* fpr fpr_inv_log2 1/log(2) | |||
* fpr fpr_bnorm_max 16822.4121 | |||
* fpr fpr_zero 0 | |||
* fpr fpr_one 1 | |||
* fpr fpr_two 2 | |||
* fpr fpr_onehalf 0.5 | |||
* fpr fpr_ptwo31 2^31 | |||
* fpr fpr_ptwo31m1 2^31-1 | |||
* fpr fpr_mtwo31m1 -(2^31-1) | |||
* fpr fpr_ptwo63m1 2^63-1 | |||
* fpr fpr_mtwo63m1 -(2^63-1) | |||
* fpr fpr_ptwo63 2^63 | |||
*/ | |||
/* ==================================================================== */ | |||
/* | |||
* RNG (rng.c). | |||
* | |||
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 | |||
* context (flipped) and is used for bulk pseudorandom generation. | |||
* A system-dependent seed generator is also provided. | |||
*/ | |||
/* | |||
* Obtain a random seed from the system RNG. | |||
* | |||
* Returned value is 1 on success, 0 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_AVX2_get_seed(void *seed, size_t seed_len); | |||
/* | |||
* Structure for a PRNG. This includes a large buffer so that values | |||
* get generated in advance. The 'state' is used to keep the current | |||
* PRNG algorithm state (contents depend on the selected algorithm). | |||
* | |||
* The unions with 'dummy_u64' are there to ensure proper alignment for | |||
* 64-bit direct access. | |||
*/ | |||
typedef struct { | |||
union { | |||
uint8_t d[512]; /* MUST be 512, exactly */ | |||
uint64_t dummy_u64; | |||
} buf; | |||
size_t ptr; | |||
union { | |||
uint8_t d[256]; | |||
uint64_t dummy_u64; | |||
} state; | |||
int type; | |||
} prng; | |||
/* | |||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 | |||
* context (in "flipped" state) to obtain its initial state. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_prng_init(prng *p, inner_shake256_context *src); | |||
/* | |||
* Refill the PRNG buffer. This is normally invoked automatically, and | |||
* is declared here only so that prng_get_u64() may be inlined. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_prng_refill(prng *p); | |||
/* | |||
* Get some bytes from a PRNG. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_prng_get_bytes(prng *p, void *dst, size_t len); | |||
/* | |||
* Get a 64-bit random value from a PRNG. | |||
*/ | |||
static inline uint64_t | |||
prng_get_u64(prng *p) { | |||
size_t u; | |||
/* | |||
* If there are less than 9 bytes in the buffer, we refill it. | |||
* This means that we may drop the last few bytes, but this allows | |||
* for faster extraction code. Also, it means that we never leave | |||
* an empty buffer. | |||
*/ | |||
u = p->ptr; | |||
if (u >= (sizeof p->buf.d) - 9) { | |||
PQCLEAN_FALCON1024_AVX2_prng_refill(p); | |||
u = 0; | |||
} | |||
p->ptr = u + 8; | |||
return (uint64_t)p->buf.d[u + 0] | |||
| ((uint64_t)p->buf.d[u + 1] << 8) | |||
| ((uint64_t)p->buf.d[u + 2] << 16) | |||
| ((uint64_t)p->buf.d[u + 3] << 24) | |||
| ((uint64_t)p->buf.d[u + 4] << 32) | |||
| ((uint64_t)p->buf.d[u + 5] << 40) | |||
| ((uint64_t)p->buf.d[u + 6] << 48) | |||
| ((uint64_t)p->buf.d[u + 7] << 56); | |||
} | |||
/* | |||
* Get an 8-bit random value from a PRNG. | |||
*/ | |||
static inline unsigned | |||
prng_get_u8(prng *p) { | |||
unsigned v; | |||
v = p->buf.d[p->ptr ++]; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON1024_AVX2_prng_refill(p); | |||
} | |||
return v; | |||
} | |||
/* ==================================================================== */ | |||
/* | |||
* FFT (falcon-fft.c). | |||
* | |||
* A real polynomial is represented as an array of N 'fpr' elements. | |||
* The FFT representation of a real polynomial contains N/2 complex | |||
* elements; each is stored as two real numbers, for the real and | |||
* imaginary parts, respectively. See falcon-fft.c for details on the | |||
* internal representation. | |||
*/ | |||
/* | |||
* Compute FFT in-place: the source array should contain a real | |||
* polynomial (N coefficients); its storage area is reused to store | |||
* the FFT representation of that polynomial (N/2 complex numbers). | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_FFT(fpr *f, unsigned logn); | |||
/* | |||
* Compute the inverse FFT in-place: the source array should contain the | |||
* FFT representation of a real polynomial (N/2 elements); the resulting | |||
* real polynomial (N coefficients of type 'fpr') is written over the | |||
* array. | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_iFFT(fpr *f, unsigned logn); | |||
/* | |||
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_add(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_sub(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Negate polynomial a. This function works in both normal and FFT | |||
* representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_neg(fpr *a, unsigned logn); | |||
/* | |||
* Compute adjoint of polynomial a. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_adj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap. | |||
* This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT | |||
* overlap. This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial with its own adjoint. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial with a real constant. This function works in both | |||
* normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_mulconst(fpr *a, fpr x, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_div_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) | |||
* (also in FFT representation). Since the result is auto-adjoint, all its | |||
* coordinates in FFT representation are real; as such, only the first N/2 | |||
* values of d[] are filled (the imaginary parts are skipped). | |||
* | |||
* Array d MUST NOT overlap with either a or b. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_invnorm2_fft(fpr *d, | |||
const fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) | |||
* (also in FFT representation). Destination d MUST NOT overlap with | |||
* any of the source arrays. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_add_muladj_fft(fpr *d, | |||
const fpr *F, const fpr *G, | |||
const fpr *f, const fpr *g, unsigned logn); | |||
/* | |||
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_div_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. On input, g00, g01 and g11 are provided (where the | |||
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 | |||
* and d11 values are written in g00, g01 and g11, respectively | |||
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). | |||
* (In fact, d00 = g00, so the g00 operand is left unmodified.) | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_LDL_fft(const fpr *g00, | |||
fpr *g01, fpr *g11, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. This is identical to poly_LDL_fft() except that | |||
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written | |||
* in two other separate buffers provided as extra parameters. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_LDLmv_fft(fpr *d11, fpr *l10, | |||
const fpr *g00, const fpr *g01, | |||
const fpr *g11, unsigned logn); | |||
/* | |||
* Apply "split" operation on a polynomial in FFT representation: | |||
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 | |||
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_split_fft(fpr *f0, fpr *f1, | |||
const fpr *f, unsigned logn); | |||
/* | |||
* Apply "merge" operation on two polynomials in FFT representation: | |||
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes | |||
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. | |||
* f MUST NOT overlap with either f0 or f1. | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_poly_merge_fft(fpr *f, | |||
const fpr *f0, const fpr *f1, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Key pair generation. | |||
*/ | |||
/* | |||
* Required sizes of the temporary buffer (in bytes). | |||
* | |||
* This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1 | |||
* or 2) where it is slightly greater. | |||
*/ | |||
#define FALCON_KEYGEN_TEMP_1 136 | |||
#define FALCON_KEYGEN_TEMP_2 272 | |||
#define FALCON_KEYGEN_TEMP_3 224 | |||
#define FALCON_KEYGEN_TEMP_4 448 | |||
#define FALCON_KEYGEN_TEMP_5 896 | |||
#define FALCON_KEYGEN_TEMP_6 1792 | |||
#define FALCON_KEYGEN_TEMP_7 3584 | |||
#define FALCON_KEYGEN_TEMP_8 7168 | |||
#define FALCON_KEYGEN_TEMP_9 14336 | |||
#define FALCON_KEYGEN_TEMP_10 28672 | |||
/* | |||
* Generate a new key pair. Randomness is extracted from the provided | |||
* SHAKE256 context, which must have already been seeded and flipped. | |||
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* | |||
* macros) and be aligned for the uint32_t, uint64_t and fpr types. | |||
* | |||
* The private key elements are written in f, g, F and G, and the | |||
* public key is written in h. Either or both of G and h may be NULL, | |||
* in which case the corresponding element is not returned (they can | |||
* be recomputed from f, g and F). | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_keygen(inner_shake256_context *rng, | |||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature generation. | |||
*/ | |||
/* | |||
* Expand a private key into the B0 matrix in FFT representation and | |||
* the LDL tree. All the values are written in 'expanded_key', for | |||
* a total of (8*logn+40)*2^logn bytes. | |||
* | |||
* The tmp[] array must have room for at least 48*2^logn bytes. | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_expand_privkey(fpr *expanded_key, | |||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, | |||
unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses an | |||
* expanded key (as generated by PQCLEAN_FALCON1024_AVX2_expand_privkey()). | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* On successful output, the start of the tmp[] buffer contains the s1 | |||
* vector (as int16_t elements). | |||
* | |||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes. | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_sign_tree(int16_t *sig, inner_shake256_context *rng, | |||
const fpr *expanded_key, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses a raw | |||
* key and dynamically recompute the B0 matrix and LDL tree; this | |||
* saves RAM since there is no needed for an expanded key, but | |||
* increases the signature cost. | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* On successful output, the start of the tmp[] buffer contains the s1 | |||
* vector (as int16_t elements). | |||
* | |||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes. | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_AVX2_sign_dyn(int16_t *sig, inner_shake256_context *rng, | |||
const int8_t *f, const int8_t *g, | |||
const int8_t *F, const int8_t *G, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Internal sampler engine. Exported for tests. | |||
* | |||
* sampler_context wraps around a source of random numbers (PRNG) and | |||
* the sigma_min value (nominally dependent on the degree). | |||
* | |||
* sampler() takes as parameters: | |||
* ctx pointer to the sampler_context structure | |||
* mu center for the distribution | |||
* isigma inverse of the distribution standard deviation | |||
* It returns an integer sampled along the Gaussian distribution centered | |||
* on mu and of standard deviation sigma = 1/isigma. | |||
* | |||
* gaussian0_sampler() takes as parameter a pointer to a PRNG, and | |||
* returns an integer sampled along a half-Gaussian with standard | |||
* deviation sigma0 = 1.8205 (center is 0, returned value is | |||
* nonnegative). | |||
*/ | |||
typedef struct { | |||
prng p; | |||
fpr sigma_min; | |||
} sampler_context; | |||
int PQCLEAN_FALCON1024_AVX2_sampler(void *ctx, fpr mu, fpr isigma); | |||
int PQCLEAN_FALCON1024_AVX2_gaussian0_sampler(prng *p); | |||
/* ==================================================================== */ | |||
#endif |
@@ -1,386 +0,0 @@ | |||
#include "api.h" | |||
#include "inner.h" | |||
#include "randombytes.h" | |||
#include <stddef.h> | |||
#include <string.h> | |||
/* | |||
* Wrapper for implementing the PQClean API. | |||
*/ | |||
#define NONCELEN 40 | |||
#define SEEDLEN 48 | |||
/* | |||
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) | |||
* | |||
* private key: | |||
* header byte: 0101nnnn | |||
* private f (6 or 5 bits by element, depending on degree) | |||
* private g (6 or 5 bits by element, depending on degree) | |||
* private F (8 bits by element) | |||
* | |||
* public key: | |||
* header byte: 0000nnnn | |||
* public h (14 bits by element) | |||
* | |||
* signature: | |||
* header byte: 0011nnnn | |||
* nonce 40 bytes | |||
* value (12 bits by element) | |||
* | |||
* message + signature: | |||
* signature length (2 bytes, big-endian) | |||
* nonce 40 bytes | |||
* message | |||
* header byte: 0010nnnn | |||
* value (12 bits by element) | |||
* (signature length is 1+len(value), not counting the nonce) | |||
*/ | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { | |||
union { | |||
uint8_t b[28 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[1024], g[1024], F[1024], G[1024]; | |||
uint16_t h[1024]; | |||
unsigned char seed[SEEDLEN]; | |||
inner_shake256_context rng; | |||
size_t u, v; | |||
/* | |||
* Generate key pair. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
inner_shake256_init(&rng); | |||
inner_shake256_inject(&rng, seed, sizeof seed); | |||
inner_shake256_flip(&rng); | |||
PQCLEAN_FALCON1024_AVX2_keygen(&rng, f, g, F, G, h, 10, tmp.b); | |||
inner_shake256_ctx_release(&rng); | |||
/* | |||
* Encode private key. | |||
*/ | |||
sk[0] = 0x50 + 10; | |||
u = 1; | |||
v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u, | |||
f, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u, | |||
g, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u, | |||
F, 10, PQCLEAN_FALCON1024_AVX2_max_FG_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
/* | |||
* Encode public key. | |||
*/ | |||
pk[0] = 0x00 + 10; | |||
v = PQCLEAN_FALCON1024_AVX2_modq_encode( | |||
pk + 1, PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1, | |||
h, 10); | |||
if (v != PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* | |||
* Compute the signature. nonce[] receives the nonce and must have length | |||
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce | |||
* or header byte), with *sigbuflen providing the maximum value length and | |||
* receiving the actual value length. | |||
* | |||
* If a signature could be computed but not encoded because it would | |||
* exceed the output buffer size, then a new signature is computed. If | |||
* the provided buffer size is too low, this could loop indefinitely, so | |||
* the caller must provide a size that can accommodate signatures with a | |||
* large enough probability. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
union { | |||
uint8_t b[72 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[1024], g[1024], F[1024], G[1024]; | |||
union { | |||
int16_t sig[1024]; | |||
uint16_t hm[1024]; | |||
} r; | |||
unsigned char seed[SEEDLEN]; | |||
inner_shake256_context sc; | |||
size_t u, v; | |||
/* | |||
* Decode the private key. | |||
*/ | |||
if (sk[0] != 0x50 + 10) { | |||
return -1; | |||
} | |||
u = 1; | |||
v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode( | |||
f, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode( | |||
g, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode( | |||
F, 10, PQCLEAN_FALCON1024_AVX2_max_FG_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
if (!PQCLEAN_FALCON1024_AVX2_complete_private(G, f, g, F, 10, tmp.b)) { | |||
return -1; | |||
} | |||
/* | |||
* Create a random nonce (40 bytes). | |||
*/ | |||
randombytes(nonce, NONCELEN); | |||
/* | |||
* Hash message nonce + message into a vector. | |||
*/ | |||
inner_shake256_init(&sc); | |||
inner_shake256_inject(&sc, nonce, NONCELEN); | |||
inner_shake256_inject(&sc, m, mlen); | |||
inner_shake256_flip(&sc); | |||
PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(&sc, r.hm, 10); | |||
inner_shake256_ctx_release(&sc); | |||
/* | |||
* Initialize a RNG. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
inner_shake256_init(&sc); | |||
inner_shake256_inject(&sc, seed, sizeof seed); | |||
inner_shake256_flip(&sc); | |||
/* | |||
* Compute and return the signature. This loops until a signature | |||
* value is found that fits in the provided buffer. | |||
*/ | |||
for (;;) { | |||
PQCLEAN_FALCON1024_AVX2_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b); | |||
v = PQCLEAN_FALCON1024_AVX2_comp_encode(sigbuf, *sigbuflen, r.sig, 10); | |||
if (v != 0) { | |||
inner_shake256_ctx_release(&sc); | |||
*sigbuflen = v; | |||
return 0; | |||
} | |||
} | |||
} | |||
/* | |||
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] | |||
* (of size sigbuflen) contains the signature value, not including the | |||
* header byte or nonce. Return value is 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_verify( | |||
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
union { | |||
uint8_t b[2 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
uint16_t h[1024], hm[1024]; | |||
int16_t sig[1024]; | |||
inner_shake256_context sc; | |||
/* | |||
* Decode public key. | |||
*/ | |||
if (pk[0] != 0x00 + 10) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON1024_AVX2_modq_decode(h, 10, | |||
pk + 1, PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) | |||
!= PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
PQCLEAN_FALCON1024_AVX2_to_ntt_monty(h, 10); | |||
/* | |||
* Decode signature. | |||
*/ | |||
if (sigbuflen == 0) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON1024_AVX2_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) { | |||
return -1; | |||
} | |||
/* | |||
* Hash nonce + message into a vector. | |||
*/ | |||
inner_shake256_init(&sc); | |||
inner_shake256_inject(&sc, nonce, NONCELEN); | |||
inner_shake256_inject(&sc, m, mlen); | |||
inner_shake256_flip(&sc); | |||
PQCLEAN_FALCON1024_AVX2_hash_to_point_ct(&sc, hm, 10, tmp.b); | |||
inner_shake256_ctx_release(&sc); | |||
/* | |||
* Verify signature. | |||
*/ | |||
if (!PQCLEAN_FALCON1024_AVX2_verify_raw(hm, sig, h, 10, tmp.b)) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
/* | |||
* The PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES constant is used for | |||
* the signed message object (as produced by PQCLEAN_FALCON1024_AVX2_crypto_sign()) | |||
* and includes a two-byte length value, so we take care here | |||
* to only generate signatures that are two bytes shorter than | |||
* the maximum. This is done to ensure that PQCLEAN_FALCON1024_AVX2_crypto_sign() | |||
* and PQCLEAN_FALCON1024_AVX2_crypto_sign_signature() produce the exact same signature | |||
* value, if used on the same message, with the same private key, | |||
* and using the same output from randombytes() (this is for | |||
* reproducibility of tests). | |||
*/ | |||
size_t vlen; | |||
vlen = PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
sig[0] = 0x30 + 10; | |||
*siglen = 1 + NONCELEN + vlen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
if (siglen < 1 + NONCELEN) { | |||
return -1; | |||
} | |||
if (sig[0] != 0x30 + 10) { | |||
return -1; | |||
} | |||
return do_verify(sig + 1, | |||
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
uint8_t *pm, *sigbuf; | |||
size_t sigbuflen; | |||
/* | |||
* Move the message to its final location; this is a memmove() so | |||
* it handles overlaps properly. | |||
*/ | |||
memmove(sm + 2 + NONCELEN, m, mlen); | |||
pm = sm + 2 + NONCELEN; | |||
sigbuf = pm + 1 + mlen; | |||
sigbuflen = PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
pm[mlen] = 0x20 + 10; | |||
sigbuflen ++; | |||
sm[0] = (uint8_t)(sigbuflen >> 8); | |||
sm[1] = (uint8_t)sigbuflen; | |||
*smlen = mlen + 2 + NONCELEN + sigbuflen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||
const uint8_t *sigbuf; | |||
size_t pmlen, sigbuflen; | |||
if (smlen < 3 + NONCELEN) { | |||
return -1; | |||
} | |||
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; | |||
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { | |||
return -1; | |||
} | |||
sigbuflen --; | |||
pmlen = smlen - NONCELEN - 3 - sigbuflen; | |||
if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) { | |||
return -1; | |||
} | |||
sigbuf = sm + 2 + NONCELEN + pmlen + 1; | |||
/* | |||
* The 2-byte length header and the one-byte signature header | |||
* have been verified. Nonce is at sm+2, followed by the message | |||
* itself. Message length is in pmlen. sigbuf/sigbuflen point to | |||
* the signature value (excluding the header byte). | |||
*/ | |||
if (do_verify(sm + 2, sigbuf, sigbuflen, | |||
sm + 2 + NONCELEN, pmlen, pk) < 0) { | |||
return -1; | |||
} | |||
/* | |||
* Signature is correct, we just have to copy/move the message | |||
* to its final destination. The memmove() properly handles | |||
* overlaps. | |||
*/ | |||
memmove(m, sm + 2 + NONCELEN, pmlen); | |||
*mlen = pmlen; | |||
return 0; | |||
} |
@@ -1,195 +0,0 @@ | |||
#include "inner.h" | |||
#include <assert.h> | |||
/* | |||
* PRNG and interface to the system RNG. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* | |||
* Include relevant system header files. For Win32, this will also need | |||
* linking with advapi32.dll, which we trigger with an appropriate #pragma. | |||
*/ | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_get_seed(void *seed, size_t len) { | |||
(void)seed; | |||
if (len == 0) { | |||
return 1; | |||
} | |||
return 0; | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_AVX2_prng_init(prng *p, inner_shake256_context *src) { | |||
inner_shake256_extract(src, p->state.d, 56); | |||
PQCLEAN_FALCON1024_AVX2_prng_refill(p); | |||
} | |||
/* | |||
* PRNG based on ChaCha20. | |||
* | |||
* State consists in key (32 bytes) then IV (16 bytes) and block counter | |||
* (8 bytes). Normally, we should not care about local endianness (this | |||
* is for a PRNG), but for the NIST competition we need reproducible KAT | |||
* vectors that work across architectures, so we enforce little-endian | |||
* interpretation where applicable. Moreover, output words are "spread | |||
* out" over the output buffer with the interleaving pattern that is | |||
* naturally obtained from the AVX2 implementation that runs eight | |||
* ChaCha20 instances in parallel. | |||
* | |||
* The block counter is XORed into the first 8 bytes of the IV. | |||
*/ | |||
void | |||
PQCLEAN_FALCON1024_AVX2_prng_refill(prng *p) { | |||
static const uint32_t CW[] = { | |||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | |||
}; | |||
uint64_t cc; | |||
size_t u; | |||
int i; | |||
uint32_t *sw; | |||
union { | |||
uint32_t w[16]; | |||
__m256i y[2]; /* for alignment */ | |||
} t; | |||
__m256i state[16], init[16]; | |||
sw = (uint32_t *)p->state.d; | |||
/* | |||
* XOR next counter values into state. | |||
*/ | |||
cc = *(uint64_t *)(p->state.d + 48); | |||
for (u = 0; u < 8; u ++) { | |||
t.w[u] = (uint32_t)(cc + u); | |||
t.w[u + 8] = (uint32_t)((cc + u) >> 32); | |||
} | |||
*(uint64_t *)(p->state.d + 48) = cc + 8; | |||
/* | |||
* Load state. | |||
*/ | |||
for (u = 0; u < 4; u ++) { | |||
state[u] = init[u] = | |||
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)CW[u])); | |||
} | |||
for (u = 0; u < 10; u ++) { | |||
state[u + 4] = init[u + 4] = | |||
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[u])); | |||
} | |||
state[14] = init[14] = _mm256_xor_si256( | |||
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[10])), | |||
_mm256_loadu_si256((__m256i *)&t.w[0])); | |||
state[15] = init[15] = _mm256_xor_si256( | |||
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[11])), | |||
_mm256_loadu_si256((__m256i *)&t.w[8])); | |||
/* | |||
* Do all rounds. | |||
*/ | |||
for (i = 0; i < 10; i ++) { | |||
#define QROUND(a, b, c, d) do { \ | |||
state[a] = _mm256_add_epi32(state[a], state[b]); \ | |||
state[d] = _mm256_xor_si256(state[d], state[a]); \ | |||
state[d] = _mm256_or_si256( \ | |||
_mm256_slli_epi32(state[d], 16), \ | |||
_mm256_srli_epi32(state[d], 16)); \ | |||
state[c] = _mm256_add_epi32(state[c], state[d]); \ | |||
state[b] = _mm256_xor_si256(state[b], state[c]); \ | |||
state[b] = _mm256_or_si256( \ | |||
_mm256_slli_epi32(state[b], 12), \ | |||
_mm256_srli_epi32(state[b], 20)); \ | |||
state[a] = _mm256_add_epi32(state[a], state[b]); \ | |||
state[d] = _mm256_xor_si256(state[d], state[a]); \ | |||
state[d] = _mm256_or_si256( \ | |||
_mm256_slli_epi32(state[d], 8), \ | |||
_mm256_srli_epi32(state[d], 24)); \ | |||
state[c] = _mm256_add_epi32(state[c], state[d]); \ | |||
state[b] = _mm256_xor_si256(state[b], state[c]); \ | |||
state[b] = _mm256_or_si256( \ | |||
_mm256_slli_epi32(state[b], 7), \ | |||
_mm256_srli_epi32(state[b], 25)); \ | |||
} while (0) | |||
QROUND( 0, 4, 8, 12); | |||
QROUND( 1, 5, 9, 13); | |||
QROUND( 2, 6, 10, 14); | |||
QROUND( 3, 7, 11, 15); | |||
QROUND( 0, 5, 10, 15); | |||
QROUND( 1, 6, 11, 12); | |||
QROUND( 2, 7, 8, 13); | |||
QROUND( 3, 4, 9, 14); | |||
#undef QROUND | |||
} | |||
/* | |||
* Add initial state back and encode the result in the destination | |||
* buffer. We can dump the AVX2 values "as is" because the non-AVX2 | |||
* code uses a compatible order of values. | |||
*/ | |||
for (u = 0; u < 16; u ++) { | |||
_mm256_storeu_si256((__m256i *)&p->buf.d[u << 5], | |||
_mm256_add_epi32(state[u], init[u])); | |||
} | |||
p->ptr = 0; | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_AVX2_prng_get_bytes(prng *p, void *dst, size_t len) { | |||
uint8_t *buf; | |||
buf = dst; | |||
while (len > 0) { | |||
size_t clen; | |||
clen = (sizeof p->buf.d) - p->ptr; | |||
if (clen > len) { | |||
clen = len; | |||
} | |||
memcpy(buf, p->buf.d, clen); | |||
buf += clen; | |||
len -= clen; | |||
p->ptr += clen; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON1024_AVX2_prng_refill(p); | |||
} | |||
} | |||
} |
@@ -1,853 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Falcon signature verification. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* ===================================================================== */ | |||
/* | |||
* Constants for NTT. | |||
* | |||
* n = 2^logn (2 <= n <= 1024) | |||
* phi = X^n + 1 | |||
* q = 12289 | |||
* q0i = -1/q mod 2^16 | |||
* R = 2^16 mod q | |||
* R2 = 2^32 mod q | |||
*/ | |||
#define Q 12289 | |||
#define Q0I 12287 | |||
#define R 4091 | |||
#define R2 10952 | |||
/* | |||
* Table for NTT, binary case: | |||
* GMb[x] = R*(g^rev(x)) mod q | |||
* where g = 7 (it is a 2048-th primitive root of 1 modulo q) | |||
* and rev() is the bit-reversal function over 10 bits. | |||
*/ | |||
static const uint16_t GMb[] = { | |||
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, | |||
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, | |||
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, | |||
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, | |||
12210, 6240, 997, 117, 4783, 4407, 1549, 7072, | |||
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, | |||
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, | |||
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, | |||
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, | |||
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, | |||
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, | |||
9277, 6130, 3323, 883, 10469, 489, 1502, 2851, | |||
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, | |||
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, | |||
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, | |||
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, | |||
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, | |||
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, | |||
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, | |||
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, | |||
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, | |||
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, | |||
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, | |||
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, | |||
737, 3698, 4699, 5753, 9046, 3687, 16, 914, | |||
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, | |||
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, | |||
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, | |||
932, 10229, 8927, 7642, 351, 9298, 237, 5858, | |||
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, | |||
4602, 1748, 11300, 340, 3711, 4614, 300, 10993, | |||
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, | |||
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, | |||
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, | |||
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, | |||
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, | |||
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, | |||
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, | |||
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, | |||
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, | |||
2523, 4339, 6115, 619, 937, 2834, 7775, 3279, | |||
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, | |||
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, | |||
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, | |||
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, | |||
11192, 315, 4511, 1158, 6061, 6751, 11865, 357, | |||
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, | |||
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, | |||
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, | |||
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, | |||
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, | |||
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, | |||
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, | |||
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, | |||
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, | |||
10438, 9471, 1271, 408, 6911, 3079, 360, 8276, | |||
11535, 9156, 9049, 11539, 850, 8617, 784, 7919, | |||
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, | |||
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, | |||
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, | |||
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, | |||
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, | |||
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, | |||
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, | |||
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, | |||
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, | |||
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, | |||
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, | |||
11736, 6813, 6979, 819, 8903, 6271, 10843, 348, | |||
7514, 8339, 6439, 694, 852, 5659, 2781, 3716, | |||
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, | |||
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, | |||
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, | |||
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, | |||
10923, 4918, 128, 7312, 725, 9157, 5006, 6393, | |||
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, | |||
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, | |||
5110, 45, 2400, 1921, 4377, 2720, 1695, 51, | |||
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, | |||
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, | |||
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, | |||
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, | |||
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, | |||
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, | |||
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, | |||
862, 3158, 477, 7279, 5678, 7914, 4254, 302, | |||
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, | |||
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, | |||
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, | |||
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, | |||
1397, 10678, 103, 7420, 7976, 936, 764, 632, | |||
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, | |||
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, | |||
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, | |||
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, | |||
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, | |||
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, | |||
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, | |||
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, | |||
8192, 986, 7527, 1401, 870, 3615, 8465, 2756, | |||
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, | |||
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, | |||
2567, 708, 893, 6465, 4962, 10024, 2090, 5718, | |||
10743, 780, 4733, 4623, 2134, 2087, 4802, 884, | |||
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, | |||
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, | |||
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, | |||
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, | |||
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, | |||
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, | |||
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, | |||
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, | |||
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, | |||
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, | |||
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, | |||
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, | |||
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, | |||
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, | |||
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, | |||
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, | |||
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, | |||
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, | |||
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, | |||
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, | |||
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, | |||
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, | |||
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, | |||
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 | |||
}; | |||
/* | |||
* Table for inverse NTT, binary case: | |||
* iGMb[x] = R*((1/g)^rev(x)) mod q | |||
* Since g = 7, 1/g = 8778 mod 12289. | |||
*/ | |||
static const uint16_t iGMb[] = { | |||
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, | |||
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, | |||
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, | |||
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, | |||
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, | |||
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, | |||
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, | |||
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, | |||
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, | |||
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, | |||
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, | |||
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, | |||
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, | |||
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, | |||
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, | |||
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, | |||
6635, 6543, 1582, 4868, 42, 673, 2240, 7219, | |||
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, | |||
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, | |||
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, | |||
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, | |||
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, | |||
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, | |||
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, | |||
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, | |||
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, | |||
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, | |||
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, | |||
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, | |||
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, | |||
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, | |||
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, | |||
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, | |||
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, | |||
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, | |||
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, | |||
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, | |||
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, | |||
6689, 386, 4462, 105, 2076, 10443, 119, 3955, | |||
4370, 11505, 3672, 11439, 750, 3240, 3133, 754, | |||
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, | |||
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, | |||
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, | |||
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, | |||
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, | |||
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, | |||
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, | |||
101, 1911, 9483, 3608, 11997, 10536, 812, 8915, | |||
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, | |||
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, | |||
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, | |||
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, | |||
7769, 136, 617, 3157, 5889, 9219, 6855, 120, | |||
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, | |||
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, | |||
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, | |||
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, | |||
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, | |||
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, | |||
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, | |||
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, | |||
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, | |||
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, | |||
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, | |||
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, | |||
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, | |||
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, | |||
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, | |||
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, | |||
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, | |||
9956, 2702, 6656, 735, 2243, 11656, 833, 3107, | |||
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, | |||
3513, 9769, 3025, 779, 9433, 3392, 7437, 668, | |||
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, | |||
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, | |||
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, | |||
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, | |||
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, | |||
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, | |||
707, 1088, 4936, 678, 10245, 18, 5684, 960, | |||
4459, 7957, 226, 2451, 6, 8874, 320, 6298, | |||
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, | |||
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, | |||
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, | |||
5227, 952, 4319, 9810, 4356, 3088, 11118, 840, | |||
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, | |||
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, | |||
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, | |||
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, | |||
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, | |||
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, | |||
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, | |||
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, | |||
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, | |||
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, | |||
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, | |||
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, | |||
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, | |||
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, | |||
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, | |||
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, | |||
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, | |||
11489, 8833, 2393, 15, 10830, 5003, 17, 565, | |||
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, | |||
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, | |||
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, | |||
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, | |||
104, 6348, 9643, 6757, 12110, 5617, 10935, 541, | |||
135, 3041, 7200, 6526, 5085, 12136, 842, 4129, | |||
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, | |||
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, | |||
1770, 273, 8377, 2271, 5225, 10283, 116, 11807, | |||
91, 11699, 757, 1304, 7524, 6451, 8032, 8154, | |||
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, | |||
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, | |||
3924, 3188, 367, 2077, 336, 5384, 5631, 8596, | |||
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, | |||
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, | |||
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, | |||
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, | |||
9763, 12191, 459, 2966, 3166, 405, 5000, 9311, | |||
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, | |||
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, | |||
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, | |||
9474, 2586, 1431, 2741, 473, 11383, 4745, 836, | |||
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, | |||
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, | |||
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 | |||
}; | |||
/* | |||
* Reduce a small signed integer modulo q. The source integer MUST | |||
* be between -q/2 and +q/2. | |||
*/ | |||
static inline uint32_t | |||
mq_conv_small(int x) { | |||
/* | |||
* If x < 0, the cast to uint32_t will set the high bit to 1. | |||
*/ | |||
uint32_t y; | |||
y = (uint32_t)x; | |||
y += Q & -(y >> 31); | |||
return y; | |||
} | |||
/* | |||
* Addition modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_add(uint32_t x, uint32_t y) { | |||
/* | |||
* We compute x + y - q. If the result is negative, then the | |||
* high bit will be set, and 'd >> 31' will be equal to 1; | |||
* thus '-(d >> 31)' will be an all-one pattern. Otherwise, | |||
* it will be an all-zero pattern. In other words, this | |||
* implements a conditional addition of q. | |||
*/ | |||
uint32_t d; | |||
d = x + y - Q; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Subtraction modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_sub(uint32_t x, uint32_t y) { | |||
/* | |||
* As in mq_add(), we use a conditional addition to ensure the | |||
* result is in the 0..q-1 range. | |||
*/ | |||
uint32_t d; | |||
d = x - y; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Division by 2 modulo q. Operand must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_rshift1(uint32_t x) { | |||
x += Q & -(x & 1); | |||
return (x >> 1); | |||
} | |||
/* | |||
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then | |||
* this function computes: x * y / R mod q | |||
* Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_montymul(uint32_t x, uint32_t y) { | |||
uint32_t z, w; | |||
/* | |||
* We compute x*y + k*q with a value of k chosen so that the 16 | |||
* low bits of the result are 0. We can then shift the value. | |||
* After the shift, result may still be larger than q, but it | |||
* will be lower than 2*q, so a conditional subtraction works. | |||
*/ | |||
z = x * y; | |||
w = ((z * Q0I) & 0xFFFF) * Q; | |||
/* | |||
* When adding z and w, the result will have its low 16 bits | |||
* equal to 0. Since x, y and z are lower than q, the sum will | |||
* be no more than (2^15 - 1) * q + (q - 1)^2, which will | |||
* fit on 29 bits. | |||
*/ | |||
z = (z + w) >> 16; | |||
/* | |||
* After the shift, analysis shows that the value will be less | |||
* than 2q. We do a subtraction then conditional subtraction to | |||
* ensure the result is in the expected range. | |||
*/ | |||
z -= Q; | |||
z += Q & -(z >> 31); | |||
return z; | |||
} | |||
/* | |||
* Montgomery squaring (computes (x^2)/R). | |||
*/ | |||
static inline uint32_t | |||
mq_montysqr(uint32_t x) { | |||
return mq_montymul(x, x); | |||
} | |||
/* | |||
* Divide x by y modulo q = 12289. | |||
*/ | |||
static inline uint32_t | |||
mq_div_12289(uint32_t x, uint32_t y) { | |||
/* | |||
* We invert y by computing y^(q-2) mod q. | |||
* | |||
* We use the following addition chain for exponent e = 12287: | |||
* | |||
* e0 = 1 | |||
* e1 = 2 * e0 = 2 | |||
* e2 = e1 + e0 = 3 | |||
* e3 = e2 + e1 = 5 | |||
* e4 = 2 * e3 = 10 | |||
* e5 = 2 * e4 = 20 | |||
* e6 = 2 * e5 = 40 | |||
* e7 = 2 * e6 = 80 | |||
* e8 = 2 * e7 = 160 | |||
* e9 = e8 + e2 = 163 | |||
* e10 = e9 + e8 = 323 | |||
* e11 = 2 * e10 = 646 | |||
* e12 = 2 * e11 = 1292 | |||
* e13 = e12 + e9 = 1455 | |||
* e14 = 2 * e13 = 2910 | |||
* e15 = 2 * e14 = 5820 | |||
* e16 = e15 + e10 = 6143 | |||
* e17 = 2 * e16 = 12286 | |||
* e18 = e17 + e0 = 12287 | |||
* | |||
* Additions on exponents are converted to Montgomery | |||
* multiplications. We define all intermediate results as so | |||
* many local variables, and let the C compiler work out which | |||
* must be kept around. | |||
*/ | |||
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; | |||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; | |||
y0 = mq_montymul(y, R2); | |||
y1 = mq_montysqr(y0); | |||
y2 = mq_montymul(y1, y0); | |||
y3 = mq_montymul(y2, y1); | |||
y4 = mq_montysqr(y3); | |||
y5 = mq_montysqr(y4); | |||
y6 = mq_montysqr(y5); | |||
y7 = mq_montysqr(y6); | |||
y8 = mq_montysqr(y7); | |||
y9 = mq_montymul(y8, y2); | |||
y10 = mq_montymul(y9, y8); | |||
y11 = mq_montysqr(y10); | |||
y12 = mq_montysqr(y11); | |||
y13 = mq_montymul(y12, y9); | |||
y14 = mq_montysqr(y13); | |||
y15 = mq_montysqr(y14); | |||
y16 = mq_montymul(y15, y10); | |||
y17 = mq_montysqr(y16); | |||
y18 = mq_montymul(y17, y0); | |||
/* | |||
* Final multiplication with x, which is not in Montgomery | |||
* representation, computes the correct division result. | |||
*/ | |||
return mq_montymul(y18, x); | |||
} | |||
/* | |||
* Compute NTT on a ring element. | |||
*/ | |||
static void | |||
mq_NTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
n = (size_t)1 << logn; | |||
t = n; | |||
for (m = 1; m < n; m <<= 1) { | |||
size_t ht, i, j1; | |||
ht = t >> 1; | |||
for (i = 0, j1 = 0; i < m; i ++, j1 += t) { | |||
size_t j, j2; | |||
uint32_t s; | |||
s = GMb[m + i]; | |||
j2 = j1 + ht; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v; | |||
u = a[j]; | |||
v = mq_montymul(a[j + ht], s); | |||
a[j] = (uint16_t)mq_add(u, v); | |||
a[j + ht] = (uint16_t)mq_sub(u, v); | |||
} | |||
} | |||
t = ht; | |||
} | |||
} | |||
/* | |||
* Compute the inverse NTT on a ring element, binary case. | |||
*/ | |||
static void | |||
mq_iNTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
uint32_t ni; | |||
n = (size_t)1 << logn; | |||
t = 1; | |||
m = n; | |||
while (m > 1) { | |||
size_t hm, dt, i, j1; | |||
hm = m >> 1; | |||
dt = t << 1; | |||
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { | |||
size_t j, j2; | |||
uint32_t s; | |||
j2 = j1 + t; | |||
s = iGMb[hm + i]; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v, w; | |||
u = a[j]; | |||
v = a[j + t]; | |||
a[j] = (uint16_t)mq_add(u, v); | |||
w = mq_sub(u, v); | |||
a[j + t] = (uint16_t) | |||
mq_montymul(w, s); | |||
} | |||
} | |||
t = dt; | |||
m = hm; | |||
} | |||
/* | |||
* To complete the inverse NTT, we must now divide all values by | |||
* n (the vector size). We thus need the inverse of n, i.e. we | |||
* need to divide 1 by 2 logn times. But we also want it in | |||
* Montgomery representation, i.e. we also want to multiply it | |||
* by R = 2^16. In the common case, this should be a simple right | |||
* shift. The loop below is generic and works also in corner cases; | |||
* its computation time is negligible. | |||
*/ | |||
ni = R; | |||
for (m = n; m > 1; m >>= 1) { | |||
ni = mq_rshift1(ni); | |||
} | |||
for (m = 0; m < n; m ++) { | |||
a[m] = (uint16_t)mq_montymul(a[m], ni); | |||
} | |||
} | |||
/* | |||
* Convert a polynomial (mod q) to Montgomery representation. | |||
*/ | |||
static void | |||
mq_poly_tomonty(uint16_t *f, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], R2); | |||
} | |||
} | |||
/* | |||
* Multiply two polynomials together (NTT representation, and using | |||
* a Montgomery multiplication). Result f*g is written over f. | |||
*/ | |||
static void | |||
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], g[u]); | |||
} | |||
} | |||
/* | |||
* Subtract polynomial g from polynomial f. | |||
*/ | |||
static void | |||
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_sub(f[u], g[u]); | |||
} | |||
} | |||
/* ===================================================================== */ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_AVX2_to_ntt_monty(uint16_t *h, unsigned logn) { | |||
mq_NTT(h, logn); | |||
mq_poly_tomonty(h, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
/* | |||
* Reduce s2 elements modulo q ([0..q-1] range). | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
} | |||
/* | |||
* Compute -s1 = s2*h - c0 mod phi mod q (in tt[]). | |||
*/ | |||
mq_NTT(tt, logn); | |||
mq_poly_montymul_ntt(tt, h, logn); | |||
mq_iNTT(tt, logn); | |||
mq_poly_sub(tt, c0, logn); | |||
/* | |||
* Normalize -s1 elements into the [-q/2..q/2] range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
int32_t w; | |||
w = (int32_t)tt[u]; | |||
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); | |||
((int16_t *)tt)[u] = (int16_t)w; | |||
} | |||
/* | |||
* Signature is valid if and only if the aggregate (-s1,s2) vector | |||
* is short enough. | |||
*/ | |||
return PQCLEAN_FALCON1024_AVX2_is_short((int16_t *)tt, s2, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
tt[u] = (uint16_t)mq_conv_small(f[u]); | |||
h[u] = (uint16_t)mq_conv_small(g[u]); | |||
} | |||
mq_NTT(h, logn); | |||
mq_NTT(tt, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (tt[u] == 0) { | |||
return 0; | |||
} | |||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||
} | |||
mq_iNTT(h, logn); | |||
return 1; | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *t1, *t2; | |||
n = (size_t)1 << logn; | |||
t1 = (uint16_t *)tmp; | |||
t2 = t1 + n; | |||
for (u = 0; u < n; u ++) { | |||
t1[u] = (uint16_t)mq_conv_small(g[u]); | |||
t2[u] = (uint16_t)mq_conv_small(F[u]); | |||
} | |||
mq_NTT(t1, logn); | |||
mq_NTT(t2, logn); | |||
mq_poly_tomonty(t1, logn); | |||
mq_poly_montymul_ntt(t1, t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
t2[u] = (uint16_t)mq_conv_small(f[u]); | |||
} | |||
mq_NTT(t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (t2[u] == 0) { | |||
return 0; | |||
} | |||
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); | |||
} | |||
mq_iNTT(t1, logn); | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
int32_t gi; | |||
w = t1[u]; | |||
w -= (Q & ~ -((w - (Q >> 1)) >> 31)); | |||
gi = *(int32_t *)&w; | |||
if (gi < -127 || gi > +127) { | |||
return 0; | |||
} | |||
G[u] = (int8_t)gi; | |||
} | |||
return 1; | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_is_invertible( | |||
const int16_t *s2, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
uint32_t r; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
} | |||
mq_NTT(tt, logn); | |||
r = 0; | |||
for (u = 0; u < n; u ++) { | |||
r |= (uint32_t)(tt[u] - 1); | |||
} | |||
return (int)(1u - (r >> 31)); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_verify_recover(uint16_t *h, | |||
const uint16_t *c0, const int16_t *s1, const int16_t *s2, | |||
unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
uint32_t r; | |||
n = (size_t)1 << logn; | |||
/* | |||
* Reduce elements of s1 and s2 modulo q; then write s2 into tt[] | |||
* and c0 - s1 into h[]. | |||
*/ | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
w = (uint32_t)s1[u]; | |||
w += Q & -(w >> 31); | |||
w = mq_sub(c0[u], w); | |||
h[u] = (uint16_t)w; | |||
} | |||
/* | |||
* Compute h = (c0 - s1) / s2. If one of the coefficients of s2 | |||
* is zero (in NTT representation) then the operation fails. We | |||
* keep that information into a flag so that we do not deviate | |||
* from strict constant-time processing; if all coefficients of | |||
* s2 are non-zero, then the high bit of r will be zero. | |||
*/ | |||
mq_NTT(tt, logn); | |||
mq_NTT(h, logn); | |||
r = 0; | |||
for (u = 0; u < n; u ++) { | |||
r |= (uint32_t)(tt[u] - 1); | |||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||
} | |||
mq_iNTT(h, logn); | |||
/* | |||
* Signature is acceptable if and only if it is short enough, | |||
* and s2 was invertible mod phi mod q. The caller must still | |||
* check that the rebuilt public key matches the expected | |||
* value (e.g. through a hash). | |||
*/ | |||
r = ~r & (uint32_t) - PQCLEAN_FALCON1024_AVX2_is_short(s1, s2, logn); | |||
return (int)(r >> 31); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) { | |||
uint16_t *s2; | |||
size_t u, n; | |||
uint32_t r; | |||
n = (size_t)1 << logn; | |||
s2 = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)sig[u]; | |||
w += Q & -(w >> 31); | |||
s2[u] = (uint16_t)w; | |||
} | |||
mq_NTT(s2, logn); | |||
r = 0; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u] - 1u; | |||
r += (w >> 31); | |||
} | |||
return (int)r; | |||
} |
@@ -1,15 +0,0 @@ | |||
set( | |||
SRC_CLEAN_FALCON1024 | |||
codec.c | |||
common.c | |||
fft.c | |||
fpr.c | |||
keygen.c | |||
pqclean.c | |||
rng.c | |||
sign.c | |||
vrfy.c) | |||
define_sig_alg( | |||
falcon1024_clean | |||
PQCLEAN_FALCON1024_CLEAN "${SRC_CLEAN_FALCON1024}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,80 +0,0 @@ | |||
#ifndef PQCLEAN_FALCON1024_CLEAN_API_H | |||
#define PQCLEAN_FALCON1024_CLEAN_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES 2305 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES 1793 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES 1330 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_ALGNAME "Falcon-1024" | |||
/* | |||
* Generate a new key pair. Public key goes into pk[], private key in sk[]. | |||
* Key sizes are exact (in bytes): | |||
* public (pk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES | |||
* private (sk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
/* | |||
* Compute a signature on a provided message (m, mlen), with a given | |||
* private key (sk). Signature is written in sig[], with length written | |||
* into *siglen. Signature length is variable; maximum signature length | |||
* (in bytes) is PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||
* | |||
* sig[], m[] and sk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Verify a signature (sig, siglen) on a message (m, mlen) with a given | |||
* public key (pk). | |||
* | |||
* sig[], m[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
/* | |||
* Compute a signature on a message and pack the signature and message | |||
* into a single object, written into sm[]. The length of that output is | |||
* written in *smlen; that length may be larger than the message length | |||
* (mlen) by up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||
* | |||
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall | |||
* not overlap with sk[]. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Open a signed message object (sm, smlen) and verify the signature; | |||
* on success, the message itself is written into m[] and its length | |||
* into *mlen. The message is shorter than the signed message object, | |||
* but the size difference depends on the signature value; the difference | |||
* may range up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||
* | |||
* m[], sm[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||
#endif |
@@ -1,555 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Encoding/decoding of keys and signatures. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_modq_encode( | |||
void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn) { | |||
size_t n, out_len, u; | |||
uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] >= 12289) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * 14) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << 14) | x[u]; | |||
acc_len += 14; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_modq_decode( | |||
uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len, u; | |||
const uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * 14) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
u = 0; | |||
while (u < n) { | |||
acc = (acc << 8) | (*buf ++); | |||
acc_len += 8; | |||
if (acc_len >= 14) { | |||
unsigned w; | |||
acc_len -= 14; | |||
w = (acc >> acc_len) & 0x3FFF; | |||
if (w >= 12289) { | |||
return 0; | |||
} | |||
x[u ++] = (uint16_t)w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i16_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i16_decode( | |||
int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
w |= -(w & mask2); | |||
x[u ++] = (int16_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
x[u ++] = (int8_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_comp_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn) { | |||
uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = out; | |||
/* | |||
* Make sure that all values are within the -2047..+2047 range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < -2047 || x[u] > +2047) { | |||
return 0; | |||
} | |||
} | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
int t; | |||
unsigned w; | |||
/* | |||
* Get sign and absolute value of next integer; push the | |||
* sign bit. | |||
*/ | |||
acc <<= 1; | |||
t = x[u]; | |||
if (t < 0) { | |||
t = -t; | |||
acc |= 1; | |||
} | |||
w = (unsigned)t; | |||
/* | |||
* Push the low 7 bits of the absolute value. | |||
*/ | |||
acc <<= 7; | |||
acc |= w & 127u; | |||
w >>= 7; | |||
/* | |||
* We pushed exactly 8 bits. | |||
*/ | |||
acc_len += 8; | |||
/* | |||
* Push as many zeros as necessary, then a one. Since the | |||
* absolute value is at most 2047, w can only range up to | |||
* 15 at this point, thus we will add at most 16 bits | |||
* here. With the 8 bits above and possibly up to 7 bits | |||
* from previous iterations, we may go up to 31 bits, which | |||
* will fit in the accumulator, which is an uint32_t. | |||
*/ | |||
acc <<= (w + 1); | |||
acc |= 1; | |||
acc_len += w + 1; | |||
/* | |||
* Produce all full bytes. | |||
*/ | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc >> acc_len); | |||
} | |||
v ++; | |||
} | |||
} | |||
/* | |||
* Flush remaining bits (if any). | |||
*/ | |||
if (acc_len > 0) { | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
v ++; | |||
} | |||
return v; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_comp_decode( | |||
int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
const uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
unsigned b, s, m; | |||
/* | |||
* Get next eight bits: sign and low seven bits of the | |||
* absolute value. | |||
*/ | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
b = acc >> acc_len; | |||
s = b & 128; | |||
m = b & 127; | |||
/* | |||
* Get next bits until a 1 is reached. | |||
*/ | |||
for (;;) { | |||
if (acc_len == 0) { | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
acc_len = 8; | |||
} | |||
acc_len --; | |||
if (((acc >> acc_len) & 1) != 0) { | |||
break; | |||
} | |||
m += 128; | |||
if (m > 2047) { | |||
return 0; | |||
} | |||
} | |||
x[u] = (int16_t) m; | |||
if (s) { | |||
x[u] = (int16_t) - x[u]; | |||
} | |||
} | |||
return v; | |||
} | |||
/* | |||
* Key elements and signatures are polynomials with small integer | |||
* coefficients. Here are some statistics gathered over many | |||
* generated key pairs (10000 or more for each degree): | |||
* | |||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||
* 1 2 129 56.31 143 60.02 | |||
* 2 4 123 40.93 160 46.52 | |||
* 3 8 97 28.97 159 38.01 | |||
* 4 16 100 21.48 154 32.50 | |||
* 5 32 71 15.41 151 29.36 | |||
* 6 64 59 11.07 138 27.77 | |||
* 7 128 39 7.91 144 27.00 | |||
* 8 256 32 5.63 148 26.61 | |||
* 9 512 22 4.00 137 26.46 | |||
* 10 1024 15 2.84 146 26.41 | |||
* | |||
* We want a compact storage format for private key, and, as part of | |||
* key generation, we are allowed to reject some keys which would | |||
* otherwise be fine (this does not induce any noticeable vulnerability | |||
* as long as we reject only a small proportion of possible keys). | |||
* Hence, we enforce at key generation time maximum values for the | |||
* elements of f, g, F and G, so that their encoding can be expressed | |||
* in fixed-width values. Limits have been chosen so that generated | |||
* keys are almost always within bounds, thus not impacting neither | |||
* security or performance. | |||
* | |||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
7, | |||
7, | |||
6, | |||
6, | |||
5 | |||
}; | |||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8 | |||
}; | |||
/* | |||
* When generating a new key pair, we can always reject keys which | |||
* feature an abnormally large coefficient. This can also be done for | |||
* signatures, albeit with some care: in case the signature process is | |||
* used in a derandomized setup (explicitly seeded with the message and | |||
* private key), we have to follow the specification faithfully, and the | |||
* specification only enforces a limit on the L2 norm of the signature | |||
* vector. The limit on the L2 norm implies that the absolute value of | |||
* a coefficient of the signature cannot be more than the following: | |||
* | |||
* log(n) n max sig coeff (theoretical) | |||
* 1 2 412 | |||
* 2 4 583 | |||
* 3 8 824 | |||
* 4 16 1166 | |||
* 5 32 1649 | |||
* 6 64 2332 | |||
* 7 128 3299 | |||
* 8 256 4665 | |||
* 9 512 6598 | |||
* 10 1024 9331 | |||
* | |||
* However, the largest observed signature coefficients during our | |||
* experiments was 1077 (in absolute value), hence we can assume that, | |||
* with overwhelming probability, signature coefficients will fit | |||
* in -2047..2047, i.e. 12 bits. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[] = { | |||
0, /* unused */ | |||
10, | |||
11, | |||
11, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12 | |||
}; |
@@ -1,294 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Support functions for signatures (hash-to-point, norm). | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime( | |||
inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn) { | |||
/* | |||
* This is the straightforward per-the-spec implementation. It | |||
* is not constant-time, thus it might reveal information on the | |||
* plaintext (at least, enough to check the plaintext against a | |||
* list of potential plaintexts) in a scenario where the | |||
* attacker does not have access to the signature value or to | |||
* the public key, but knows the nonce (without knowledge of the | |||
* nonce, the hashed output cannot be matched against potential | |||
* plaintexts). | |||
*/ | |||
size_t n; | |||
n = (size_t)1 << logn; | |||
while (n > 0) { | |||
uint8_t buf[2]; | |||
uint32_t w; | |||
inner_shake256_extract(sc, (void *)buf, sizeof buf); | |||
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; | |||
if (w < 61445) { | |||
while (w >= 12289) { | |||
w -= 12289; | |||
} | |||
*x ++ = (uint16_t)w; | |||
n --; | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct( | |||
inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp) { | |||
/* | |||
* Each 16-bit sample is a value in 0..65535. The value is | |||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||
* and rejected otherwise; thus, each sample has probability | |||
* about 0.93758 of being selected. | |||
* | |||
* We want to oversample enough to be sure that we will | |||
* have enough values with probability at least 1 - 2^(-256). | |||
* Depending on degree N, this leads to the following | |||
* required oversampling: | |||
* | |||
* logn n oversampling | |||
* 1 2 65 | |||
* 2 4 67 | |||
* 3 8 71 | |||
* 4 16 77 | |||
* 5 32 86 | |||
* 6 64 100 | |||
* 7 128 122 | |||
* 8 256 154 | |||
* 9 512 205 | |||
* 10 1024 287 | |||
* | |||
* If logn >= 7, then the provided temporary buffer is large | |||
* enough. Otherwise, we use a stack buffer of 63 entries | |||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||
*/ | |||
static const uint16_t overtab[] = { | |||
0, /* unused */ | |||
65, | |||
67, | |||
71, | |||
77, | |||
86, | |||
100, | |||
122, | |||
154, | |||
205, | |||
287 | |||
}; | |||
unsigned n, n2, u, m, p, over; | |||
uint16_t *tt1, tt2[63]; | |||
/* | |||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||
* We also reduce modulo q the values; rejected values are set | |||
* to 0xFFFF. | |||
*/ | |||
n = 1U << logn; | |||
n2 = n << 1; | |||
over = overtab[logn]; | |||
m = n + over; | |||
tt1 = (uint16_t *)tmp; | |||
for (u = 0; u < m; u ++) { | |||
uint8_t buf[2]; | |||
uint32_t w, wr; | |||
inner_shake256_extract(sc, buf, sizeof buf); | |||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||
wr |= ((w - 61445) >> 31) - 1; | |||
if (u < n) { | |||
x[u] = (uint16_t)wr; | |||
} else if (u < n2) { | |||
tt1[u - n] = (uint16_t)wr; | |||
} else { | |||
tt2[u - n2] = (uint16_t)wr; | |||
} | |||
} | |||
/* | |||
* Now we must "squeeze out" the invalid values. We do this in | |||
* a logarithmic sequence of passes; each pass computes where a | |||
* value should go, and moves it down by 'p' slots if necessary, | |||
* where 'p' uses an increasing powers-of-two scale. It can be | |||
* shown that in all cases where the loop decides that a value | |||
* has to be moved down by p slots, the destination slot is | |||
* "free" (i.e. contains an invalid value). | |||
*/ | |||
for (p = 1; p <= over; p <<= 1) { | |||
unsigned v; | |||
/* | |||
* In the loop below: | |||
* | |||
* - v contains the index of the final destination of | |||
* the value; it is recomputed dynamically based on | |||
* whether values are valid or not. | |||
* | |||
* - u is the index of the value we consider ("source"); | |||
* its address is s. | |||
* | |||
* - The loop may swap the value with the one at index | |||
* u-p. The address of the swap destination is d. | |||
*/ | |||
v = 0; | |||
for (u = 0; u < m; u ++) { | |||
uint16_t *s, *d; | |||
unsigned j, sv, dv, mk; | |||
if (u < n) { | |||
s = &x[u]; | |||
} else if (u < n2) { | |||
s = &tt1[u - n]; | |||
} else { | |||
s = &tt2[u - n2]; | |||
} | |||
sv = *s; | |||
/* | |||
* The value in sv should ultimately go to | |||
* address v, i.e. jump back by u-v slots. | |||
*/ | |||
j = u - v; | |||
/* | |||
* We increment v for the next iteration, but | |||
* only if the source value is valid. The mask | |||
* 'mk' is -1 if the value is valid, 0 otherwise, | |||
* so we _subtract_ mk. | |||
*/ | |||
mk = (sv >> 15) - 1U; | |||
v -= mk; | |||
/* | |||
* In this loop we consider jumps by p slots; if | |||
* u < p then there is nothing more to do. | |||
*/ | |||
if (u < p) { | |||
continue; | |||
} | |||
/* | |||
* Destination for the swap: value at address u-p. | |||
*/ | |||
if ((u - p) < n) { | |||
d = &x[u - p]; | |||
} else if ((u - p) < n2) { | |||
d = &tt1[(u - p) - n]; | |||
} else { | |||
d = &tt2[(u - p) - n2]; | |||
} | |||
dv = *d; | |||
/* | |||
* The swap should be performed only if the source | |||
* is valid AND the jump j has its 'p' bit set. | |||
*/ | |||
mk &= -(((j & p) + 0x1FF) >> 9); | |||
*s = (uint16_t)(sv ^ (mk & (sv ^ dv))); | |||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv))); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_is_short( | |||
const int16_t *s1, const int16_t *s2, unsigned logn) { | |||
/* | |||
* We use the l2-norm. Code below uses only 32-bit operations to | |||
* compute the square of the norm with saturation to 2^32-1 if | |||
* the value exceeds 2^31-1. | |||
*/ | |||
size_t n, u; | |||
uint32_t s, ng; | |||
n = (size_t)1 << logn; | |||
s = 0; | |||
ng = 0; | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s1[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
z = s2[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
} | |||
s |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_is_short_half( | |||
uint32_t sqn, const int16_t *s2, unsigned logn) { | |||
size_t n, u; | |||
uint32_t ng; | |||
n = (size_t)1 << logn; | |||
ng = -(sqn >> 31); | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s2[u]; | |||
sqn += (uint32_t)(z * z); | |||
ng |= sqn; | |||
} | |||
sqn |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} |
@@ -1,700 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* FFT code. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* | |||
* Rules for complex number macros: | |||
* -------------------------------- | |||
* | |||
* Operand order is: destination, source1, source2... | |||
* | |||
* Each operand is a real and an imaginary part. | |||
* | |||
* All overlaps are allowed. | |||
*/ | |||
/* | |||
* Addition of two complex numbers (d = a + b). | |||
*/ | |||
#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_re, fpct_im; \ | |||
fpct_re = fpr_add(a_re, b_re); \ | |||
fpct_im = fpr_add(a_im, b_im); \ | |||
(d_re) = fpct_re; \ | |||
(d_im) = fpct_im; \ | |||
} while (0) | |||
/* | |||
* Subtraction of two complex numbers (d = a - b). | |||
*/ | |||
#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_re, fpct_im; \ | |||
fpct_re = fpr_sub(a_re, b_re); \ | |||
fpct_im = fpr_sub(a_im, b_im); \ | |||
(d_re) = fpct_re; \ | |||
(d_im) = fpct_im; \ | |||
} while (0) | |||
/* | |||
* Multplication of two complex numbers (d = a * b). | |||
*/ | |||
#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_b_re, fpct_b_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_b_re = (b_re); \ | |||
fpct_b_im = (b_im); \ | |||
fpct_d_re = fpr_sub( \ | |||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||
fpct_d_im = fpr_add( \ | |||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Squaring of a complex number (d = a * a). | |||
*/ | |||
#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||
fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Inversion of a complex number (d = 1 / a). | |||
*/ | |||
#define FPC_INV(d_re, d_im, a_re, a_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpr fpct_m; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||
fpct_m = fpr_inv(fpct_m); \ | |||
fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ | |||
fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Division of complex numbers (d = a / b). | |||
*/ | |||
#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_b_re, fpct_b_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpr fpct_m; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_b_re = (b_re); \ | |||
fpct_b_im = (b_im); \ | |||
fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ | |||
fpct_m = fpr_inv(fpct_m); \ | |||
fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ | |||
fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ | |||
fpct_d_re = fpr_sub( \ | |||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||
fpct_d_im = fpr_add( \ | |||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the | |||
* values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots | |||
* of X^N+1 in the field of complex numbers. A crucial property is that | |||
* w_{N-1-j} = conj(w_j) = 1/w_j for all j. | |||
* | |||
* FFT representation of a polynomial f (taken modulo X^N+1) is the | |||
* set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), | |||
* thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, | |||
* for j = 0 to N/2-1; the other half can be recomputed easily when (if) | |||
* needed. A consequence is that FFT representation has the same size | |||
* as normal representation: N/2 complex numbers use N real numbers (each | |||
* complex number is the combination of a real and an imaginary part). | |||
* | |||
* We use a specific ordering which makes computations easier. Let rev() | |||
* be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we | |||
* store the real and imaginary parts of f(w_j) in slots: | |||
* | |||
* Re(f(w_j)) -> slot rev(j)/2 | |||
* Im(f(w_j)) -> slot rev(j)/2+N/2 | |||
* | |||
* (Note that rev(j) is even for j < N/2.) | |||
*/ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn) { | |||
/* | |||
* FFT algorithm in bit-reversal order uses the following | |||
* iterative algorithm: | |||
* | |||
* t = N | |||
* for m = 1; m < N; m *= 2: | |||
* ht = t/2 | |||
* for i1 = 0; i1 < m; i1 ++: | |||
* j1 = i1 * t | |||
* s = GM[m + i1] | |||
* for j = j1; j < (j1 + ht); j ++: | |||
* x = f[j] | |||
* y = s * f[j + ht] | |||
* f[j] = x + y | |||
* f[j + ht] = x - y | |||
* t = ht | |||
* | |||
* GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). | |||
* | |||
* In the description above, f[] is supposed to contain complex | |||
* numbers. In our in-memory representation, the real and | |||
* imaginary parts of f[k] are in array slots k and k+N/2. | |||
* | |||
* We only keep the first half of the complex numbers. We can | |||
* see that after the first iteration, the first and second halves | |||
* of the array of complex numbers have separate lives, so we | |||
* simply ignore the second part. | |||
*/ | |||
unsigned u; | |||
size_t t, n, hn, m; | |||
/* | |||
* First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 | |||
* (because GM[1] = w^rev(1) = w^(N/2) = i). | |||
* In our chosen representation, this is a no-op: everything is | |||
* already where it should be. | |||
*/ | |||
/* | |||
* Subsequent iterations are truncated to use only the first | |||
* half of values. | |||
*/ | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
t = hn; | |||
for (u = 1, m = 2; u < logn; u ++, m <<= 1) { | |||
size_t ht, hm, i1, j1; | |||
ht = t >> 1; | |||
hm = m >> 1; | |||
for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { | |||
size_t j, j2; | |||
j2 = j1 + ht; | |||
fpr s_re, s_im; | |||
s_re = fpr_gm_tab[((m + i1) << 1) + 0]; | |||
s_im = fpr_gm_tab[((m + i1) << 1) + 1]; | |||
for (j = j1; j < j2; j ++) { | |||
fpr x_re, x_im, y_re, y_im; | |||
x_re = f[j]; | |||
x_im = f[j + hn]; | |||
y_re = f[j + ht]; | |||
y_im = f[j + ht + hn]; | |||
FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im); | |||
FPC_ADD(f[j], f[j + hn], | |||
x_re, x_im, y_re, y_im); | |||
FPC_SUB(f[j + ht], f[j + ht + hn], | |||
x_re, x_im, y_re, y_im); | |||
} | |||
} | |||
t = ht; | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn) { | |||
/* | |||
* Inverse FFT algorithm in bit-reversal order uses the following | |||
* iterative algorithm: | |||
* | |||
* t = 1 | |||
* for m = N; m > 1; m /= 2: | |||
* hm = m/2 | |||
* dt = t*2 | |||
* for i1 = 0; i1 < hm; i1 ++: | |||
* j1 = i1 * dt | |||
* s = iGM[hm + i1] | |||
* for j = j1; j < (j1 + t); j ++: | |||
* x = f[j] | |||
* y = f[j + t] | |||
* f[j] = x + y | |||
* f[j + t] = s * (x - y) | |||
* t = dt | |||
* for i1 = 0; i1 < N; i1 ++: | |||
* f[i1] = f[i1] / N | |||
* | |||
* iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) | |||
* (actually, iGM[k] = 1/GM[k] = conj(GM[k])). | |||
* | |||
* In the main loop (not counting the final division loop), in | |||
* all iterations except the last, the first and second half of f[] | |||
* (as an array of complex numbers) are separate. In our chosen | |||
* representation, we do not keep the second half. | |||
* | |||
* The last iteration recombines the recomputed half with the | |||
* implicit half, and should yield only real numbers since the | |||
* target polynomial is real; moreover, s = i at that step. | |||
* Thus, when considering x and y: | |||
* y = conj(x) since the final f[j] must be real | |||
* Therefore, f[j] is filled with 2*Re(x), and f[j + t] is | |||
* filled with 2*Im(x). | |||
* But we already have Re(x) and Im(x) in array slots j and j+t | |||
* in our chosen representation. That last iteration is thus a | |||
* simple doubling of the values in all the array. | |||
* | |||
* We make the last iteration a no-op by tweaking the final | |||
* division into a division by N/2, not N. | |||
*/ | |||
size_t u, n, hn, t, m; | |||
n = (size_t)1 << logn; | |||
t = 1; | |||
m = n; | |||
hn = n >> 1; | |||
for (u = logn; u > 1; u --) { | |||
size_t hm, dt, i1, j1; | |||
hm = m >> 1; | |||
dt = t << 1; | |||
for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { | |||
size_t j, j2; | |||
j2 = j1 + t; | |||
fpr s_re, s_im; | |||
s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; | |||
s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); | |||
for (j = j1; j < j2; j ++) { | |||
fpr x_re, x_im, y_re, y_im; | |||
x_re = f[j]; | |||
x_im = f[j + hn]; | |||
y_re = f[j + t]; | |||
y_im = f[j + t + hn]; | |||
FPC_ADD(f[j], f[j + hn], | |||
x_re, x_im, y_re, y_im); | |||
FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im); | |||
FPC_MUL(f[j + t], f[j + t + hn], | |||
x_re, x_im, s_re, s_im); | |||
} | |||
} | |||
t = dt; | |||
m = hm; | |||
} | |||
/* | |||
* Last iteration is a no-op, provided that we divide by N/2 | |||
* instead of N. We need to make a special case for logn = 0. | |||
*/ | |||
if (logn > 0) { | |||
fpr ni; | |||
ni = fpr_p2_tab[logn]; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = fpr_mul(f[u], ni); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_add( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_add(a[u], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_sub( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_sub(a[u], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_neg(a[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = (n >> 1); u < n; u ++) { | |||
a[u] = fpr_neg(a[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mul_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = fpr_neg(b[u + hn]); | |||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) { | |||
/* | |||
* Since each coefficient is multiplied with its own conjugate, | |||
* the result contains only real values. | |||
*/ | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); | |||
a[u + hn] = fpr_zero; | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_mul(a[u], x); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_div_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d, | |||
const fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im; | |||
fpr b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
d[u] = fpr_inv(fpr_add( | |||
fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), | |||
fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d, | |||
const fpr *F, const fpr *G, | |||
const fpr *f, const fpr *g, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr F_re, F_im, G_re, G_im; | |||
fpr f_re, f_im, g_re, g_im; | |||
fpr a_re, a_im, b_re, b_im; | |||
F_re = F[u]; | |||
F_im = F[u + hn]; | |||
G_re = G[u]; | |||
G_im = G[u + hn]; | |||
f_re = f[u]; | |||
f_im = f[u + hn]; | |||
g_re = g[u]; | |||
g_im = g[u + hn]; | |||
FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); | |||
FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); | |||
d[u] = fpr_add(a_re, b_re); | |||
d[u + hn] = fpr_add(a_im, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
a[u] = fpr_mul(a[u], b[u]); | |||
a[u + hn] = fpr_mul(a[u + hn], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr ib; | |||
ib = fpr_inv(b[u]); | |||
a[u] = fpr_mul(a[u], ib); | |||
a[u + hn] = fpr_mul(a[u + hn], ib); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft( | |||
const fpr *g00, | |||
fpr *g01, fpr *g11, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||
fpr mu_re, mu_im; | |||
g00_re = g00[u]; | |||
g00_im = g00[u + hn]; | |||
g01_re = g01[u]; | |||
g01_im = g01[u + hn]; | |||
g11_re = g11[u]; | |||
g11_im = g11[u + hn]; | |||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||
FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||
g01[u] = mu_re; | |||
g01[u + hn] = fpr_neg(mu_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft( | |||
fpr *d11, fpr *l10, | |||
const fpr *g00, const fpr *g01, | |||
const fpr *g11, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||
fpr mu_re, mu_im; | |||
g00_re = g00[u]; | |||
g00_im = g00[u + hn]; | |||
g01_re = g01[u]; | |||
g01_im = g01[u + hn]; | |||
g11_re = g11[u]; | |||
g11_im = g11[u + hn]; | |||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||
FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||
l10[u] = mu_re; | |||
l10[u + hn] = fpr_neg(mu_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_split_fft( | |||
fpr *f0, fpr *f1, | |||
const fpr *f, unsigned logn) { | |||
/* | |||
* The FFT representation we use is in bit-reversed order | |||
* (element i contains f(w^(rev(i))), where rev() is the | |||
* bit-reversal function over the ring degree. This changes | |||
* indexes with regards to the Falcon specification. | |||
*/ | |||
size_t n, hn, qn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
qn = hn >> 1; | |||
/* | |||
* We process complex values by pairs. For logn = 1, there is only | |||
* one complex value (the other one is the implicit conjugate), | |||
* so we add the two lines below because the loop will be | |||
* skipped. | |||
*/ | |||
f0[0] = f[0]; | |||
f1[0] = f[hn]; | |||
for (u = 0; u < qn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
fpr t_re, t_im; | |||
a_re = f[(u << 1) + 0]; | |||
a_im = f[(u << 1) + 0 + hn]; | |||
b_re = f[(u << 1) + 1]; | |||
b_im = f[(u << 1) + 1 + hn]; | |||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f0[u] = fpr_half(t_re); | |||
f0[u + qn] = fpr_half(t_im); | |||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||
FPC_MUL(t_re, t_im, t_re, t_im, | |||
fpr_gm_tab[((u + hn) << 1) + 0], | |||
fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); | |||
f1[u] = fpr_half(t_re); | |||
f1[u + qn] = fpr_half(t_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_merge_fft( | |||
fpr *f, | |||
const fpr *f0, const fpr *f1, unsigned logn) { | |||
size_t n, hn, qn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
qn = hn >> 1; | |||
/* | |||
* An extra copy to handle the special case logn = 1. | |||
*/ | |||
f[0] = f0[0]; | |||
f[hn] = f1[0]; | |||
for (u = 0; u < qn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
fpr t_re, t_im; | |||
a_re = f0[u]; | |||
a_im = f0[u + qn]; | |||
FPC_MUL(b_re, b_im, f1[u], f1[u + qn], | |||
fpr_gm_tab[((u + hn) << 1) + 0], | |||
fpr_gm_tab[((u + hn) << 1) + 1]); | |||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f[(u << 1) + 0] = t_re; | |||
f[(u << 1) + 0 + hn] = t_im; | |||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f[(u << 1) + 1] = t_re; | |||
f[(u << 1) + 1 + hn] = t_im; | |||
} | |||
} |
@@ -1,473 +0,0 @@ | |||
#ifndef PQCLEAN_FALCON1024_CLEAN_FPR_H | |||
#define PQCLEAN_FALCON1024_CLEAN_FPR_H | |||
/* | |||
* Floating-point operations. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* ====================================================================== */ | |||
/* | |||
* Custom floating-point implementation with integer arithmetics. We | |||
* use IEEE-754 "binary64" format, with some simplifications: | |||
* | |||
* - Top bit is s = 1 for negative, 0 for positive. | |||
* | |||
* - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). | |||
* | |||
* - Mantissa m uses the 52 low bits. | |||
* | |||
* Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) | |||
* i.e. the mantissa really is a 53-bit number (less than 2.0, but not | |||
* less than 1.0), but the top bit (equal to 1 by definition) is omitted | |||
* in the encoding. | |||
* | |||
* In IEEE-754, there are some special values: | |||
* | |||
* - If e = 2047, then the value is either an infinite (m = 0) or | |||
* a NaN (m != 0). | |||
* | |||
* - If e = 0, then the value is either a zero (m = 0) or a subnormal, | |||
* aka "denormalized number" (m != 0). | |||
* | |||
* Of these, we only need the zeros. The caller is responsible for not | |||
* providing operands that would lead to infinites, NaNs or subnormals. | |||
* If inputs are such that values go out of range, then indeterminate | |||
* values are returned (it would still be deterministic, but no specific | |||
* value may be relied upon). | |||
* | |||
* At the C level, the three parts are stored in a 64-bit unsigned | |||
* word. | |||
* | |||
* One may note that a property of the IEEE-754 format is that order | |||
* is preserved for positive values: if two positive floating-point | |||
* values x and y are such that x < y, then their respective encodings | |||
* as _signed_ 64-bit integers i64(x) and i64(y) will be such that | |||
* i64(x) < i64(y). For negative values, order is reversed: if x < 0, | |||
* y < 0, and x < y, then ia64(x) > ia64(y). | |||
* | |||
* IMPORTANT ASSUMPTIONS: | |||
* ====================== | |||
* | |||
* For proper computations, and constant-time behaviour, we assume the | |||
* following: | |||
* | |||
* - 32x32->64 multiplication (unsigned) has an execution time that | |||
* is independent of its operands. This is true of most modern | |||
* x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ | |||
* and M3 (in the M0 and M0+, this is done in software, so it depends | |||
* on that routine), and the PowerPC cores from the G3/G4 lines. | |||
* For more info, see: https://www.bearssl.org/ctmul.html | |||
* | |||
* - Left-shifts and right-shifts of 32-bit values have an execution | |||
* time which does not depend on the shifted value nor on the | |||
* shift count. An historical exception is the Pentium IV, but most | |||
* modern CPU have barrel shifters. Some small microcontrollers | |||
* might have varying-time shifts (not the ARM Cortex M*, though). | |||
* | |||
* - Right-shift of a signed negative value performs a sign extension. | |||
* As per the C standard, this operation returns an | |||
* implementation-defined result (this is NOT an "undefined | |||
* behaviour"). On most/all systems, an arithmetic shift is | |||
* performed, because this is what makes most sense. | |||
*/ | |||
/* | |||
* Normally we should declare the 'fpr' type to be a struct or union | |||
* around the internal 64-bit value; however, we want to use the | |||
* direct 64-bit integer type to enable a lighter call convention on | |||
* ARM platforms. This means that direct (invalid) use of operators | |||
* such as '*' or '+' will not be caught by the compiler. We rely on | |||
* the "normal" (non-emulated) code to detect such instances. | |||
*/ | |||
typedef uint64_t fpr; | |||
/* | |||
* For computations, we split values into an integral mantissa in the | |||
* 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is | |||
* "sticky" (it is set to 1 if any of the bits below it is 1); when | |||
* re-encoding, the low two bits are dropped, but may induce an | |||
* increment in the value for proper rounding. | |||
*/ | |||
/* | |||
* Right-shift a 64-bit unsigned value by a possibly secret shift count. | |||
* We assumed that the underlying architecture had a barrel shifter for | |||
* 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will | |||
* typically invoke a software routine that is not necessarily | |||
* constant-time; hence the function below. | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline uint64_t | |||
fpr_ursh(uint64_t x, int n) { | |||
x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); | |||
return x >> (n & 31); | |||
} | |||
/* | |||
* Right-shift a 64-bit signed value by a possibly secret shift count | |||
* (see fpr_ursh() for the rationale). | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline int64_t | |||
fpr_irsh(int64_t x, int n) { | |||
x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); | |||
return x >> (n & 31); | |||
} | |||
/* | |||
* Left-shift a 64-bit unsigned value by a possibly secret shift count | |||
* (see fpr_ursh() for the rationale). | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline uint64_t | |||
fpr_ulsh(uint64_t x, int n) { | |||
x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); | |||
return x << (n & 31); | |||
} | |||
/* | |||
* Expectations: | |||
* s = 0 or 1 | |||
* exponent e is "arbitrary" and unbiased | |||
* 2^54 <= m < 2^55 | |||
* Numerical value is (-1)^2 * m * 2^e | |||
* | |||
* Exponents which are too low lead to value zero. If the exponent is | |||
* too large, the returned value is indeterminate. | |||
* | |||
* If m = 0, then a zero is returned (using the provided sign). | |||
* If e < -1076, then a zero is returned (regardless of the value of m). | |||
* If e >= -1076 and e != 0, m must be within the expected range | |||
* (2^54 to 2^55-1). | |||
*/ | |||
static inline fpr | |||
FPR(int s, int e, uint64_t m) { | |||
fpr x; | |||
uint32_t t; | |||
unsigned f; | |||
/* | |||
* If e >= -1076, then the value is "normal"; otherwise, it | |||
* should be a subnormal, which we clamp down to zero. | |||
*/ | |||
e += 1076; | |||
t = (uint32_t)e >> 31; | |||
m &= (uint64_t)t - 1; | |||
/* | |||
* If m = 0 then we want a zero; make e = 0 too, but conserve | |||
* the sign. | |||
*/ | |||
t = (uint32_t)(m >> 54); | |||
e &= -(int)t; | |||
/* | |||
* The 52 mantissa bits come from m. Value m has its top bit set | |||
* (unless it is a zero); we leave it "as is": the top bit will | |||
* increment the exponent by 1, except when m = 0, which is | |||
* exactly what we want. | |||
*/ | |||
x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); | |||
/* | |||
* Rounding: if the low three bits of m are 011, 110 or 111, | |||
* then the value should be incremented to get the next | |||
* representable value. This implements the usual | |||
* round-to-nearest rule (with preference to even values in case | |||
* of a tie). Note that the increment may make a carry spill | |||
* into the exponent field, which is again exactly what we want | |||
* in that case. | |||
*/ | |||
f = (unsigned)m & 7U; | |||
x += (0xC8U >> f) & 1; | |||
return x; | |||
} | |||
#define fpr_scaled PQCLEAN_FALCON1024_CLEAN_fpr_scaled | |||
fpr fpr_scaled(int64_t i, int sc); | |||
static inline fpr | |||
fpr_of(int64_t i) { | |||
return fpr_scaled(i, 0); | |||
} | |||
static const fpr fpr_q = 4667981563525332992; | |||
static const fpr fpr_inverse_of_q = 4545632735260551042; | |||
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; | |||
static const fpr fpr_inv_sigma = 4573359825155195350; | |||
static const fpr fpr_sigma_min_9 = 4608495221497168882; | |||
static const fpr fpr_sigma_min_10 = 4608586345619182117; | |||
static const fpr fpr_log2 = 4604418534313441775; | |||
static const fpr fpr_inv_log2 = 4609176140021203710; | |||
static const fpr fpr_bnorm_max = 4670353323383631276; | |||
static const fpr fpr_zero = 0; | |||
static const fpr fpr_one = 4607182418800017408; | |||
static const fpr fpr_two = 4611686018427387904; | |||
static const fpr fpr_onehalf = 4602678819172646912; | |||
static const fpr fpr_invsqrt2 = 4604544271217802189; | |||
static const fpr fpr_invsqrt8 = 4600040671590431693; | |||
static const fpr fpr_ptwo31 = 4746794007248502784; | |||
static const fpr fpr_ptwo31m1 = 4746794007244308480; | |||
static const fpr fpr_mtwo31m1 = 13970166044099084288U; | |||
static const fpr fpr_ptwo63m1 = 4890909195324358656; | |||
static const fpr fpr_mtwo63m1 = 14114281232179134464U; | |||
static const fpr fpr_ptwo63 = 4890909195324358656; | |||
static inline int64_t | |||
fpr_rint(fpr x) { | |||
uint64_t m, d; | |||
int e; | |||
uint32_t s, dd, f; | |||
/* | |||
* We assume that the value fits in -(2^63-1)..+(2^63-1). We can | |||
* thus extract the mantissa as a 63-bit integer, then right-shift | |||
* it as needed. | |||
*/ | |||
m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||
e = 1085 - ((int)(x >> 52) & 0x7FF); | |||
/* | |||
* If a shift of more than 63 bits is needed, then simply set m | |||
* to zero. This also covers the case of an input operand equal | |||
* to zero. | |||
*/ | |||
m &= -(uint64_t)((uint32_t)(e - 64) >> 31); | |||
e &= 63; | |||
/* | |||
* Right-shift m as needed. Shift count is e. Proper rounding | |||
* mandates that: | |||
* - If the highest dropped bit is zero, then round low. | |||
* - If the highest dropped bit is one, and at least one of the | |||
* other dropped bits is one, then round up. | |||
* - If the highest dropped bit is one, and all other dropped | |||
* bits are zero, then round up if the lowest kept bit is 1, | |||
* or low otherwise (i.e. ties are broken by "rounding to even"). | |||
* | |||
* We thus first extract a word consisting of all the dropped bit | |||
* AND the lowest kept bit; then we shrink it down to three bits, | |||
* the lowest being "sticky". | |||
*/ | |||
d = fpr_ulsh(m, 63 - e); | |||
dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); | |||
f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); | |||
m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); | |||
/* | |||
* Apply the sign bit. | |||
*/ | |||
s = (uint32_t)(x >> 63); | |||
return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; | |||
} | |||
static inline int64_t | |||
fpr_floor(fpr x) { | |||
uint64_t t; | |||
int64_t xi; | |||
int e, cc; | |||
/* | |||
* We extract the integer as a _signed_ 64-bit integer with | |||
* a scaling factor. Since we assume that the value fits | |||
* in the -(2^63-1)..+(2^63-1) range, we can left-shift the | |||
* absolute value to make it in the 2^62..2^63-1 range: we | |||
* will only need a right-shift afterwards. | |||
*/ | |||
e = (int)(x >> 52) & 0x7FF; | |||
t = x >> 63; | |||
xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) | |||
& (((uint64_t)1 << 63) - 1)); | |||
xi = (xi ^ -(int64_t)t) + (int64_t)t; | |||
cc = 1085 - e; | |||
/* | |||
* We perform an arithmetic right-shift on the value. This | |||
* applies floor() semantics on both positive and negative values | |||
* (rounding toward minus infinity). | |||
*/ | |||
xi = fpr_irsh(xi, cc & 63); | |||
/* | |||
* If the true shift count was 64 or more, then we should instead | |||
* replace xi with 0 (if nonnegative) or -1 (if negative). Edge | |||
* case: -0 will be floored to -1, not 0 (whether this is correct | |||
* is debatable; in any case, the other functions normalize zero | |||
* to +0). | |||
* | |||
* For an input of zero, the non-shifted xi was incorrect (we used | |||
* a top implicit bit of value 1, not 0), but this does not matter | |||
* since this operation will clamp it down. | |||
*/ | |||
xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); | |||
return xi; | |||
} | |||
static inline int64_t | |||
fpr_trunc(fpr x) { | |||
uint64_t t, xu; | |||
int e, cc; | |||
/* | |||
* Extract the absolute value. Since we assume that the value | |||
* fits in the -(2^63-1)..+(2^63-1) range, we can left-shift | |||
* the absolute value into the 2^62..2^63-1 range, and then | |||
* do a right shift afterwards. | |||
*/ | |||
e = (int)(x >> 52) & 0x7FF; | |||
xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||
cc = 1085 - e; | |||
xu = fpr_ursh(xu, cc & 63); | |||
/* | |||
* If the exponent is too low (cc > 63), then the shift was wrong | |||
* and we must clamp the value to 0. This also covers the case | |||
* of an input equal to zero. | |||
*/ | |||
xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); | |||
/* | |||
* Apply back the sign, if the source value is negative. | |||
*/ | |||
t = x >> 63; | |||
xu = (xu ^ -t) + t; | |||
return *(int64_t *)&xu; | |||
} | |||
#define fpr_add PQCLEAN_FALCON1024_CLEAN_fpr_add | |||
fpr fpr_add(fpr x, fpr y); | |||
static inline fpr | |||
fpr_sub(fpr x, fpr y) { | |||
y ^= (uint64_t)1 << 63; | |||
return fpr_add(x, y); | |||
} | |||
static inline fpr | |||
fpr_neg(fpr x) { | |||
x ^= (uint64_t)1 << 63; | |||
return x; | |||
} | |||
static inline fpr | |||
fpr_half(fpr x) { | |||
/* | |||
* To divide a value by 2, we just have to subtract 1 from its | |||
* exponent, but we have to take care of zero. | |||
*/ | |||
uint32_t t; | |||
x -= (uint64_t)1 << 52; | |||
t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; | |||
x &= (uint64_t)t - 1; | |||
return x; | |||
} | |||
static inline fpr | |||
fpr_double(fpr x) { | |||
/* | |||
* To double a value, we just increment by one the exponent. We | |||
* don't care about infinites or NaNs; however, 0 is a | |||
* special case. | |||
*/ | |||
x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; | |||
return x; | |||
} | |||
#define fpr_mul PQCLEAN_FALCON1024_CLEAN_fpr_mul | |||
fpr fpr_mul(fpr x, fpr y); | |||
static inline fpr | |||
fpr_sqr(fpr x) { | |||
return fpr_mul(x, x); | |||
} | |||
#define fpr_div PQCLEAN_FALCON1024_CLEAN_fpr_div | |||
fpr fpr_div(fpr x, fpr y); | |||
static inline fpr | |||
fpr_inv(fpr x) { | |||
return fpr_div(4607182418800017408u, x); | |||
} | |||
#define fpr_sqrt PQCLEAN_FALCON1024_CLEAN_fpr_sqrt | |||
fpr fpr_sqrt(fpr x); | |||
static inline int | |||
fpr_lt(fpr x, fpr y) { | |||
/* | |||
* If both x and y are positive, then a signed comparison yields | |||
* the proper result: | |||
* - For positive values, the order is preserved. | |||
* - The sign bit is at the same place as in integers, so | |||
* sign is preserved. | |||
* Moreover, we can compute [x < y] as sgn(x-y) and the computation | |||
* of x-y will not overflow. | |||
* | |||
* If the signs differ, then sgn(x) gives the proper result. | |||
* | |||
* If both x and y are negative, then the order is reversed. | |||
* Hence [x < y] = sgn(y-x). We must compute this separately from | |||
* sgn(x-y); simply inverting sgn(x-y) would not handle the edge | |||
* case x = y properly. | |||
*/ | |||
int cc0, cc1; | |||
int64_t sx; | |||
int64_t sy; | |||
sx = *(int64_t *)&x; | |||
sy = *(int64_t *)&y; | |||
sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */ | |||
cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */ | |||
cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */ | |||
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); | |||
} | |||
/* | |||
* Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 | |||
* bits or so. | |||
*/ | |||
#define fpr_expm_p63 PQCLEAN_FALCON1024_CLEAN_fpr_expm_p63 | |||
uint64_t fpr_expm_p63(fpr x, fpr ccs); | |||
#define fpr_gm_tab PQCLEAN_FALCON1024_CLEAN_fpr_gm_tab | |||
extern const fpr fpr_gm_tab[]; | |||
#define fpr_p2_tab PQCLEAN_FALCON1024_CLEAN_fpr_p2_tab | |||
extern const fpr fpr_p2_tab[]; | |||
/* ====================================================================== */ | |||
#endif |
@@ -1,834 +0,0 @@ | |||
#ifndef PQCLEAN_FALCON1024_CLEAN_INNER_H | |||
#define PQCLEAN_FALCON1024_CLEAN_INNER_H | |||
/* | |||
* Internal functions for Falcon. This is not the API intended to be | |||
* used by applications; instead, this internal API provides all the | |||
* primitives on which wrappers build to provide external APIs. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* | |||
* IMPORTANT API RULES | |||
* ------------------- | |||
* | |||
* This API has some non-trivial usage rules: | |||
* | |||
* | |||
* - All public functions (i.e. the non-static ones) must be referenced | |||
* with the PQCLEAN_FALCON1024_CLEAN_ macro (e.g. PQCLEAN_FALCON1024_CLEAN_verify_raw for the verify_raw() | |||
* function). That macro adds a prefix to the name, which is | |||
* configurable with the FALCON_PREFIX macro. This allows compiling | |||
* the code into a specific "namespace" and potentially including | |||
* several versions of this code into a single application (e.g. to | |||
* have an AVX2 and a non-AVX2 variants and select the one to use at | |||
* runtime based on availability of AVX2 opcodes). | |||
* | |||
* - Functions that need temporary buffers expects them as a final | |||
* tmp[] array of type uint8_t*, with a size which is documented for | |||
* each function. However, most have some alignment requirements, | |||
* because they will use the array to store 16-bit, 32-bit or 64-bit | |||
* values (e.g. uint64_t or double). The caller must ensure proper | |||
* alignment. What happens on unaligned access depends on the | |||
* underlying architecture, ranging from a slight time penalty | |||
* to immediate termination of the process. | |||
* | |||
* - Some functions rely on specific rounding rules and precision for | |||
* floating-point numbers. On some systems (in particular 32-bit x86 | |||
* with the 387 FPU), this requires setting an hardware control | |||
* word. The caller MUST use set_fpu_cw() to ensure proper precision: | |||
* | |||
* oldcw = set_fpu_cw(2); | |||
* PQCLEAN_FALCON1024_CLEAN_sign_dyn(...); | |||
* set_fpu_cw(oldcw); | |||
* | |||
* On systems where the native floating-point precision is already | |||
* proper, or integer-based emulation is used, the set_fpu_cw() | |||
* function does nothing, so it can be called systematically. | |||
*/ | |||
#include "fips202.h" | |||
#include "fpr.h" | |||
#include <stdint.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
/* | |||
* Some computations with floating-point elements, in particular | |||
* rounding to the nearest integer, rely on operations using _exactly_ | |||
* the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit | |||
* x86, the 387 FPU may be used (depending on the target OS) and, in | |||
* that case, may use more precision bits (i.e. 64 bits, for an 80-bit | |||
* total type length); to prevent miscomputations, we define an explicit | |||
* function that modifies the precision in the FPU control word. | |||
* | |||
* set_fpu_cw() sets the precision to the provided value, and returns | |||
* the previously set precision; callers are supposed to restore the | |||
* previous precision on exit. The correct (52-bit) precision is | |||
* configured with the value "2". On unsupported compilers, or on | |||
* targets other than 32-bit x86, or when the native 'double' type is | |||
* not used, the set_fpu_cw() function does nothing at all. | |||
*/ | |||
static inline unsigned | |||
set_fpu_cw(unsigned x) { | |||
return x; | |||
} | |||
/* ==================================================================== */ | |||
/* | |||
* SHAKE256 implementation (shake.c). | |||
* | |||
* API is defined to be easily replaced with the fips202.h API defined | |||
* as part of PQClean. | |||
*/ | |||
#define inner_shake256_context shake256incctx | |||
#define inner_shake256_init(sc) shake256_inc_init(sc) | |||
#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) | |||
#define inner_shake256_flip(sc) shake256_inc_finalize(sc) | |||
#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) | |||
#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc) | |||
/* ==================================================================== */ | |||
/* | |||
* Encoding/decoding functions (codec.c). | |||
* | |||
* Encoding functions take as parameters an output buffer (out) with | |||
* a given maximum length (max_out_len); returned value is the actual | |||
* number of bytes which have been written. If the output buffer is | |||
* not large enough, then 0 is returned (some bytes may have been | |||
* written to the buffer). If 'out' is NULL, then 'max_out_len' is | |||
* ignored; instead, the function computes and returns the actual | |||
* required output length (in bytes). | |||
* | |||
* Decoding functions take as parameters an input buffer (in) with | |||
* its maximum length (max_in_len); returned value is the actual number | |||
* of bytes that have been read from the buffer. If the provided length | |||
* is too short, then 0 is returned. | |||
* | |||
* Values to encode or decode are vectors of integers, with N = 2^logn | |||
* elements. | |||
* | |||
* Three encoding formats are defined: | |||
* | |||
* - modq: sequence of values modulo 12289, each encoded over exactly | |||
* 14 bits. The encoder and decoder verify that integers are within | |||
* the valid range (0..12288). Values are arrays of uint16. | |||
* | |||
* - trim: sequence of signed integers, a specified number of bits | |||
* each. The number of bits is provided as parameter and includes | |||
* the sign bit. Each integer x must be such that |x| < 2^(bits-1) | |||
* (which means that the -2^(bits-1) value is forbidden); encode and | |||
* decode functions check that property. Values are arrays of | |||
* int16_t or int8_t, corresponding to names 'trim_i16' and | |||
* 'trim_i8', respectively. | |||
* | |||
* - comp: variable-length encoding for signed integers; each integer | |||
* uses a minimum of 9 bits, possibly more. This is normally used | |||
* only for signatures. | |||
* | |||
*/ | |||
size_t PQCLEAN_FALCON1024_CLEAN_modq_encode(void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON1024_CLEAN_comp_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON1024_CLEAN_modq_decode(uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_CLEAN_comp_decode(int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
/* | |||
* Number of bits for key elements, indexed by logn (1 to 10). This | |||
* is at most 8 bits for all degrees, but some degrees may have shorter | |||
* elements. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[]; | |||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[]; | |||
/* | |||
* Maximum size, in bits, of elements in a signature, indexed by logn | |||
* (1 to 10). The size includes the sign bit. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[]; | |||
/* ==================================================================== */ | |||
/* | |||
* Support functions used for both signature generation and signature | |||
* verification (common.c). | |||
*/ | |||
/* | |||
* From a SHAKE256 context (must be already flipped), produce a new | |||
* point. This is the non-constant-time version, which may leak enough | |||
* information to serve as a stop condition on a brute force attack on | |||
* the hashed message (provided that the nonce value is known). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn); | |||
/* | |||
* From a SHAKE256 context (must be already flipped), produce a new | |||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes. | |||
* This function is constant-time but is typically more expensive than | |||
* PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(). | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. This compares the appropriate norm of the | |||
* vector with the acceptance bound. Returned value is 1 on success | |||
* (vector is short enough to be acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. Instead of the first half s1, this | |||
* function receives the "saturated squared norm" of s1, i.e. the | |||
* sum of the squares of the coordinates of s1 (saturated at 2^32-1 | |||
* if the sum exceeds 2^31-1). | |||
* | |||
* Returned value is 1 on success (vector is short enough to be | |||
* acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature verification functions (vrfy.c). | |||
*/ | |||
/* | |||
* Convert a public key to NTT + Montgomery format. Conversion is done | |||
* in place. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn); | |||
/* | |||
* Internal signature verification code: | |||
* c0[] contains the hashed nonce+message | |||
* s2[] is the decoded signature | |||
* h[] contains the public key, in NTT + Montgomery format | |||
* logn is the degree log | |||
* tmp[] temporary, must have at least 2*2^logn bytes | |||
* Returned value is 1 on success, 0 on error. | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute the public key h[], given the private key elements f[] and | |||
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial | |||
* modulus. This function returns 1 on success, 0 on error (an error is | |||
* reported if f is not invertible mod phi mod q). | |||
* | |||
* The tmp[] array must have room for at least 2*2^logn elements. | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Recompute the fourth private key element. Private key consists in | |||
* four polynomials with small coefficients f, g, F and G, which are | |||
* such that fG - gF = q mod phi; furthermore, f is invertible modulo | |||
* phi and modulo q. This function recomputes G from f, g and F. | |||
* | |||
* The tmp[] array must have room for at least 4*2^logn bytes. | |||
* | |||
* Returned value is 1 in success, 0 on error (f not invertible). | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp); | |||
/* | |||
* Test whether a given polynomial is invertible modulo phi and q. | |||
* Polynomial coefficients are small integers. | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_is_invertible( | |||
const int16_t *s2, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Count the number of elements of value zero in the NTT representation | |||
* of the given polynomial: this is the number of primitive 2n-th roots | |||
* of unity (modulo q = 12289) that are roots of the provided polynomial | |||
* (taken modulo q). | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Internal signature verification with public key recovery: | |||
* h[] receives the public key (NOT in NTT/Montgomery format) | |||
* c0[] contains the hashed nonce+message | |||
* s1[] is the first signature half | |||
* s2[] is the second signature half | |||
* logn is the degree log | |||
* tmp[] temporary, must have at least 2*2^logn bytes | |||
* Returned value is 1 on success, 0 on error. Success is returned if | |||
* the signature is a short enough vector; in that case, the public | |||
* key has been written to h[]. However, the caller must still | |||
* verify that h[] is the correct value (e.g. with regards to a known | |||
* hash of the public key). | |||
* | |||
* h[] may not overlap with any of the other arrays. | |||
* | |||
* tmp[] must have 16-bit alignment. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h, | |||
const uint16_t *c0, const int16_t *s1, const int16_t *s2, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Implementation of floating-point real numbers (fpr.h, fpr.c). | |||
*/ | |||
/* | |||
* Real numbers are implemented by an extra header file, included below. | |||
* This is meant to support pluggable implementations. The default | |||
* implementation relies on the C type 'double'. | |||
* | |||
* The included file must define the following types, functions and | |||
* constants: | |||
* | |||
* fpr | |||
* type for a real number | |||
* | |||
* fpr fpr_of(int64_t i) | |||
* cast an integer into a real number; source must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_scaled(int64_t i, int sc) | |||
* compute i*2^sc as a real number; source 'i' must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_ldexp(fpr x, int e) | |||
* compute x*2^e | |||
* | |||
* int64_t fpr_rint(fpr x) | |||
* round x to the nearest integer; x must be in the -(2^63-1) | |||
* to +(2^63-1) range | |||
* | |||
* int64_t fpr_trunc(fpr x) | |||
* round to an integer; this rounds towards zero; value must | |||
* be in the -(2^63-1) to +(2^63-1) range | |||
* | |||
* fpr fpr_add(fpr x, fpr y) | |||
* compute x + y | |||
* | |||
* fpr fpr_sub(fpr x, fpr y) | |||
* compute x - y | |||
* | |||
* fpr fpr_neg(fpr x) | |||
* compute -x | |||
* | |||
* fpr fpr_half(fpr x) | |||
* compute x/2 | |||
* | |||
* fpr fpr_double(fpr x) | |||
* compute x*2 | |||
* | |||
* fpr fpr_mul(fpr x, fpr y) | |||
* compute x * y | |||
* | |||
* fpr fpr_sqr(fpr x) | |||
* compute x * x | |||
* | |||
* fpr fpr_inv(fpr x) | |||
* compute 1/x | |||
* | |||
* fpr fpr_div(fpr x, fpr y) | |||
* compute x/y | |||
* | |||
* fpr fpr_sqrt(fpr x) | |||
* compute the square root of x | |||
* | |||
* int fpr_lt(fpr x, fpr y) | |||
* return 1 if x < y, 0 otherwise | |||
* | |||
* uint64_t fpr_expm_p63(fpr x) | |||
* return exp(x), assuming that 0 <= x < log(2). Returned value | |||
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), | |||
* rounded to the nearest integer). Computation should have a | |||
* precision of at least 45 bits. | |||
* | |||
* const fpr fpr_gm_tab[] | |||
* array of constants for FFT / iFFT | |||
* | |||
* const fpr fpr_p2_tab[] | |||
* precomputed powers of 2 (by index, 0 to 10) | |||
* | |||
* Constants of type 'fpr': | |||
* | |||
* fpr fpr_q 12289 | |||
* fpr fpr_inverse_of_q 1/12289 | |||
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) | |||
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) | |||
* fpr fpr_sigma_min_9 1.291500756233514568549480827642 | |||
* fpr fpr_sigma_min_10 1.311734375905083682667395805765 | |||
* fpr fpr_log2 log(2) | |||
* fpr fpr_inv_log2 1/log(2) | |||
* fpr fpr_bnorm_max 16822.4121 | |||
* fpr fpr_zero 0 | |||
* fpr fpr_one 1 | |||
* fpr fpr_two 2 | |||
* fpr fpr_onehalf 0.5 | |||
* fpr fpr_ptwo31 2^31 | |||
* fpr fpr_ptwo31m1 2^31-1 | |||
* fpr fpr_mtwo31m1 -(2^31-1) | |||
* fpr fpr_ptwo63m1 2^63-1 | |||
* fpr fpr_mtwo63m1 -(2^63-1) | |||
* fpr fpr_ptwo63 2^63 | |||
*/ | |||
/* ==================================================================== */ | |||
/* | |||
* RNG (rng.c). | |||
* | |||
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 | |||
* context (flipped) and is used for bulk pseudorandom generation. | |||
* A system-dependent seed generator is also provided. | |||
*/ | |||
/* | |||
* Obtain a random seed from the system RNG. | |||
* | |||
* Returned value is 1 on success, 0 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t seed_len); | |||
/* | |||
* Structure for a PRNG. This includes a large buffer so that values | |||
* get generated in advance. The 'state' is used to keep the current | |||
* PRNG algorithm state (contents depend on the selected algorithm). | |||
* | |||
* The unions with 'dummy_u64' are there to ensure proper alignment for | |||
* 64-bit direct access. | |||
*/ | |||
typedef struct { | |||
union { | |||
uint8_t d[512]; /* MUST be 512, exactly */ | |||
uint64_t dummy_u64; | |||
} buf; | |||
size_t ptr; | |||
union { | |||
uint8_t d[256]; | |||
uint64_t dummy_u64; | |||
} state; | |||
int type; | |||
} prng; | |||
/* | |||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 | |||
* context (in "flipped" state) to obtain its initial state. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src); | |||
/* | |||
* Refill the PRNG buffer. This is normally invoked automatically, and | |||
* is declared here only so that prng_get_u64() may be inlined. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p); | |||
/* | |||
* Get some bytes from a PRNG. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); | |||
/* | |||
* Get a 64-bit random value from a PRNG. | |||
*/ | |||
static inline uint64_t | |||
prng_get_u64(prng *p) { | |||
size_t u; | |||
/* | |||
* If there are less than 9 bytes in the buffer, we refill it. | |||
* This means that we may drop the last few bytes, but this allows | |||
* for faster extraction code. Also, it means that we never leave | |||
* an empty buffer. | |||
*/ | |||
u = p->ptr; | |||
if (u >= (sizeof p->buf.d) - 9) { | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
u = 0; | |||
} | |||
p->ptr = u + 8; | |||
/* | |||
* On systems that use little-endian encoding and allow | |||
* unaligned accesses, we can simply read the data where it is. | |||
*/ | |||
return (uint64_t)p->buf.d[u + 0] | |||
| ((uint64_t)p->buf.d[u + 1] << 8) | |||
| ((uint64_t)p->buf.d[u + 2] << 16) | |||
| ((uint64_t)p->buf.d[u + 3] << 24) | |||
| ((uint64_t)p->buf.d[u + 4] << 32) | |||
| ((uint64_t)p->buf.d[u + 5] << 40) | |||
| ((uint64_t)p->buf.d[u + 6] << 48) | |||
| ((uint64_t)p->buf.d[u + 7] << 56); | |||
} | |||
/* | |||
* Get an 8-bit random value from a PRNG. | |||
*/ | |||
static inline unsigned | |||
prng_get_u8(prng *p) { | |||
unsigned v; | |||
v = p->buf.d[p->ptr ++]; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
} | |||
return v; | |||
} | |||
/* ==================================================================== */ | |||
/* | |||
* FFT (falcon-fft.c). | |||
* | |||
* A real polynomial is represented as an array of N 'fpr' elements. | |||
* The FFT representation of a real polynomial contains N/2 complex | |||
* elements; each is stored as two real numbers, for the real and | |||
* imaginary parts, respectively. See falcon-fft.c for details on the | |||
* internal representation. | |||
*/ | |||
/* | |||
* Compute FFT in-place: the source array should contain a real | |||
* polynomial (N coefficients); its storage area is reused to store | |||
* the FFT representation of that polynomial (N/2 complex numbers). | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn); | |||
/* | |||
* Compute the inverse FFT in-place: the source array should contain the | |||
* FFT representation of a real polynomial (N/2 elements); the resulting | |||
* real polynomial (N coefficients of type 'fpr') is written over the | |||
* array. | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn); | |||
/* | |||
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_add(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_sub(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Negate polynomial a. This function works in both normal and FFT | |||
* representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn); | |||
/* | |||
* Compute adjoint of polynomial a. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap. | |||
* This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT | |||
* overlap. This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial with its own adjoint. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial with a real constant. This function works in both | |||
* normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_div_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) | |||
* (also in FFT representation). Since the result is auto-adjoint, all its | |||
* coordinates in FFT representation are real; as such, only the first N/2 | |||
* values of d[] are filled (the imaginary parts are skipped). | |||
* | |||
* Array d MUST NOT overlap with either a or b. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d, | |||
const fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) | |||
* (also in FFT representation). Destination d MUST NOT overlap with | |||
* any of the source arrays. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d, | |||
const fpr *F, const fpr *G, | |||
const fpr *f, const fpr *g, unsigned logn); | |||
/* | |||
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. On input, g00, g01 and g11 are provided (where the | |||
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 | |||
* and d11 values are written in g00, g01 and g11, respectively | |||
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). | |||
* (In fact, d00 = g00, so the g00 operand is left unmodified.) | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft(const fpr *g00, | |||
fpr *g01, fpr *g11, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. This is identical to poly_LDL_fft() except that | |||
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written | |||
* in two other separate buffers provided as extra parameters. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(fpr *d11, fpr *l10, | |||
const fpr *g00, const fpr *g01, | |||
const fpr *g11, unsigned logn); | |||
/* | |||
* Apply "split" operation on a polynomial in FFT representation: | |||
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 | |||
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_split_fft(fpr *f0, fpr *f1, | |||
const fpr *f, unsigned logn); | |||
/* | |||
* Apply "merge" operation on two polynomials in FFT representation: | |||
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes | |||
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. | |||
* f MUST NOT overlap with either f0 or f1. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(fpr *f, | |||
const fpr *f0, const fpr *f1, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Key pair generation. | |||
*/ | |||
/* | |||
* Required sizes of the temporary buffer (in bytes). | |||
* | |||
* This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1 | |||
* or 2) where it is slightly greater. | |||
*/ | |||
#define FALCON_KEYGEN_TEMP_1 136 | |||
#define FALCON_KEYGEN_TEMP_2 272 | |||
#define FALCON_KEYGEN_TEMP_3 224 | |||
#define FALCON_KEYGEN_TEMP_4 448 | |||
#define FALCON_KEYGEN_TEMP_5 896 | |||
#define FALCON_KEYGEN_TEMP_6 1792 | |||
#define FALCON_KEYGEN_TEMP_7 3584 | |||
#define FALCON_KEYGEN_TEMP_8 7168 | |||
#define FALCON_KEYGEN_TEMP_9 14336 | |||
#define FALCON_KEYGEN_TEMP_10 28672 | |||
/* | |||
* Generate a new key pair. Randomness is extracted from the provided | |||
* SHAKE256 context, which must have already been seeded and flipped. | |||
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* | |||
* macros) and be aligned for the uint32_t, uint64_t and fpr types. | |||
* | |||
* The private key elements are written in f, g, F and G, and the | |||
* public key is written in h. Either or both of G and h may be NULL, | |||
* in which case the corresponding element is not returned (they can | |||
* be recomputed from f, g and F). | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_keygen(inner_shake256_context *rng, | |||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature generation. | |||
*/ | |||
/* | |||
* Expand a private key into the B0 matrix in FFT representation and | |||
* the LDL tree. All the values are written in 'expanded_key', for | |||
* a total of (8*logn+40)*2^logn bytes. | |||
* | |||
* The tmp[] array must have room for at least 48*2^logn bytes. | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key, | |||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, | |||
unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses an | |||
* expanded key (as generated by PQCLEAN_FALCON1024_CLEAN_expand_privkey()). | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* On successful output, the start of the tmp[] buffer contains the s1 | |||
* vector (as int16_t elements). | |||
* | |||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes. | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng, | |||
const fpr *expanded_key, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses a raw | |||
* key and dynamically recompute the B0 matrix and LDL tree; this | |||
* saves RAM since there is no needed for an expanded key, but | |||
* increases the signature cost. | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* On successful output, the start of the tmp[] buffer contains the s1 | |||
* vector (as int16_t elements). | |||
* | |||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes. | |||
* | |||
* tmp[] must have 64-bit alignment. | |||
* This function uses floating-point rounding (see set_fpu_cw()). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng, | |||
const int8_t *f, const int8_t *g, | |||
const int8_t *F, const int8_t *G, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Internal sampler engine. Exported for tests. | |||
* | |||
* sampler_context wraps around a source of random numbers (PRNG) and | |||
* the sigma_min value (nominally dependent on the degree). | |||
* | |||
* sampler() takes as parameters: | |||
* ctx pointer to the sampler_context structure | |||
* mu center for the distribution | |||
* isigma inverse of the distribution standard deviation | |||
* It returns an integer sampled along the Gaussian distribution centered | |||
* on mu and of standard deviation sigma = 1/isigma. | |||
* | |||
* gaussian0_sampler() takes as parameter a pointer to a PRNG, and | |||
* returns an integer sampled along a half-Gaussian with standard | |||
* deviation sigma0 = 1.8205 (center is 0, returned value is | |||
* nonnegative). | |||
*/ | |||
typedef struct { | |||
prng p; | |||
fpr sigma_min; | |||
} sampler_context; | |||
int PQCLEAN_FALCON1024_CLEAN_sampler(void *ctx, fpr mu, fpr isigma); | |||
int PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(prng *p); | |||
/* ==================================================================== */ | |||
#endif |
@@ -1,386 +0,0 @@ | |||
#include "api.h" | |||
#include "inner.h" | |||
#include "randombytes.h" | |||
#include <stddef.h> | |||
#include <string.h> | |||
/* | |||
* Wrapper for implementing the PQClean API. | |||
*/ | |||
#define NONCELEN 40 | |||
#define SEEDLEN 48 | |||
/* | |||
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) | |||
* | |||
* private key: | |||
* header byte: 0101nnnn | |||
* private f (6 or 5 bits by element, depending on degree) | |||
* private g (6 or 5 bits by element, depending on degree) | |||
* private F (8 bits by element) | |||
* | |||
* public key: | |||
* header byte: 0000nnnn | |||
* public h (14 bits by element) | |||
* | |||
* signature: | |||
* header byte: 0011nnnn | |||
* nonce 40 bytes | |||
* value (12 bits by element) | |||
* | |||
* message + signature: | |||
* signature length (2 bytes, big-endian) | |||
* nonce 40 bytes | |||
* message | |||
* header byte: 0010nnnn | |||
* value (12 bits by element) | |||
* (signature length is 1+len(value), not counting the nonce) | |||
*/ | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { | |||
union { | |||
uint8_t b[28 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[1024], g[1024], F[1024], G[1024]; | |||
uint16_t h[1024]; | |||
unsigned char seed[SEEDLEN]; | |||
inner_shake256_context rng; | |||
size_t u, v; | |||
/* | |||
* Generate key pair. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
inner_shake256_init(&rng); | |||
inner_shake256_inject(&rng, seed, sizeof seed); | |||
inner_shake256_flip(&rng); | |||
PQCLEAN_FALCON1024_CLEAN_keygen(&rng, f, g, F, G, h, 10, tmp.b); | |||
inner_shake256_ctx_release(&rng); | |||
/* | |||
* Encode private key. | |||
*/ | |||
sk[0] = 0x50 + 10; | |||
u = 1; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
/* | |||
* Encode public key. | |||
*/ | |||
pk[0] = 0x00 + 10; | |||
v = PQCLEAN_FALCON1024_CLEAN_modq_encode( | |||
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1, | |||
h, 10); | |||
if (v != PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* | |||
* Compute the signature. nonce[] receives the nonce and must have length | |||
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce | |||
* or header byte), with *sigbuflen providing the maximum value length and | |||
* receiving the actual value length. | |||
* | |||
* If a signature could be computed but not encoded because it would | |||
* exceed the output buffer size, then a new signature is computed. If | |||
* the provided buffer size is too low, this could loop indefinitely, so | |||
* the caller must provide a size that can accommodate signatures with a | |||
* large enough probability. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
union { | |||
uint8_t b[72 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[1024], g[1024], F[1024], G[1024]; | |||
union { | |||
int16_t sig[1024]; | |||
uint16_t hm[1024]; | |||
} r; | |||
unsigned char seed[SEEDLEN]; | |||
inner_shake256_context sc; | |||
size_t u, v; | |||
/* | |||
* Decode the private key. | |||
*/ | |||
if (sk[0] != 0x50 + 10) { | |||
return -1; | |||
} | |||
u = 1; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
if (!PQCLEAN_FALCON1024_CLEAN_complete_private(G, f, g, F, 10, tmp.b)) { | |||
return -1; | |||
} | |||
/* | |||
* Create a random nonce (40 bytes). | |||
*/ | |||
randombytes(nonce, NONCELEN); | |||
/* | |||
* Hash message nonce + message into a vector. | |||
*/ | |||
inner_shake256_init(&sc); | |||
inner_shake256_inject(&sc, nonce, NONCELEN); | |||
inner_shake256_inject(&sc, m, mlen); | |||
inner_shake256_flip(&sc); | |||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(&sc, r.hm, 10); | |||
inner_shake256_ctx_release(&sc); | |||
/* | |||
* Initialize a RNG. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
inner_shake256_init(&sc); | |||
inner_shake256_inject(&sc, seed, sizeof seed); | |||
inner_shake256_flip(&sc); | |||
/* | |||
* Compute and return the signature. This loops until a signature | |||
* value is found that fits in the provided buffer. | |||
*/ | |||
for (;;) { | |||
PQCLEAN_FALCON1024_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b); | |||
v = PQCLEAN_FALCON1024_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 10); | |||
if (v != 0) { | |||
inner_shake256_ctx_release(&sc); | |||
*sigbuflen = v; | |||
return 0; | |||
} | |||
} | |||
} | |||
/* | |||
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] | |||
* (of size sigbuflen) contains the signature value, not including the | |||
* header byte or nonce. Return value is 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_verify( | |||
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
union { | |||
uint8_t b[2 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
uint16_t h[1024], hm[1024]; | |||
int16_t sig[1024]; | |||
inner_shake256_context sc; | |||
/* | |||
* Decode public key. | |||
*/ | |||
if (pk[0] != 0x00 + 10) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON1024_CLEAN_modq_decode(h, 10, | |||
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) | |||
!= PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(h, 10); | |||
/* | |||
* Decode signature. | |||
*/ | |||
if (sigbuflen == 0) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON1024_CLEAN_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) { | |||
return -1; | |||
} | |||
/* | |||
* Hash nonce + message into a vector. | |||
*/ | |||
inner_shake256_init(&sc); | |||
inner_shake256_inject(&sc, nonce, NONCELEN); | |||
inner_shake256_inject(&sc, m, mlen); | |||
inner_shake256_flip(&sc); | |||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(&sc, hm, 10, tmp.b); | |||
inner_shake256_ctx_release(&sc); | |||
/* | |||
* Verify signature. | |||
*/ | |||
if (!PQCLEAN_FALCON1024_CLEAN_verify_raw(hm, sig, h, 10, tmp.b)) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
/* | |||
* The PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES constant is used for | |||
* the signed message object (as produced by PQCLEAN_FALCON1024_CLEAN_crypto_sign()) | |||
* and includes a two-byte length value, so we take care here | |||
* to only generate signatures that are two bytes shorter than | |||
* the maximum. This is done to ensure that PQCLEAN_FALCON1024_CLEAN_crypto_sign() | |||
* and PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature() produce the exact same signature | |||
* value, if used on the same message, with the same private key, | |||
* and using the same output from randombytes() (this is for | |||
* reproducibility of tests). | |||
*/ | |||
size_t vlen; | |||
vlen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
sig[0] = 0x30 + 10; | |||
*siglen = 1 + NONCELEN + vlen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
if (siglen < 1 + NONCELEN) { | |||
return -1; | |||
} | |||
if (sig[0] != 0x30 + 10) { | |||
return -1; | |||
} | |||
return do_verify(sig + 1, | |||
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
uint8_t *pm, *sigbuf; | |||
size_t sigbuflen; | |||
/* | |||
* Move the message to its final location; this is a memmove() so | |||
* it handles overlaps properly. | |||
*/ | |||
memmove(sm + 2 + NONCELEN, m, mlen); | |||
pm = sm + 2 + NONCELEN; | |||
sigbuf = pm + 1 + mlen; | |||
sigbuflen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
pm[mlen] = 0x20 + 10; | |||
sigbuflen ++; | |||
sm[0] = (uint8_t)(sigbuflen >> 8); | |||
sm[1] = (uint8_t)sigbuflen; | |||
*smlen = mlen + 2 + NONCELEN + sigbuflen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||
const uint8_t *sigbuf; | |||
size_t pmlen, sigbuflen; | |||
if (smlen < 3 + NONCELEN) { | |||
return -1; | |||
} | |||
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; | |||
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { | |||
return -1; | |||
} | |||
sigbuflen --; | |||
pmlen = smlen - NONCELEN - 3 - sigbuflen; | |||
if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) { | |||
return -1; | |||
} | |||
sigbuf = sm + 2 + NONCELEN + pmlen + 1; | |||
/* | |||
* The 2-byte length header and the one-byte signature header | |||
* have been verified. Nonce is at sm+2, followed by the message | |||
* itself. Message length is in pmlen. sigbuf/sigbuflen point to | |||
* the signature value (excluding the header byte). | |||
*/ | |||
if (do_verify(sm + 2, sigbuf, sigbuflen, | |||
sm + 2 + NONCELEN, pmlen, pk) < 0) { | |||
return -1; | |||
} | |||
/* | |||
* Signature is correct, we just have to copy/move the message | |||
* to its final destination. The memmove() properly handles | |||
* overlaps. | |||
*/ | |||
memmove(m, sm + 2 + NONCELEN, pmlen); | |||
*mlen = pmlen; | |||
return 0; | |||
} |
@@ -1,201 +0,0 @@ | |||
#include "inner.h" | |||
#include <assert.h> | |||
/* | |||
* PRNG and interface to the system RNG. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* | |||
* Include relevant system header files. For Win32, this will also need | |||
* linking with advapi32.dll, which we trigger with an appropriate #pragma. | |||
*/ | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t len) { | |||
(void)seed; | |||
if (len == 0) { | |||
return 1; | |||
} | |||
return 0; | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src) { | |||
/* | |||
* To ensure reproducibility for a given seed, we | |||
* must enforce little-endian interpretation of | |||
* the state words. | |||
*/ | |||
uint8_t tmp[56]; | |||
uint64_t th, tl; | |||
int i; | |||
inner_shake256_extract(src, tmp, 56); | |||
for (i = 0; i < 14; i ++) { | |||
uint32_t w; | |||
w = (uint32_t)tmp[(i << 2) + 0] | |||
| ((uint32_t)tmp[(i << 2) + 1] << 8) | |||
| ((uint32_t)tmp[(i << 2) + 2] << 16) | |||
| ((uint32_t)tmp[(i << 2) + 3] << 24); | |||
*(uint32_t *)(p->state.d + (i << 2)) = w; | |||
} | |||
tl = *(uint32_t *)(p->state.d + 48); | |||
th = *(uint32_t *)(p->state.d + 52); | |||
*(uint64_t *)(p->state.d + 48) = tl + (th << 32); | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
} | |||
/* | |||
* PRNG based on ChaCha20. | |||
* | |||
* State consists in key (32 bytes) then IV (16 bytes) and block counter | |||
* (8 bytes). Normally, we should not care about local endianness (this | |||
* is for a PRNG), but for the NIST competition we need reproducible KAT | |||
* vectors that work across architectures, so we enforce little-endian | |||
* interpretation where applicable. Moreover, output words are "spread | |||
* out" over the output buffer with the interleaving pattern that is | |||
* naturally obtained from the AVX2 implementation that runs eight | |||
* ChaCha20 instances in parallel. | |||
* | |||
* The block counter is XORed into the first 8 bytes of the IV. | |||
*/ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p) { | |||
static const uint32_t CW[] = { | |||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | |||
}; | |||
uint64_t cc; | |||
size_t u; | |||
/* | |||
* State uses local endianness. Only the output bytes must be | |||
* converted to little endian (if used on a big-endian machine). | |||
*/ | |||
cc = *(uint64_t *)(p->state.d + 48); | |||
for (u = 0; u < 8; u ++) { | |||
uint32_t state[16]; | |||
size_t v; | |||
int i; | |||
memcpy(&state[0], CW, sizeof CW); | |||
memcpy(&state[4], p->state.d, 48); | |||
state[14] ^= (uint32_t)cc; | |||
state[15] ^= (uint32_t)(cc >> 32); | |||
for (i = 0; i < 10; i ++) { | |||
#define QROUND(a, b, c, d) do { \ | |||
state[a] += state[b]; \ | |||
state[d] ^= state[a]; \ | |||
state[d] = (state[d] << 16) | (state[d] >> 16); \ | |||
state[c] += state[d]; \ | |||
state[b] ^= state[c]; \ | |||
state[b] = (state[b] << 12) | (state[b] >> 20); \ | |||
state[a] += state[b]; \ | |||
state[d] ^= state[a]; \ | |||
state[d] = (state[d] << 8) | (state[d] >> 24); \ | |||
state[c] += state[d]; \ | |||
state[b] ^= state[c]; \ | |||
state[b] = (state[b] << 7) | (state[b] >> 25); \ | |||
} while (0) | |||
QROUND( 0, 4, 8, 12); | |||
QROUND( 1, 5, 9, 13); | |||
QROUND( 2, 6, 10, 14); | |||
QROUND( 3, 7, 11, 15); | |||
QROUND( 0, 5, 10, 15); | |||
QROUND( 1, 6, 11, 12); | |||
QROUND( 2, 7, 8, 13); | |||
QROUND( 3, 4, 9, 14); | |||
#undef QROUND | |||
} | |||
for (v = 0; v < 4; v ++) { | |||
state[v] += CW[v]; | |||
} | |||
for (v = 4; v < 14; v ++) { | |||
state[v] += ((uint32_t *)p->state.d)[v - 4]; | |||
} | |||
state[14] += ((uint32_t *)p->state.d)[10] | |||
^ (uint32_t)cc; | |||
state[15] += ((uint32_t *)p->state.d)[11] | |||
^ (uint32_t)(cc >> 32); | |||
cc ++; | |||
/* | |||
* We mimic the interleaving that is used in the AVX2 | |||
* implementation. | |||
*/ | |||
for (v = 0; v < 16; v ++) { | |||
p->buf.d[(u << 2) + (v << 5) + 0] = | |||
(uint8_t)state[v]; | |||
p->buf.d[(u << 2) + (v << 5) + 1] = | |||
(uint8_t)(state[v] >> 8); | |||
p->buf.d[(u << 2) + (v << 5) + 2] = | |||
(uint8_t)(state[v] >> 16); | |||
p->buf.d[(u << 2) + (v << 5) + 3] = | |||
(uint8_t)(state[v] >> 24); | |||
} | |||
} | |||
*(uint64_t *)(p->state.d + 48) = cc; | |||
p->ptr = 0; | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) { | |||
uint8_t *buf; | |||
buf = dst; | |||
while (len > 0) { | |||
size_t clen; | |||
clen = (sizeof p->buf.d) - p->ptr; | |||
if (clen > len) { | |||
clen = len; | |||
} | |||
memcpy(buf, p->buf.d, clen); | |||
buf += clen; | |||
len -= clen; | |||
p->ptr += clen; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
} | |||
} | |||
} |
@@ -1,853 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Falcon signature verification. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* ===================================================================== */ | |||
/* | |||
* Constants for NTT. | |||
* | |||
* n = 2^logn (2 <= n <= 1024) | |||
* phi = X^n + 1 | |||
* q = 12289 | |||
* q0i = -1/q mod 2^16 | |||
* R = 2^16 mod q | |||
* R2 = 2^32 mod q | |||
*/ | |||
#define Q 12289 | |||
#define Q0I 12287 | |||
#define R 4091 | |||
#define R2 10952 | |||
/* | |||
* Table for NTT, binary case: | |||
* GMb[x] = R*(g^rev(x)) mod q | |||
* where g = 7 (it is a 2048-th primitive root of 1 modulo q) | |||
* and rev() is the bit-reversal function over 10 bits. | |||
*/ | |||
static const uint16_t GMb[] = { | |||
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, | |||
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, | |||
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, | |||
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, | |||
12210, 6240, 997, 117, 4783, 4407, 1549, 7072, | |||
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, | |||
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, | |||
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, | |||
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, | |||
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, | |||
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, | |||
9277, 6130, 3323, 883, 10469, 489, 1502, 2851, | |||
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, | |||
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, | |||
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, | |||
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, | |||
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, | |||
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, | |||
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, | |||
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, | |||
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, | |||
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, | |||
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, | |||
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, | |||
737, 3698, 4699, 5753, 9046, 3687, 16, 914, | |||
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, | |||
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, | |||
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, | |||
932, 10229, 8927, 7642, 351, 9298, 237, 5858, | |||
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, | |||
4602, 1748, 11300, 340, 3711, 4614, 300, 10993, | |||
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, | |||
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, | |||
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, | |||
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, | |||
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, | |||
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, | |||
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, | |||
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, | |||
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, | |||
2523, 4339, 6115, 619, 937, 2834, 7775, 3279, | |||
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, | |||
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, | |||
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, | |||
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, | |||
11192, 315, 4511, 1158, 6061, 6751, 11865, 357, | |||
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, | |||
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, | |||
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, | |||
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, | |||
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, | |||
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, | |||
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, | |||
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, | |||
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, | |||
10438, 9471, 1271, 408, 6911, 3079, 360, 8276, | |||
11535, 9156, 9049, 11539, 850, 8617, 784, 7919, | |||
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, | |||
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, | |||
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, | |||
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, | |||
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, | |||
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, | |||
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, | |||
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, | |||
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, | |||
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, | |||
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, | |||
11736, 6813, 6979, 819, 8903, 6271, 10843, 348, | |||
7514, 8339, 6439, 694, 852, 5659, 2781, 3716, | |||
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, | |||
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, | |||
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, | |||
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, | |||
10923, 4918, 128, 7312, 725, 9157, 5006, 6393, | |||
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, | |||
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, | |||
5110, 45, 2400, 1921, 4377, 2720, 1695, 51, | |||
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, | |||
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, | |||
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, | |||
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, | |||
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, | |||
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, | |||
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, | |||
862, 3158, 477, 7279, 5678, 7914, 4254, 302, | |||
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, | |||
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, | |||
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, | |||
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, | |||
1397, 10678, 103, 7420, 7976, 936, 764, 632, | |||
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, | |||
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, | |||
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, | |||
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, | |||
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, | |||
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, | |||
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, | |||
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, | |||
8192, 986, 7527, 1401, 870, 3615, 8465, 2756, | |||
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, | |||
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, | |||
2567, 708, 893, 6465, 4962, 10024, 2090, 5718, | |||
10743, 780, 4733, 4623, 2134, 2087, 4802, 884, | |||
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, | |||
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, | |||
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, | |||
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, | |||
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, | |||
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, | |||
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, | |||
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, | |||
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, | |||
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, | |||
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, | |||
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, | |||
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, | |||
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, | |||
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, | |||
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, | |||
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, | |||
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, | |||
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, | |||
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, | |||
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, | |||
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, | |||
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, | |||
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 | |||
}; | |||
/* | |||
* Table for inverse NTT, binary case: | |||
* iGMb[x] = R*((1/g)^rev(x)) mod q | |||
* Since g = 7, 1/g = 8778 mod 12289. | |||
*/ | |||
static const uint16_t iGMb[] = { | |||
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, | |||
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, | |||
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, | |||
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, | |||
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, | |||
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, | |||
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, | |||
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, | |||
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, | |||
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, | |||
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, | |||
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, | |||
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, | |||
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, | |||
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, | |||
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, | |||
6635, 6543, 1582, 4868, 42, 673, 2240, 7219, | |||
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, | |||
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, | |||
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, | |||
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, | |||
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, | |||
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, | |||
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, | |||
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, | |||
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, | |||
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, | |||
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, | |||
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, | |||
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, | |||
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, | |||
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, | |||
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, | |||
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, | |||
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, | |||
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, | |||
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, | |||
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, | |||
6689, 386, 4462, 105, 2076, 10443, 119, 3955, | |||
4370, 11505, 3672, 11439, 750, 3240, 3133, 754, | |||
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, | |||
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, | |||
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, | |||
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, | |||
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, | |||
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, | |||
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, | |||
101, 1911, 9483, 3608, 11997, 10536, 812, 8915, | |||
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, | |||
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, | |||
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, | |||
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, | |||
7769, 136, 617, 3157, 5889, 9219, 6855, 120, | |||
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, | |||
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, | |||
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, | |||
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, | |||
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, | |||
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, | |||
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, | |||
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, | |||
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, | |||
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, | |||
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, | |||
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, | |||
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, | |||
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, | |||
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, | |||
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, | |||
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, | |||
9956, 2702, 6656, 735, 2243, 11656, 833, 3107, | |||
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, | |||
3513, 9769, 3025, 779, 9433, 3392, 7437, 668, | |||
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, | |||
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, | |||
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, | |||
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, | |||
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, | |||
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, | |||
707, 1088, 4936, 678, 10245, 18, 5684, 960, | |||
4459, 7957, 226, 2451, 6, 8874, 320, 6298, | |||
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, | |||
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, | |||
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, | |||
5227, 952, 4319, 9810, 4356, 3088, 11118, 840, | |||
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, | |||
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, | |||
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, | |||
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, | |||
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, | |||
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, | |||
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, | |||
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, | |||
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, | |||
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, | |||
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, | |||
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, | |||
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, | |||
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, | |||
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, | |||
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, | |||
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, | |||
11489, 8833, 2393, 15, 10830, 5003, 17, 565, | |||
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, | |||
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, | |||
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, | |||
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, | |||
104, 6348, 9643, 6757, 12110, 5617, 10935, 541, | |||
135, 3041, 7200, 6526, 5085, 12136, 842, 4129, | |||
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, | |||
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, | |||
1770, 273, 8377, 2271, 5225, 10283, 116, 11807, | |||
91, 11699, 757, 1304, 7524, 6451, 8032, 8154, | |||
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, | |||
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, | |||
3924, 3188, 367, 2077, 336, 5384, 5631, 8596, | |||
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, | |||
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, | |||
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, | |||
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, | |||
9763, 12191, 459, 2966, 3166, 405, 5000, 9311, | |||
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, | |||
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, | |||
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, | |||
9474, 2586, 1431, 2741, 473, 11383, 4745, 836, | |||
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, | |||
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, | |||
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 | |||
}; | |||
/* | |||
* Reduce a small signed integer modulo q. The source integer MUST | |||
* be between -q/2 and +q/2. | |||
*/ | |||
static inline uint32_t | |||
mq_conv_small(int x) { | |||
/* | |||
* If x < 0, the cast to uint32_t will set the high bit to 1. | |||
*/ | |||
uint32_t y; | |||
y = (uint32_t)x; | |||
y += Q & -(y >> 31); | |||
return y; | |||
} | |||
/* | |||
* Addition modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_add(uint32_t x, uint32_t y) { | |||
/* | |||
* We compute x + y - q. If the result is negative, then the | |||
* high bit will be set, and 'd >> 31' will be equal to 1; | |||
* thus '-(d >> 31)' will be an all-one pattern. Otherwise, | |||
* it will be an all-zero pattern. In other words, this | |||
* implements a conditional addition of q. | |||
*/ | |||
uint32_t d; | |||
d = x + y - Q; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Subtraction modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_sub(uint32_t x, uint32_t y) { | |||
/* | |||
* As in mq_add(), we use a conditional addition to ensure the | |||
* result is in the 0..q-1 range. | |||
*/ | |||
uint32_t d; | |||
d = x - y; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Division by 2 modulo q. Operand must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_rshift1(uint32_t x) { | |||
x += Q & -(x & 1); | |||
return (x >> 1); | |||
} | |||
/* | |||
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then | |||
* this function computes: x * y / R mod q | |||
* Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_montymul(uint32_t x, uint32_t y) { | |||
uint32_t z, w; | |||
/* | |||
* We compute x*y + k*q with a value of k chosen so that the 16 | |||
* low bits of the result are 0. We can then shift the value. | |||
* After the shift, result may still be larger than q, but it | |||
* will be lower than 2*q, so a conditional subtraction works. | |||
*/ | |||
z = x * y; | |||
w = ((z * Q0I) & 0xFFFF) * Q; | |||
/* | |||
* When adding z and w, the result will have its low 16 bits | |||
* equal to 0. Since x, y and z are lower than q, the sum will | |||
* be no more than (2^15 - 1) * q + (q - 1)^2, which will | |||
* fit on 29 bits. | |||
*/ | |||
z = (z + w) >> 16; | |||
/* | |||
* After the shift, analysis shows that the value will be less | |||
* than 2q. We do a subtraction then conditional subtraction to | |||
* ensure the result is in the expected range. | |||
*/ | |||
z -= Q; | |||
z += Q & -(z >> 31); | |||
return z; | |||
} | |||
/* | |||
* Montgomery squaring (computes (x^2)/R). | |||
*/ | |||
static inline uint32_t | |||
mq_montysqr(uint32_t x) { | |||
return mq_montymul(x, x); | |||
} | |||
/* | |||
* Divide x by y modulo q = 12289. | |||
*/ | |||
static inline uint32_t | |||
mq_div_12289(uint32_t x, uint32_t y) { | |||
/* | |||
* We invert y by computing y^(q-2) mod q. | |||
* | |||
* We use the following addition chain for exponent e = 12287: | |||
* | |||
* e0 = 1 | |||
* e1 = 2 * e0 = 2 | |||
* e2 = e1 + e0 = 3 | |||
* e3 = e2 + e1 = 5 | |||
* e4 = 2 * e3 = 10 | |||
* e5 = 2 * e4 = 20 | |||
* e6 = 2 * e5 = 40 | |||
* e7 = 2 * e6 = 80 | |||
* e8 = 2 * e7 = 160 | |||
* e9 = e8 + e2 = 163 | |||
* e10 = e9 + e8 = 323 | |||
* e11 = 2 * e10 = 646 | |||
* e12 = 2 * e11 = 1292 | |||
* e13 = e12 + e9 = 1455 | |||
* e14 = 2 * e13 = 2910 | |||
* e15 = 2 * e14 = 5820 | |||
* e16 = e15 + e10 = 6143 | |||
* e17 = 2 * e16 = 12286 | |||
* e18 = e17 + e0 = 12287 | |||
* | |||
* Additions on exponents are converted to Montgomery | |||
* multiplications. We define all intermediate results as so | |||
* many local variables, and let the C compiler work out which | |||
* must be kept around. | |||
*/ | |||
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; | |||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; | |||
y0 = mq_montymul(y, R2); | |||
y1 = mq_montysqr(y0); | |||
y2 = mq_montymul(y1, y0); | |||
y3 = mq_montymul(y2, y1); | |||
y4 = mq_montysqr(y3); | |||
y5 = mq_montysqr(y4); | |||
y6 = mq_montysqr(y5); | |||
y7 = mq_montysqr(y6); | |||
y8 = mq_montysqr(y7); | |||
y9 = mq_montymul(y8, y2); | |||
y10 = mq_montymul(y9, y8); | |||
y11 = mq_montysqr(y10); | |||
y12 = mq_montysqr(y11); | |||
y13 = mq_montymul(y12, y9); | |||
y14 = mq_montysqr(y13); | |||
y15 = mq_montysqr(y14); | |||
y16 = mq_montymul(y15, y10); | |||
y17 = mq_montysqr(y16); | |||
y18 = mq_montymul(y17, y0); | |||
/* | |||
* Final multiplication with x, which is not in Montgomery | |||
* representation, computes the correct division result. | |||
*/ | |||
return mq_montymul(y18, x); | |||
} | |||
/* | |||
* Compute NTT on a ring element. | |||
*/ | |||
static void | |||
mq_NTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
n = (size_t)1 << logn; | |||
t = n; | |||
for (m = 1; m < n; m <<= 1) { | |||
size_t ht, i, j1; | |||
ht = t >> 1; | |||
for (i = 0, j1 = 0; i < m; i ++, j1 += t) { | |||
size_t j, j2; | |||
uint32_t s; | |||
s = GMb[m + i]; | |||
j2 = j1 + ht; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v; | |||
u = a[j]; | |||
v = mq_montymul(a[j + ht], s); | |||
a[j] = (uint16_t)mq_add(u, v); | |||
a[j + ht] = (uint16_t)mq_sub(u, v); | |||
} | |||
} | |||
t = ht; | |||
} | |||
} | |||
/* | |||
* Compute the inverse NTT on a ring element, binary case. | |||
*/ | |||
static void | |||
mq_iNTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
uint32_t ni; | |||
n = (size_t)1 << logn; | |||
t = 1; | |||
m = n; | |||
while (m > 1) { | |||
size_t hm, dt, i, j1; | |||
hm = m >> 1; | |||
dt = t << 1; | |||
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { | |||
size_t j, j2; | |||
uint32_t s; | |||
j2 = j1 + t; | |||
s = iGMb[hm + i]; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v, w; | |||
u = a[j]; | |||
v = a[j + t]; | |||
a[j] = (uint16_t)mq_add(u, v); | |||
w = mq_sub(u, v); | |||
a[j + t] = (uint16_t) | |||
mq_montymul(w, s); | |||
} | |||
} | |||
t = dt; | |||
m = hm; | |||
} | |||
/* | |||
* To complete the inverse NTT, we must now divide all values by | |||
* n (the vector size). We thus need the inverse of n, i.e. we | |||
* need to divide 1 by 2 logn times. But we also want it in | |||
* Montgomery representation, i.e. we also want to multiply it | |||
* by R = 2^16. In the common case, this should be a simple right | |||
* shift. The loop below is generic and works also in corner cases; | |||
* its computation time is negligible. | |||
*/ | |||
ni = R; | |||
for (m = n; m > 1; m >>= 1) { | |||
ni = mq_rshift1(ni); | |||
} | |||
for (m = 0; m < n; m ++) { | |||
a[m] = (uint16_t)mq_montymul(a[m], ni); | |||
} | |||
} | |||
/* | |||
* Convert a polynomial (mod q) to Montgomery representation. | |||
*/ | |||
static void | |||
mq_poly_tomonty(uint16_t *f, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], R2); | |||
} | |||
} | |||
/* | |||
* Multiply two polynomials together (NTT representation, and using | |||
* a Montgomery multiplication). Result f*g is written over f. | |||
*/ | |||
static void | |||
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], g[u]); | |||
} | |||
} | |||
/* | |||
* Subtract polynomial g from polynomial f. | |||
*/ | |||
static void | |||
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_sub(f[u], g[u]); | |||
} | |||
} | |||
/* ===================================================================== */ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) { | |||
mq_NTT(h, logn); | |||
mq_poly_tomonty(h, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
/* | |||
* Reduce s2 elements modulo q ([0..q-1] range). | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
} | |||
/* | |||
* Compute -s1 = s2*h - c0 mod phi mod q (in tt[]). | |||
*/ | |||
mq_NTT(tt, logn); | |||
mq_poly_montymul_ntt(tt, h, logn); | |||
mq_iNTT(tt, logn); | |||
mq_poly_sub(tt, c0, logn); | |||
/* | |||
* Normalize -s1 elements into the [-q/2..q/2] range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
int32_t w; | |||
w = (int32_t)tt[u]; | |||
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); | |||
((int16_t *)tt)[u] = (int16_t)w; | |||
} | |||
/* | |||
* Signature is valid if and only if the aggregate (-s1,s2) vector | |||
* is short enough. | |||
*/ | |||
return PQCLEAN_FALCON1024_CLEAN_is_short((int16_t *)tt, s2, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
tt[u] = (uint16_t)mq_conv_small(f[u]); | |||
h[u] = (uint16_t)mq_conv_small(g[u]); | |||
} | |||
mq_NTT(h, logn); | |||
mq_NTT(tt, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (tt[u] == 0) { | |||
return 0; | |||
} | |||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||
} | |||
mq_iNTT(h, logn); | |||
return 1; | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *t1, *t2; | |||
n = (size_t)1 << logn; | |||
t1 = (uint16_t *)tmp; | |||
t2 = t1 + n; | |||
for (u = 0; u < n; u ++) { | |||
t1[u] = (uint16_t)mq_conv_small(g[u]); | |||
t2[u] = (uint16_t)mq_conv_small(F[u]); | |||
} | |||
mq_NTT(t1, logn); | |||
mq_NTT(t2, logn); | |||
mq_poly_tomonty(t1, logn); | |||
mq_poly_montymul_ntt(t1, t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
t2[u] = (uint16_t)mq_conv_small(f[u]); | |||
} | |||
mq_NTT(t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (t2[u] == 0) { | |||
return 0; | |||
} | |||
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); | |||
} | |||
mq_iNTT(t1, logn); | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
int32_t gi; | |||
w = t1[u]; | |||
w -= (Q & ~ -((w - (Q >> 1)) >> 31)); | |||
gi = *(int32_t *)&w; | |||
if (gi < -127 || gi > +127) { | |||
return 0; | |||
} | |||
G[u] = (int8_t)gi; | |||
} | |||
return 1; | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_is_invertible( | |||
const int16_t *s2, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
uint32_t r; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
} | |||
mq_NTT(tt, logn); | |||
r = 0; | |||
for (u = 0; u < n; u ++) { | |||
r |= (uint32_t)(tt[u] - 1); | |||
} | |||
return (int)(1u - (r >> 31)); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h, | |||
const uint16_t *c0, const int16_t *s1, const int16_t *s2, | |||
unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
uint32_t r; | |||
n = (size_t)1 << logn; | |||
/* | |||
* Reduce elements of s1 and s2 modulo q; then write s2 into tt[] | |||
* and c0 - s1 into h[]. | |||
*/ | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
w = (uint32_t)s1[u]; | |||
w += Q & -(w >> 31); | |||
w = mq_sub(c0[u], w); | |||
h[u] = (uint16_t)w; | |||
} | |||
/* | |||
* Compute h = (c0 - s1) / s2. If one of the coefficients of s2 | |||
* is zero (in NTT representation) then the operation fails. We | |||
* keep that information into a flag so that we do not deviate | |||
* from strict constant-time processing; if all coefficients of | |||
* s2 are non-zero, then the high bit of r will be zero. | |||
*/ | |||
mq_NTT(tt, logn); | |||
mq_NTT(h, logn); | |||
r = 0; | |||
for (u = 0; u < n; u ++) { | |||
r |= (uint32_t)(tt[u] - 1); | |||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||
} | |||
mq_iNTT(h, logn); | |||
/* | |||
* Signature is acceptable if and only if it is short enough, | |||
* and s2 was invertible mod phi mod q. The caller must still | |||
* check that the rebuilt public key matches the expected | |||
* value (e.g. through a hash). | |||
*/ | |||
r = ~r & (uint32_t) - PQCLEAN_FALCON1024_CLEAN_is_short(s1, s2, logn); | |||
return (int)(r >> 31); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) { | |||
uint16_t *s2; | |||
size_t u, n; | |||
uint32_t r; | |||
n = (size_t)1 << logn; | |||
s2 = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)sig[u]; | |||
w += Q & -(w >> 31); | |||
s2[u] = (uint16_t)w; | |||
} | |||
mq_NTT(s2, logn); | |||
r = 0; | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u] - 1u; | |||
r += (w >> 31); | |||
} | |||
return (int)r; | |||
} |
@@ -1,15 +0,0 @@ | |||
set( | |||
SRC_AVX2_FALCON512 | |||
codec.c | |||
common.c | |||
fft.c | |||
fpr.c | |||
keygen.c | |||
pqclean.c | |||
rng.c | |||
sign.c | |||
vrfy.c) | |||
define_sig_alg( | |||
falcon512_avx2 | |||
PQCLEAN_FALCON512_AVX2 "${SRC_AVX2_FALCON512}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,80 +0,0 @@ | |||
#ifndef PQCLEAN_FALCON512_AVX2_API_H | |||
#define PQCLEAN_FALCON512_AVX2_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES 1281 | |||
#define PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES 897 | |||
#define PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES 690 | |||
#define PQCLEAN_FALCON512_AVX2_CRYPTO_ALGNAME "Falcon-512" | |||
/* | |||
* Generate a new key pair. Public key goes into pk[], private key in sk[]. | |||
* Key sizes are exact (in bytes): | |||
* public (pk): PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES | |||
* private (sk): PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_AVX2_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
/* | |||
* Compute a signature on a provided message (m, mlen), with a given | |||
* private key (sk). Signature is written in sig[], with length written | |||
* into *siglen. Signature length is variable; maximum signature length | |||
* (in bytes) is PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES. | |||
* | |||
* sig[], m[] and sk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Verify a signature (sig, siglen) on a message (m, mlen) with a given | |||
* public key (pk). | |||
* | |||
* sig[], m[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
/* | |||
* Compute a signature on a message and pack the signature and message | |||
* into a single object, written into sm[]. The length of that output is | |||
* written in *smlen; that length may be larger than the message length | |||
* (mlen) by up to PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES. | |||
* | |||
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall | |||
* not overlap with sk[]. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Open a signed message object (sm, smlen) and verify the signature; | |||
* on success, the message itself is written into m[] and its length | |||
* into *mlen. The message is shorter than the signed message object, | |||
* but the size difference depends on the signature value; the difference | |||
* may range up to PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES. | |||
* | |||
* m[], sm[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||
#endif |
@@ -1,555 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Encoding/decoding of keys and signatures. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_modq_encode( | |||
void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn) { | |||
size_t n, out_len, u; | |||
uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] >= 12289) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * 14) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << 14) | x[u]; | |||
acc_len += 14; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_modq_decode( | |||
uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len, u; | |||
const uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * 14) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
u = 0; | |||
while (u < n) { | |||
acc = (acc << 8) | (*buf ++); | |||
acc_len += 8; | |||
if (acc_len >= 14) { | |||
unsigned w; | |||
acc_len -= 14; | |||
w = (acc >> acc_len) & 0x3FFF; | |||
if (w >= 12289) { | |||
return 0; | |||
} | |||
x[u ++] = (uint16_t)w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_trim_i16_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_trim_i16_decode( | |||
int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
w |= -(w & mask2); | |||
x[u ++] = (int16_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_trim_i8_encode( | |||
void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_trim_i8_decode( | |||
int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
x[u ++] = (int8_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_comp_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn) { | |||
uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = out; | |||
/* | |||
* Make sure that all values are within the -2047..+2047 range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < -2047 || x[u] > +2047) { | |||
return 0; | |||
} | |||
} | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
int t; | |||
unsigned w; | |||
/* | |||
* Get sign and absolute value of next integer; push the | |||
* sign bit. | |||
*/ | |||
acc <<= 1; | |||
t = x[u]; | |||
if (t < 0) { | |||
t = -t; | |||
acc |= 1; | |||
} | |||
w = (unsigned)t; | |||
/* | |||
* Push the low 7 bits of the absolute value. | |||
*/ | |||
acc <<= 7; | |||
acc |= w & 127u; | |||
w >>= 7; | |||
/* | |||
* We pushed exactly 8 bits. | |||
*/ | |||
acc_len += 8; | |||
/* | |||
* Push as many zeros as necessary, then a one. Since the | |||
* absolute value is at most 2047, w can only range up to | |||
* 15 at this point, thus we will add at most 16 bits | |||
* here. With the 8 bits above and possibly up to 7 bits | |||
* from previous iterations, we may go up to 31 bits, which | |||
* will fit in the accumulator, which is an uint32_t. | |||
*/ | |||
acc <<= (w + 1); | |||
acc |= 1; | |||
acc_len += w + 1; | |||
/* | |||
* Produce all full bytes. | |||
*/ | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc >> acc_len); | |||
} | |||
v ++; | |||
} | |||
} | |||
/* | |||
* Flush remaining bits (if any). | |||
*/ | |||
if (acc_len > 0) { | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
v ++; | |||
} | |||
return v; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_AVX2_comp_decode( | |||
int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
const uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
unsigned b, s, m; | |||
/* | |||
* Get next eight bits: sign and low seven bits of the | |||
* absolute value. | |||
*/ | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
b = acc >> acc_len; | |||
s = b & 128; | |||
m = b & 127; | |||
/* | |||
* Get next bits until a 1 is reached. | |||
*/ | |||
for (;;) { | |||
if (acc_len == 0) { | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
acc_len = 8; | |||
} | |||
acc_len --; | |||
if (((acc >> acc_len) & 1) != 0) { | |||
break; | |||
} | |||
m += 128; | |||
if (m > 2047) { | |||
return 0; | |||
} | |||
} | |||
x[u] = (int16_t) m; | |||
if (s) { | |||
x[u] = (int16_t) - x[u]; | |||
} | |||
} | |||
return v; | |||
} | |||
/* | |||
* Key elements and signatures are polynomials with small integer | |||
* coefficients. Here are some statistics gathered over many | |||
* generated key pairs (10000 or more for each degree): | |||
* | |||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||
* 1 2 129 56.31 143 60.02 | |||
* 2 4 123 40.93 160 46.52 | |||
* 3 8 97 28.97 159 38.01 | |||
* 4 16 100 21.48 154 32.50 | |||
* 5 32 71 15.41 151 29.36 | |||
* 6 64 59 11.07 138 27.77 | |||
* 7 128 39 7.91 144 27.00 | |||
* 8 256 32 5.63 148 26.61 | |||
* 9 512 22 4.00 137 26.46 | |||
* 10 1024 15 2.84 146 26.41 | |||
* | |||
* We want a compact storage format for private key, and, as part of | |||
* key generation, we are allowed to reject some keys which would | |||
* otherwise be fine (this does not induce any noticeable vulnerability | |||
* as long as we reject only a small proportion of possible keys). | |||
* Hence, we enforce at key generation time maximum values for the | |||
* elements of f, g, F and G, so that their encoding can be expressed | |||
* in fixed-width values. Limits have been chosen so that generated | |||
* keys are almost always within bounds, thus not impacting neither | |||
* security or performance. | |||
* | |||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON512_AVX2_max_fg_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
7, | |||
7, | |||
6, | |||
6, | |||
5 | |||
}; | |||
const uint8_t PQCLEAN_FALCON512_AVX2_max_FG_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8 | |||
}; | |||
/* | |||
* When generating a new key pair, we can always reject keys which | |||
* feature an abnormally large coefficient. This can also be done for | |||
* signatures, albeit with some care: in case the signature process is | |||
* used in a derandomized setup (explicitly seeded with the message and | |||
* private key), we have to follow the specification faithfully, and the | |||
* specification only enforces a limit on the L2 norm of the signature | |||
* vector. The limit on the L2 norm implies that the absolute value of | |||
* a coefficient of the signature cannot be more than the following: | |||
* | |||
* log(n) n max sig coeff (theoretical) | |||
* 1 2 412 | |||
* 2 4 583 | |||
* 3 8 824 | |||
* 4 16 1166 | |||
* 5 32 1649 | |||
* 6 64 2332 | |||
* 7 128 3299 | |||
* 8 256 4665 | |||
* 9 512 6598 | |||
* 10 1024 9331 | |||
* | |||
* However, the largest observed signature coefficients during our | |||
* experiments was 1077 (in absolute value), hence we can assume that, | |||
* with overwhelming probability, signature coefficients will fit | |||
* in -2047..2047, i.e. 12 bits. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON512_AVX2_max_sig_bits[] = { | |||
0, /* unused */ | |||
10, | |||
11, | |||
11, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12 | |||
}; |
@@ -1,294 +0,0 @@ | |||
#include "inner.h" | |||
/* | |||
* Support functions for signatures (hash-to-point, norm). | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_AVX2_hash_to_point_vartime( | |||
inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn) { | |||
/* | |||
* This is the straightforward per-the-spec implementation. It | |||
* is not constant-time, thus it might reveal information on the | |||
* plaintext (at least, enough to check the plaintext against a | |||
* list of potential plaintexts) in a scenario where the | |||
* attacker does not have access to the signature value or to | |||
* the public key, but knows the nonce (without knowledge of the | |||
* nonce, the hashed output cannot be matched against potential | |||
* plaintexts). | |||
*/ | |||
size_t n; | |||
n = (size_t)1 << logn; | |||
while (n > 0) { | |||
uint8_t buf[2]; | |||
uint32_t w; | |||
inner_shake256_extract(sc, (void *)buf, sizeof buf); | |||
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; | |||
if (w < 61445) { | |||
while (w >= 12289) { | |||
w -= 12289; | |||
} | |||
*x ++ = (uint16_t)w; | |||
n --; | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_AVX2_hash_to_point_ct( | |||
inner_shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp) { | |||
/* | |||
* Each 16-bit sample is a value in 0..65535. The value is | |||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||
* and rejected otherwise; thus, each sample has probability | |||
* about 0.93758 of being selected. | |||
* | |||
* We want to oversample enough to be sure that we will | |||
* have enough values with probability at least 1 - 2^(-256). | |||
* Depending on degree N, this leads to the following | |||
* required oversampling: | |||
* | |||
* logn n oversampling | |||
* 1 2 65 | |||
* 2 4 67 | |||
* 3 8 71 | |||
* 4 16 77 | |||
* 5 32 86 | |||
* 6 64 100 | |||
* 7 128 122 | |||
* 8 256 154 | |||
* 9 512 205 | |||
* 10 1024 287 | |||
* | |||
* If logn >= 7, then the provided temporary buffer is large | |||
* enough. Otherwise, we use a stack buffer of 63 entries | |||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||
*/ | |||
static const uint16_t overtab[] = { | |||
0, /* unused */ | |||
65, | |||
67, | |||
71, | |||
77, | |||
86, | |||
100, | |||
122, | |||
154, | |||
205, | |||
287 | |||
}; | |||
unsigned n, n2, u, m, p, over; | |||
uint16_t *tt1, tt2[63]; | |||
/* | |||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||
* We also reduce modulo q the values; rejected values are set | |||
* to 0xFFFF. | |||
*/ | |||
n = 1U << logn; | |||
n2 = n << 1; | |||
over = overtab[logn]; | |||
m = n + over; | |||
tt1 = (uint16_t *)tmp; | |||
for (u = 0; u < m; u ++) { | |||
uint8_t buf[2]; | |||
uint32_t w, wr; | |||
inner_shake256_extract(sc, buf, sizeof buf); | |||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||
wr |= ((w - 61445) >> 31) - 1; | |||
if (u < n) { | |||
x[u] = (uint16_t)wr; | |||
} else if (u < n2) { | |||
tt1[u - n] = (uint16_t)wr; | |||
} else { | |||
tt2[u - n2] = (uint16_t)wr; | |||
} | |||
} | |||
/* | |||
* Now we must "squeeze out" the invalid values. We do this in | |||
* a logarithmic sequence of passes; each pass computes where a | |||
* value should go, and moves it down by 'p' slots if necessary, | |||
* where 'p' uses an increasing powers-of-two scale. It can be | |||
* shown that in all cases where the loop decides that a value | |||
* has to be moved down by p slots, the destination slot is | |||
* "free" (i.e. contains an invalid value). | |||
*/ | |||
for (p = 1; p <= over; p <<= 1) { | |||
unsigned v; | |||
/* | |||
* In the loop below: | |||
* | |||
* - v contains the index of the final destination of | |||
* the value; it is recomputed dynamically based on | |||
* whether values are valid or not. | |||
* | |||
* - u is the index of the value we consider ("source"); | |||
* its address is s. | |||
* | |||
* - The loop may swap the value with the one at index | |||
* u-p. The address of the swap destination is d. | |||
*/ | |||
v = 0; | |||
for (u = 0; u < m; u ++) { | |||
uint16_t *s, *d; | |||
unsigned j, sv, dv, mk; | |||
if (u < n) { | |||
s = &x[u]; | |||
} else if (u < n2) { | |||
s = &tt1[u - n]; | |||
} else { | |||
s = &tt2[u - n2]; | |||
} | |||
sv = *s; | |||
/* | |||
* The value in sv should ultimately go to | |||
* address v, i.e. jump back by u-v slots. | |||
*/ | |||
j = u - v; | |||
/* | |||
* We increment v for the next iteration, but | |||
* only if the source value is valid. The mask | |||
* 'mk' is -1 if the value is valid, 0 otherwise, | |||
* so we _subtract_ mk. | |||
*/ | |||
mk = (sv >> 15) - 1U; | |||
v -= mk; | |||
/* | |||
* In this loop we consider jumps by p slots; if | |||
* u < p then there is nothing more to do. | |||
*/ | |||
if (u < p) { | |||
continue; | |||
} | |||
/* | |||
* Destination for the swap: value at address u-p. | |||
*/ | |||
if ((u - p) < n) { | |||
d = &x[u - p]; | |||
} else if ((u - p) < n2) { | |||
d = &tt1[(u - p) - n]; | |||
} else { | |||
d = &tt2[(u - p) - n2]; | |||
} | |||
dv = *d; | |||
/* | |||
* The swap should be performed only if the source | |||
* is valid AND the jump j has its 'p' bit set. | |||
*/ | |||
mk &= -(((j & p) + 0x1FF) >> 9); | |||
*s = (uint16_t)(sv ^ (mk & (sv ^ dv))); | |||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv))); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON512_AVX2_is_short( | |||
const int16_t *s1, const int16_t *s2, unsigned logn) { | |||
/* | |||
* We use the l2-norm. Code below uses only 32-bit operations to | |||
* compute the square of the norm with saturation to 2^32-1 if | |||
* the value exceeds 2^31-1. | |||
*/ | |||
size_t n, u; | |||
uint32_t s, ng; | |||
n = (size_t)1 << logn; | |||
s = 0; | |||
ng = 0; | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s1[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
z = s2[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
} | |||
s |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON512_AVX2_is_short_half( | |||
uint32_t sqn, const int16_t *s2, unsigned logn) { | |||
size_t n, u; | |||
uint32_t ng; | |||
n = (size_t)1 << logn; | |||
ng = -(sqn >> 31); | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s2[u]; | |||
sqn += (uint32_t)(z * z); | |||
ng |= sqn; | |||
} | |||
sqn |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} |