Compare commits

...

Author SHA1 Message Date
  Henry Case a2cd3fb88d ct: adds chained memcmp test 3 years ago
  Henry Case be7a0bbdb8 CT checks for Frodo 3 years ago
  Henry Case 4f25353aa9 Change names of the tests 3 years ago
  Henry Case 55719e929c ct: use inline static instead of macros 3 years ago
  Henry Case caa97d8dfb Test CT sanitizer and CTGRIND functionality 3 years ago
  Henry Case e4eff10297 memsan: enable kyber in bench 3 years ago
  Henry Case ea54cd3ea9 use memory sanitizer in cpu_features build also 3 years ago
  Henry Case 0bb09a6e22 prevent updating llvm-project during 'make' 3 years ago
  Henry Case 175a5725b7 Enable all tests 3 years ago
  Henry Case 7ba897ed4d ensure sike doest use uinitialized reads 3 years ago
  Kris Kwiatkowski c1283aa979
Update README.md 3 years ago
  Henry Case bb3fe16bd5 Memory Sanitizer build 3 years ago
  Henry Case 2ce8a28e41 fix build 3 years ago
  Henry Case d9344d6956 improves makefile 3 years ago
  Henry Case 6d3550454a msan: in msan mode disable bench for kyber INDCPA encryption 3 years ago
  Henry Case ced21a0c79 makes MSan happy 3 years ago
  Henry Case 9b7b7277ce remove not needed flag 3 years ago
  Henry Case 77ca982b4c Redesign CMakeLists.txt for MemorySanitizer 3 years ago
  Henry Case 7be2562de5 Build libcxx and libcxxabi with Memory Sanitizer 3 years ago
  Henry Case 24881fade8 Run KAT in separated step 3 years ago
  Henry Case 74e87f1ae2 remove MSan build for now 3 years ago
  Henry Case af2cee5b17 adds address and memory sanitizer 3 years ago
  Henry Case a0e38afc59 Adds flags for memory and address sanitizer 3 years ago
  Henry Case 950479bdee adds fpic 3 years ago
  Henry Case 6cef14338a updates gbench 3 years ago
  Henry Case 5ce7524c1d multiple compilations 3 years ago
  Henry Case 944543c9b9 fix bug in kyber 3 years ago
  Henry Case c98780b4d5 adds McEliece 3 years ago
  Henry Case f3aa725c4c don't use submodules 3 years ago
  Kris Kwiatkowski 974f62bb26
Update README.md 3 years ago
  Henry Case e9249a2bee remove duplication 3 years ago
  Henry Case 1120727660 remove duplication 3 years ago
  Henry Case fd21b95a2d kat: run in release mode 3 years ago
  Henry Case 7ff8d8fcef Implelments Falcon 512/1024 Round3 3 years ago
  Henry Case 8bf02c41cd henrydcase -> kriskwiatkowski 3 years ago
  Henry Case f0c2436311 change comment 3 years ago
  Henry Case 791c59ef06 reorder scheme definitions 3 years ago
  Kris Kwiatkowski 7a20d33c15
Update README.md 3 years ago
  Henry Case 4e10c0925f prefix structs with pqc_ 3 years ago
  Henry Case 895d9c0abd bench ntt 3 years ago
  Henry Case 395896dc92 basemul bench 3 years ago
  Kris K 977d449ce3
Update README.md 3 years ago
  Henry Case 832da09aa8 fix build 3 years ago
  Henry Case d7ca0ddad6 fix memory overrun 3 years ago
  Henry Case 744461b0ff add drone.yml 3 years ago
  Henry Case 89a34ac04b SIKE: enable optimized version 3 years ago
  Henry Case 9cb7e5a265 SIKE/p434 3 years ago
  Henry Case 15b97bc74e Change variable name 3 years ago
  Henry Case 128b5406cc Add bench for rejection sampling 3 years ago
  Henry Case 40e3fff409 remove gtest header 3 years ago
  Henry Case 2e14f263b0 kyber512 benchmarks 3 years ago
  Henry Case 6e0b153ed3 kyber matrix generation bench 3 years ago
  Henry Case 56629c53f9 add benchmarking framework 3 years ago
  Henry Case 59df9a3f73
Create SECURITY.md 3 years ago
100 changed files with 4897 additions and 25435 deletions
Split View
  1. +0
    -2
      .astylerc
  2. +45
    -0
      .cmake/libstd-memory_sanitizer.mk
  3. +0
    -6
      .gitattributes
  4. +74
    -2
      .github/workflows/main.yml
  5. +1
    -4
      .gitignore
  6. +0
    -9
      .gitmodules
  7. +0
    -1
      3rd/gbench
  8. +0
    -1
      3rd/gtest
  9. +202
    -54
      CMakeLists.txt
  10. +11
    -7
      README.md
  11. +9
    -0
      SECURITY.md
  12. +66
    -65
      public/pqc/pqc.h
  13. +54
    -146
      src/capi/pqapi.c
  14. +120
    -0
      src/capi/schemes.h
  15. +0
    -22
      src/common/Makefile
  16. +55
    -0
      src/common/ct_check.h
  17. +8
    -0
      src/common/fips202.c
  18. +4
    -0
      src/common/fips202.h
  19. +4
    -0
      src/common/randombytes.c
  20. +48
    -0
      src/common/utils.h
  21. +20
    -2
      src/kem/frodo/frodokem640shake/clean/kem.c
  22. +4
    -2
      src/kem/frodo/frodokem640shake/clean/util.c
  23. +9
    -11
      src/kem/kyber/common/reduce.c
  24. +22
    -0
      src/kem/kyber/common/reduce.h
  25. +1
    -1
      src/kem/kyber/kyber1024/clean/CMakeLists.txt
  26. +1
    -1
      src/kem/kyber/kyber1024/clean/ntt.c
  27. +1
    -1
      src/kem/kyber/kyber1024/clean/poly.c
  28. +0
    -44
      src/kem/kyber/kyber1024/clean/reduce.c
  29. +0
    -13
      src/kem/kyber/kyber1024/clean/reduce.h
  30. +1
    -1
      src/kem/kyber/kyber512/avx2/indcpa.c
  31. +2
    -2
      src/kem/kyber/kyber512/avx2/kem.c
  32. +1
    -1
      src/kem/kyber/kyber512/avx2/polyvec.c
  33. +0
    -1
      src/kem/kyber/kyber512/clean/CMakeLists.txt
  34. +1
    -1
      src/kem/kyber/kyber512/clean/ntt.c
  35. +1
    -1
      src/kem/kyber/kyber512/clean/poly.c
  36. +0
    -13
      src/kem/kyber/kyber512/clean/reduce.h
  37. +0
    -1
      src/kem/kyber/kyber768/clean/CMakeLists.txt
  38. +1
    -1
      src/kem/kyber/kyber768/clean/ntt.c
  39. +1
    -1
      src/kem/kyber/kyber768/clean/poly.c
  40. +0
    -44
      src/kem/kyber/kyber768/clean/reduce.c
  41. +0
    -13
      src/kem/kyber/kyber768/clean/reduce.h
  42. +20
    -0
      src/kem/mceliece/mceliece348864/clean/CMakeLists.txt
  43. +20
    -0
      src/kem/mceliece/mceliece348864f/clean/CMakeLists.txt
  44. +20
    -0
      src/kem/mceliece/mceliece460896/clean/CMakeLists.txt
  45. +20
    -0
      src/kem/mceliece/mceliece460896f/clean/CMakeLists.txt
  46. +20
    -0
      src/kem/mceliece/mceliece6688128/clean/CMakeLists.txt
  47. +20
    -0
      src/kem/mceliece/mceliece6688128f/clean/CMakeLists.txt
  48. +20
    -0
      src/kem/mceliece/mceliece6960119/clean/CMakeLists.txt
  49. +20
    -0
      src/kem/mceliece/mceliece6960119f/clean/CMakeLists.txt
  50. +20
    -0
      src/kem/mceliece/mceliece8192128/clean/CMakeLists.txt
  51. +20
    -0
      src/kem/mceliece/mceliece8192128f/clean/CMakeLists.txt
  52. +20
    -0
      src/kem/sike/CMakeLists.txt
  53. +81
    -0
      src/kem/sike/includes/sike/sike.h
  54. +926
    -0
      src/kem/sike/p434/fp-x86_64.S
  55. +207
    -0
      src/kem/sike/p434/fp_generic.c
  56. +282
    -0
      src/kem/sike/p434/fpx.c
  57. +110
    -0
      src/kem/sike/p434/fpx.h
  58. +262
    -0
      src/kem/sike/p434/isogeny.c
  59. +49
    -0
      src/kem/sike/p434/isogeny.h
  60. +128
    -0
      src/kem/sike/p434/params.c
  61. +505
    -0
      src/kem/sike/p434/sike.c
  62. +214
    -0
      src/kem/sike/p434/utils.h
  63. +143
    -113
      src/rustapi/pqc-sys/src/bindings.rs
  64. +4
    -2
      src/rustapi/pqc-sys/src/build.rs
  65. +17
    -0
      src/sign/falcon/CMakeLists.txt
  66. +77
    -0
      src/sign/falcon/api.c
  67. +37
    -0
      src/sign/falcon/api.h
  68. +570
    -0
      src/sign/falcon/codec.c
  69. +298
    -0
      src/sign/falcon/common.c
  70. +0
    -15
      src/sign/falcon/falcon-1024/avx2/CMakeLists.txt
  71. +0
    -80
      src/sign/falcon/falcon-1024/avx2/api.h
  72. +0
    -555
      src/sign/falcon/falcon-1024/avx2/codec.c
  73. +0
    -294
      src/sign/falcon/falcon-1024/avx2/common.c
  74. +0
    -1109
      src/sign/falcon/falcon-1024/avx2/fft.c
  75. +0
    -1078
      src/sign/falcon/falcon-1024/avx2/fpr.c
  76. +0
    -349
      src/sign/falcon/falcon-1024/avx2/fpr.h
  77. +0
    -826
      src/sign/falcon/falcon-1024/avx2/inner.h
  78. +0
    -4231
      src/sign/falcon/falcon-1024/avx2/keygen.c
  79. +0
    -386
      src/sign/falcon/falcon-1024/avx2/pqclean.c
  80. +0
    -195
      src/sign/falcon/falcon-1024/avx2/rng.c
  81. +0
    -1312
      src/sign/falcon/falcon-1024/avx2/sign.c
  82. +0
    -853
      src/sign/falcon/falcon-1024/avx2/vrfy.c
  83. +0
    -15
      src/sign/falcon/falcon-1024/clean/CMakeLists.txt
  84. +0
    -80
      src/sign/falcon/falcon-1024/clean/api.h
  85. +0
    -555
      src/sign/falcon/falcon-1024/clean/codec.c
  86. +0
    -294
      src/sign/falcon/falcon-1024/clean/common.c
  87. +0
    -700
      src/sign/falcon/falcon-1024/clean/fft.c
  88. +0
    -1634
      src/sign/falcon/falcon-1024/clean/fpr.c
  89. +0
    -473
      src/sign/falcon/falcon-1024/clean/fpr.h
  90. +0
    -834
      src/sign/falcon/falcon-1024/clean/inner.h
  91. +0
    -4231
      src/sign/falcon/falcon-1024/clean/keygen.c
  92. +0
    -386
      src/sign/falcon/falcon-1024/clean/pqclean.c
  93. +0
    -201
      src/sign/falcon/falcon-1024/clean/rng.c
  94. +0
    -1254
      src/sign/falcon/falcon-1024/clean/sign.c
  95. +0
    -853
      src/sign/falcon/falcon-1024/clean/vrfy.c
  96. +0
    -15
      src/sign/falcon/falcon-512/avx2/CMakeLists.txt
  97. +0
    -80
      src/sign/falcon/falcon-512/avx2/api.h
  98. +0
    -555
      src/sign/falcon/falcon-512/avx2/codec.c
  99. +0
    -294
      src/sign/falcon/falcon-512/avx2/common.c
  100. +0
    -1109
      src/sign/falcon/falcon-512/avx2/fft.c

+ 0
- 2
.astylerc View File

@@ -1,14 +0,0 @@
#--unpad-paren
# disable backup files

+ 45
- 0
.cmake/libstd-memory_sanitizer.mk View File

@@ -0,0 +1,45 @@
include(ExternalProject)
find_program(MAKE_PROGRAM make)

string (REPLACE " " "$<SEMICOLON>" LLVM_PROJECT_TARGETS "libcxx libcxxabi")
set(PREFIX ${CMAKE_CURRENT_BINARY_DIR}/3rd/llvm-project)
set(LLVM_LIB_CXX
${PREFIX}/usr/local/lib/libc++${CMAKE_STATIC_LIBRARY_SUFFIX})
set(LLVM_LIB_CXXABI
${PREFIX}/usr/local/lib/libc++abi${CMAKE_STATIC_LIBRARY_SUFFIX})

ExternalProject_Add(
llvm-project
GIT_REPOSITORY https://github.com/llvm/llvm-project.git
GIT_TAG llvmorg-12.0.0
GIT_SHALLOW TRUE
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS=${LLVM_PROJECT_TARGETS} -DLLVM_USE_SANITIZER=MemoryWithOrigins -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ ../llvm-project/llvm -DLLVM_INCLUDE_BENCHMARKS=OFF
BUILD_COMMAND ${MAKE_PROGRAM} cxx cxxabi
INSTALL_COMMAND DESTDIR=${PREFIX} make install-cxx-headers install-cxx install-cxxabi
COMMENT "Building memcheck instrumented libc++ and libc++abi"
PREFIX ${PREFIX}
# Don't try updating the source. This prevents running update when calling 'make' (not sure why update step is run during make).
# It will also cause not updateing source during calling 'cmake' again. But we use fixed branch, so this shouldn't be needed
UPDATE_DISCONNECTED TRUE
)

add_library(
cxx SHARED IMPORTED GLOBAL)
add_library(
cxxabi SHARED IMPORTED GLOBAL)

add_dependencies(
cxx
llvm-project)
add_dependencies(
cxxabi
llvm-project)

set_target_properties(
cxx PROPERTIES IMPORTED_LOCATION ${LLVM_LIB_CXX})
set_target_properties(
cxxabi PROPERTIES IMPORTED_LOCATION ${LLVM_LIB_CXXABI})

set_property(
GLOBAL PROPERTY llvmproject_build_install_dir_property
${PREFIX}/usr/local)

+ 0
- 6
.gitattributes View File

@@ -1,6 +0,0 @@
* text=auto
*.[ch] text whitespacestrict
*.yaml text whitespacestrict
Makefile text whitespace="tabwidth=4,-tab-in-indent,indent-with-non-tab"

[attr]whitespacestrict whitespace="trailing-space,tab-in-indent,space-before-tab,tabwidth=4"

+ 74
- 2
.github/workflows/main.yml View File

@@ -8,6 +8,62 @@ jobs:
unit-test:
name: Unit tests
runs-on: [ubuntu-20.04]
env:
CC: ${{ matrix.cc }}
CXX: ${{ matrix.cxx }}
CMAKE_FLAGS: ${{matrix.flags}}
strategy:
fail-fast: false
max-parallel: 4
matrix:
name: [
gcc-release-build,
clang-release-build,
gcc-debug-build,
clang-debug-build,
clang-release-asan-build,
]

include:
- name: gcc-release-build
cc: gcc
cxx: g++
flags: -DCMAKE_BUILD_TYPE=Release
- name: gcc-debug-build
cc: gcc
cxx: g++
flags: -DCMAKE_BUILD_TYPE=Debug
- name: clang-release-build
cc: clang
cxx: clang++
flags: -DCMAKE_BUILD_TYPE=Release
- name: clang-debug-build
cc: /usr/bin/clang
cxx: /usr/bin/clang++
flags: -DCMAKE_BUILD_TYPE=Debug
- name: clang-release-asan-build
cc: clang
cxx: clang++
flags: -DCMAKE_BUILD_TYPE=Release -DADDRSAN=1
steps:
- uses: actions/checkout@v1
with:
submodules: true
- name: build
run: |
mkdir -p build
cd build
CC=${CC} CXX=${CXX} cmake ${CMAKE_FLAGS} ..
make
- name: run tests
run: |
cd build && ./ut
- name: Build Rust bindings
run: |
cd src/rustapi/pqc-sys && cargo build
KAT:
name: Known Answer Tests
runs-on: [ubuntu-20.04]
steps:
- uses: actions/checkout@v1
with:
@@ -16,7 +72,7 @@ jobs:
run: |
mkdir -p build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
CC=clang CXX=clang++ cmake -DCMAKE_BUILD_TYPE=Release ..
make
- name: run tests
run: |
@@ -29,4 +85,20 @@ jobs:
cd test/katrunner &&
curl http://amongbytes.com/~flowher/permalinks/kat.zip --output kat.zip
unzip kat.zip
cargo run -- --katdir KAT
cargo run --release -- --katdir KAT
MEMSAN:
name: Memory Sanitizer build
runs-on: [ubuntu-20.04]
steps:
- uses: actions/checkout@v1
with:
submodules: true
- name: build
run: |
mkdir -p build
cd build
CC=clang CXX=clang++ cmake -DCMAKE_BUILD_TYPE=Release -DMEMSAN=1 -DCTSAN=1 ..
make
- name: run tests
run: |
cd build && ./ut

+ 1
- 4
.gitignore View File

@@ -7,7 +7,4 @@ bin/

# Object and library files on Windows
*.lib
*.obj

__pycache__
testcases/
*.obj

+ 0
- 9
.gitmodules View File

@@ -1,9 +0,0 @@
[submodule "test/pycparser"]
path = test/pycparser
url = https://github.com/eliben/pycparser.git
[submodule "3rd/gtest"]
path = 3rd/gtest
url = https://github.com/google/googletest.git
[submodule "3rd/gbench"]
path = 3rd/gbench
url = https://github.com/henrydcase/benchmark.git

+ 0
- 1
3rd/gbench

@@ -1 +0,0 @@
Subproject commit e45fcc64e02489f718df499a162b41f742a1b7e5

+ 0
- 1
3rd/gtest

@@ -1 +0,0 @@
Subproject commit 1a8ecf1813d022cc7914e04564b92decff6161fc

+ 202
- 54
CMakeLists.txt View File

@@ -1,10 +1,73 @@
cmake_minimum_required(VERSION 3.13)
project(cryptocore NONE)
project(cryptocore VERSION 0.0.1 LANGUAGES C)
include(FetchContent)
include(ExternalProject)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_C_STANDARD 99)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

enable_language(C)
enable_language(CXX)
enable_language(ASM)

set_property(GLOBAL PROPERTY obj_libs "")

# Build with address sanitizer
if(ADDRSAN)
string(APPEND EXTRA_CXX_FLAGS " -fsanitize=undefined,address,leak -fno-omit-frame-pointer")
set(EXTRA_LDFLAGS " -fsanitize=undefined,address,leak")
endif()

if(MEMSAN)
# PQC_MEMSAN enables usage of some internals from clang
if (NOT CMAKE_C_COMPILER_ID MATCHES "Clang")
message(FATAL_ERROR "Must use clang if compiled with memory sanitizer.")
endif()
if(ADDRSAN)
message(FATAL_ERROR "Can't use MSAN and ASAN")
endif()
include(.cmake/libstd-memory_sanitizer.mk)

# LLVM project location
set(LLVM_PRJ ${CMAKE_CURRENT_BINARY_DIR}/3rd/llvm-project)
set(LLVM_PRJ_LIB ${LLVM_PRJ}/usr/local/lib)
set(LLVM_PRJ_INC ${LLVM_PRJ}/usr/local/include)

# Add memory sanitizer instrumented libraries
set(CMAKE_ARGS_MEMCHECK_LIB "-stdlib=libc++")
set(CMAKE_ARGS_MEMCHECK_INC "-isystem -I${LLVM_PRJ_INC} -I${LLVM_PRJ_INC}/c++/v1")
set(CMAKE_ARGS_MEMCHECK_FLAGS "-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-omit-frame-pointer -Wno-unused-command-line-argument")
# Enablin "keep-going" flag alows two things:
# 1. Enables CT_EXPECT_UMR()/CT_REQUIRE_UMR() in tests. For some reason MSan will halt
# on error even if it expects UMR. And hence, CT can't be tested. This is probably a bug.
# 2. reports all the errors from the run, not only the first one (don't fail-fast)
string(APPEND CMAKE_ARGS_MEMCHECK_FLAGS " -mllvm -msan-keep-going=1")
set(EXTRA_CXX_FLAGS "${CMAKE_ARGS_MEMCHECK_FLAGS} ${CMAKE_ARGS_MEMCHECK_LIB} ${CMAKE_ARGS_MEMCHECK_INC} -DPQC_MEMSAN_BUILD")
set(CXXLIBS_FOR_MEMORY_SANITIZER cxx cxxabi)
endif()

# Contant time memory checks with CTGRIND (requires clang and -DMEMSAN)
if(CTSAN)
if (NOT MEMSAN)
message(FATAL_ERROR "Constant time sanitizer requires -DMEMSAN")
endif()

if (NOT CMAKE_C_COMPILER_ID MATCHES "Clang")
message(FATAL_ERROR "Constant time sanitizer requires Clang")
endif()

string(APPEND EXTRA_CXX_FLAGS " -DPQC_USE_CTSANITIZER")
endif()

# Contant time memory checks with CTGRIND (requires valgrind)
if (CTGRIND)
if (MEMSAN OR CTSAN)
message(FATAL_ERROR "Can't use memory sanitizer (MEMSAN) and CTGRIND")
endif()
string(APPEND EXTRA_CXX_FLAGS " -DPQC_USE_CTGRIND")
endif()

set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "~/.cmake/Modules")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "3rd/cmake-modules")
@@ -33,8 +96,6 @@ else()
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
endif()

add_subdirectory(3rd/gtest)

# Arch settings

if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
@@ -42,7 +103,8 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
endif()

if(CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CLANG 1)
# Additional flags only useful when compiling with clang
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wconditional-uninitialized -Wno-missing-variable-declarations -Wno-unused-command-line-argument")
endif()

if (MACOSX)
@@ -52,55 +114,99 @@ endif()

# Global configuration

set(C_CXX_FLAGS
"-Wno-ignored-qualifiers \
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wno-ignored-qualifiers \
-Wall \
-Werror \
-Wextra \
-Wpedantic \
-Wshadow \
-Wno-variadic-macros \
-Wundef \
-Wunused-result")

if(CLANG)
set(C_CXX_FLAGS
"-Wconditional-uninitialized \
-Wmissing-variable-declarations")
-Wunused-result \
-Wno-unused-command-line-argument \
-Wno-undef")

if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 11.0)
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wno-stringop-overread \
-Wno-stringop-overflow \
-Wno-array-parameter")
endif()

include(.cmake/common.mk)

# Control Debug/Release mode
if(CMAKE_BUILD_TYPE_LOWER STREQUAL "debug")
set(C_CXX_FLAGS "${C_CXX_FLAGS} -g3 -O0 -Wno-unused")
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -g3 -O0 -Wno-unused")
else()
set(C_CXX_FLAGS "${C_CXX_FLAGS} -O3")
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -O3")
endif()

include_directories(
public
src/common/
src
)
# Set CPU architecture
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -D${ARCH}")

set_property(GLOBAL PROPERTY obj_libs "")
# Build for haswell if on x86_64
if(${ARCH} STREQUAL "ARCH_x86_64")
add_compile_options("-march=haswell")
endif()

# Dependencies
ExternalProject_Add(
gtest_project
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/gtest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG a3460d1aeeaa43fdf137a6adefef10ba0b59fe4b
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/3rd/gtest
INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/3rd/gtest
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/3rd/gtest -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS=${EXTRA_CXX_FLAGS} -DCMAKE_C_FLAGS=${EXTRA_CXX_FLAGS} -Dgtest_disable_pthreads=ON
)
if(MEMSAN)
add_dependencies(gtest_project ${CXXLIBS_FOR_MEMORY_SANITIZER})
endif()

# Set CPU architecture
set(CMAKE_C_FLAGS "${C_CXX_FLAGS} -D${ARCH}")
set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -D${ARCH}")
FetchContent_Declare(
gbench
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/gbench
GIT_REPOSITORY https://github.com/kriskwiatkowski/benchmark.git
GIT_TAG 49862ab56b6b7c3afd87b80bd5d787ed78ce3b96
)
FetchContent_Populate(gbench)

FetchContent_Declare(
cpu_features
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/cpu_features
GIT_REPOSITORY https://github.com/kriskwiatkowski/cpu_features.git
GIT_TAG 38f4324533390b09079a38b524be8b178be8e435
)
FetchContent_Populate(cpu_features)

if(PQC_WEAK_RANDOMBYTES)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DPQC_WEAK_RANDOMBYTES")
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -DPQC_WEAK_RANDOMBYTES")
endif()

# Build CPU features
set(CMAKE_C_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "$${PQC_CMAKE_C_CXX_FLAGS} {EXTRA_CXX_FLAGS}")
set(BUILD_PIC ON CACHE BOOL "")
add_subdirectory(3rd/cpu_features)

# PQC library

# Set C, CXX, and LD flags
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -Wpedantic")
set(CMAKE_C_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
string(APPEND LDFLAGS "${EXTRA_LDFLAGS}")

include_directories(
public
src/common/
src
3rd/cpu_features/include
)

# Define sources of the components
add_subdirectory(src/sign/dilithium/dilithium2/clean)
add_subdirectory(src/sign/dilithium/dilithium3/clean)
add_subdirectory(src/sign/dilithium/dilithium5/clean)
add_subdirectory(src/sign/falcon/falcon-1024/clean)
add_subdirectory(src/sign/falcon/falcon-512/clean)
add_subdirectory(src/sign/falcon)
add_subdirectory(src/sign/rainbow/rainbowV-classic/clean)
add_subdirectory(src/sign/rainbow/rainbowI-classic/clean)
add_subdirectory(src/sign/rainbow/rainbowIII-classic/clean)
@@ -148,19 +254,23 @@ add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean)

add_subdirectory(src/kem/sike)
add_subdirectory(src/kem/mceliece/mceliece348864/clean)
add_subdirectory(src/kem/mceliece/mceliece460896/clean)
add_subdirectory(src/kem/mceliece/mceliece6688128/clean)
add_subdirectory(src/kem/mceliece/mceliece6960119/clean)
add_subdirectory(src/kem/mceliece/mceliece8192128/clean)
add_subdirectory(src/kem/mceliece/mceliece348864f/clean)
add_subdirectory(src/kem/mceliece/mceliece460896f/clean)
add_subdirectory(src/kem/mceliece/mceliece6688128f/clean)
add_subdirectory(src/kem/mceliece/mceliece6960119f/clean)
add_subdirectory(src/kem/mceliece/mceliece8192128f/clean)
# Hardware optimized targets
if(${ARCH} STREQUAL "ARCH_x86_64")

set(CMAKE_C_FLAGS
"${CMAKE_C_FLAGS} -march=native -mtune=native")
set(SRC_COMMON_AVX2
src/common/keccak4x/KeccakP-1600-times4-SIMD256.c
)
if(${ARCH} STREQUAL "ARCH_x86_64")
set(COMMON_EXTRA_SRC "src/common/keccak4x/KeccakP-1600-times4-SIMD256.c")

# Sign
add_subdirectory(src/sign/falcon/falcon-512/avx2)
add_subdirectory(src/sign/falcon/falcon-1024/avx2)
add_subdirectory(src/sign/dilithium/dilithium2/avx2)
add_subdirectory(src/sign/dilithium/dilithium3/avx2)
add_subdirectory(src/sign/dilithium/dilithium5/avx2)
@@ -188,7 +298,6 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-simple/avx2)
add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-robust/avx2)
add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-simple/avx2)
add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/avx2)

# KEMs
add_subdirectory(src/kem/kyber/kyber512/avx2)
add_subdirectory(src/kem/kyber/kyber768/avx2)
@@ -208,25 +317,20 @@ add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2)
add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2)
endif()



# The rest of the library
set(SRC_COMMON_GENERIC
add_library(
common
OBJECT

src/common/aes.c
src/common/fips202.c
src/common/sp800-185.c
src/common/randombytes.c
src/common/sha2.c
src/common/nistseedexpander.c
src/common/utils.c
src/capi/pqapi.c
)

add_library(
common
OBJECT
${SRC_COMMON_GENERIC}
${SRC_COMMON_AVX2}
)
${COMMON_EXTRA_SRC})

add_library(
pqc
@@ -241,33 +345,77 @@ get_property(OBJ_LIBS GLOBAL PROPERTY obj_libs)

target_link_libraries(
pqc
common
${OBJ_LIBS}
cpu_features
common
)

target_link_libraries(
pqc_s

cpu_features
common
${OBJ_LIBS}
)

SET(UT_SRC test/ut.cpp)
if(CTGRIND OR CTSAN)
SET(UT_SRC ${UT_SRC} test/ct.cpp)
endif()

add_executable(
ut

test/ut.cpp
${UT_SRC}
)

target_link_libraries(
ut

gtest
gtest_main
pqc_s)
pqc_s
${CXXLIBS_FOR_MEMORY_SANITIZER})

ExternalProject_Get_Property(gtest_project INSTALL_DIR)
target_include_directories(
ut PRIVATE

${CMAKE_SOURCE_DIR})
${CMAKE_SOURCE_DIR}
${INSTALL_DIR}/include)

target_link_directories(
ut
PRIVATE
${INSTALL_DIR}/lib)

# github CI requires that
add_dependencies(ut gtest_project)

if(NOT CMAKE_BUILD_TYPE_LOWER STREQUAL "debug")
# settings below are required by benchmark library
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
# Target for benchmark - it also builds gtest library
set(BENCHMARK_ENABLE_GTEST_TESTS ON CACHE BOOL "Enable testing of the benchmark library." FORCE)
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark tests" FORCE)
set(GOOGLETEST_PATH "${CMAKE_SOURCE_DIR}/3rd/gtest" CACHE PATH "Path to the gtest sources" FORCE)
#if (NOT MACOSX)
# set(BENCHMARK_ENABLE_LTO ON CACHE BOOL "Enable link time optim" FORCE)
#endif()
set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE)
set(CMAKE_C_FLAGS "${EXTRA_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${EXTRA_CXX_FLAGS}")
if (MEMSAN)
set(BENCHMARK_USE_LIBCXX ON CACHE BOOL "" FORCE)
# Since build requires C++20 it is safe to assume that std::regex is available.
# It seems I need to force it as benchmark build doesn't work very well with libc++
set(HAVE_STD_REGEX ON CACHE BOOL "OK" FORCE)
endif()

add_subdirectory(${CMAKE_SOURCE_DIR}/3rd/gbench)
add_subdirectory(test/bench)
endif()

install(TARGETS pqc pqc_s
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE


+ 11
- 7
README.md View File

@@ -1,23 +1,27 @@
# PQ Crypto Catalog

This is a repository of post-quantum schemes copied from either the submission to the NIST Post-Quantum Standardization or [PQClean](https://github.com/PQClean/PQClean) project. The goal of the library is to provide easy to use API which enables quick experimentation with some post-quantum cryptographic schemes.
Implementation of quantum-safe signature and KEM schemes submitted to NIST PQC Standardization Process.

The goal is to provide an easy-to-use API in C and Rust to enable experimentation. The code is derived from the submission to the NIST Post-Quantum Standardization, either directly or by leveraging [PQClean](https://github.com/PQClean/PQClean) project.

Users shouldn't expect any level of security provided by this code. The library is not meant to be used on live production systems.

## Schemes support
## Supported schemes

| Name | NIST Round | x86 optimized |
|--------------------------|------------|---------------|
| Kyber | 3 | x |
| NTRU | 3 | x |
| SABER | 3 | x |
| FrodoKEM | 3 | |
| Dilithium | 3 | x |
| Falcon | 3 | |
| SPHINCS+ SHA256/SHAKE256 | 3 | x |
| NTRU | 3 | x |
| NTRU Prime | 3 | x |
| HQC-RMRS | 3 | x |
| Dilithium | 3 | x |
| Falcon | 2 | |
| Rainbow | 3 | |
| SPHINCS+ SHA256/SHAKE256 | 3 | x |
| SIKE/p434 | 3 | x |
| McEliece | 3 | |

## Building

@@ -38,13 +42,13 @@ Library provides simple API, wrapping PQClean. For example to use KEM, one shoul
```c
#include <pqc/pqc.h>

const params_t *p = pqc_kem_alg_by_id(KYBER512);
std::vector<uint8_t> ct(ciphertext_bsz(p));
std::vector<uint8_t> ss1(shared_secret_bsz(p));
std::vector<uint8_t> ss2(shared_secret_bsz(p));
std::vector<uint8_t> sk(private_key_bsz(p));
std::vector<uint8_t> pk(public_key_bsz(p));

const params_t *p = pqc_kem_alg_by_id(KYBER512);
pqc_keygen(p, pk.data(), sk.data());
pqc_kem_encapsulate(p, ct.data(), ss1.data(), pk.data());
pqc_kem_decapsulate(p, ss2.data(), ct.data(), sk.data());


+ 9
- 0
SECURITY.md View File

@@ -0,0 +1,9 @@
# Security Policy

## Supported Versions

No security guaranteed.

## Reporting a Vulnerability

Any comments welcome: contact (at) amongbytes.com

+ 66
- 65
public/pqc/pqc.h View File

@@ -8,73 +8,88 @@ extern "C" {
#include <stdint.h>
#include <stdbool.h>

// defines supported signature algorithm list
#define PQC_SUPPORTED_SIGS(_) \
// Defines supported signature algorithm list. The resulting
// ID of an algorithm is PQC_ALG_SIG_(NAME_AS_BELOW)
#define PQC_SUPPORTED_SIGS(_) \
_(DILITHIUM2) \
_(DILITHIUM3) \
_(DILITHIUM5) \
_(FALCON1024) \
_(FALCON512) \
_(RAINBOWVCLASSIC) \
_(FALCON1024) \
_(RAINBOWICLASSIC) \
_(RAINBOWIIICLASSIC) \
_(SPHINCSSHA256192FSIMPLE) \
_(SPHINCSSHAKE256256FSIMPLE) \
_(SPHINCSSHAKE256192FROBUST) \
_(RAINBOWVCLASSIC) \
_(SPHINCSSHAKE256128FSIMPLE) \
_(SPHINCSSHAKE256256SSIMPLE) \
_(SPHINCSSHAKE256128SSIMPLE) \
_(SPHINCSSHA256128FROBUST) \
_(SPHINCSSHA256192SROBUST) \
_(SPHINCSSHAKE256128FROBUST) \
_(SPHINCSSHAKE256128SROBUST) \
_(SPHINCSSHAKE256256SROBUST) \
_(SPHINCSSHA256192SSIMPLE) \
_(SPHINCSSHAKE256192FSIMPLE) \
_(SPHINCSSHAKE256192SSIMPLE) \
_(SPHINCSSHAKE256192FROBUST) \
_(SPHINCSSHAKE256192SROBUST) \
_(SPHINCSSHAKE256192FSIMPLE) \
_(SPHINCSSHA256256SSIMPLE) \
_(SPHINCSSHA256128SSIMPLE) \
_(SPHINCSSHAKE256256FSIMPLE) \
_(SPHINCSSHAKE256256SSIMPLE) \
_(SPHINCSSHAKE256256FROBUST) \
_(SPHINCSSHA256256FROBUST) \
_(SPHINCSSHA256256FSIMPLE) \
_(SPHINCSSHA256256SROBUST) \
_(SPHINCSSHA256128SROBUST) \
_(SPHINCSSHAKE256256SROBUST) \
_(SPHINCSSHA256128FSIMPLE) \
_(SPHINCSSHA256192FROBUST)
_(SPHINCSSHA256128SSIMPLE) \
_(SPHINCSSHA256128FROBUST) \
_(SPHINCSSHA256128SROBUST) \
_(SPHINCSSHA256192FSIMPLE) \
_(SPHINCSSHA256192SSIMPLE) \
_(SPHINCSSHA256192FROBUST) \
_(SPHINCSSHA256192SROBUST) \
_(SPHINCSSHA256256FSIMPLE) \
_(SPHINCSSHA256256SSIMPLE) \
_(SPHINCSSHA256256FROBUST) \
_(SPHINCSSHA256256SROBUST)

// defines supported kem algorithm list
// Defines supported kem algorithm list. The resulting
// ID of an algorithm is PQC_ALG_KEM_(NAME_AS_BELOW)
#define PQC_SUPPORTED_KEMS(_)\
_(FRODOKEM640SHAKE) \
_(FRODOKEM976SHAKE) \
_(FRODOKEM1344SHAKE) \
_(FRODOKEM640SHAKE) \
_(KYBER512) \
_(KYBER768) \
_(KYBER1024) \
_(KYBER512) \
_(NTRUHPS4096821) \
_(NTRUHPS2048509) \
_(NTRUHPS4096821) \
_(NTRUHRSS701) \
_(NTRUHPS2048677) \
_(NTRULPR761) \
_(NTRULPR653) \
_(NTRULPR857) \
_(LIGHTSABER) \
_(FIRESABER) \
_(SABER) \
_(FIRESABER) \
_(HQCRMRS128) \
_(HQCRMRS192) \
_(HQCRMRS256)
_(HQCRMRS256) \
_(SIKE434) \
_(MCELIECE348864) \
_(MCELIECE460896) \
_(MCELIECE6688128) \
_(MCELIECE6960119) \
_(MCELIECE8192128) \
_(MCELIECE348864F) \
_(MCELIECE460896F) \
_(MCELIECE6688128F) \
_(MCELIECE6960119F) \
_(MCELIECE8192128F)

// Defines IDs for each algorithm. The
// PQC_ALG_SIG/KEM_MAX indicates number
// of KEM and signature schemes supported.
#define DEFNUM(N) N,
enum { PQC_SUPPORTED_SIGS(DEFNUM) PQC_ALG_SIG_MAX };
enum { PQC_SUPPORTED_KEMS(DEFNUM) PQC_ALG_KEM_MAX };
#undef DEFNUM
#define DEFNUM_SIG(N) PQC_ALG_SIG_##N,
#define DEFNUM_KEM(N) PQC_ALG_KEM_##N,
enum { PQC_SUPPORTED_SIGS(DEFNUM_SIG) PQC_ALG_SIG_MAX };
enum { PQC_SUPPORTED_KEMS(DEFNUM_KEM) PQC_ALG_KEM_MAX };
#undef DEFNUM_SIG
#undef DEFNUM_KEM

// Parameters of the scheme
typedef struct params_t {
typedef struct pqc_ctx_t {
const uint8_t alg_id;
const char* alg_name;
const uint32_t prv_key_bsz;
@@ -82,73 +97,59 @@ typedef struct params_t {
const bool is_kem;

int (*keygen)(uint8_t *sk, uint8_t *pk);
} params_t;
} pqc_ctx_t;

typedef struct kem_params_t {
params_t p;
typedef struct pqc_kem_ctx_t {
pqc_ctx_t p;
const uint32_t ciphertext_bsz;
const uint32_t secret_bsz;

int (*encapsulate)(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
int (*decapsulate)(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
} kem_params_t;
} pqc_kem_ctx_t;

typedef struct sig_params_t {
params_t p;
typedef struct pqc_sig_ctx_t {
pqc_ctx_t p;
const uint32_t sign_bsz;
int (*sign)(uint8_t *sig, uint64_t *siglen, const uint8_t *m, uint64_t mlen, const uint8_t *sk);
int (*verify)(const uint8_t *sig, uint64_t siglen, const uint8_t *m, uint64_t mlen, const uint8_t *pk);
} sig_params_t;

inline uint32_t ciphertext_bsz(const params_t *p) {
return ((kem_params_t *)p)->ciphertext_bsz;
}

inline uint32_t shared_secret_bsz(const params_t *p) {
return ((kem_params_t *)p)->secret_bsz;
}

inline uint32_t signature_bsz(const params_t *p) {
return ((sig_params_t *)p)->sign_bsz;
}

inline uint32_t public_key_bsz(const params_t *p) {
return p->pub_key_bsz;
}

inline uint32_t private_key_bsz(const params_t *p) {
return p->prv_key_bsz;
}
} pqc_sig_ctx_t;

bool pqc_keygen(
const params_t *p,
const pqc_ctx_t *p,
uint8_t *pk, uint8_t *sk);

bool pqc_kem_encapsulate(
const params_t *p,
const pqc_ctx_t *p,
uint8_t *ct, uint8_t *ss,
const uint8_t *pk);

bool pqc_kem_decapsulate(
const params_t *p,
const pqc_ctx_t *p,
uint8_t *ss, const uint8_t *ct,
const uint8_t *sk);

bool pqc_sig_create(
const params_t *p,
const pqc_ctx_t *p,
uint8_t *sig, uint64_t *siglen,
const uint8_t *m, uint64_t mlen,
const uint8_t *sk);

bool pqc_sig_verify(
const params_t *p,
const pqc_ctx_t *p,
const uint8_t *sig, uint64_t siglen,
const uint8_t *m, uint64_t mlen,
const uint8_t *pk);


const params_t *pqc_kem_alg_by_id(uint8_t id);
const params_t *pqc_sig_alg_by_id(uint8_t id);
const pqc_ctx_t *pqc_kem_alg_by_id(uint8_t id);
const pqc_ctx_t *pqc_sig_alg_by_id(uint8_t id);

uint32_t pqc_ciphertext_bsz(const pqc_ctx_t *p);
uint32_t pqc_shared_secret_bsz(const pqc_ctx_t *p);
uint32_t pqc_signature_bsz(const pqc_ctx_t *p);
uint32_t pqc_public_key_bsz(const pqc_ctx_t *p);
uint32_t pqc_private_key_bsz(const pqc_ctx_t *p);

#ifdef __cplusplus
}


+ 54
- 146
src/capi/pqapi.c View File

@@ -1,138 +1,14 @@
#include <stdint.h>
#include <stdbool.h>
#include <pqc/pqc.h>
#include <cpuinfo_x86.h>
#include <common/utils.h>

// PQClean include
#include "sign/rainbow/rainbowV-classic/clean/api.h"
#include "sign/rainbow/rainbowI-classic/clean/api.h"
#include "sign/rainbow/rainbowIII-classic/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h"
#include "sign/falcon/falcon-1024/clean/api.h"
#include "sign/falcon/falcon-1024/avx2/api.h"
#include "sign/falcon/falcon-512/clean/api.h"
#include "sign/falcon/falcon-512/avx2/api.h"
#include "sign/dilithium/dilithium2/clean/api.h"
#include "sign/dilithium/dilithium2/avx2/api.h"
#include "sign/dilithium/dilithium3/clean/api.h"
#include "sign/dilithium/dilithium3/avx2/api.h"
#include "sign/dilithium/dilithium5/clean/api.h"
#include "sign/dilithium/dilithium5/avx2/api.h"
#include "kem/ntru/ntruhps4096821/clean/api.h"
#include "kem/ntru/ntruhps4096821/avx2/api.h"
#include "kem/ntru/ntruhps2048509/clean/api.h"
#include "kem/ntru/ntruhps2048509/avx2/api.h"
#include "kem/ntru/ntruhrss701/clean/api.h"
#include "kem/ntru/ntruhrss701/avx2/api.h"
#include "kem/ntru/ntruhps2048677/clean/api.h"
#include "kem/ntru/ntruhps2048677/avx2/api.h"
#include "kem/ntru_prime/ntrulpr761/clean/api.h"
#include "kem/ntru_prime/ntrulpr761/avx2/api.h"
#include "kem/ntru_prime/ntrulpr653/clean/api.h"
#include "kem/ntru_prime/ntrulpr653/avx2/api.h"
#include "kem/ntru_prime/ntrulpr857/clean/api.h"
#include "kem/ntru_prime/ntrulpr857/avx2/api.h"
#include "kem/kyber/kyber768/clean/api.h"
#include "kem/kyber/kyber768/avx2/api.h"
#include "kem/kyber/kyber1024/clean/api.h"
#include "kem/kyber/kyber1024/avx2/api.h"
#include "kem/kyber/kyber512/clean/api.h"
#include "kem/kyber/kyber512/avx2/api.h"
#include "kem/mceliece/mceliece460896f/avx/api.h"
#include "kem/mceliece/mceliece460896f/clean/api.h"
#include "kem/mceliece/mceliece8192128/avx/api.h"
#include "kem/mceliece/mceliece8192128/clean/api.h"
#include "kem/mceliece/mceliece6688128f/avx/api.h"
#include "kem/mceliece/mceliece6688128f/clean/api.h"
#include "kem/mceliece/mceliece8192128f/avx/api.h"
#include "kem/mceliece/mceliece8192128f/clean/api.h"
#include "kem/mceliece/mceliece6960119f/avx/api.h"
#include "kem/mceliece/mceliece6960119f/clean/api.h"
#include "kem/mceliece/mceliece460896/avx/api.h"
#include "kem/mceliece/mceliece460896/clean/api.h"
#include "kem/mceliece/mceliece6688128/avx/api.h"
#include "kem/mceliece/mceliece6688128/clean/api.h"
#include "kem/mceliece/mceliece348864f/avx/api.h"
#include "kem/mceliece/mceliece348864f/clean/api.h"
#include "kem/mceliece/mceliece6960119/avx/api.h"
#include "kem/mceliece/mceliece6960119/clean/api.h"
#include "kem/mceliece/mceliece348864/avx/api.h"
#include "kem/mceliece/mceliece348864/clean/api.h"
#include "kem/frodo/frodokem976shake/clean/api.h"
#include "kem/frodo/frodokem1344shake/clean/api.h"
#include "kem/frodo/frodokem640shake/clean/api.h"
#include "kem/saber/lightsaber/clean/api.h"
#include "kem/saber/lightsaber/avx2/api.h"
#include "kem/saber/firesaber/clean/api.h"
#include "kem/saber/firesaber/avx2/api.h"
#include "kem/saber/saber/clean/api.h"
#include "kem/saber/saber/avx2/api.h"
#include "kem/hqc/hqc-rmrs-128/clean/api.h"
#include "kem/hqc/hqc-rmrs-192/clean/api.h"
#include "kem/hqc/hqc-rmrs-256/clean/api.h"
#include "kem/hqc/hqc-rmrs-128/avx2/api.h"
#include "kem/hqc/hqc-rmrs-192/avx2/api.h"
#include "kem/hqc/hqc-rmrs-256/avx2/api.h"
#include "schemes.h"

// not proud of this thingy
#define OPT_VERSION _CLEAN_

// Helper to stringify constants
#define STR(x) STR_(x)
#define STR_(x) #x

/* Concatenate tokens X and Y. Can be done by the "##" operator in
* simple cases, but has some side effects in more complicated cases.
*/
#define GLUE(a, b) GLUE_(a, b)
#define GLUE_(a, b) a##b

// Returns prefix defined by PQClean, depending
// on OPT_VERSION setting.
// Something like: "PQCLEAN_KYBER512_CLEAN_"
@@ -153,9 +29,9 @@
#define PQC_FN_SIGN(x) GLUE(A(x), crypto_sign_signature)
#define PQC_FN_VERIFY(x) GLUE(A(x), crypto_sign_verify)

#define REG_ALG(ID) \
#define REG_ALG(PFX,ID) \
{ \
.alg_id = ID, \
.alg_id = GLUE(PFX,ID), \
.alg_name = STR(ID), \
.prv_key_bsz = PQC_PRV_KEY_BSZ(ID), \
.pub_key_bsz = PQC_PUB_KEY_BSZ(ID), \
@@ -164,7 +40,7 @@
// Macro magic needed to initialize parameters for a scheme
#define REG_KEM(ID) \
{ \
.p = REG_ALG(ID), \
.p = REG_ALG(PQC_ALG_KEM_,ID), \
.p.keygen = PQC_FN_KEM_KEYGEN(ID),\
.ciphertext_bsz = PQC_CT_BSZ(ID), \
.secret_bsz = PQC_KEM_BSZ(ID), \
@@ -175,7 +51,7 @@
// Macro magic needed to initialize parameters for a scheme
#define REG_SIG(ID) \
{ \
.p = REG_ALG(ID), \
.p = REG_ALG(PQC_ALG_SIG_,ID), \
.p.keygen = PQC_FN_SIG_KEYGEN(ID),\
.sign_bsz = PQC_SIGN_BSZ(ID), \
.sign = PQC_FN_SIGN(ID), \
@@ -183,62 +59,94 @@
},

// Registers supported KEMs
const kem_params_t kems[] = {
const pqc_kem_ctx_t kems[] = {
PQC_SUPPORTED_KEMS(REG_KEM)
};

// Registers supported signatures
const sig_params_t sigs[] = {
const pqc_sig_ctx_t sigs[] = {
PQC_SUPPORTED_SIGS(REG_SIG)
};

const params_t *pqc_kem_alg_by_id(uint8_t id) {
// Contains capabilities on x86 CPU on which implementation is running
X86Features CPU_CAPS;

const X86Features * get_cpu_caps(void) {
return &CPU_CAPS;
}

const pqc_ctx_t *pqc_kem_alg_by_id(uint8_t id) {
int i;
for(i=0; i<PQC_ALG_KEM_MAX; i++) {
if (kems[i].p.alg_id == id) {
return (params_t*)&kems[i];
return (pqc_ctx_t*)&kems[i];
}
}
return 0;
}

const params_t *pqc_sig_alg_by_id(uint8_t id) {
const pqc_ctx_t *pqc_sig_alg_by_id(uint8_t id) {
int i;
for(i=0; i<PQC_ALG_SIG_MAX; i++) {
if (sigs[i].p.alg_id == id) {
return (params_t*)&sigs[i];
return (pqc_ctx_t*)&sigs[i];
}
}
return 0;
}

bool pqc_keygen(const params_t *p,
bool pqc_keygen(const pqc_ctx_t *p,
uint8_t *pk, uint8_t *sk) {
return !p->keygen(pk, sk);
}

bool pqc_kem_encapsulate(const params_t *p,
bool pqc_kem_encapsulate(const pqc_ctx_t *p,
uint8_t *ct, uint8_t *ss,
const uint8_t *pk) {
return !((kem_params_t*)p)->encapsulate(ct, ss, pk);
return !((pqc_kem_ctx_t*)p)->encapsulate(ct, ss, pk);
}

bool pqc_kem_decapsulate(const params_t *p,
bool pqc_kem_decapsulate(const pqc_ctx_t *p,
uint8_t *ss, const uint8_t *ct,
const uint8_t *sk) {
return !((kem_params_t*)p)->decapsulate(ss, ct, sk);
return !((pqc_kem_ctx_t*)p)->decapsulate(ss, ct, sk);
}

bool pqc_sig_create(const params_t *p,
bool pqc_sig_create(const pqc_ctx_t *p,
uint8_t *sig, uint64_t *siglen,
const uint8_t *m, uint64_t mlen,
const uint8_t *sk) {
return !((sig_params_t *)p)->sign(sig, siglen, m, mlen, sk);
return !((pqc_sig_ctx_t *)p)->sign(sig, siglen, m, mlen, sk);
}

bool pqc_sig_verify(const params_t *p,
bool pqc_sig_verify(const pqc_ctx_t *p,
const uint8_t *sig, uint64_t siglen,
const uint8_t *m, uint64_t mlen,
const uint8_t *pk) {
return !((sig_params_t *)p)->verify(sig, siglen, m, mlen, pk);
return !((pqc_sig_ctx_t *)p)->verify(sig, siglen, m, mlen, pk);
}

uint32_t pqc_ciphertext_bsz(const pqc_ctx_t *p) {
return ((pqc_kem_ctx_t *)p)->ciphertext_bsz;
}

uint32_t pqc_shared_secret_bsz(const pqc_ctx_t *p) {
return ((pqc_kem_ctx_t *)p)->secret_bsz;
}

uint32_t pqc_signature_bsz(const pqc_ctx_t *p) {
return ((pqc_sig_ctx_t *)p)->sign_bsz;
}

uint32_t pqc_public_key_bsz(const pqc_ctx_t *p) {
return p->pub_key_bsz;
}

uint32_t pqc_private_key_bsz(const pqc_ctx_t *p) {
return p->prv_key_bsz;
}

void static_initialization(void) __attribute__((constructor));
void static_initialization(void) {
CPU_CAPS = GetX86Info().features;
}

+ 120
- 0
src/capi/schemes.h View File

@@ -0,0 +1,120 @@
#ifndef PQC_SCHEMES_
#define PQC_SCHEMES_

// PQClean include
#include "sign/rainbow/rainbowV-classic/clean/api.h"
#include "sign/rainbow/rainbowI-classic/clean/api.h"
#include "sign/rainbow/rainbowIII-classic/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h"
#include "sign/dilithium/dilithium2/clean/api.h"
#include "sign/dilithium/dilithium2/avx2/api.h"
#include "sign/dilithium/dilithium3/clean/api.h"
#include "sign/dilithium/dilithium3/avx2/api.h"
#include "sign/dilithium/dilithium5/clean/api.h"
#include "sign/dilithium/dilithium5/avx2/api.h"
#include "sign/falcon/api.h"
#include "kem/ntru/ntruhps4096821/clean/api.h"
#include "kem/ntru/ntruhps4096821/avx2/api.h"
#include "kem/ntru/ntruhps2048509/clean/api.h"
#include "kem/ntru/ntruhps2048509/avx2/api.h"
#include "kem/ntru/ntruhrss701/clean/api.h"
#include "kem/ntru/ntruhrss701/avx2/api.h"
#include "kem/ntru/ntruhps2048677/clean/api.h"
#include "kem/ntru/ntruhps2048677/avx2/api.h"
#include "kem/ntru_prime/ntrulpr761/clean/api.h"
#include "kem/ntru_prime/ntrulpr761/avx2/api.h"
#include "kem/ntru_prime/ntrulpr653/clean/api.h"
#include "kem/ntru_prime/ntrulpr653/avx2/api.h"
#include "kem/ntru_prime/ntrulpr857/clean/api.h"
#include "kem/ntru_prime/ntrulpr857/avx2/api.h"
#include "kem/kyber/kyber768/clean/api.h"
#include "kem/kyber/kyber768/avx2/api.h"
#include "kem/kyber/kyber1024/clean/api.h"
#include "kem/kyber/kyber1024/avx2/api.h"
#include "kem/kyber/kyber512/clean/api.h"
#include "kem/kyber/kyber512/avx2/api.h"
#include "kem/mceliece/mceliece460896f/avx/api.h"
#include "kem/mceliece/mceliece460896f/clean/api.h"
#include "kem/mceliece/mceliece8192128/avx/api.h"
#include "kem/mceliece/mceliece8192128/clean/api.h"
#include "kem/mceliece/mceliece6688128f/avx/api.h"
#include "kem/mceliece/mceliece6688128f/clean/api.h"
#include "kem/mceliece/mceliece8192128f/avx/api.h"
#include "kem/mceliece/mceliece8192128f/clean/api.h"
#include "kem/mceliece/mceliece6960119f/avx/api.h"
#include "kem/mceliece/mceliece6960119f/clean/api.h"
#include "kem/mceliece/mceliece460896/avx/api.h"
#include "kem/mceliece/mceliece460896/clean/api.h"
#include "kem/mceliece/mceliece6688128/avx/api.h"
#include "kem/mceliece/mceliece6688128/clean/api.h"
#include "kem/mceliece/mceliece348864f/avx/api.h"
#include "kem/mceliece/mceliece348864f/clean/api.h"
#include "kem/mceliece/mceliece6960119/avx/api.h"
#include "kem/mceliece/mceliece6960119/clean/api.h"
#include "kem/mceliece/mceliece348864/avx/api.h"
#include "kem/mceliece/mceliece348864/clean/api.h"
#include "kem/frodo/frodokem976shake/clean/api.h"
#include "kem/frodo/frodokem1344shake/clean/api.h"
#include "kem/frodo/frodokem640shake/clean/api.h"
#include "kem/saber/lightsaber/clean/api.h"
#include "kem/saber/lightsaber/avx2/api.h"
#include "kem/saber/firesaber/clean/api.h"
#include "kem/saber/firesaber/avx2/api.h"
#include "kem/saber/saber/clean/api.h"
#include "kem/saber/saber/avx2/api.h"
#include "kem/hqc/hqc-rmrs-128/clean/api.h"
#include "kem/hqc/hqc-rmrs-192/clean/api.h"
#include "kem/hqc/hqc-rmrs-256/clean/api.h"
#include "kem/hqc/hqc-rmrs-128/avx2/api.h"
#include "kem/hqc/hqc-rmrs-192/avx2/api.h"
#include "kem/hqc/hqc-rmrs-256/avx2/api.h"
#include "kem/sike/includes/sike/sike.h"

#endif

+ 0
- 22
src/common/Makefile View File

@@ -1,22 +0,0 @@
# This Makefile can be used with GNU Make or BSD Make

LIB=libcommon.a
HEADERS= fips202.h aes.h sha2.h randombytes.h sp800-185.h nistseedexpander.h cpucycles.h speed_print.h
OBJECTS= fips202.o aes.o sha2.o randombytes.o sp800-185.o nistseedexpander.o cpucycles.o speed_print.o

CFLAGS=-O3 -march=native -mtune=native -flto -mavx2 -maes -mbmi2 -Wall -Wextra -Wpedantic -Wvla -Wredundant-decls -Wmissing-prototypes -std=gnu99 $(EXTRAFLAGS)

all: $(LIB)

%.o: %.s $(HEADERS)
$(AS) -o $@ $<

%.o: %.c $(HEADERS)
$(CC) $(CFLAGS) -c -o $@ $<

$(LIB): $(OBJECTS)
$(AR) -r $@ $(OBJECTS)

clean:
$(RM) $(OBJECTS)
$(RM) $(LIB)

+ 55
- 0
src/common/ct_check.h View File

@@ -0,0 +1,55 @@
#ifndef CT_CHECK_H
#define CT_CHECK_H

// helper
#define VOID(V) ((void)V)

// Uses Clang's Memory Sanitizer
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer)
#include <stddef.h>
#include <sanitizer/msan_interface.h>
#elif defined(PQC_USE_CTGRIND)
#include <valgrind/valgrind.h>
#include <valgrind/memcheck.h>
#endif

// Set sz bytes of memory starting at address p as uninitialized. Switches on constat time checks.
static inline void ct_poison(const volatile void *p, size_t sz) {
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer)
__msan_allocated_memory(p,sz);
#elif defined(PQC_USE_CTGRIND)
VALGRIND_MAKE_MEM_UNDEFINED(p,sz);
#else
VOID(p), VOID(sz);
#endif
}

// Set sz bytes of memory starting at p as initialized. Switches off constat time checks.
static inline void ct_purify(const volatile void *p, size_t sz) {
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer)
__msan_unpoison(p,sz);
#elif defined(PQC_USE_CTGRIND)
VALGRIND_MAKE_MEM_DEFINED(p,sz);
#else
VOID(p), VOID(sz);
#endif
}

// Function instructs memory sanitizer that code expects to do operation on unintialized memory.
static inline void ct_expect_umr() {
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer)
__msan_set_expect_umr(1);
#endif
}

// Checks if action on unintialized memory has occured. If this is not a case
// then error is reported. It works in tandem with ct_expect_umr(). In current version of
// MSan, the code needs to be compiled with `-mllvm -msan-keep-going=1` flags in order to work
// correctly.
static inline void ct_require_umr() {
#if defined(PQC_USE_CTSANITIZER) && defined(__clang__) && defined(__has_feature) && __has_feature(memory_sanitizer)
__msan_set_expect_umr(0);
#endif
}

#endif // CT_CHECK_H

+ 8
- 0
src/common/fips202.c View File

@@ -542,6 +542,10 @@ void shake128_inc_squeeze(uint8_t *output, size_t outlen, shake128incctx *state)
keccak_inc_squeeze(output, outlen, state->ctx, SHAKE128_RATE);
}

void shake128_inc_reset(shake128incctx *state) {
keccak_inc_init(state->ctx);
}

void shake128_inc_ctx_clone(shake128incctx *dest, const shake128incctx *src) {
dest->ctx = malloc(PQC_SHAKEINCCTX_BYTES);
if (dest->ctx == NULL) {
@@ -566,6 +570,10 @@ void shake256_inc_absorb(shake256incctx *state, const uint8_t *input, size_t inl
keccak_inc_absorb(state->ctx, SHAKE256_RATE, input, inlen);
}

void shake256_inc_reset(shake256incctx *state) {
keccak_inc_init(state->ctx);
}

void shake256_inc_finalize(shake256incctx *state) {
keccak_inc_finalize(state->ctx, SHAKE256_RATE, 0x1F);
}


+ 4
- 0
src/common/fips202.h View File

@@ -72,6 +72,8 @@ void shake128_inc_init(shake128incctx *state);
* Can be called multiple times.
*/
void shake128_inc_absorb(shake128incctx *state, const uint8_t *input, size_t inlen);
// Reset the state
void shake128_inc_reset(shake128incctx *state);
/* Finalize the XOF for squeezing */
void shake128_inc_finalize(shake128incctx *state);
/* Squeeze output out of the sponge.
@@ -95,6 +97,8 @@ void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen);
* Supports being called multiple times
*/
void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state);
// Reset the state
void shake256_inc_reset(shake256incctx *state);
/* Free the context held by this XOF */
void shake256_ctx_release(shake256ctx *state);
/* Copy the context held by this XOF */


+ 4
- 0
src/common/randombytes.c View File

@@ -301,6 +301,10 @@ static int randombytes_js_randombytes_nodejs(void *buf, size_t n) {
#endif /* defined(__EMSCRIPTEN__) */

int randombytes(uint8_t *buf, size_t n) {
#ifdef PQC_MEMSAN_BUILD
size_t i;
for (i=0; i<n; i++) buf[i]=0;
#endif
#if defined(__EMSCRIPTEN__)
return randombytes_js_randombytes_nodejs(buf, n);
#elif defined(__linux__)


+ 48
- 0
src/common/utils.h View File

@@ -0,0 +1,48 @@
#ifndef PQC_COMMON_UTILS_
#define PQC_COMMON_UTILS_

#include <cpuinfo_x86.h>
#include <stdint.h>
#include <stddef.h>

// Helper to stringify constants
#define STR(x) STR_(x)
#define STR_(x) #x

/* Concatenate tokens X and Y. Can be done by the "##" operator in
* simple cases, but has some side effects in more complicated cases.
*/
#define GLUE(a, b) GLUE_(a, b)
#define GLUE_(a, b) a##b

#define ARRAY_LEN(x) sizeof(x)/sizeof(x[0])
#define LOAD32L(x) \
(((uint32_t)((x)[0])<< 0) | \
((uint32_t)((x)[1])<< 8) | \
((uint32_t)((x)[2])<<16) | \
((uint32_t)((x)[3])<<24))

#define LOAD64L(x) \
(((uint64_t)LOAD32L((x)+4)) << 32) | \
(((uint64_t)LOAD32L((x)+0)) << 0)

#define STORE16B(x,y) do { \
(x)[0] = (((y) >> 8)&0xFF); \
(x)[1] = (((y) >> 0)&0xFF); \
} while(0)
#define LOAD16B(x) \
(((uint16_t)(x)[0])<<8 | \
((uint16_t)(x)[1])<<0) \

/**
* \brief Compares two arrays in constant time.
* \param [in] a first array
* \param [in] b second arrray
* \param [in] sz number of bytes to compare
* \returns 0 if arrays are equal, otherwise 1.
*/
uint8_t ct_memcmp(const void *a, const void *b, size_t sz);

const X86Features * get_cpu_caps(void);

#endif

+ 20
- 2
src/kem/frodo/frodokem640shake/clean/kem.c View File

@@ -14,6 +14,9 @@
#include "common.h"
#include "params.h"

#include "common/ct_check.h"
#include "common/utils.h"

int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
// FrodoKEM's key generation
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
@@ -139,7 +142,6 @@ int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, cons
return 0;
}


int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
// FrodoKEM's key decapsulation
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
@@ -218,9 +220,25 @@ int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct
// Needs to avoid branching on secret data as per:
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
int8_t selector = PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
#if 0
int8_t selector = ct_memcmp(Bp, BBp, PARAMS_N * PARAMS_NBAR) | ct_memcmp(C, CC, PARAMS_NBAR * PARAMS_NBAR);
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
#else
// Is (Bp == BBp & C == CC) = true
//ct_poison(Bp, sizeof(Bp));
//ct_poison(BBp, sizeof(BBp));
if (ct_memcmp(Bp, BBp, 2*PARAMS_N*PARAMS_NBAR) == 0 && ct_memcmp(C, CC, 2*PARAMS_NBAR*PARAMS_NBAR) == 0) {
// Load k' to do ss = F(ct || k')
memcpy(Fin_k, kprime, CRYPTO_BYTES);
} else {
// Load s to do ss = F(ct || s)
// This branch is executed when a malicious ciphertext is decapsulated
// and is necessary for security. Note that the known answer tests
// will not exercise this line of code but it should not be removed.
memcpy(Fin_k, sk_s, CRYPTO_BYTES);
}
#endif
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

// Cleanup:


+ 4
- 2
src/kem/frodo/frodokem640shake/clean/util.c View File

@@ -11,6 +11,8 @@
#include "common.h"
#include "params.h"

#include "common/ct_check.h"

static inline uint8_t min(uint8_t x, uint8_t y) {
if (x < y) {
return x;
@@ -246,9 +248,9 @@ int8_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
// Select one of the two input arrays to be moved to r
// If (selector == 0) then load r with a, else if (selector == -1) load r with b
uint8_t mask = 0 - selector;
for (size_t i = 0; i < len; i++) {
r[i] = (~selector & a[i]) | (selector & b[i]);
r[i] = (~mask & a[i]) | (mask & b[i]);
}
}



src/kem/kyber/kyber512/clean/reduce.c → src/kem/kyber/common/reduce.c View File

@@ -3,7 +3,7 @@
#include <stdint.h>

/*************************************************
* Name: PQCLEAN_KYBER512_CLEAN_montgomery_reduce
* Name: kyber_montgomery_reduce
*
* Description: Montgomery reduction; given a 32-bit integer a, computes
* 16-bit integer congruent to a * R^-1 mod q, where R=2^16
@@ -13,7 +13,7 @@
*
* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q.
**************************************************/
int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a) {
int16_t kyber_montgomery_reduce(int32_t a) {
int32_t t;
int16_t u;

@@ -25,20 +25,18 @@ int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a) {
}

/*************************************************
* Name: PQCLEAN_KYBER512_CLEAN_barrett_reduce
* Name: kyber_barrett_reduce
*
* Description: Barrett reduction; given a 16-bit integer a, computes
* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2}
* centered representative congruent to a mod q in {0,q}
*
* Arguments: - int16_t a: input integer to be reduced
*
* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
* Returns: integer in {0,q} congruent to a modulo q.
**************************************************/
int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a) {
int16_t kyber_barrett_reduce(int16_t a) {
int16_t t;
const int16_t v = ((1U << 26) + KYBER_Q / 2) / KYBER_Q;

t = ((int32_t)v * a + (1 << 25)) >> 26;
t *= KYBER_Q;
return a - t;
static const int32_t v = 20159;
t = ((v * a) + (1 << 25)) >> 26;
return a - (t*KYBER_Q);
}

+ 22
- 0
src/kem/kyber/common/reduce.h View File

@@ -0,0 +1,22 @@
#ifndef KYBER_REDUCE_H
#define KYBER_REDUCE_H

#include <stdint.h>

// TODO: Remove those once not used
#define PQCLEAN_KYBER512_CLEAN_montgomery_reduce kyber_montgomery_reduce
#define PQCLEAN_KYBER768_CLEAN_montgomery_reduce kyber_montgomery_reduce
#define PQCLEAN_KYBER1024_CLEAN_montgomery_reduce kyber_montgomery_reduce

#define PQCLEAN_KYBER512_CLEAN_barrett_reduce kyber_barrett_reduce
#define PQCLEAN_KYBER768_CLEAN_barrett_reduce kyber_barrett_reduce
#define PQCLEAN_KYBER1024_CLEAN_barrett_reduce kyber_barrett_reduce

#define MONT 2285 // 2^16 mod q
#define QINV 62209 // q^-1 mod 2^16

int16_t kyber_montgomery_reduce(int32_t a);

int16_t kyber_barrett_reduce(int16_t a);

#endif

+ 1
- 1
src/kem/kyber/kyber1024/clean/CMakeLists.txt View File

@@ -6,7 +6,7 @@ set(
ntt.c
poly.c
polyvec.c
reduce.c
../../common/reduce.c
symmetric-shake.c
verify.c
)


+ 1
- 1
src/kem/kyber/kyber1024/clean/ntt.c View File

@@ -1,6 +1,6 @@
#include "ntt.h"
#include "params.h"
#include "reduce.h"
#include "../../common/reduce.h"
#include <stdint.h>

/* Code to generate PQCLEAN_KYBER1024_CLEAN_zetas and zetas_inv used in the number-theoretic transform:


+ 1
- 1
src/kem/kyber/kyber1024/clean/poly.c View File

@@ -2,7 +2,7 @@
#include "ntt.h"
#include "params.h"
#include "poly.h"
#include "reduce.h"
#include "../../common/reduce.h"
#include "symmetric.h"
#include <stdint.h>



+ 0
- 44
src/kem/kyber/kyber1024/clean/reduce.c View File

@@ -1,44 +0,0 @@
#include "params.h"
#include "reduce.h"
#include <stdint.h>

/*************************************************
* Name: PQCLEAN_KYBER1024_CLEAN_montgomery_reduce
*
* Description: Montgomery reduction; given a 32-bit integer a, computes
* 16-bit integer congruent to a * R^-1 mod q, where R=2^16
*
* Arguments: - int32_t a: input integer to be reduced;
* has to be in {-q2^15,...,q2^15-1}
*
* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q.
**************************************************/
int16_t PQCLEAN_KYBER1024_CLEAN_montgomery_reduce(int32_t a) {
int32_t t;
int16_t u;

u = (int16_t)(a * (int64_t)QINV);
t = (int32_t)u * KYBER_Q;
t = a - t;
t >>= 16;
return (int16_t)t;
}

/*************************************************
* Name: PQCLEAN_KYBER1024_CLEAN_barrett_reduce
*
* Description: Barrett reduction; given a 16-bit integer a, computes
* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2}
*
* Arguments: - int16_t a: input integer to be reduced
*
* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
**************************************************/
int16_t PQCLEAN_KYBER1024_CLEAN_barrett_reduce(int16_t a) {
int16_t t;
const int16_t v = ((1U << 26) + KYBER_Q / 2) / KYBER_Q;

t = ((int32_t)v * a + (1 << 25)) >> 26;
t *= KYBER_Q;
return a - t;
}

+ 0
- 13
src/kem/kyber/kyber1024/clean/reduce.h View File

@@ -1,13 +0,0 @@
#ifndef PQCLEAN_KYBER1024_CLEAN_REDUCE_H
#define PQCLEAN_KYBER1024_CLEAN_REDUCE_H
#include "params.h"
#include <stdint.h>

#define MONT 2285 // 2^16 mod q
#define QINV 62209 // q^-1 mod 2^16

int16_t PQCLEAN_KYBER1024_CLEAN_montgomery_reduce(int32_t a);

int16_t PQCLEAN_KYBER1024_CLEAN_barrett_reduce(int16_t a);

#endif

+ 1
- 1
src/kem/kyber/kyber512/avx2/indcpa.c View File

@@ -289,7 +289,7 @@ void PQCLEAN_KYBER512_AVX2_indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
const uint8_t coins[KYBER_SYMBYTES]) {
unsigned int i;
uint8_t seed[KYBER_SYMBYTES];
polyvec sp, pkpv, ep, at[KYBER_K], b;
polyvec sp, pkpv, ep, at[KYBER_K], b = {0};
poly v, k, epp;

unpack_pk(&pkpv, seed, pk);


+ 2
- 2
src/kem/kyber/kyber512/avx2/kem.c View File

@@ -51,9 +51,9 @@ int PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(unsigned char pk[KYBER_PUBLICKEYBYT
int PQCLEAN_KYBER512_AVX2_crypto_kem_enc(unsigned char ct[KYBER_CIPHERTEXTBYTES],
unsigned char ss[KYBER_SSBYTES],
const unsigned char pk[KYBER_PUBLICKEYBYTES]) {
uint8_t buf[2 * KYBER_SYMBYTES];
uint8_t buf[2 * KYBER_SYMBYTES] = {0};
/* Will contain key, coins */
uint8_t kr[2 * KYBER_SYMBYTES];
uint8_t kr[2 * KYBER_SYMBYTES] = {0};

randombytes(buf, KYBER_SYMBYTES);
/* Don't release system RNG output */


+ 1
- 1
src/kem/kyber/kyber512/avx2/polyvec.c View File

@@ -182,7 +182,7 @@ void PQCLEAN_KYBER512_AVX2_polyvec_invntt_tomont(polyvec *r) {
**************************************************/
void PQCLEAN_KYBER512_AVX2_polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) {
size_t i;
poly tmp;
poly tmp = {0};

PQCLEAN_KYBER512_AVX2_poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]);
for (i = 1; i < KYBER_K; i++) {


+ 0
- 1
src/kem/kyber/kyber512/clean/CMakeLists.txt View File

@@ -6,7 +6,6 @@ set(
ntt.c
poly.c
polyvec.c
reduce.c
symmetric-shake.c
verify.c
)


+ 1
- 1
src/kem/kyber/kyber512/clean/ntt.c View File

@@ -1,6 +1,6 @@
#include "ntt.h"
#include "params.h"
#include "reduce.h"
#include "../../common/reduce.h"
#include <stdint.h>

/* Code to generate PQCLEAN_KYBER512_CLEAN_zetas and zetas_inv used in the number-theoretic transform:


+ 1
- 1
src/kem/kyber/kyber512/clean/poly.c View File

@@ -2,7 +2,7 @@
#include "ntt.h"
#include "params.h"
#include "poly.h"
#include "reduce.h"
#include "../../common/reduce.h"
#include "symmetric.h"
#include <stdint.h>



+ 0
- 13
src/kem/kyber/kyber512/clean/reduce.h View File

@@ -1,13 +0,0 @@
#ifndef PQCLEAN_KYBER512_CLEAN_REDUCE_H
#define PQCLEAN_KYBER512_CLEAN_REDUCE_H
#include "params.h"
#include <stdint.h>

#define MONT 2285 // 2^16 mod q
#define QINV 62209 // q^-1 mod 2^16

int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a);

int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a);

#endif

+ 0
- 1
src/kem/kyber/kyber768/clean/CMakeLists.txt View File

@@ -6,7 +6,6 @@ set(
ntt.c
poly.c
polyvec.c
reduce.c
symmetric-shake.c
verify.c
)


+ 1
- 1
src/kem/kyber/kyber768/clean/ntt.c View File

@@ -1,6 +1,6 @@
#include "ntt.h"
#include "params.h"
#include "reduce.h"
#include "../../common/reduce.h"
#include <stdint.h>

/* Code to generate PQCLEAN_KYBER768_CLEAN_zetas and zetas_inv used in the number-theoretic transform:


+ 1
- 1
src/kem/kyber/kyber768/clean/poly.c View File

@@ -2,7 +2,7 @@
#include "ntt.h"
#include "params.h"
#include "poly.h"
#include "reduce.h"
#include "../../common/reduce.h"
#include "symmetric.h"
#include <stdint.h>



+ 0
- 44
src/kem/kyber/kyber768/clean/reduce.c View File

@@ -1,44 +0,0 @@
#include "params.h"
#include "reduce.h"
#include <stdint.h>

/*************************************************
* Name: PQCLEAN_KYBER768_CLEAN_montgomery_reduce
*
* Description: Montgomery reduction; given a 32-bit integer a, computes
* 16-bit integer congruent to a * R^-1 mod q, where R=2^16
*
* Arguments: - int32_t a: input integer to be reduced;
* has to be in {-q2^15,...,q2^15-1}
*
* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q.
**************************************************/
int16_t PQCLEAN_KYBER768_CLEAN_montgomery_reduce(int32_t a) {
int32_t t;
int16_t u;

u = (int16_t)(a * (int64_t)QINV);
t = (int32_t)u * KYBER_Q;
t = a - t;
t >>= 16;
return (int16_t)t;
}

/*************************************************
* Name: PQCLEAN_KYBER768_CLEAN_barrett_reduce
*
* Description: Barrett reduction; given a 16-bit integer a, computes
* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2}
*
* Arguments: - int16_t a: input integer to be reduced
*
* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
**************************************************/
int16_t PQCLEAN_KYBER768_CLEAN_barrett_reduce(int16_t a) {
int16_t t;
const int16_t v = ((1U << 26) + KYBER_Q / 2) / KYBER_Q;

t = ((int32_t)v * a + (1 << 25)) >> 26;
t *= KYBER_Q;
return a - t;
}

+ 0
- 13
src/kem/kyber/kyber768/clean/reduce.h View File

@@ -1,13 +0,0 @@
#ifndef PQCLEAN_KYBER768_CLEAN_REDUCE_H
#define PQCLEAN_KYBER768_CLEAN_REDUCE_H
#include "params.h"
#include <stdint.h>

#define MONT 2285 // 2^16 mod q
#define QINV 62209 // q^-1 mod 2^16

int16_t PQCLEAN_KYBER768_CLEAN_montgomery_reduce(int32_t a);

int16_t PQCLEAN_KYBER768_CLEAN_barrett_reduce(int16_t a);

#endif

+ 20
- 0
src/kem/mceliece/mceliece348864/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE348864
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece348864_clean
PQCLEAN_MCELIECE348864_OPT "${SRC_CLEAN_MCELIECE348864}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece348864f/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE348864F
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece348864f_clean
PQCLEAN_MCELIECE348864F_OPT "${SRC_CLEAN_MCELIECE348864F}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece460896/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE460896
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece460896_clean
PQCLEAN_MCELIECE460896_OPT "${SRC_CLEAN_MCELIECE460896}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece460896f/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE460896F
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece460896f_clean
PQCLEAN_MCELIECE460896F_OPT "${SRC_CLEAN_MCELIECE460896F}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece6688128/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE6688128
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece6688128_clean
PQCLEAN_MCELIECE6688128_OPT "${SRC_CLEAN_MCELIECE6688128}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece6688128f/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE6688128F
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece6688128f_clean
PQCLEAN_MCELIECE6688128F_OPT "${SRC_CLEAN_MCELIECE6688128F}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece6960119/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE6960119
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece6960119_clean
PQCLEAN_MCELIECE6960119_OPT "${SRC_CLEAN_MCELIECE6960119}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece6960119f/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE6960119F
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece6960119f_clean
PQCLEAN_MCELIECE6960119F_OPT "${SRC_CLEAN_MCELIECE6960119F}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece8192128/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE8192128
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece8192128_clean
PQCLEAN_MCELIECE8192128_OPT "${SRC_CLEAN_MCELIECE8192128}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/mceliece/mceliece8192128f/clean/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_MCELIECE8192128F
aes256ctr.c
benes.c
bm.c
controlbits.c
decrypt.c
encrypt.c
gf.c
operations.c
pk_gen.c
root.c
sk_gen.c
synd.c
transpose.c
util.c
)

define_kem_alg(mceliece8192128f_clean
PQCLEAN_MCELIECE8192128F_OPT "${SRC_CLEAN_MCELIECE8192128F}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 20
- 0
src/kem/sike/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_SIKE_P434
p434/fpx.c
p434/fp_generic.c
p434/isogeny.c
p434/params.c
p434/sike.c)

if(${ARCH} STREQUAL "ARCH_x86_64")
add_definitions(-DPQC_ASM=1)
set(
SRC_CLEAN_SIKE_P434
${SRC_CLEAN_SIKE_P434}
p434/fp-x86_64.S
)
endif()

define_kem_alg(
sike_p434_clean
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 81
- 0
src/kem/sike/includes/sike/sike.h View File

@@ -0,0 +1,81 @@
#ifndef SIKE_H_
#define SIKE_H_

#include <stdint.h>
#include <string.h>
#include "randombytes.h"

/* SIKE
*
* SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the
* algorithm is provided in [SIKE]. This implementation uses 434-bit field size. The code
* is based on "Additional_Implementations" from PQC NIST submission package which can
* be found here:
* https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip
*
* [SIKE] https://sike.org/files/SIDH-spec.pdf
*/

// SIKE_PUB_BYTESZ is the number of bytes in a public key.
#define SIKE_PUB_BYTESZ 330
// SIKE_PRV_BYTESZ is the number of bytes in a private key.
#define SIKE_PRV_BYTESZ 28
// SIKE_SS_BYTESZ is the number of bytes in a shared key.
#define SIKE_SS_BYTESZ 16
// SIKE_MSG_BYTESZ is the number of bytes in a random bit string concatenated
// with the public key (see 1.4 of SIKE).
#define SIKE_MSG_BYTESZ 16
// SIKE_SS_BYTESZ is the number of bytes in a ciphertext.
#define SIKE_CT_BYTESZ (SIKE_PUB_BYTESZ + SIKE_MSG_BYTESZ)

// SIKE_keypair outputs a public and secret key. In case of success
// function returns 1, otherwise 0.
int SIKE_keypair(
uint8_t out_priv[SIKE_PRV_BYTESZ],
uint8_t out_pub[SIKE_PUB_BYTESZ]);

// SIKE_encaps generates and encrypts a random session key, writing those values to
// |out_shared_key| and |out_ciphertext|, respectively.
void SIKE_encaps(
uint8_t out_shared_key[SIKE_SS_BYTESZ],
uint8_t out_ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ]);

// SIKE_decaps outputs a random session key, writing it to |out_shared_key|.
void SIKE_decaps(
uint8_t out_shared_key[SIKE_SS_BYTESZ],
const uint8_t ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ],
const uint8_t priv_key[SIKE_PRV_BYTESZ]);

// boilerplate needed for integration
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_BYTES SIKE_SS_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_ALGNAME "SIKE/p434"

#define PQCLEAN_SIKE434_AVX2_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_BYTES SIKE_SS_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_ALGNAME "SIKE/p434"

static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
SIKE_keypair(sk, pk);
// KATs require the public key to be concatenated after private key
memcpy(&sk[SIKE_MSG_BYTESZ+SIKE_PRV_BYTESZ], pk, SIKE_PUB_BYTESZ);
return 0;
}
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
SIKE_encaps(ss,ct,pk);
return 0;
}

static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
SIKE_decaps(ss, ct, &sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], sk);
return 0;
}


#endif

+ 926
- 0
src/kem/sike/p434/fp-x86_64.S View File

@@ -0,0 +1,926 @@
.text

.Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE
.quad 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF
.quad 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC
.quad 0x0004683E4E2EE688


.Lp434p1:
.quad 0xFDC1767AE3000000
.quad 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056
.quad 0x0002341F27177344

.globl sike_fpadd_asm
.hidden sike_fpadd_asm
.type sike_fpadd_asm,@function
sike_fpadd_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32

xorq %rax,%rax

movq 0(%rdi),%r8
addq 0(%rsi),%r8
movq 8(%rdi),%r9
adcq 8(%rsi),%r9
movq 16(%rdi),%r10
adcq 16(%rsi),%r10
movq 24(%rdi),%r11
adcq 24(%rsi),%r11
movq 32(%rdi),%r12
adcq 32(%rsi),%r12
movq 40(%rdi),%r13
adcq 40(%rsi),%r13
movq 48(%rdi),%r14
adcq 48(%rsi),%r14

movq .Lp434x2(%rip),%rcx
subq %rcx,%r8
movq 8+.Lp434x2(%rip),%rcx
sbbq %rcx,%r9
sbbq %rcx,%r10
movq 16+.Lp434x2(%rip),%rcx
sbbq %rcx,%r11
movq 24+.Lp434x2(%rip),%rcx
sbbq %rcx,%r12
movq 32+.Lp434x2(%rip),%rcx
sbbq %rcx,%r13
movq 40+.Lp434x2(%rip),%rcx
sbbq %rcx,%r14

sbbq $0,%rax

movq .Lp434x2(%rip),%rdi
andq %rax,%rdi
movq 8+.Lp434x2(%rip),%rsi
andq %rax,%rsi
movq 16+.Lp434x2(%rip),%rcx
andq %rax,%rcx

addq %rdi,%r8
movq %r8,0(%rdx)
adcq %rsi,%r9
movq %r9,8(%rdx)
adcq %rsi,%r10
movq %r10,16(%rdx)
adcq %rcx,%r11
movq %r11,24(%rdx)

setc %cl
movq 24+.Lp434x2(%rip),%r8
andq %rax,%r8
movq 32+.Lp434x2(%rip),%r9
andq %rax,%r9
movq 40+.Lp434x2(%rip),%r10
andq %rax,%r10
btq $0,%rcx

adcq %r8,%r12
movq %r12,32(%rdx)
adcq %r9,%r13
movq %r13,40(%rdx)
adcq %r10,%r14
movq %r14,48(%rdx)

popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc

.globl sike_fpsub_asm
.hidden sike_fpsub_asm
.type sike_fpsub_asm,@function
sike_fpsub_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32

xorq %rax,%rax

movq 0(%rdi),%r8
subq 0(%rsi),%r8
movq 8(%rdi),%r9
sbbq 8(%rsi),%r9
movq 16(%rdi),%r10
sbbq 16(%rsi),%r10
movq 24(%rdi),%r11
sbbq 24(%rsi),%r11
movq 32(%rdi),%r12
sbbq 32(%rsi),%r12
movq 40(%rdi),%r13
sbbq 40(%rsi),%r13
movq 48(%rdi),%r14
sbbq 48(%rsi),%r14

sbbq $0x0,%rax

movq .Lp434x2(%rip),%rdi
andq %rax,%rdi
movq 8+.Lp434x2(%rip),%rsi
andq %rax,%rsi
movq 16+.Lp434x2(%rip),%rcx
andq %rax,%rcx

addq %rdi,%r8
movq %r8,0(%rdx)
adcq %rsi,%r9
movq %r9,8(%rdx)
adcq %rsi,%r10
movq %r10,16(%rdx)
adcq %rcx,%r11
movq %r11,24(%rdx)

setc %cl
movq 24+.Lp434x2(%rip),%r8
andq %rax,%r8
movq 32+.Lp434x2(%rip),%r9
andq %rax,%r9
movq 40+.Lp434x2(%rip),%r10
andq %rax,%r10
btq $0x0,%rcx

adcq %r8,%r12
adcq %r9,%r13
adcq %r10,%r14
movq %r12,32(%rdx)
movq %r13,40(%rdx)
movq %r14,48(%rdx)

popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpadd_asm
.hidden sike_mpadd_asm
.type sike_mpadd_asm,@function
sike_mpadd_asm:
.cfi_startproc
movq 0(%rdi),%r8;
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11
movq 32(%rdi),%rcx
addq 0(%rsi),%r8
adcq 8(%rsi),%r9
adcq 16(%rsi),%r10
adcq 24(%rsi),%r11
adcq 32(%rsi),%rcx
movq %r8,0(%rdx)
movq %r9,8(%rdx)
movq %r10,16(%rdx)
movq %r11,24(%rdx)
movq %rcx,32(%rdx)

movq 40(%rdi),%r8
movq 48(%rdi),%r9
adcq 40(%rsi),%r8
adcq 48(%rsi),%r9
movq %r8,40(%rdx)
movq %r9,48(%rdx)
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpsubx2_asm
.hidden sike_mpsubx2_asm
.type sike_mpsubx2_asm,@function
sike_mpsubx2_asm:
.cfi_startproc
xorq %rax,%rax

movq 0(%rdi),%r8
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11
movq 32(%rdi),%rcx
subq 0(%rsi),%r8
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
sbbq 24(%rsi),%r11
sbbq 32(%rsi),%rcx
movq %r8,0(%rdx)
movq %r9,8(%rdx)
movq %r10,16(%rdx)
movq %r11,24(%rdx)
movq %rcx,32(%rdx)

movq 40(%rdi),%r8
movq 48(%rdi),%r9
movq 56(%rdi),%r10
movq 64(%rdi),%r11
movq 72(%rdi),%rcx
sbbq 40(%rsi),%r8
sbbq 48(%rsi),%r9
sbbq 56(%rsi),%r10
sbbq 64(%rsi),%r11
sbbq 72(%rsi),%rcx
movq %r8,40(%rdx)
movq %r9,48(%rdx)
movq %r10,56(%rdx)
movq %r11,64(%rdx)
movq %rcx,72(%rdx)

movq 80(%rdi),%r8
movq 88(%rdi),%r9
movq 96(%rdi),%r10
movq 104(%rdi),%r11
sbbq 80(%rsi),%r8
sbbq 88(%rsi),%r9
sbbq 96(%rsi),%r10
sbbq 104(%rsi),%r11
sbbq $0x0,%rax
movq %r8,80(%rdx)
movq %r9,88(%rdx)
movq %r10,96(%rdx)
movq %r11,104(%rdx)
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpdblsubx2_asm
.hidden sike_mpdblsubx2_asm
.type sike_mpdblsubx2_asm,@function
sike_mpdblsubx2_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24

xorq %rax,%rax


movq 0(%rdx),%r8
movq 8(%rdx),%r9
movq 16(%rdx),%r10
movq 24(%rdx),%r11
movq 32(%rdx),%r12
movq 40(%rdx),%r13
movq 48(%rdx),%rcx
subq 0(%rdi),%r8
sbbq 8(%rdi),%r9
sbbq 16(%rdi),%r10
sbbq 24(%rdi),%r11
sbbq 32(%rdi),%r12
sbbq 40(%rdi),%r13
sbbq 48(%rdi),%rcx
adcq $0x0,%rax


subq 0(%rsi),%r8
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
sbbq 24(%rsi),%r11
sbbq 32(%rsi),%r12
sbbq 40(%rsi),%r13
sbbq 48(%rsi),%rcx
adcq $0x0,%rax


movq %r8,0(%rdx)
movq %r9,8(%rdx)
movq %r10,16(%rdx)
movq %r11,24(%rdx)
movq %r12,32(%rdx)
movq %r13,40(%rdx)
movq %rcx,48(%rdx)


movq 56(%rdx),%r8
movq 64(%rdx),%r9
movq 72(%rdx),%r10
movq 80(%rdx),%r11
movq 88(%rdx),%r12
movq 96(%rdx),%r13
movq 104(%rdx),%rcx

subq %rax,%r8
sbbq 56(%rdi),%r8
sbbq 64(%rdi),%r9
sbbq 72(%rdi),%r10
sbbq 80(%rdi),%r11
sbbq 88(%rdi),%r12
sbbq 96(%rdi),%r13
sbbq 104(%rdi),%rcx


subq 56(%rsi),%r8
sbbq 64(%rsi),%r9
sbbq 72(%rsi),%r10
sbbq 80(%rsi),%r11
sbbq 88(%rsi),%r12
sbbq 96(%rsi),%r13
sbbq 104(%rsi),%rcx


movq %r8,56(%rdx)
movq %r9,64(%rdx)
movq %r10,72(%rdx)
movq %r11,80(%rdx)
movq %r12,88(%rdx)
movq %r13,96(%rdx)
movq %rcx,104(%rdx)

popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc

.globl sike_fprdc_asm
.hidden sike_fprdc_asm
.type sike_fprdc_asm,@function
sike_fprdc_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15, -40

xorq %rax,%rax
movq 0+0(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

xorq %rax,%rax
movq 0+8(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r13,%rcx
adcxq %r13,%r9
adcxq %rcx,%r10

mulxq 8+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r11
adoxq %rcx,%r10

mulxq 16+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r12
adoxq %rcx,%r11

mulxq 24+.Lp434p1(%rip),%rcx,%r13
adcxq %rax,%r13
adoxq %rcx,%r12
adoxq %rax,%r13

xorq %rcx,%rcx
addq 24(%rdi),%r8
adcq 32(%rdi),%r9
adcq 40(%rdi),%r10
adcq 48(%rdi),%r11
adcq 56(%rdi),%r12
adcq 64(%rdi),%r13
adcq 72(%rdi),%rcx
movq %r8,24(%rdi)
movq %r9,32(%rdi)
movq %r10,40(%rdi)
movq %r11,48(%rdi)
movq %r12,56(%rdi)
movq %r13,64(%rdi)
movq %rcx,72(%rdi)
movq 80(%rdi),%r8
movq 88(%rdi),%r9
movq 96(%rdi),%r10
movq 104(%rdi),%r11
adcq $0x0,%r8
adcq $0x0,%r9
adcq $0x0,%r10
adcq $0x0,%r11
movq %r8,80(%rdi)
movq %r9,88(%rdi)
movq %r10,96(%rdi)
movq %r11,104(%rdi)

xorq %rax,%rax
movq 16+0(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

xorq %rax,%rax
movq 16+8(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r13,%rcx
adcxq %r13,%r9
adcxq %rcx,%r10

mulxq 8+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r11
adoxq %rcx,%r10

mulxq 16+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r12
adoxq %rcx,%r11

mulxq 24+.Lp434p1(%rip),%rcx,%r13
adcxq %rax,%r13
adoxq %rcx,%r12
adoxq %rax,%r13

xorq %rcx,%rcx
addq 40(%rdi),%r8
adcq 48(%rdi),%r9
adcq 56(%rdi),%r10
adcq 64(%rdi),%r11
adcq 72(%rdi),%r12
adcq 80(%rdi),%r13
adcq 88(%rdi),%rcx
movq %r8,40(%rdi)
movq %r9,48(%rdi)
movq %r10,56(%rdi)
movq %r11,64(%rdi)
movq %r12,72(%rdi)
movq %r13,80(%rdi)
movq %rcx,88(%rdi)
movq 96(%rdi),%r8
movq 104(%rdi),%r9
adcq $0x0,%r8
adcq $0x0,%r9
movq %r8,96(%rdi)
movq %r9,104(%rdi)

xorq %rax,%rax
movq 32+0(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

xorq %rax,%rax
movq 32+8(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r13,%rcx
adcxq %r13,%r9
adcxq %rcx,%r10

mulxq 8+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r11
adoxq %rcx,%r10

mulxq 16+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r12
adoxq %rcx,%r11

mulxq 24+.Lp434p1(%rip),%rcx,%r13
adcxq %rax,%r13
adoxq %rcx,%r12
adoxq %rax,%r13

xorq %rcx,%rcx
addq 56(%rdi),%r8
adcq 64(%rdi),%r9
adcq 72(%rdi),%r10
adcq 80(%rdi),%r11
adcq 88(%rdi),%r12
adcq 96(%rdi),%r13
adcq 104(%rdi),%rcx
movq %r8,0(%rsi)
movq %r9,8(%rsi)
movq %r10,72(%rdi)
movq %r11,80(%rdi)
movq %r12,88(%rdi)
movq %r13,96(%rdi)
movq %rcx,104(%rdi)

xorq %rax,%rax
movq 48(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

addq 72(%rdi),%r8
adcq 80(%rdi),%r9
adcq 88(%rdi),%r10
adcq 96(%rdi),%r11
adcq 104(%rdi),%r12
movq %r8,16(%rsi)
movq %r9,24(%rsi)
movq %r10,32(%rsi)
movq %r11,40(%rsi)
movq %r12,48(%rsi)


popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpmul_asm
.hidden sike_mpmul_asm
.type sike_mpmul_asm,@function
sike_mpmul_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15, -40


movq %rdx,%rcx
xorq %rax,%rax


movq 0(%rdi),%r8
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11

pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset rbx, -48
pushq %rbp
.cfi_offset rbp, -56
.cfi_adjust_cfa_offset 8
subq $96,%rsp
.cfi_adjust_cfa_offset 96

addq 32(%rdi),%r8
adcq 40(%rdi),%r9
adcq 48(%rdi),%r10
adcq $0x0,%r11
sbbq $0x0,%rax
movq %r8,0(%rsp)
movq %r9,8(%rsp)
movq %r10,16(%rsp)
movq %r11,24(%rsp)


xorq %rbx,%rbx
movq 0(%rsi),%r12
movq 8(%rsi),%r13
movq 16(%rsi),%r14
movq 24(%rsi),%r15
addq 32(%rsi),%r12
adcq 40(%rsi),%r13
adcq 48(%rsi),%r14
adcq $0x0,%r15
sbbq $0x0,%rbx
movq %r12,32(%rsp)
movq %r13,40(%rsp)
movq %r14,48(%rsp)
movq %r15,56(%rsp)


andq %rax,%r12
andq %rax,%r13
andq %rax,%r14
andq %rax,%r15


andq %rbx,%r8
andq %rbx,%r9
andq %rbx,%r10
andq %rbx,%r11


addq %r12,%r8
adcq %r13,%r9
adcq %r14,%r10
adcq %r15,%r11
movq %r8,64(%rsp)
movq %r9,72(%rsp)
movq %r10,80(%rsp)
movq %r11,88(%rsp)


movq 0+0(%rsp),%rdx
mulxq 32+0(%rsp),%r9,%r8
movq %r9,0+0(%rsp)
mulxq 32+8(%rsp),%r10,%r9
xorq %rax,%rax
adoxq %r10,%r8
mulxq 32+16(%rsp),%r11,%r10
adoxq %r11,%r9
mulxq 32+24(%rsp),%r12,%r11
adoxq %r12,%r10

movq 0+8(%rsp),%rdx
mulxq 32+0(%rsp),%r12,%r13
adoxq %rax,%r11
xorq %rax,%rax
mulxq 32+8(%rsp),%r15,%r14
adoxq %r8,%r12
movq %r12,0+8(%rsp)
adcxq %r15,%r13
mulxq 32+16(%rsp),%rbx,%r15
adcxq %rbx,%r14
adoxq %r9,%r13
mulxq 32+24(%rsp),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r10,%r14

movq 0+16(%rsp),%rdx
mulxq 32+0(%rsp),%r8,%r9
adoxq %r11,%r15
adoxq %rax,%rbx
xorq %rax,%rax
mulxq 32+8(%rsp),%r11,%r10
adoxq %r13,%r8
movq %r8,0+16(%rsp)
adcxq %r11,%r9
mulxq 32+16(%rsp),%r12,%r11
adcxq %r12,%r10
adoxq %r14,%r9
mulxq 32+24(%rsp),%rbp,%r12
adcxq %rbp,%r11
adcxq %rax,%r12

adoxq %r15,%r10
adoxq %rbx,%r11
adoxq %rax,%r12

movq 0+24(%rsp),%rdx
mulxq 32+0(%rsp),%r8,%r13
xorq %rax,%rax
mulxq 32+8(%rsp),%r15,%r14
adcxq %r15,%r13
adoxq %r8,%r9
mulxq 32+16(%rsp),%rbx,%r15
adcxq %rbx,%r14
adoxq %r13,%r10
mulxq 32+24(%rsp),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r14,%r11
adoxq %r15,%r12
adoxq %rax,%rbx
movq %r9,0+24(%rsp)
movq %r10,0+32(%rsp)
movq %r11,0+40(%rsp)
movq %r12,0+48(%rsp)
movq %rbx,0+56(%rsp)



movq 0+0(%rdi),%rdx
mulxq 0+0(%rsi),%r9,%r8
movq %r9,0+0(%rcx)
mulxq 0+8(%rsi),%r10,%r9
xorq %rax,%rax
adoxq %r10,%r8
mulxq 0+16(%rsi),%r11,%r10
adoxq %r11,%r9
mulxq 0+24(%rsi),%r12,%r11
adoxq %r12,%r10

movq 0+8(%rdi),%rdx
mulxq 0+0(%rsi),%r12,%r13
adoxq %rax,%r11
xorq %rax,%rax
mulxq 0+8(%rsi),%r15,%r14
adoxq %r8,%r12
movq %r12,0+8(%rcx)
adcxq %r15,%r13
mulxq 0+16(%rsi),%rbx,%r15
adcxq %rbx,%r14
adoxq %r9,%r13
mulxq 0+24(%rsi),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r10,%r14

movq 0+16(%rdi),%rdx
mulxq 0+0(%rsi),%r8,%r9
adoxq %r11,%r15
adoxq %rax,%rbx
xorq %rax,%rax
mulxq 0+8(%rsi),%r11,%r10
adoxq %r13,%r8
movq %r8,0+16(%rcx)
adcxq %r11,%r9
mulxq 0+16(%rsi),%r12,%r11
adcxq %r12,%r10
adoxq %r14,%r9
mulxq 0+24(%rsi),%rbp,%r12
adcxq %rbp,%r11
adcxq %rax,%r12

adoxq %r15,%r10
adoxq %rbx,%r11
adoxq %rax,%r12

movq 0+24(%rdi),%rdx
mulxq 0+0(%rsi),%r8,%r13
xorq %rax,%rax
mulxq 0+8(%rsi),%r15,%r14
adcxq %r15,%r13
adoxq %r8,%r9
mulxq 0+16(%rsi),%rbx,%r15
adcxq %rbx,%r14
adoxq %r13,%r10
mulxq 0+24(%rsi),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r14,%r11
adoxq %r15,%r12
adoxq %rax,%rbx
movq %r9,0+24(%rcx)
movq %r10,0+32(%rcx)
movq %r11,0+40(%rcx)
movq %r12,0+48(%rcx)
movq %rbx,0+56(%rcx)



movq 32+0(%rdi),%rdx
mulxq 32+0(%rsi),%r9,%r8
movq %r9,64+0(%rcx)
mulxq 32+8(%rsi),%r10,%r9
xorq %rax,%rax
adoxq %r10,%r8
mulxq 32+16(%rsi),%r11,%r10
adoxq %r11,%r9

movq 32+8(%rdi),%rdx
mulxq 32+0(%rsi),%r12,%r11
adoxq %rax,%r10
xorq %rax,%rax

mulxq 32+8(%rsi),%r14,%r13
adoxq %r8,%r12
movq %r12,64+8(%rcx)
adcxq %r14,%r11

mulxq 32+16(%rsi),%r8,%r14
adoxq %r9,%r11
adcxq %r8,%r13
adcxq %rax,%r14
adoxq %r10,%r13

movq 32+16(%rdi),%rdx
mulxq 32+0(%rsi),%r8,%r9
adoxq %rax,%r14
xorq %rax,%rax

mulxq 32+8(%rsi),%r10,%r12
adoxq %r11,%r8
movq %r8,64+16(%rcx)
adcxq %r13,%r9

mulxq 32+16(%rsi),%r11,%r8
adcxq %r14,%r12
adcxq %rax,%r8
adoxq %r10,%r9
adoxq %r12,%r11
adoxq %rax,%r8
movq %r9,64+24(%rcx)
movq %r11,64+32(%rcx)
movq %r8,64+40(%rcx)




movq 64(%rsp),%r8
movq 72(%rsp),%r9
movq 80(%rsp),%r10
movq 88(%rsp),%r11

movq 32(%rsp),%rax
addq %rax,%r8
movq 40(%rsp),%rax
adcq %rax,%r9
movq 48(%rsp),%rax
adcq %rax,%r10
movq 56(%rsp),%rax
adcq %rax,%r11


movq 0(%rsp),%r12
movq 8(%rsp),%r13
movq 16(%rsp),%r14
movq 24(%rsp),%r15
subq 0(%rcx),%r12
sbbq 8(%rcx),%r13
sbbq 16(%rcx),%r14
sbbq 24(%rcx),%r15
sbbq 32(%rcx),%r8
sbbq 40(%rcx),%r9
sbbq 48(%rcx),%r10
sbbq 56(%rcx),%r11


subq 64(%rcx),%r12
sbbq 72(%rcx),%r13
sbbq 80(%rcx),%r14
sbbq 88(%rcx),%r15
sbbq 96(%rcx),%r8
sbbq 104(%rcx),%r9
sbbq $0x0,%r10
sbbq $0x0,%r11

addq 32(%rcx),%r12
movq %r12,32(%rcx)
adcq 40(%rcx),%r13
movq %r13,40(%rcx)
adcq 48(%rcx),%r14
movq %r14,48(%rcx)
adcq 56(%rcx),%r15
movq %r15,56(%rcx)
adcq 64(%rcx),%r8
movq %r8,64(%rcx)
adcq 72(%rcx),%r9
movq %r9,72(%rcx)
adcq 80(%rcx),%r10
movq %r10,80(%rcx)
adcq 88(%rcx),%r11
movq %r11,88(%rcx)
movq 96(%rcx),%r12
adcq $0x0,%r12
movq %r12,96(%rcx)
movq 104(%rcx),%r13
adcq $0x0,%r13
movq %r13,104(%rcx)

addq $96,%rsp
.cfi_adjust_cfa_offset -96
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_same_value rbp
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_same_value rbx


popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc

+ 207
- 0
src/kem/sike/p434/fp_generic.c View File

@@ -0,0 +1,207 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: portable modular arithmetic for P503
*********************************************************************************************/
#include "common/utils.h"

#include "utils.h"
#include "fpx.h"

#ifndef PQC_NOASM
void sike_fprdc_asm(const felm_t ma, felm_t mc);
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c);
void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c);
#endif

// Global constants
extern const struct params_t params;

// Digit multiplication, digit * digit -> 2-digit result
static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
{
crypto_word_t al, ah, bl, bh, temp;
crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4);

al = a & mask_low; // Low part
ah = a >> (sizeof(crypto_word_t) * 4); // High part
bl = b & mask_low;
bh = b >> (sizeof(crypto_word_t) * 4);

albl = al*bl;
albh = al*bh;
ahbl = ah*bl;
ahbh = ah*bh;
c[0] = albl & mask_low; // C00

res1 = albl >> (sizeof(crypto_word_t) * 4);
res2 = ahbl & mask_low;
res3 = albh & mask_low;
temp = res1 + res2 + res3;
carry = temp >> (sizeof(crypto_word_t) * 4);
c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01

res1 = ahbl >> (sizeof(crypto_word_t) * 4);
res2 = albh >> (sizeof(crypto_word_t) * 4);
res3 = ahbh & mask_low;
temp = res1 + res2 + res3 + carry;
c[1] = temp & mask_low; // C10
carry = temp & mask_high;
c[1] ^= (ahbh & mask_high) + carry; // C11
}

// Modular addition, c = a+b mod p434.
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
{
#ifdef PQC_ASM
sike_fpadd_asm(a,b,c);
#else
unsigned int i, carry = 0;
crypto_word_t mask;

for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}

carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(carry, c[i], params.prime_x2[i], carry, c[i]);
}
mask = 0 - (crypto_word_t)carry;

carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]);
}
#endif
}

void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
{ // Modular subtraction, c = a-b mod p434.
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#ifdef PQC_ASM
sike_fpsub_asm(a,b,c);
#else
unsigned int i, borrow = 0;
crypto_word_t mask;

for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
mask = 0 - (crypto_word_t)borrow;

borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]);
}
#endif
}

// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
{
#ifdef PQC_ASM
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
sike_mpmul_asm(a,b,c);
return;
}
#endif

unsigned int i, j;
crypto_word_t t = 0, u = 0, v = 0, UV[2];
unsigned int carry = 0;

for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j <= i; j++) {
MUL(a[j], b[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}

for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
MUL(a[j], b[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}
c[2*NWORDS_FIELD-1] = v;
}

// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
// mc = ma*R^-1 mod p434x2, where R = 2^448.
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.
void sike_fprdc(const felm_t ma, felm_t mc)
{
#ifdef PQC_ASM
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
sike_fprdc_asm(ma, mc);
return;
}
#endif
unsigned int i, j, carry, count = ZERO_WORDS;
crypto_word_t UV[2], t = 0, u = 0, v = 0;

for (i = 0; i < NWORDS_FIELD; i++) {
mc[i] = 0;
}

for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j < i; j++) {
if (j < (i-ZERO_WORDS+1)) {
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i] = v;
v = u;
u = t;
t = 0;
}

for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
if (count > 0) {
count -= 1;
}
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
if (j < (NWORDS_FIELD-count)) {
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i-NWORDS_FIELD] = v;
v = u;
u = t;
t = 0;
}
ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v);
mc[NWORDS_FIELD-1] = v;
}

+ 282
- 0
src/kem/sike/p434/fpx.c View File

@@ -0,0 +1,282 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: core functions over GF(p) and GF(p^2)
*********************************************************************************************/
#include <stddef.h>
#include "utils.h"
#include "fpx.h"

extern const struct params_t params;

// Multiprecision squaring, c = a^2 mod p.
static void fpsqr_mont(const felm_t ma, felm_t mc)
{
dfelm_t temp = {0};
sike_mpmul(ma, ma, temp);
sike_fprdc(temp, mc);
}

// Chain to compute a^(p-3)/4 using Montgomery arithmetic.
static void fpinv_chain_mont(felm_t a)
{
unsigned int i, j;
felm_t t[31], tt;

// Precomputed table
fpsqr_mont(a, tt);
sike_fpmul_mont(a, tt, t[0]);
for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]);

sike_fpcopy(a, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[5], tt, tt);
for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[14], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[3], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[23], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[13], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[24], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[7], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[12], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[1], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[21], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[2], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[19], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[1], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[24], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[26], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[16], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[10], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[6], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[0], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[20], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[9], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[25], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[26], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(a, tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[28], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[6], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[10], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[22], tt, tt);
for (j = 0; j < 35; j++) {
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
}
sike_fpcopy(tt, a);
}

// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
static void fpinv_mont(felm_t a)
{
felm_t tt = {0};
sike_fpcopy(a, tt);
fpinv_chain_mont(tt);
fpsqr_mont(tt, tt);
fpsqr_mont(tt, tt);
sike_fpmul_mont(a, tt, a);
}

// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
#ifndef PQC_ASM
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
uint8_t carry = 0;
for (size_t i = 0; i < nwords; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}
return carry;
}

// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
uint32_t borrow = 0;
for (size_t i = 0; i < nwords; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
return borrow;
}
#endif

// Multiprecision addition, c = a+b.
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
{
#ifdef PQC_ASM
sike_mpadd_asm(a, b, c);
#else
mp_add(a, b, c, NWORDS_FIELD);
#endif
}

// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
#ifdef PQC_ASM
return sike_mpsubx2_asm(a, b, c);
#else
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
#endif
}

// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// Inputs should be s.t. c > a and c > b
inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
#ifdef PQC_ASM
sike_mpdblsubx2_asm(a, b, c);
#else
mp_sub(c, a, c, 2*NWORDS_FIELD);
mp_sub(c, b, c, 2*NWORDS_FIELD);
#endif
}

// Copy a field element, c = a.
void sike_fpcopy(const felm_t a, felm_t c) {
for (size_t i = 0; i < NWORDS_FIELD; i++) {
c[i] = a[i];
}
}

// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
{
dfelm_t temp = {0};
sike_mpmul(ma, mb, temp);
sike_fprdc(temp, mc);
}

// Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
void sike_from_mont(const felm_t ma, felm_t c)
{
felm_t one = {0};
one[0] = 1;

sike_fpmul_mont(ma, one, c);
sike_fpcorrection(c);
}

// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) {
felm_t t1 = {0}, t2 = {0}, t3 = {0};

mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
sike_fpsub(a->c0, a->c1, t2); // t2 = a0-a1
mp_addfast(a->c0, a->c0, t3); // t3 = 2a0
sike_fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1)
sike_fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1
}

// Modular negation, a = -a mod p503.
// Input/output: a in [0, 2*p503-1]
void sike_fpneg(felm_t a) {
uint32_t borrow = 0;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, params.prime_x2[i], a[i], borrow, a[i]);
}
}

// Modular division by two, c = a/2 mod p503.
// Input : a in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
void sike_fpdiv2(const felm_t a, felm_t c) {
uint32_t carry = 0;
crypto_word_t mask;

mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503
for (size_t i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], params.prime[i] & mask, carry, c[i]);
}

// Multiprecision right shift by one.
for (size_t i = 0; i < NWORDS_FIELD-1; i++) {
c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1));
}
c[NWORDS_FIELD-1] >>= 1;
}

// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
void sike_fpcorrection(felm_t a) {
uint32_t borrow = 0;
crypto_word_t mask;

for (size_t i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], params.prime[i], borrow, a[i]);
}
mask = 0 - (crypto_word_t)borrow;

borrow = 0;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, a[i], params.prime[i] & mask, borrow, a[i]);
}
}

// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) {
felm_t t1 = {0}, t2 = {0};
dfelm_t tt1, tt2, tt3;
crypto_word_t mask;

mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1
sike_mpmul(a->c0, b->c0, tt1); // tt1 = a0*b0
sike_mpmul(a->c1, b->c1, tt2); // tt2 = a1*b1
sike_mpmul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1)
mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0

for (size_t i = 0; i < NWORDS_FIELD; i++) {
t1[i] = params.prime[i] & mask;
}

sike_fprdc(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]);
sike_fprdc(tt1, c->c0); // c[0] = a0*b0 - a1*b1
}

// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
void sike_fp2inv_mont(f2elm_t a) {
f2elm_t t1 = {0};

fpsqr_mont(a->c0, t1->c0); // t10 = a0^2
fpsqr_mont(a->c1, t1->c1); // t11 = a1^2
sike_fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2
fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1
sike_fpneg(a->c1); // a = a0-i*a1
sike_fpmul_mont(a->c0, t1->c0, a->c0);
sike_fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1
}

+ 110
- 0
src/kem/sike/p434/fpx.h View File

@@ -0,0 +1,110 @@
#ifndef FPX_H_
#define FPX_H_

#include "utils.h"

#if defined(__cplusplus)
extern "C" {
#endif

// Modular addition, c = a+b mod p.
void sike_fpadd(const felm_t a, const felm_t b, felm_t c);
// Modular subtraction, c = a-b mod p.
void sike_fpsub(const felm_t a, const felm_t b, felm_t c);
// Modular division by two, c = a/2 mod p.
void sike_fpdiv2(const felm_t a, felm_t c);
// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1].
void sike_fpcorrection(felm_t a);
// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c);
// 443-bit Montgomery reduction, c = a mod p
void sike_fprdc(const dfelm_t a, felm_t c);
// Double 2x443-bit multiprecision subtraction, c = c-a-b
void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
// Multiprecision subtraction, c = a-b
crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c);
// 443-bit multiprecision addition, c = a+b
void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c);
// Modular negation, a = -a mod p.
void sike_fpneg(felm_t a);
// Copy of a field element, c = a
void sike_fpcopy(const felm_t a, felm_t c);
// Copy a field element, c = a.
void sike_fpzero(felm_t a);
// Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
void sike_from_mont(const felm_t ma, felm_t c);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);
// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2)
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
void sike_fp2inv_mont(f2elm_t a);
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c);
// Modular correction, a = a in GF(p^2).
void sike_fp2correction(f2elm_t a);

#if defined(__cplusplus)
} // extern C
#endif

// GF(p^2) addition, c = a+b in GF(p^2).
#define sike_fp2add(a, b, c) \
do { \
sike_fpadd(a->c0, b->c0, c->c0); \
sike_fpadd(a->c1, b->c1, c->c1); \
} while(0)

// GF(p^2) subtraction, c = a-b in GF(p^2).
#define sike_fp2sub(a,b,c) \
do { \
sike_fpsub(a->c0, b->c0, c->c0); \
sike_fpsub(a->c1, b->c1, c->c1); \
} while(0)

// Copy a GF(p^2) element, c = a.
#define sike_fp2copy(a, c) \
do { \
sike_fpcopy(a->c0, c->c0); \
sike_fpcopy(a->c1, c->c1); \
} while(0)

// GF(p^2) negation, a = -a in GF(p^2).
#define sike_fp2neg(a) \
do { \
sike_fpneg(a->c0); \
sike_fpneg(a->c1); \
} while(0)

// GF(p^2) division by two, c = a/2 in GF(p^2).
#define sike_fp2div2(a, c) \
do { \
sike_fpdiv2(a->c0, c->c0); \
sike_fpdiv2(a->c1, c->c1); \
} while(0)

// Modular correction, a = a in GF(p^2).
#define sike_fp2correction(a) \
do { \
sike_fpcorrection(a->c0); \
sike_fpcorrection(a->c1); \
} while(0)

// Conversion of a GF(p^2) element to Montgomery representation,
// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
#define sike_to_fp2mont(a, mc) \
do { \
sike_fpmul_mont(a->c0, params.mont_R2, mc->c0); \
sike_fpmul_mont(a->c1, params.mont_R2, mc->c1); \
} while(0)

// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
// c_i = ma_i*R^(-1) = a_i in GF(p^2).
#define sike_from_fp2mont(ma, c) \
do { \
sike_from_mont(ma->c0, c->c0); \
sike_from_mont(ma->c1, c->c1); \
} while(0)

#endif // FPX_H_

+ 262
- 0
src/kem/sike/p434/isogeny.c View File

@@ -0,0 +1,262 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: elliptic curve and isogeny functions
*********************************************************************************************/
#include <stddef.h>
#include <string.h>
#include "utils.h"
#include "isogeny.h"
#include "fpx.h"

static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
{ // Doubling of a Montgomery point in projective coordinates (X:Z).
// Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
// Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
f2elm_t t0 = {0}, t1 = {0};

sike_fp2sub(P->X, P->Z, t0); // t0 = X1-Z1
sike_fp2add(P->X, P->Z, t1); // t1 = X1+Z1
sike_fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2
sike_fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2
sike_fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2
sike_fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
sike_fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2
sike_fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
sike_fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
sike_fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
}

void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e)
{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
// Output: projective Montgomery x-coordinates Q <- (2^e)*P.

memmove(Q, P, sizeof(*P));
for (size_t i = 0; i < e; i++) {
xDBL(Q, Q, A24plus, C24);
}
}

void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
// Input: projective point of order four P = (X4:Z4).
// Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
// that are used to evaluate the isogeny at a point in eval_4_isog().

sike_fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4
sike_fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4
sike_fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2
sike_fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2
sike_fp2sqr_mont(P->X, A24plus); // A24plus = X4^2
sike_fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2
sike_fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4
}

void eval_4_isog(point_proj_t P, f2elm_t* coeff)
{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
// by the 3 coefficients in coeff (computed in the function get_4_isog()).
// Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
// Output: the projective point P = phi(P) = (X:Z) in the codomain.
f2elm_t t0 = {0}, t1 = {0};

sike_fp2add(P->X, P->Z, t0); // t0 = X+Z
sike_fp2sub(P->X, P->Z, t1); // t1 = X-Z
sike_fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1]
sike_fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2]
sike_fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z)
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z)
sike_fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
sike_fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
sike_fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
sike_fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
sike_fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
sike_fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
sike_fp2mul_mont(P->X, t1, P->X); // Xfinal
sike_fp2mul_mont(P->Z, t0, P->Z); // Zfinal
}


void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
{ // Tripling of a Montgomery point in projective coordinates (X:Z).
// Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
// Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
f2elm_t t0, t1, t2, t3, t4, t5, t6;

sike_fp2sub(P->X, P->Z, t0); // t0 = X-Z
sike_fp2sqr_mont(t0, t2); // t2 = (X-Z)^2
sike_fp2add(P->X, P->Z, t1); // t1 = X+Z
sike_fp2sqr_mont(t1, t3); // t3 = (X+Z)^2
sike_fp2add(t0, t1, t4); // t4 = 2*X
sike_fp2sub(t1, t0, t0); // t0 = 2*Z
sike_fp2sqr_mont(t4, t1); // t1 = 4*X^2
sike_fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2
sike_fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
sike_fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2
sike_fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3
sike_fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2
sike_fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3
sike_fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
sike_fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
sike_fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
sike_fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
sike_fp2sqr_mont(t2, t2); // t2 = t2^2
sike_fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2
sike_fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
sike_fp2sqr_mont(t1, t1); // t1 = t1^2
sike_fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1
}

void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e)
{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
// Output: projective Montgomery x-coordinates Q <- (3^e)*P.
memmove(Q, P, sizeof(*P));
for (size_t i = 0; i < e; i++) {
xTPL(Q, Q, A24minus, A24plus);
}
}

void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
// Input: projective point of order three P = (X3:Z3).
// Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
f2elm_t t0 = {0}, t1 = {0}, t2 = {0}, t3 = {0}, t4 = {0};

sike_fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z
sike_fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2
sike_fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z
sike_fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2
sike_fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2
sike_fp2add(coeff[0], coeff[1], t3); // t3 = 2*X
sike_fp2sqr_mont(t3, t3); // t3 = 4*X^2
sike_fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
sike_fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2
sike_fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2
sike_fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
sike_fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
sike_fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
sike_fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
sike_fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
sike_fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
sike_fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
sike_fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
}


void eval_3_isog(point_proj_t Q, f2elm_t* coeff)
{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
// a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
// Inputs: projective points P = (X3:Z3) and Q = (X:Z).
// Output: the projective point Q <- phi(Q) = (X3:Z3).
f2elm_t t0, t1, t2;

sike_fp2add(Q->X, Q->Z, t0); // t0 = X+Z
sike_fp2sub(Q->X, Q->Z, t1); // t1 = X-Z
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z)
sike_fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z)
sike_fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z)
sike_fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z)
sike_fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
sike_fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
sike_fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2
sike_fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
}


void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
{ // 3-way simultaneous inversion
// Input: z1,z2,z3
// Output: 1/z1,1/z2,1/z3 (override inputs).
f2elm_t t0, t1, t2, t3;

sike_fp2mul_mont(z1, z2, t0); // t0 = z1*z2
sike_fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3
sike_fp2inv_mont(t1); // t1 = 1/(z1*z2*z3)
sike_fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2)
sike_fp2mul_mont(t2, z2, t3); // t3 = 1/z1
sike_fp2mul_mont(t2, z1, z2); // z2 = 1/z2
sike_fp2mul_mont(t0, t1, z3); // z3 = 1/z3
sike_fp2copy(t3, z1); // z1 = 1/z1
}


void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
// Input: the x-coordinates xP, xQ, and xR of the points P, Q and R.
// Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
f2elm_t t0 = F2ELM_INIT, t1 = F2ELM_INIT, one = F2ELM_INIT;

extern const struct params_t params;
sike_fpcopy(params.mont_one, one->c0);
sike_fp2add(xP, xQ, t1); // t1 = xP+xQ
sike_fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ
sike_fp2mul_mont(xR, t1, A); // A = xR*t1
sike_fp2add(t0, A, A); // A = A+t0
sike_fp2mul_mont(t0, xR, t0); // t0 = t0*xR
sike_fp2sub(A, one, A); // A = A-1
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2add(t1, xR, t1); // t1 = t1+xR
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2sqr_mont(A, A); // A = A^2
sike_fp2inv_mont(t0); // t0 = 1/t0
sike_fp2mul_mont(A, t0, A); // A = A*t0
sike_fp2sub(A, t1, A); // Afinal = A-t1
}


void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
{ // Computes the j-invariant of a Montgomery curve with projective constant.
// Input: A,C in GF(p^2).
// Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
f2elm_t t0 = F2ELM_INIT, t1 = F2ELM_INIT;

sike_fp2sqr_mont(A, jinv); // jinv = A^2
sike_fp2sqr_mont(C, t1); // t1 = C^2
sike_fp2add(t1, t1, t0); // t0 = t1+t1
sike_fp2sub(jinv, t0, t0); // t0 = jinv-t0
sike_fp2sub(t0, t1, t0); // t0 = t0-t1
sike_fp2sub(t0, t1, jinv); // jinv = t0-t1
sike_fp2sqr_mont(t1, t1); // t1 = t1^2
sike_fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2sqr_mont(t0, t1); // t1 = t0^2
sike_fp2mul_mont(t0, t1, t0); // t0 = t0*t1
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2inv_mont(jinv); // jinv = 1/jinv
sike_fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv
}


void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
{ // Simultaneous doubling and differential addition.
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
// Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
f2elm_t t0 = F2ELM_INIT, t1 = F2ELM_INIT, t2 = F2ELM_INIT;

sike_fp2add(P->X, P->Z, t0); // t0 = XP+ZP
sike_fp2sub(P->X, P->Z, t1); // t1 = XP-ZP
sike_fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2
sike_fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ
sike_fp2correction(t2);
sike_fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ
sike_fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ)
sike_fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2
sike_fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ)
sike_fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2
sike_fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2
sike_fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
sike_fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
sike_fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
sike_fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
sike_fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
sike_fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
sike_fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
sike_fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
}

+ 49
- 0
src/kem/sike/p434/isogeny.h View File

@@ -0,0 +1,49 @@
#ifndef ISOGENY_H_
#define ISOGENY_H_

// Computes [2^e](X:Z) on Montgomery curve with projective
// constant via e repeated doublings.
void xDBLe(
const point_proj_t P, point_proj_t Q, const f2elm_t A24plus,
const f2elm_t C24, size_t e);
// Simultaneous doubling and differential addition.
void xDBLADD(
point_proj_t P, point_proj_t Q, const f2elm_t xPQ,
const f2elm_t A24);
// Tripling of a Montgomery point in projective coordinates (X:Z).
void xTPL(
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
const f2elm_t A24plus);
// Computes [3^e](X:Z) on Montgomery curve with projective constant
// via e repeated triplings.
void xTPLe(
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
const f2elm_t A24plus, size_t e);
// Given the x-coordinates of P, Q, and R, returns the value A
// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
void get_A(
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
// Computes the j-invariant of a Montgomery curve with projective constant.
void j_inv(
const f2elm_t A, const f2elm_t C, f2elm_t jinv);
// Computes the corresponding 4-isogeny of a projective Montgomery
// point (X4:Z4) of order 4.
void get_4_isog(
const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
// Computes the corresponding 3-isogeny of a projective Montgomery
// point (X3:Z3) of order 3.
void get_3_isog(
const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus,
f2elm_t* coeff);
// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3)
// of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
void eval_3_isog(
point_proj_t Q, f2elm_t* coeff);
// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
void eval_4_isog(
point_proj_t P, f2elm_t* coeff);
// 3-way simultaneous inversion
void inv_3_way(
f2elm_t z1, f2elm_t z2, f2elm_t z3);

#endif // ISOGENY_H_

+ 128
- 0
src/kem/sike/p434/params.c View File

@@ -0,0 +1,128 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: supersingular isogeny parameters and generation of functions for P434
*********************************************************************************************/

#include "utils.h"

// Parameters for isogeny system "SIKE"
const struct params_t params = {
.prime = {
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF),
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
U64_TO_WORDS(0x0002341F27177344)
},
.prime_p1 = {
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000),
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
U64_TO_WORDS(0x0002341F27177344)
},
.prime_x2 = {
U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF),
U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC),
U64_TO_WORDS(0x0004683E4E2EE688)
},
.A_gen = {
U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B),
U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1),
U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F),
U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0
U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B),
U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA),
U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C),
U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1
U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6),
U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03),
U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215),
U64_TO_WORDS(0x0001C4CB77542876), // XQA0
U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050),
U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374),
U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508),
U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1
U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611),
U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD),
U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3),
U64_TO_WORDS(0x00022A81D8D55643), // XRA0
U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED),
U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4),
U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2),
U64_TO_WORDS(0x0000B87FC716C0C6) // XRA1
},
.B_gen = {
U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05),
U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F),
U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9),
U64_TO_WORDS(0x0001BED4772E551F), // XPB0
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), // XPB1
U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C),
U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62),
U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F),
U64_TO_WORDS(0x000034080181D8AE), // XQB0
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), // XQB1
U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647),
U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D),
U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504),
U64_TO_WORDS(0x00007E8A50F02E37), // XRB0
U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230),
U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53),
U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B),
U64_TO_WORDS(0x000173FA910377D3) // XRB1
},
.mont_R2 = {
U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2),
U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B),
U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A),
U64_TO_WORDS(0x000025A89BCDD12A)
},
.mont_one = {
U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000),
U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C),
U64_TO_WORDS(0x0000ECEEA7BD2EDA)
},
.mont_six = {
U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000),
U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0),
U64_TO_WORDS(0x00012559A0403298)
},
.A_strat = {
0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01,
0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02,
0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01,
0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01,
0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03,
0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04,
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01
},
.B_strat = {
0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01,
0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01,
0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02,
0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10,
0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01,
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01,
0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02,
0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01,
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01
}
};

+ 505
- 0
src/kem/sike/p434/sike.c View File

@@ -0,0 +1,505 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
*********************************************************************************************/

#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <randombytes.h>
#include <common/fips202.h>

#include "utils.h"
#include "isogeny.h"
#include "fpx.h"

extern const struct params_t params;

// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
#define SIDH_JINV_BYTESZ 110U
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
#define SIDH_PRV_A_BITSZ 216U
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
#define SIDH_PRV_B_BITSZ 217U
// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
#define MAX_INT_POINTS_ALICE 7U
// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
#define MAX_INT_POINTS_BOB 8U

// Swap points.
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
{
crypto_word_t temp;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
P->X->c0[i] = temp ^ P->X->c0[i];
Q->X->c0[i] = temp ^ Q->X->c0[i];
temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
P->Z->c0[i] = temp ^ P->Z->c0[i];
Q->Z->c0[i] = temp ^ Q->Z->c0[i];
temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
P->X->c1[i] = temp ^ P->X->c1[i];
Q->X->c1[i] = temp ^ Q->X->c1[i];
temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
P->Z->c1[i] = temp ^ P->Z->c1[i];
Q->Z->c1[i] = temp ^ Q->Z->c1[i];
}
}

static void ladder3Pt(
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,
int is_A, point_proj_t R, const f2elm_t A) {
point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
f2elm_t A24 = F2ELM_INIT;
crypto_word_t mask;
int bit, swap, prevbit = 0;

const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ;

// Initializing constant
sike_fpcopy(params.mont_one, A24[0].c0);
sike_fp2add(A24, A24, A24);
sike_fp2add(A, A24, A24);
sike_fp2div2(A24, A24);
sike_fp2div2(A24, A24); // A24 = (A+2)/4

// Initializing points
sike_fp2copy(xQ, R0->X);
sike_fpcopy(params.mont_one, R0->Z[0].c0);
sike_fp2copy(xPQ, R2->X);
sike_fpcopy(params.mont_one, R2->Z[0].c0);
sike_fp2copy(xP, R->X);
sike_fpcopy(params.mont_one, R->Z[0].c0);
memset(R->Z->c1, 0, sizeof(R->Z->c1));

// Main loop
for (size_t i = 0; i < nbits; i++) {
bit = (m[i >> 3] >> (i & 7)) & 1;
swap = bit ^ prevbit;
prevbit = bit;
mask = 0 - (crypto_word_t)swap;

sike_fp2cswap(R, R2, mask);
xDBLADD(R0, R2, R->X, A24);
sike_fp2mul_mont(R2->X, R->Z, R2->X);
}
swap = 0 ^ prevbit;
mask = 0 - (crypto_word_t)swap;
sike_fp2cswap(R, R2, mask);
}

// Initialization of basis points
static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
sike_fpcopy(gen, XP->c0);
sike_fpcopy(gen + NWORDS_FIELD, XP->c1);
sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1);
sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0);
sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1);
}

// Conversion of GF(p^2) element from Montgomery to standard representation.
static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) {
f2elm_t t={0};
sike_from_fp2mont(x, t);

// convert to bytes in little endian form
for (size_t i=0; i<FIELD_BYTESZ; i++) {
enc[i+ 0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
}
}

// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
// Elements over GF(p503) are encoded in 63 octets in little endian format
// (i.e., the least significant octet is located in the lowest memory address).
static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
memset(t[0].c0, 0, sizeof(t[0].c0));
memset(t[0].c1, 0, sizeof(t[0].c1));
// convert bytes in little endian form to f2elm_t
for (size_t i = 0; i < FIELD_BYTESZ; i++) {
t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (8*(i%LSZ));
t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ));
}
sike_to_fp2mont(t, t);
}

// Alice's ephemeral public key generation
// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
static void gen_iso_A(const uint8_t* skA, uint8_t* pkA)
{
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
point_proj_t phiP = POINT_PROJ_INIT;
point_proj_t phiQ = POINT_PROJ_INIT;
point_proj_t phiR = POINT_PROJ_INIT;
f2elm_t XPA, XQA, XRA, coeff[3] = {0};
f2elm_t A24plus = F2ELM_INIT;
f2elm_t C24 = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE] = {0}, npts = 0, ii = 0;

// Initialize basis points
sike_init_basis(params.A_gen, XPA, XQA, XRA);
sike_init_basis(params.B_gen, phiP->X, phiQ->X, phiR->X);
sike_fpcopy(params.mont_one, (phiP->Z)->c0);
sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
sike_fpcopy(params.mont_one, (phiR->Z)->c0);

// Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1
sike_fpcopy(params.mont_one, A24plus->c0);
sike_fp2add(A24plus, A24plus, A24plus);
sike_fp2add(A24plus, A24plus, C24);
sike_fp2add(A24plus, C24, A);
sike_fp2add(C24, C24, A24plus);

// Retrieve kernel point
ladder3Pt(XPA, XQA, XRA, skA, 1, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < A_max; row++) {
while (index < A_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.A_strat[ii++];
xDBLe(R, R, A24plus, C24, (2*m));
index += m;
}
get_4_isog(R, A24plus, C24, coeff);

for (size_t i = 0; i < npts; i++) {
eval_4_isog(pts[i], coeff);
}
eval_4_isog(phiP, coeff);
eval_4_isog(phiQ, coeff);
eval_4_isog(phiR, coeff);

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_4_isog(R, A24plus, C24, coeff);
eval_4_isog(phiP, coeff);
eval_4_isog(phiQ, coeff);
eval_4_isog(phiR, coeff);

inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);

// Format public key
sike_fp2_encode(phiP->X, pkA);
sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ);
sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ);
}

// Bob's ephemeral key-pair generation
// It produces a private key skB and computes the public key pkB.
// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
static void gen_iso_B(const uint8_t* skB, uint8_t* pkB)
{
point_proj_t R, pts[MAX_INT_POINTS_BOB];
point_proj_t phiP = POINT_PROJ_INIT;
point_proj_t phiQ = POINT_PROJ_INIT;
point_proj_t phiR = POINT_PROJ_INIT;
f2elm_t XPB, XQB, XRB, coeff[3] = {0};
f2elm_t A24plus = F2ELM_INIT;
f2elm_t A24minus = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB] = {0}, npts = 0, ii = 0;

// Initialize basis points
sike_init_basis(params.B_gen, XPB, XQB, XRB);
sike_init_basis(params.A_gen, phiP->X, phiQ->X, phiR->X);
sike_fpcopy(params.mont_one, (phiP->Z)->c0);
sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
sike_fpcopy(params.mont_one, (phiR->Z)->c0);

// Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
sike_fpcopy(params.mont_one, A24plus->c0);
sike_fp2add(A24plus, A24plus, A24plus);
sike_fp2add(A24plus, A24plus, A24minus);
sike_fp2add(A24plus, A24minus, A);
sike_fp2add(A24minus, A24minus, A24plus);

// Retrieve kernel point
ladder3Pt(XPB, XQB, XRB, skB, 0, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < B_max; row++) {
while (index < B_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.B_strat[ii++];
xTPLe(R, R, A24minus, A24plus, m);
index += m;
}
get_3_isog(R, A24minus, A24plus, coeff);

for (size_t i = 0; i < npts; i++) {
eval_3_isog(pts[i], coeff);
}
eval_3_isog(phiP, coeff);
eval_3_isog(phiQ, coeff);
eval_3_isog(phiR, coeff);

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_3_isog(R, A24minus, A24plus, coeff);
eval_3_isog(phiP, coeff);
eval_3_isog(phiQ, coeff);
eval_3_isog(phiR, coeff);

inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);

// Format public key
sike_fp2_encode(phiP->X, pkB);
sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ);
sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ);
}

// Alice's ephemeral shared secret computation
// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
{
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
f2elm_t coeff[3], PKB[3], jinv;
f2elm_t A24plus = F2ELM_INIT;
f2elm_t C24 = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;

// Initialize images of Bob's basis
fp2_decode(pkB, PKB[0]);
fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]);
fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]);

// Initialize constants
get_A(PKB[0], PKB[1], PKB[2], A);
sike_fpadd(params.mont_one, params.mont_one, C24->c0);
sike_fp2add(A, C24, A24plus);
sike_fpadd(C24->c0, C24->c0, C24->c0);

// Retrieve kernel point
ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < A_max; row++) {
while (index < A_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.A_strat[ii++];
xDBLe(R, R, A24plus, C24, (2*m));
index += m;
}
get_4_isog(R, A24plus, C24, coeff);

for (size_t i = 0; i < npts; i++) {
eval_4_isog(pts[i], coeff);
}

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_4_isog(R, A24plus, C24, coeff);
sike_fp2add(A24plus, A24plus, A24plus);
sike_fp2sub(A24plus, C24, A24plus);
sike_fp2add(A24plus, A24plus, A24plus);
j_inv(A24plus, C24, jinv);
sike_fp2_encode(jinv, ssA);
}

// Bob's ephemeral shared secret computation
// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
{
point_proj_t R, pts[MAX_INT_POINTS_BOB] = {0};
f2elm_t coeff[3] = {0}, PKB[3] = {0}, jinv;
f2elm_t A24plus = F2ELM_INIT;
f2elm_t A24minus = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB] = {0}, npts = 0, ii = 0;

// Initialize images of Alice's basis
fp2_decode(pkA, PKB[0]);
fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]);
fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]);

// Initialize constants
get_A(PKB[0], PKB[1], PKB[2], A);
sike_fpadd(params.mont_one, params.mont_one, A24minus->c0);
sike_fp2add(A, A24minus, A24plus);
sike_fp2sub(A, A24minus, A24minus);

// Retrieve kernel point
ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < B_max; row++) {
while (index < B_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.B_strat[ii++];
xTPLe(R, R, A24minus, A24plus, m);
index += m;
}
get_3_isog(R, A24minus, A24plus, coeff);

for (size_t i = 0; i < npts; i++) {
eval_3_isog(pts[i], coeff);
}

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_3_isog(R, A24minus, A24plus, coeff);
sike_fp2add(A24plus, A24minus, A);
sike_fp2add(A, A, A);
sike_fp2sub(A24plus, A24minus, A24plus);
j_inv(A, A24plus, jinv);
sike_fp2_encode(jinv, ssB);
}

int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ],
uint8_t out_pub[SIKE_PUB_BYTESZ]) {
// Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and <
// 253 bits
randombytes(out_priv, SIKE_MSG_BYTESZ);
randombytes(&out_priv[SIKE_MSG_BYTESZ], SIKE_PRV_BYTESZ);
out_priv[SIKE_MSG_BYTESZ+28-1] = (out_priv[SIKE_MSG_BYTESZ+28-1] & 0x01);
gen_iso_B(&out_priv[SIKE_MSG_BYTESZ], out_pub);
return 1;
}

void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
uint8_t out_ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ]) {
// Secret buffer is reused by the function to store some ephemeral
// secret data. It's size must be maximum of 64,
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
uint8_t secret[32]; // OZAPTF, why?
uint8_t j[SIDH_JINV_BYTESZ] = {0};
uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ];
shake256incctx ctx;

// Generate secret key for A
// secret key A = SHAKE256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
randombytes(temp, SIKE_MSG_BYTESZ);

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);

// Generate public key for A - first part of the ciphertext
gen_iso_A(secret, out_ciphertext);

// Generate c1:
// h = SHAKE256(j-invariant)
// c1 = h ^ m
ex_iso_A(secret, pub_key, j);
shake256(secret, sizeof secret, j, sizeof j);

// c1 = h ^ m
uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ];
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
c1[i] = temp[i] ^ secret[i];
}

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, out_ciphertext, SIKE_CT_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);
// Generate shared secret out_shared_key = SHAKE256(m||out_ciphertext)
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
}

void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
const uint8_t ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ],
const uint8_t priv_key[SIKE_MSG_BYTESZ + SIKE_PRV_BYTESZ]) {
// Secret buffer is reused by the function to store some ephemeral
// secret data. It's size must be maximum of 64,
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
uint8_t secret[32];
uint8_t j[SIDH_JINV_BYTESZ] = {0};
uint8_t c0[SIKE_PUB_BYTESZ] = {0};
uint8_t temp[SIKE_MSG_BYTESZ] = {0};
shake256incctx ctx;

// Recover m
// Let ciphertext = c0 || c1 - both have fixed sizes
// m = F(j-invariant(c0, priv_key)) ^ c1
ex_iso_B(&priv_key[SIKE_MSG_BYTESZ], ciphertext, j);

shake256(secret, sizeof secret, j, sizeof j);


const uint8_t *c1 = &ciphertext[sizeof(c0)];
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
temp[i] = c1[i] ^ secret[i];
}

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);

// Recover c0 = public key A
gen_iso_A(secret, c0);
crypto_word_t ok = ct_uint_eq(
ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1);
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
temp[i] = ct_select_8(ok, temp[i], priv_key[i]);
}

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, ciphertext, SIKE_CT_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);

// Generate shared secret out_shared_key = SHAKE256(m||ciphertext)
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
}

+ 214
- 0
src/kem/sike/p434/utils.h View File

@@ -0,0 +1,214 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: internal header file for P434
*********************************************************************************************/

#ifndef UTILS_H_
#define UTILS_H_

#include <stddef.h>
#include <kem/sike/includes/sike/sike.h>

// Conversion macro from number of bits to number of bytes
#define BITS_TO_BYTES(nbits) (((nbits)+7)/8)

// Bit size of the field
#define BITS_FIELD 434
// Byte size of the field
#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD)
// Number of 64-bit words of a 224-bit element
#define NBITS_ORDER 224
#define NWORDS64_ORDER ((NBITS_ORDER+63)/64)
// Number of elements in Alice's strategy
#define A_max 108
// Number of elements in Bob's strategy
#define B_max 137
// Word size size
#define RADIX sizeof(crypto_word_t)*8
// Byte size of a limb
#define LSZ sizeof(crypto_word_t)

#if defined(CPU_64_BIT)
typedef uint64_t crypto_word_t;
// Number of words of a 434-bit field element
#define NWORDS_FIELD 7
// Number of "0" digits in the least significant part of p434 + 1
#define ZERO_WORDS 3
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
#define U64_TO_WORDS(x) UINT64_C(x)
#else
typedef uint32_t crypto_word_t;
// Number of words of a 434-bit field element
#define NWORDS_FIELD 14
// Number of "0" digits in the least significant part of p434 + 1
#define ZERO_WORDS 6
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
#define U64_TO_WORDS(x) \
(uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32)
#endif

// Extended datatype support
#if !defined(HAS_UINT128)
typedef uint64_t uint128_t[2];
#endif

// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
// Digit multiplication
#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo));

// If mask |x|==0xff.ff set |x| to 1, otherwise 0
#define M2B(x) ((x)>>(RADIX-1))

// Digit addition with carry
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
do { \
crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \
(sumOut) = (addend2) + tempReg; \
(carryOut) = M2B(ct_uint_lt(tempReg, (crypto_word_t)(carryIn)) | \
ct_uint_lt((sumOut), tempReg)); \
} while(0)

// Digit subtraction with borrow
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
do { \
crypto_word_t tempReg = (minuend) - (subtrahend); \
crypto_word_t borrowReg = M2B(ct_uint_lt((minuend), (subtrahend))); \
borrowReg |= ((borrowIn) & ct_uint_eq(tempReg, 0)); \
(differenceOut) = tempReg - (crypto_word_t)(borrowIn); \
(borrowOut) = borrowReg; \
} while(0)

/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly,
which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8).
Defines below are used to work around the bug, and provide a way
to initialize f2elem_t and point_proj_t structs.
Bug has been fixed in GCC6 (debian stretch).
*/
#define F2ELM_INIT {{ {0}, {0} }}
#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }}

// Datatype for representing 434-bit field elements (448-bit max.)
// Elements over GF(p434) are encoded in 63 octets in little endian format
// (i.e., the least significant octet is located in the lowest memory address).
typedef crypto_word_t felm_t[NWORDS_FIELD];

// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e.
// Fp2 element = c0 + c1*i in F_{p^2}
// Datatype for representing double-precision 2x434-bit field elements (448-bit max.)
// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are
// encoded as {a, b}, with a in the lowest memory portion.
typedef struct {
felm_t c0;
felm_t c1;
} fp2;

// Our F_{p^2} element type is a pointer to the struct.
typedef fp2 f2elm_t[1];

// Datatype for representing double-precision 2x434-bit
// field elements in contiguous memory.
typedef crypto_word_t dfelm_t[2*NWORDS_FIELD];

// Constants used during SIKE computation.
struct params_t {
// Stores a prime
const crypto_word_t prime[NWORDS_FIELD];
// Stores prime + 1
const crypto_word_t prime_p1[NWORDS_FIELD];
// Stores prime * 2
const crypto_word_t prime_x2[NWORDS_FIELD];
// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i}
// in GF(prime^2), expressed in Montgomery representation
const crypto_word_t A_gen[6*NWORDS_FIELD];
// Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i}
// in GF(prime^2), expressed in Montgomery representation
const crypto_word_t B_gen[6*NWORDS_FIELD];
// Montgomery constant mont_R2 = (2^448)^2 mod prime
const crypto_word_t mont_R2[NWORDS_FIELD];
// Value 'one' in Montgomery representation
const crypto_word_t mont_one[NWORDS_FIELD];
// Value '6' in Montgomery representation
const crypto_word_t mont_six[NWORDS_FIELD];
// Fixed parameters for isogeny tree computation
const unsigned int A_strat[A_max-1];
const unsigned int B_strat[B_max-1];
};

// Point representation in projective XZ Montgomery coordinates.
typedef struct {
f2elm_t X;
f2elm_t Z;
} point_proj;
typedef point_proj point_proj_t[1];

// Checks whether two words are equal. Returns 1 in case it is,
// otherwise 0.
static inline crypto_word_t ct_uint_eq(crypto_word_t x, crypto_word_t y)
{
// if x==y then t = 0
crypto_word_t t = x ^ y;
// if x!=y t will have first bit set
t = (t >> 1) - t;
// return MSB - 1 in case x==y, otherwise 0
return ((~t) >> (RADIX-1));
}
// Constant time select.
// if pick == 1 (out = in1)
// if pick == 0 (out = in2)
// else out is undefined
static inline uint8_t ct_select_8(uint8_t flag, uint8_t in1, uint8_t in2) {
uint8_t mask = ((int8_t)(flag << 7))>>7;
return (in1&mask) | (in2&(~mask));
}

// Constant time memcmp. Returns 1 if p==q, otherwise 0
static inline int ct_mem_eq(const void *p, const void *q, size_t n)
{
const uint8_t *pp = (uint8_t*)p, *qq = (uint8_t*)q;
uint8_t a = 0;

while (n--) a |= *pp++ ^ *qq++;
return (ct_uint_eq(a, 0));
}

static inline crypto_word_t constant_time_msb_w(crypto_word_t a) {
return 0u - (a >> (sizeof(a) * 8 - 1));
}

// constant_time_lt_w returns 0xff..f if a < b and 0 otherwise.
static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y)
{
// Consider the two cases of the problem:
// msb(a) == msb(b): a < b iff the MSB of a - b is set.
// msb(a) != msb(b): a < b iff the MSB of b is set.
//
// If msb(a) == msb(b) then the following evaluates as:
// msb(a^((a^b)|((a-b)^a))) ==
// msb(a^((a-b) ^ a)) == (because msb(a^b) == 0)
// msb(a^a^(a-b)) == (rearranging)
// msb(a-b) (because ∀x. x^x == 0)
//
// Else, if msb(a) != msb(b) then the following evaluates as:
// msb(a^((a^b)|((a-b)^a))) ==
// msb(a^(𝟙 | ((a-b)^a))) == (because msb(a^b) == 1 and 𝟙
// represents a value s.t. msb(𝟙) = 1)
// msb(a^𝟙) == (because ORing with 1 results in 1)
// msb(b)
//
//
// Here is an SMT-LIB verification of this formula:
//
// (define-fun lt ((a (_ BitVec 32)) (b (_ BitVec 32))) (_ BitVec 32)
// (bvxor a (bvor (bvxor a b) (bvxor (bvsub a b) a)))
// )
//
// (declare-fun a () (_ BitVec 32))
// (declare-fun b () (_ BitVec 32))
//
// (assert (not (= (= #x00000001 (bvlshr (lt a b) #x0000001f)) (bvult a b))))
// (check-sat)
// (get-model)
return constant_time_msb_w(x^((x^y)|((x-y)^x)));
}
#endif // UTILS_H_

+ 143
- 113
src/rustapi/pqc-sys/src/bindings.rs View File

@@ -203,64 +203,75 @@ pub type uint_fast32_t = ::std::os::raw::c_ulong;
pub type uint_fast64_t = ::std::os::raw::c_ulong;
pub type intmax_t = __intmax_t;
pub type uintmax_t = __uintmax_t;
pub const DILITHIUM2: ::std::os::raw::c_uint = 0;
pub const DILITHIUM3: ::std::os::raw::c_uint = 1;
pub const DILITHIUM5: ::std::os::raw::c_uint = 2;
pub const FALCON1024: ::std::os::raw::c_uint = 3;
pub const FALCON512: ::std::os::raw::c_uint = 4;
pub const RAINBOWVCLASSIC: ::std::os::raw::c_uint = 5;
pub const RAINBOWICLASSIC: ::std::os::raw::c_uint = 6;
pub const RAINBOWIIICLASSIC: ::std::os::raw::c_uint = 7;
pub const SPHINCSSHA256192FSIMPLE: ::std::os::raw::c_uint = 8;
pub const SPHINCSSHAKE256256FSIMPLE: ::std::os::raw::c_uint = 9;
pub const SPHINCSSHAKE256192FROBUST: ::std::os::raw::c_uint = 10;
pub const SPHINCSSHAKE256128FSIMPLE: ::std::os::raw::c_uint = 11;
pub const SPHINCSSHAKE256256SSIMPLE: ::std::os::raw::c_uint = 12;
pub const SPHINCSSHAKE256128SSIMPLE: ::std::os::raw::c_uint = 13;
pub const SPHINCSSHA256128FROBUST: ::std::os::raw::c_uint = 14;
pub const SPHINCSSHA256192SROBUST: ::std::os::raw::c_uint = 15;
pub const SPHINCSSHAKE256128FROBUST: ::std::os::raw::c_uint = 16;
pub const SPHINCSSHAKE256128SROBUST: ::std::os::raw::c_uint = 17;
pub const SPHINCSSHAKE256256SROBUST: ::std::os::raw::c_uint = 18;
pub const SPHINCSSHA256192SSIMPLE: ::std::os::raw::c_uint = 19;
pub const SPHINCSSHAKE256192SSIMPLE: ::std::os::raw::c_uint = 20;
pub const SPHINCSSHAKE256192SROBUST: ::std::os::raw::c_uint = 21;
pub const SPHINCSSHAKE256192FSIMPLE: ::std::os::raw::c_uint = 22;
pub const SPHINCSSHA256256SSIMPLE: ::std::os::raw::c_uint = 23;
pub const SPHINCSSHA256128SSIMPLE: ::std::os::raw::c_uint = 24;
pub const SPHINCSSHAKE256256FROBUST: ::std::os::raw::c_uint = 25;
pub const SPHINCSSHA256256FROBUST: ::std::os::raw::c_uint = 26;
pub const SPHINCSSHA256256FSIMPLE: ::std::os::raw::c_uint = 27;
pub const SPHINCSSHA256256SROBUST: ::std::os::raw::c_uint = 28;
pub const SPHINCSSHA256128SROBUST: ::std::os::raw::c_uint = 29;
pub const SPHINCSSHA256128FSIMPLE: ::std::os::raw::c_uint = 30;
pub const SPHINCSSHA256192FROBUST: ::std::os::raw::c_uint = 31;
pub const PQC_ALG_SIG_DILITHIUM2: ::std::os::raw::c_uint = 0;
pub const PQC_ALG_SIG_DILITHIUM3: ::std::os::raw::c_uint = 1;
pub const PQC_ALG_SIG_DILITHIUM5: ::std::os::raw::c_uint = 2;
pub const PQC_ALG_SIG_FALCON512: ::std::os::raw::c_uint = 3;
pub const PQC_ALG_SIG_FALCON1024: ::std::os::raw::c_uint = 4;
pub const PQC_ALG_SIG_RAINBOWICLASSIC: ::std::os::raw::c_uint = 5;
pub const PQC_ALG_SIG_RAINBOWIIICLASSIC: ::std::os::raw::c_uint = 6;
pub const PQC_ALG_SIG_RAINBOWVCLASSIC: ::std::os::raw::c_uint = 7;
pub const PQC_ALG_SIG_SPHINCSSHAKE256128FSIMPLE: ::std::os::raw::c_uint = 8;
pub const PQC_ALG_SIG_SPHINCSSHAKE256128SSIMPLE: ::std::os::raw::c_uint = 9;
pub const PQC_ALG_SIG_SPHINCSSHAKE256128FROBUST: ::std::os::raw::c_uint = 10;
pub const PQC_ALG_SIG_SPHINCSSHAKE256128SROBUST: ::std::os::raw::c_uint = 11;
pub const PQC_ALG_SIG_SPHINCSSHAKE256192FSIMPLE: ::std::os::raw::c_uint = 12;
pub const PQC_ALG_SIG_SPHINCSSHAKE256192SSIMPLE: ::std::os::raw::c_uint = 13;
pub const PQC_ALG_SIG_SPHINCSSHAKE256192FROBUST: ::std::os::raw::c_uint = 14;
pub const PQC_ALG_SIG_SPHINCSSHAKE256192SROBUST: ::std::os::raw::c_uint = 15;
pub const PQC_ALG_SIG_SPHINCSSHAKE256256FSIMPLE: ::std::os::raw::c_uint = 16;
pub const PQC_ALG_SIG_SPHINCSSHAKE256256SSIMPLE: ::std::os::raw::c_uint = 17;
pub const PQC_ALG_SIG_SPHINCSSHAKE256256FROBUST: ::std::os::raw::c_uint = 18;
pub const PQC_ALG_SIG_SPHINCSSHAKE256256SROBUST: ::std::os::raw::c_uint = 19;
pub const PQC_ALG_SIG_SPHINCSSHA256128FSIMPLE: ::std::os::raw::c_uint = 20;
pub const PQC_ALG_SIG_SPHINCSSHA256128SSIMPLE: ::std::os::raw::c_uint = 21;
pub const PQC_ALG_SIG_SPHINCSSHA256128FROBUST: ::std::os::raw::c_uint = 22;
pub const PQC_ALG_SIG_SPHINCSSHA256128SROBUST: ::std::os::raw::c_uint = 23;
pub const PQC_ALG_SIG_SPHINCSSHA256192FSIMPLE: ::std::os::raw::c_uint = 24;
pub const PQC_ALG_SIG_SPHINCSSHA256192SSIMPLE: ::std::os::raw::c_uint = 25;
pub const PQC_ALG_SIG_SPHINCSSHA256192FROBUST: ::std::os::raw::c_uint = 26;
pub const PQC_ALG_SIG_SPHINCSSHA256192SROBUST: ::std::os::raw::c_uint = 27;
pub const PQC_ALG_SIG_SPHINCSSHA256256FSIMPLE: ::std::os::raw::c_uint = 28;
pub const PQC_ALG_SIG_SPHINCSSHA256256SSIMPLE: ::std::os::raw::c_uint = 29;
pub const PQC_ALG_SIG_SPHINCSSHA256256FROBUST: ::std::os::raw::c_uint = 30;
pub const PQC_ALG_SIG_SPHINCSSHA256256SROBUST: ::std::os::raw::c_uint = 31;
pub const PQC_ALG_SIG_MAX: ::std::os::raw::c_uint = 32;
pub type _bindgen_ty_1 = ::std::os::raw::c_uint;
pub const FRODOKEM976SHAKE: ::std::os::raw::c_uint = 0;
pub const FRODOKEM1344SHAKE: ::std::os::raw::c_uint = 1;
pub const FRODOKEM640SHAKE: ::std::os::raw::c_uint = 2;
pub const KYBER768: ::std::os::raw::c_uint = 3;
pub const KYBER1024: ::std::os::raw::c_uint = 4;
pub const KYBER512: ::std::os::raw::c_uint = 5;
pub const NTRUHPS4096821: ::std::os::raw::c_uint = 6;
pub const NTRUHPS2048509: ::std::os::raw::c_uint = 7;
pub const NTRUHRSS701: ::std::os::raw::c_uint = 8;
pub const NTRUHPS2048677: ::std::os::raw::c_uint = 9;
pub const NTRULPR761: ::std::os::raw::c_uint = 10;
pub const NTRULPR653: ::std::os::raw::c_uint = 11;
pub const NTRULPR857: ::std::os::raw::c_uint = 12;
pub const LIGHTSABER: ::std::os::raw::c_uint = 13;
pub const FIRESABER: ::std::os::raw::c_uint = 14;
pub const SABER: ::std::os::raw::c_uint = 15;
pub const HQCRMRS128: ::std::os::raw::c_uint = 16;
pub const HQCRMRS192: ::std::os::raw::c_uint = 17;
pub const HQCRMRS256: ::std::os::raw::c_uint = 18;
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 19;
pub const PQC_ALG_KEM_FRODOKEM640SHAKE: ::std::os::raw::c_uint = 0;
pub const PQC_ALG_KEM_FRODOKEM976SHAKE: ::std::os::raw::c_uint = 1;
pub const PQC_ALG_KEM_FRODOKEM1344SHAKE: ::std::os::raw::c_uint = 2;
pub const PQC_ALG_KEM_KYBER512: ::std::os::raw::c_uint = 3;
pub const PQC_ALG_KEM_KYBER768: ::std::os::raw::c_uint = 4;
pub const PQC_ALG_KEM_KYBER1024: ::std::os::raw::c_uint = 5;
pub const PQC_ALG_KEM_NTRUHPS2048509: ::std::os::raw::c_uint = 6;
pub const PQC_ALG_KEM_NTRUHPS4096821: ::std::os::raw::c_uint = 7;
pub const PQC_ALG_KEM_NTRUHRSS701: ::std::os::raw::c_uint = 8;
pub const PQC_ALG_KEM_NTRUHPS2048677: ::std::os::raw::c_uint = 9;
pub const PQC_ALG_KEM_NTRULPR761: ::std::os::raw::c_uint = 10;
pub const PQC_ALG_KEM_NTRULPR653: ::std::os::raw::c_uint = 11;
pub const PQC_ALG_KEM_NTRULPR857: ::std::os::raw::c_uint = 12;
pub const PQC_ALG_KEM_LIGHTSABER: ::std::os::raw::c_uint = 13;
pub const PQC_ALG_KEM_SABER: ::std::os::raw::c_uint = 14;
pub const PQC_ALG_KEM_FIRESABER: ::std::os::raw::c_uint = 15;
pub const PQC_ALG_KEM_HQCRMRS128: ::std::os::raw::c_uint = 16;
pub const PQC_ALG_KEM_HQCRMRS192: ::std::os::raw::c_uint = 17;
pub const PQC_ALG_KEM_HQCRMRS256: ::std::os::raw::c_uint = 18;
pub const PQC_ALG_KEM_SIKE434: ::std::os::raw::c_uint = 19;
pub const PQC_ALG_KEM_MCELIECE348864: ::std::os::raw::c_uint = 20;
pub const PQC_ALG_KEM_MCELIECE460896: ::std::os::raw::c_uint = 21;
pub const PQC_ALG_KEM_MCELIECE6688128: ::std::os::raw::c_uint = 22;
pub const PQC_ALG_KEM_MCELIECE6960119: ::std::os::raw::c_uint = 23;
pub const PQC_ALG_KEM_MCELIECE8192128: ::std::os::raw::c_uint = 24;
pub const PQC_ALG_KEM_MCELIECE348864F: ::std::os::raw::c_uint = 25;
pub const PQC_ALG_KEM_MCELIECE460896F: ::std::os::raw::c_uint = 26;
pub const PQC_ALG_KEM_MCELIECE6688128F: ::std::os::raw::c_uint = 27;
pub const PQC_ALG_KEM_MCELIECE6960119F: ::std::os::raw::c_uint = 28;
pub const PQC_ALG_KEM_MCELIECE8192128F: ::std::os::raw::c_uint = 29;
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 30;
pub type _bindgen_ty_2 = ::std::os::raw::c_uint;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct params_t {
pub struct pqc_ctx_t {
pub alg_id: u8,
pub alg_name: *const ::std::os::raw::c_char,
pub prv_key_bsz: u32,
@@ -271,87 +282,87 @@ pub struct params_t {
>,
}
#[test]
fn bindgen_test_layout_params_t() {
fn bindgen_test_layout_pqc_ctx_t() {
assert_eq!(
::std::mem::size_of::<params_t>(),
::std::mem::size_of::<pqc_ctx_t>(),
40usize,
concat!("Size of: ", stringify!(params_t))
concat!("Size of: ", stringify!(pqc_ctx_t))
);
assert_eq!(
::std::mem::align_of::<params_t>(),
::std::mem::align_of::<pqc_ctx_t>(),
8usize,
concat!("Alignment of ", stringify!(params_t))
concat!("Alignment of ", stringify!(pqc_ctx_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<params_t>())).alg_id as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).alg_id as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(params_t),
stringify!(pqc_ctx_t),
"::",
stringify!(alg_id)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<params_t>())).alg_name as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).alg_name as *const _ as usize },
8usize,
concat!(
"Offset of field: ",
stringify!(params_t),
stringify!(pqc_ctx_t),
"::",
stringify!(alg_name)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<params_t>())).prv_key_bsz as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).prv_key_bsz as *const _ as usize },
16usize,
concat!(
"Offset of field: ",
stringify!(params_t),
stringify!(pqc_ctx_t),
"::",
stringify!(prv_key_bsz)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<params_t>())).pub_key_bsz as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).pub_key_bsz as *const _ as usize },
20usize,
concat!(
"Offset of field: ",
stringify!(params_t),
stringify!(pqc_ctx_t),
"::",
stringify!(pub_key_bsz)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<params_t>())).is_kem as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).is_kem as *const _ as usize },
24usize,
concat!(
"Offset of field: ",
stringify!(params_t),
stringify!(pqc_ctx_t),
"::",
stringify!(is_kem)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<params_t>())).keygen as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_ctx_t>())).keygen as *const _ as usize },
32usize,
concat!(
"Offset of field: ",
stringify!(params_t),
stringify!(pqc_ctx_t),
"::",
stringify!(keygen)
)
);
}
impl Default for params_t {
impl Default for pqc_ctx_t {
fn default() -> Self {
unsafe { ::std::mem::zeroed() }
}
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct kem_params_t {
pub p: params_t,
pub struct pqc_kem_ctx_t {
pub p: pqc_ctx_t,
pub ciphertext_bsz: u32,
pub secret_bsz: u32,
pub encapsulate: ::std::option::Option<
@@ -362,77 +373,77 @@ pub struct kem_params_t {
>,
}
#[test]
fn bindgen_test_layout_kem_params_t() {
fn bindgen_test_layout_pqc_kem_ctx_t() {
assert_eq!(
::std::mem::size_of::<kem_params_t>(),
::std::mem::size_of::<pqc_kem_ctx_t>(),
64usize,
concat!("Size of: ", stringify!(kem_params_t))
concat!("Size of: ", stringify!(pqc_kem_ctx_t))
);
assert_eq!(
::std::mem::align_of::<kem_params_t>(),
::std::mem::align_of::<pqc_kem_ctx_t>(),
8usize,
concat!("Alignment of ", stringify!(kem_params_t))
concat!("Alignment of ", stringify!(pqc_kem_ctx_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<kem_params_t>())).p as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).p as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(kem_params_t),
stringify!(pqc_kem_ctx_t),
"::",
stringify!(p)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<kem_params_t>())).ciphertext_bsz as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).ciphertext_bsz as *const _ as usize },
40usize,
concat!(
"Offset of field: ",
stringify!(kem_params_t),
stringify!(pqc_kem_ctx_t),
"::",
stringify!(ciphertext_bsz)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<kem_params_t>())).secret_bsz as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).secret_bsz as *const _ as usize },
44usize,
concat!(
"Offset of field: ",
stringify!(kem_params_t),
stringify!(pqc_kem_ctx_t),
"::",
stringify!(secret_bsz)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<kem_params_t>())).encapsulate as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).encapsulate as *const _ as usize },
48usize,
concat!(
"Offset of field: ",
stringify!(kem_params_t),
stringify!(pqc_kem_ctx_t),
"::",
stringify!(encapsulate)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<kem_params_t>())).decapsulate as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_kem_ctx_t>())).decapsulate as *const _ as usize },
56usize,
concat!(
"Offset of field: ",
stringify!(kem_params_t),
stringify!(pqc_kem_ctx_t),
"::",
stringify!(decapsulate)
)
);
}
impl Default for kem_params_t {
impl Default for pqc_kem_ctx_t {
fn default() -> Self {
unsafe { ::std::mem::zeroed() }
}
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct sig_params_t {
pub p: params_t,
pub struct pqc_sig_ctx_t {
pub p: pqc_ctx_t,
pub sign_bsz: u32,
pub sign: ::std::option::Option<
unsafe extern "C" fn(
@@ -454,73 +465,77 @@ pub struct sig_params_t {
>,
}
#[test]
fn bindgen_test_layout_sig_params_t() {
fn bindgen_test_layout_pqc_sig_ctx_t() {
assert_eq!(
::std::mem::size_of::<sig_params_t>(),
::std::mem::size_of::<pqc_sig_ctx_t>(),
64usize,
concat!("Size of: ", stringify!(sig_params_t))
concat!("Size of: ", stringify!(pqc_sig_ctx_t))
);
assert_eq!(
::std::mem::align_of::<sig_params_t>(),
::std::mem::align_of::<pqc_sig_ctx_t>(),
8usize,
concat!("Alignment of ", stringify!(sig_params_t))
concat!("Alignment of ", stringify!(pqc_sig_ctx_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<sig_params_t>())).p as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).p as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(sig_params_t),
stringify!(pqc_sig_ctx_t),
"::",
stringify!(p)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<sig_params_t>())).sign_bsz as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).sign_bsz as *const _ as usize },
40usize,
concat!(
"Offset of field: ",
stringify!(sig_params_t),
stringify!(pqc_sig_ctx_t),
"::",
stringify!(sign_bsz)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<sig_params_t>())).sign as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).sign as *const _ as usize },
48usize,
concat!(
"Offset of field: ",
stringify!(sig_params_t),
stringify!(pqc_sig_ctx_t),
"::",
stringify!(sign)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<sig_params_t>())).verify as *const _ as usize },
unsafe { &(*(::std::ptr::null::<pqc_sig_ctx_t>())).verify as *const _ as usize },
56usize,
concat!(
"Offset of field: ",
stringify!(sig_params_t),
stringify!(pqc_sig_ctx_t),
"::",
stringify!(verify)
)
);
}
impl Default for sig_params_t {
impl Default for pqc_sig_ctx_t {
fn default() -> Self {
unsafe { ::std::mem::zeroed() }
}
}
extern "C" {
pub fn pqc_keygen(p: *const params_t, pk: *mut u8, sk: *mut u8) -> bool;
pub fn pqc_keygen(p: *const pqc_ctx_t, pk: *mut u8, sk: *mut u8) -> bool;
}
extern "C" {
pub fn pqc_kem_encapsulate(p: *const params_t, ct: *mut u8, ss: *mut u8, pk: *const u8)
-> bool;
pub fn pqc_kem_encapsulate(
p: *const pqc_ctx_t,
ct: *mut u8,
ss: *mut u8,
pk: *const u8,
) -> bool;
}
extern "C" {
pub fn pqc_kem_decapsulate(
p: *const params_t,
p: *const pqc_ctx_t,
ss: *mut u8,
ct: *const u8,
sk: *const u8,
@@ -528,7 +543,7 @@ extern "C" {
}
extern "C" {
pub fn pqc_sig_create(
p: *const params_t,
p: *const pqc_ctx_t,
sig: *mut u8,
siglen: *mut u64,
m: *const u8,
@@ -538,7 +553,7 @@ extern "C" {
}
extern "C" {
pub fn pqc_sig_verify(
p: *const params_t,
p: *const pqc_ctx_t,
sig: *const u8,
siglen: u64,
m: *const u8,
@@ -547,8 +562,23 @@ extern "C" {
) -> bool;
}
extern "C" {
pub fn pqc_kem_alg_by_id(id: u8) -> *const params_t;
pub fn pqc_kem_alg_by_id(id: u8) -> *const pqc_ctx_t;
}
extern "C" {
pub fn pqc_sig_alg_by_id(id: u8) -> *const pqc_ctx_t;
}
extern "C" {
pub fn pqc_ciphertext_bsz(p: *const pqc_ctx_t) -> u32;
}
extern "C" {
pub fn pqc_shared_secret_bsz(p: *const pqc_ctx_t) -> u32;
}
extern "C" {
pub fn pqc_signature_bsz(p: *const pqc_ctx_t) -> u32;
}
extern "C" {
pub fn pqc_public_key_bsz(p: *const pqc_ctx_t) -> u32;
}
extern "C" {
pub fn pqc_sig_alg_by_id(id: u8) -> *const params_t;
pub fn pqc_private_key_bsz(p: *const pqc_ctx_t) -> u32;
}

+ 4
- 2
src/rustapi/pqc-sys/src/build.rs View File

@@ -4,12 +4,14 @@ extern crate bindgen;

fn main() {
let dst = Config::new("../../../")
.profile("Release")
.profile("Debug")
.very_verbose(true)
.build();
.build();

println!("cargo:rustc-link-search=native={}/lib", dst.display());
println!("cargo:rustc-link-lib=static=pqc_s");
// For some reason GetX86Info symbol is undefined in the pqc_s. Hence this line
println!("cargo:rustc-link-lib=static=cpu_features");
println!("cargo:rerun-if-changed=../../../capi/*,../../../kem/*,../../../sign/*,../../../../public/pqc/pqc.h");

// The bindgen::Builder is the main entry point


+ 17
- 0
src/sign/falcon/CMakeLists.txt View File

@@ -0,0 +1,17 @@
set(
SRC_CLEAN_FALCON
api.c
codec.c
common.c
falcon.c
fft.c
fpr.c
keygen.c
rng.c
sign.c
vrfy.c
)

define_sig_alg(
falcon1024_clean
PQCLEAN_FALCON_CLEAN "${SRC_CLEAN_FALCON}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 77
- 0
src/sign/falcon/api.c View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include <common/utils.h>
#include "inner.h"
#include "api.h"

// Forward declarations of signature API
int Zf(keypair)(uint8_t *pk, size_t pk_sz, uint8_t *sk, size_t sk_sz, size_t logn);
int Zf(sign)(uint8_t *sm, size_t *smsz, const uint8_t *m, size_t msz,
const uint8_t *sk, size_t sk_sz, size_t logn);
int Zf(verify)(const uint8_t *m, size_t msz, const uint8_t *sm, size_t smsz,
const uint8_t *pk, size_t pk_sz, size_t logn, size_t sig_sz);

// Integration wrappers

// Falcon 512
int PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) {
return Zf(keypair)(pk, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES,
sk, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES, 9);
}

int PQCLEAN_FALCON512_CLEAN_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
return Zf(sign)(sig, siglen, m, mlen, sk,
PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES, 9);
}

int PQCLEAN_FALCON512_CLEAN_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk) {
return Zf(verify)(m,mlen,sig,siglen,pk,
PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES,9,
PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES);
}

// Falcon 1024
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) {
return Zf(keypair)(pk, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES,
sk, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES, 10);
}

int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
return Zf(sign)(sig, siglen, m, mlen, sk,
PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES, 10);
}

int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk) {
return Zf(verify)(m,mlen,sig,siglen,pk,
PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES,10,
PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES);
}

+ 37
- 0
src/sign/falcon/api.h View File

@@ -0,0 +1,37 @@
#ifndef PQCLEAN_FALCON_CLEAN_API_H
#define PQCLEAN_FALCON_CLEAN_API_H

#include <stddef.h>
#include <stdint.h>

#define PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES 897
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES 1281
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES 690
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_ALGNAME "Falcon512"

#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES 1793
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES 2305
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES 1330
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_ALGNAME "Falcon1024"

int PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_FALCON512_CLEAN_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

int PQCLEAN_FALCON512_CLEAN_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk);

int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk);

#endif

+ 570
- 0
src/sign/falcon/codec.c View File

@@ -0,0 +1,570 @@
/*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

/*
* Encoding/decoding of keys and signatures.
*/
#include "inner.h"

/* see inner.h */
size_t
Zf(modq_encode)(
void *out, size_t max_out_len,
const uint16_t *x, unsigned logn)
{
size_t n, out_len, u;
uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
if (x[u] >= 12289) {
return 0;
}
}
out_len = ((n * 14) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
for (u = 0; u < n; u ++) {
acc = (acc << 14) | x[u];
acc_len += 14;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
Zf(modq_decode)(
uint16_t *x, unsigned logn,
const void *in, size_t max_in_len)
{
size_t n, in_len, u;
const uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
in_len = ((n * 14) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
acc = 0;
acc_len = 0;
u = 0;
while (u < n) {
acc = (acc << 8) | (*buf ++);
acc_len += 8;
if (acc_len >= 14) {
unsigned w;

acc_len -= 14;
w = (acc >> acc_len) & 0x3FFF;
if (w >= 12289) {
return 0;
}
x[u ++] = (uint16_t)w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
return 0;
}
return in_len;
}

/* see inner.h */
size_t
Zf(trim_i16_encode)(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn, unsigned bits)
{
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint16_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
Zf(trim_i16_decode)(
int16_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len)
{
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
w |= -(w & mask2);
x[u ++] = (int16_t)*(int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
Zf(trim_i8_encode)(
void *out, size_t max_out_len,
const int8_t *x, unsigned logn, unsigned bits)
{
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint8_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
Zf(trim_i8_decode)(
int8_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len)
{
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
x[u ++] = (int8_t)*(int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
Zf(comp_encode)(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn)
{
uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = out;

/*
* Make sure that all values are within the -2047..+2047 range.
*/
for (u = 0; u < n; u ++) {
if (x[u] < -2047 || x[u] > +2047) {
return 0;
}
}

acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
int t;
unsigned w;

/*
* Get sign and absolute value of next integer; push the
* sign bit.
*/
acc <<= 1;
t = x[u];
if (t < 0) {
t = -t;
acc |= 1;
}
w = (unsigned)t;

/*
* Push the low 7 bits of the absolute value.
*/
acc <<= 7;
acc |= w & 127u;
w >>= 7;

/*
* We pushed exactly 8 bits.
*/
acc_len += 8;

/*
* Push as many zeros as necessary, then a one. Since the
* absolute value is at most 2047, w can only range up to
* 15 at this point, thus we will add at most 16 bits
* here. With the 8 bits above and possibly up to 7 bits
* from previous iterations, we may go up to 31 bits, which
* will fit in the accumulator, which is an uint32_t.
*/
acc <<= (w + 1);
acc |= 1;
acc_len += w + 1;

/*
* Produce all full bytes.
*/
while (acc_len >= 8) {
acc_len -= 8;
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc >> acc_len);
}
v ++;
}
}

/*
* Flush remaining bits (if any).
*/
if (acc_len > 0) {
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc << (8 - acc_len));
}
v ++;
}

return v;
}

/* see inner.h */
size_t
Zf(comp_decode)(
int16_t *x, unsigned logn,
const void *in, size_t max_in_len)
{
const uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = in;
acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
unsigned b, s, m;

/*
* Get next eight bits: sign and low seven bits of the
* absolute value.
*/
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
b = acc >> acc_len;
s = b & 128;
m = b & 127;

/*
* Get next bits until a 1 is reached.
*/
for (;;) {
if (acc_len == 0) {
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
acc_len = 8;
}
acc_len --;
if (((acc >> acc_len) & 1) != 0) {
break;
}
m += 128;
if (m > 2047) {
return 0;
}
}

/*
* "-0" is forbidden.
*/
if (s && m == 0) {
return 0;
}

x[u] = (int16_t)(s ? -(int)m : (int)m);
}

/*
* Unused bits in the last byte must be zero.
*/
if ((acc & ((1u << acc_len) - 1u)) != 0) {
return 0;
}

return v;
}

/*
* Key elements and signatures are polynomials with small integer
* coefficients. Here are some statistics gathered over many
* generated key pairs (10000 or more for each degree):
*
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G)
* 1 2 129 56.31 143 60.02
* 2 4 123 40.93 160 46.52
* 3 8 97 28.97 159 38.01
* 4 16 100 21.48 154 32.50
* 5 32 71 15.41 151 29.36
* 6 64 59 11.07 138 27.77
* 7 128 39 7.91 144 27.00
* 8 256 32 5.63 148 26.61
* 9 512 22 4.00 137 26.46
* 10 1024 15 2.84 146 26.41
*
* We want a compact storage format for private key, and, as part of
* key generation, we are allowed to reject some keys which would
* otherwise be fine (this does not induce any noticeable vulnerability
* as long as we reject only a small proportion of possible keys).
* Hence, we enforce at key generation time maximum values for the
* elements of f, g, F and G, so that their encoding can be expressed
* in fixed-width values. Limits have been chosen so that generated
* keys are almost always within bounds, thus not impacting neither
* security or performance.
*
* IMPORTANT: the code assumes that all coefficients of f, g, F and G
* ultimately fit in the -127..+127 range. Thus, none of the elements
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8.
*/

const uint8_t Zf(max_fg_bits)[] = {
0, /* unused */
8,
8,
8,
8,
8,
7,
7,
6,
6,
5
};

const uint8_t Zf(max_FG_bits)[] = {
0, /* unused */
8,
8,
8,
8,
8,
8,
8,
8,
8,
8
};

/*
* When generating a new key pair, we can always reject keys which
* feature an abnormally large coefficient. This can also be done for
* signatures, albeit with some care: in case the signature process is
* used in a derandomized setup (explicitly seeded with the message and
* private key), we have to follow the specification faithfully, and the
* specification only enforces a limit on the L2 norm of the signature
* vector. The limit on the L2 norm implies that the absolute value of
* a coefficient of the signature cannot be more than the following:
*
* log(n) n max sig coeff (theoretical)
* 1 2 412
* 2 4 583
* 3 8 824
* 4 16 1166
* 5 32 1649
* 6 64 2332
* 7 128 3299
* 8 256 4665
* 9 512 6598
* 10 1024 9331
*
* However, the largest observed signature coefficients during our
* experiments was 1077 (in absolute value), hence we can assume that,
* with overwhelming probability, signature coefficients will fit
* in -2047..2047, i.e. 12 bits.
*/

const uint8_t Zf(max_sig_bits)[] = {
0, /* unused */
10,
11,
11,
12,
12,
12,
12,
12,
12,
12
};

+ 298
- 0
src/sign/falcon/common.c View File

@@ -0,0 +1,298 @@
/*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "inner.h"

/* see inner.h */
void
Zf(hash_to_point_vartime)(
shake256incctx *sc,
uint16_t *x, unsigned logn)
{
/*
* This is the straightforward per-the-spec implementation. It
* is not constant-time, thus it might reveal information on the
* plaintext (at least, enough to check the plaintext against a
* list of potential plaintexts) in a scenario where the
* attacker does not have access to the signature value or to
* the public key, but knows the nonce (without knowledge of the
* nonce, the hashed output cannot be matched against potential
* plaintexts).
*/
size_t n;

n = (size_t)1 << logn;
while (n > 0) {
uint8_t buf[2];
uint32_t w;

shake256_inc_squeeze((void *)buf, sizeof buf, sc);
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
if (w < 61445) {
while (w >= 12289) {
w -= 12289;
}
*x ++ = (uint16_t)w;
n --;
}
}
}

/* see inner.h */
void
Zf(hash_to_point_ct)(
shake256incctx *sc,
uint16_t *x, unsigned logn, uint8_t *tmp)
{
/*
* Each 16-bit sample is a value in 0..65535. The value is
* kept if it falls in 0..61444 (because 61445 = 5*12289)
* and rejected otherwise; thus, each sample has probability
* about 0.93758 of being selected.
*
* We want to oversample enough to be sure that we will
* have enough values with probability at least 1 - 2^(-256).
* Depending on degree N, this leads to the following
* required oversampling:
*
* logn n oversampling
* 1 2 65
* 2 4 67
* 3 8 71
* 4 16 77
* 5 32 86
* 6 64 100
* 7 128 122
* 8 256 154
* 9 512 205
* 10 1024 287
*
* If logn >= 7, then the provided temporary buffer is large
* enough. Otherwise, we use a stack buffer of 63 entries
* (i.e. 126 bytes) for the values that do not fit in tmp[].
*/

static const uint16_t overtab[] = {
0, /* unused */
65,
67,
71,
77,
86,
100,
122,
154,
205,
287
};

unsigned n, n2, u, m, p, over;
uint16_t *tt1, tt2[63];

/*
* We first generate m 16-bit value. Values 0..n-1 go to x[].
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[].
* We also reduce modulo q the values; rejected values are set
* to 0xFFFF.
*/
n = 1U << logn;
n2 = n << 1;
over = overtab[logn];
m = n + over;
tt1 = (uint16_t *)tmp;
for (u = 0; u < m; u ++) {
uint8_t buf[2];
uint32_t w, wr;

shake256_inc_squeeze(buf, sizeof buf, sc);
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1));
wr |= ((w - 61445) >> 31) - 1;
if (u < n) {
x[u] = (uint16_t)wr;
} else if (u < n2) {
tt1[u - n] = (uint16_t)wr;
} else {
tt2[u - n2] = (uint16_t)wr;
}
}

/*
* Now we must "squeeze out" the invalid values. We do this in
* a logarithmic sequence of passes; each pass computes where a
* value should go, and moves it down by 'p' slots if necessary,
* where 'p' uses an increasing powers-of-two scale. It can be
* shown that in all cases where the loop decides that a value
* has to be moved down by p slots, the destination slot is
* "free" (i.e. contains an invalid value).
*/
for (p = 1; p <= over; p <<= 1) {
unsigned v;

/*
* In the loop below:
*
* - v contains the index of the final destination of
* the value; it is recomputed dynamically based on
* whether values are valid or not.
*
* - u is the index of the value we consider ("source");
* its address is s.
*
* - The loop may swap the value with the one at index
* u-p. The address of the swap destination is d.
*/
v = 0;
for (u = 0; u < m; u ++) {
uint16_t *s, *d;
unsigned j, sv, dv, mk;

if (u < n) {
s = &x[u];
} else if (u < n2) {
s = &tt1[u - n];
} else {
s = &tt2[u - n2];
}
sv = *s;

/*
* The value in sv should ultimately go to
* address v, i.e. jump back by u-v slots.
*/
j = u - v;

/*
* We increment v for the next iteration, but
* only if the source value is valid. The mask
* 'mk' is -1 if the value is valid, 0 otherwise,
* so we _subtract_ mk.
*/
mk = (sv >> 15) - 1U;
v -= mk;

/*
* In this loop we consider jumps by p slots; if
* u < p then there is nothing more to do.
*/
if (u < p) {
continue;
}

/*
* Destination for the swap: value at address u-p.
*/
if ((u - p) < n) {
d = &x[u - p];
} else if ((u - p) < n2) {
d = &tt1[(u - p) - n];
} else {
d = &tt2[(u - p) - n2];
}
dv = *d;

/*
* The swap should be performed only if the source
* is valid AND the jump j has its 'p' bit set.
*/
mk &= -(((j & p) + 0x1FF) >> 9);

*s = (uint16_t)(sv ^ (mk & (sv ^ dv)));
*d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
}
}
}

/*
* Acceptance bound for the (squared) l2-norm of the signature depends
* on the degree. This array is indexed by logn (1 to 10). These bounds
* are _inclusive_ (they are equal to floor(beta^2)).
*/
static const uint32_t l2bound[] = {
0, /* unused */
101498,
208714,
428865,
892039,
1852696,
3842630,
7959734,
16468416,
34034726,
70265242
};

/* see inner.h */
int
Zf(is_short)(
const int16_t *s1, const int16_t *s2, unsigned logn)
{
/*
* We use the l2-norm. Code below uses only 32-bit operations to
* compute the square of the norm with saturation to 2^32-1 if
* the value exceeds 2^31-1.
*/
size_t n, u;
uint32_t s, ng;

n = (size_t)1 << logn;
s = 0;
ng = 0;
for (u = 0; u < n; u ++) {
int32_t z;

z = s1[u];
s += (uint32_t)(z * z);
ng |= s;
z = s2[u];
s += (uint32_t)(z * z);
ng |= s;
}
s |= -(ng >> 31);

return s <= l2bound[logn];
}

/* see inner.h */
int
Zf(is_short_half)(
uint32_t sqn, const int16_t *s2, unsigned logn)
{
size_t n, u;
uint32_t ng;

n = (size_t)1 << logn;
ng = -(sqn >> 31);
for (u = 0; u < n; u ++) {
int32_t z;

z = s2[u];
sqn += (uint32_t)(z * z);
ng |= sqn;
}
sqn |= -(ng >> 31);

return sqn <= l2bound[logn];
}

+ 0
- 15
src/sign/falcon/falcon-1024/avx2/CMakeLists.txt View File

@@ -1,15 +0,0 @@
set(
SRC_AVX2_FALCON1024
codec.c
common.c
fft.c
fpr.c
keygen.c
pqclean.c
rng.c
sign.c
vrfy.c)

define_sig_alg(
falcon1024_avx2
PQCLEAN_FALCON1024_AVX2 "${SRC_AVX2_FALCON1024}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 80
src/sign/falcon/falcon-1024/avx2/api.h View File

@@ -1,80 +0,0 @@
#ifndef PQCLEAN_FALCON1024_AVX2_API_H
#define PQCLEAN_FALCON1024_AVX2_API_H

#include <stddef.h>
#include <stdint.h>

#define PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES 2305
#define PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES 1793
#define PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES 1330

#define PQCLEAN_FALCON1024_AVX2_CRYPTO_ALGNAME "Falcon-1024"

/*
* Generate a new key pair. Public key goes into pk[], private key in sk[].
* Key sizes are exact (in bytes):
* public (pk): PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES
* private (sk): PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_AVX2_crypto_sign_keypair(
uint8_t *pk, uint8_t *sk);

/*
* Compute a signature on a provided message (m, mlen), with a given
* private key (sk). Signature is written in sig[], with length written
* into *siglen. Signature length is variable; maximum signature length
* (in bytes) is PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES.
*
* sig[], m[] and sk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_AVX2_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

/*
* Verify a signature (sig, siglen) on a message (m, mlen) with a given
* public key (pk).
*
* sig[], m[] and pk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_AVX2_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk);

/*
* Compute a signature on a message and pack the signature and message
* into a single object, written into sm[]. The length of that output is
* written in *smlen; that length may be larger than the message length
* (mlen) by up to PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES.
*
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall
* not overlap with sk[].
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_AVX2_crypto_sign(
uint8_t *sm, size_t *smlen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

/*
* Open a signed message object (sm, smlen) and verify the signature;
* on success, the message itself is written into m[] and its length
* into *mlen. The message is shorter than the signed message object,
* but the size difference depends on the signature value; the difference
* may range up to PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES.
*
* m[], sm[] and pk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_AVX2_crypto_sign_open(
uint8_t *m, size_t *mlen,
const uint8_t *sm, size_t smlen, const uint8_t *pk);

#endif

+ 0
- 555
src/sign/falcon/falcon-1024/avx2/codec.c View File

@@ -1,555 +0,0 @@
#include "inner.h"

/*
* Encoding/decoding of keys and signatures.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_modq_encode(
void *out, size_t max_out_len,
const uint16_t *x, unsigned logn) {
size_t n, out_len, u;
uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
if (x[u] >= 12289) {
return 0;
}
}
out_len = ((n * 14) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
for (u = 0; u < n; u ++) {
acc = (acc << 14) | x[u];
acc_len += 14;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_modq_decode(
uint16_t *x, unsigned logn,
const void *in, size_t max_in_len) {
size_t n, in_len, u;
const uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
in_len = ((n * 14) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
acc = 0;
acc_len = 0;
u = 0;
while (u < n) {
acc = (acc << 8) | (*buf ++);
acc_len += 8;
if (acc_len >= 14) {
unsigned w;

acc_len -= 14;
w = (acc >> acc_len) & 0x3FFF;
if (w >= 12289) {
return 0;
}
x[u ++] = (uint16_t)w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_trim_i16_encode(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn, unsigned bits) {
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint16_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_trim_i16_decode(
int16_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len) {
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
w |= -(w & mask2);
x[u ++] = (int16_t) * (int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_trim_i8_encode(
void *out, size_t max_out_len,
const int8_t *x, unsigned logn, unsigned bits) {
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint8_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_trim_i8_decode(
int8_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len) {
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
x[u ++] = (int8_t) * (int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_comp_encode(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn) {
uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = out;

/*
* Make sure that all values are within the -2047..+2047 range.
*/
for (u = 0; u < n; u ++) {
if (x[u] < -2047 || x[u] > +2047) {
return 0;
}
}

acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
int t;
unsigned w;

/*
* Get sign and absolute value of next integer; push the
* sign bit.
*/
acc <<= 1;
t = x[u];
if (t < 0) {
t = -t;
acc |= 1;
}
w = (unsigned)t;

/*
* Push the low 7 bits of the absolute value.
*/
acc <<= 7;
acc |= w & 127u;
w >>= 7;

/*
* We pushed exactly 8 bits.
*/
acc_len += 8;

/*
* Push as many zeros as necessary, then a one. Since the
* absolute value is at most 2047, w can only range up to
* 15 at this point, thus we will add at most 16 bits
* here. With the 8 bits above and possibly up to 7 bits
* from previous iterations, we may go up to 31 bits, which
* will fit in the accumulator, which is an uint32_t.
*/
acc <<= (w + 1);
acc |= 1;
acc_len += w + 1;

/*
* Produce all full bytes.
*/
while (acc_len >= 8) {
acc_len -= 8;
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc >> acc_len);
}
v ++;
}
}

/*
* Flush remaining bits (if any).
*/
if (acc_len > 0) {
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc << (8 - acc_len));
}
v ++;
}

return v;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_AVX2_comp_decode(
int16_t *x, unsigned logn,
const void *in, size_t max_in_len) {
const uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = in;
acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
unsigned b, s, m;

/*
* Get next eight bits: sign and low seven bits of the
* absolute value.
*/
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
b = acc >> acc_len;
s = b & 128;
m = b & 127;

/*
* Get next bits until a 1 is reached.
*/
for (;;) {
if (acc_len == 0) {
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
acc_len = 8;
}
acc_len --;
if (((acc >> acc_len) & 1) != 0) {
break;
}
m += 128;
if (m > 2047) {
return 0;
}
}
x[u] = (int16_t) m;
if (s) {
x[u] = (int16_t) - x[u];
}
}
return v;
}

/*
* Key elements and signatures are polynomials with small integer
* coefficients. Here are some statistics gathered over many
* generated key pairs (10000 or more for each degree):
*
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G)
* 1 2 129 56.31 143 60.02
* 2 4 123 40.93 160 46.52
* 3 8 97 28.97 159 38.01
* 4 16 100 21.48 154 32.50
* 5 32 71 15.41 151 29.36
* 6 64 59 11.07 138 27.77
* 7 128 39 7.91 144 27.00
* 8 256 32 5.63 148 26.61
* 9 512 22 4.00 137 26.46
* 10 1024 15 2.84 146 26.41
*
* We want a compact storage format for private key, and, as part of
* key generation, we are allowed to reject some keys which would
* otherwise be fine (this does not induce any noticeable vulnerability
* as long as we reject only a small proportion of possible keys).
* Hence, we enforce at key generation time maximum values for the
* elements of f, g, F and G, so that their encoding can be expressed
* in fixed-width values. Limits have been chosen so that generated
* keys are almost always within bounds, thus not impacting neither
* security or performance.
*
* IMPORTANT: the code assumes that all coefficients of f, g, F and G
* ultimately fit in the -127..+127 range. Thus, none of the elements
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8.
*/

const uint8_t PQCLEAN_FALCON1024_AVX2_max_fg_bits[] = {
0, /* unused */
8,
8,
8,
8,
8,
7,
7,
6,
6,
5
};

const uint8_t PQCLEAN_FALCON1024_AVX2_max_FG_bits[] = {
0, /* unused */
8,
8,
8,
8,
8,
8,
8,
8,
8,
8
};

/*
* When generating a new key pair, we can always reject keys which
* feature an abnormally large coefficient. This can also be done for
* signatures, albeit with some care: in case the signature process is
* used in a derandomized setup (explicitly seeded with the message and
* private key), we have to follow the specification faithfully, and the
* specification only enforces a limit on the L2 norm of the signature
* vector. The limit on the L2 norm implies that the absolute value of
* a coefficient of the signature cannot be more than the following:
*
* log(n) n max sig coeff (theoretical)
* 1 2 412
* 2 4 583
* 3 8 824
* 4 16 1166
* 5 32 1649
* 6 64 2332
* 7 128 3299
* 8 256 4665
* 9 512 6598
* 10 1024 9331
*
* However, the largest observed signature coefficients during our
* experiments was 1077 (in absolute value), hence we can assume that,
* with overwhelming probability, signature coefficients will fit
* in -2047..2047, i.e. 12 bits.
*/

const uint8_t PQCLEAN_FALCON1024_AVX2_max_sig_bits[] = {
0, /* unused */
10,
11,
11,
12,
12,
12,
12,
12,
12,
12
};

+ 0
- 294
src/sign/falcon/falcon-1024/avx2/common.c View File

@@ -1,294 +0,0 @@
#include "inner.h"

/*
* Support functions for signatures (hash-to-point, norm).
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* see inner.h */
void
PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(
inner_shake256_context *sc,
uint16_t *x, unsigned logn) {
/*
* This is the straightforward per-the-spec implementation. It
* is not constant-time, thus it might reveal information on the
* plaintext (at least, enough to check the plaintext against a
* list of potential plaintexts) in a scenario where the
* attacker does not have access to the signature value or to
* the public key, but knows the nonce (without knowledge of the
* nonce, the hashed output cannot be matched against potential
* plaintexts).
*/
size_t n;

n = (size_t)1 << logn;
while (n > 0) {
uint8_t buf[2];
uint32_t w;

inner_shake256_extract(sc, (void *)buf, sizeof buf);
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
if (w < 61445) {
while (w >= 12289) {
w -= 12289;
}
*x ++ = (uint16_t)w;
n --;
}
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_AVX2_hash_to_point_ct(
inner_shake256_context *sc,
uint16_t *x, unsigned logn, uint8_t *tmp) {
/*
* Each 16-bit sample is a value in 0..65535. The value is
* kept if it falls in 0..61444 (because 61445 = 5*12289)
* and rejected otherwise; thus, each sample has probability
* about 0.93758 of being selected.
*
* We want to oversample enough to be sure that we will
* have enough values with probability at least 1 - 2^(-256).
* Depending on degree N, this leads to the following
* required oversampling:
*
* logn n oversampling
* 1 2 65
* 2 4 67
* 3 8 71
* 4 16 77
* 5 32 86
* 6 64 100
* 7 128 122
* 8 256 154
* 9 512 205
* 10 1024 287
*
* If logn >= 7, then the provided temporary buffer is large
* enough. Otherwise, we use a stack buffer of 63 entries
* (i.e. 126 bytes) for the values that do not fit in tmp[].
*/

static const uint16_t overtab[] = {
0, /* unused */
65,
67,
71,
77,
86,
100,
122,
154,
205,
287
};

unsigned n, n2, u, m, p, over;
uint16_t *tt1, tt2[63];

/*
* We first generate m 16-bit value. Values 0..n-1 go to x[].
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[].
* We also reduce modulo q the values; rejected values are set
* to 0xFFFF.
*/
n = 1U << logn;
n2 = n << 1;
over = overtab[logn];
m = n + over;
tt1 = (uint16_t *)tmp;
for (u = 0; u < m; u ++) {
uint8_t buf[2];
uint32_t w, wr;

inner_shake256_extract(sc, buf, sizeof buf);
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1));
wr |= ((w - 61445) >> 31) - 1;
if (u < n) {
x[u] = (uint16_t)wr;
} else if (u < n2) {
tt1[u - n] = (uint16_t)wr;
} else {
tt2[u - n2] = (uint16_t)wr;
}
}

/*
* Now we must "squeeze out" the invalid values. We do this in
* a logarithmic sequence of passes; each pass computes where a
* value should go, and moves it down by 'p' slots if necessary,
* where 'p' uses an increasing powers-of-two scale. It can be
* shown that in all cases where the loop decides that a value
* has to be moved down by p slots, the destination slot is
* "free" (i.e. contains an invalid value).
*/
for (p = 1; p <= over; p <<= 1) {
unsigned v;

/*
* In the loop below:
*
* - v contains the index of the final destination of
* the value; it is recomputed dynamically based on
* whether values are valid or not.
*
* - u is the index of the value we consider ("source");
* its address is s.
*
* - The loop may swap the value with the one at index
* u-p. The address of the swap destination is d.
*/
v = 0;
for (u = 0; u < m; u ++) {
uint16_t *s, *d;
unsigned j, sv, dv, mk;

if (u < n) {
s = &x[u];
} else if (u < n2) {
s = &tt1[u - n];
} else {
s = &tt2[u - n2];
}
sv = *s;

/*
* The value in sv should ultimately go to
* address v, i.e. jump back by u-v slots.
*/
j = u - v;

/*
* We increment v for the next iteration, but
* only if the source value is valid. The mask
* 'mk' is -1 if the value is valid, 0 otherwise,
* so we _subtract_ mk.
*/
mk = (sv >> 15) - 1U;
v -= mk;

/*
* In this loop we consider jumps by p slots; if
* u < p then there is nothing more to do.
*/
if (u < p) {
continue;
}

/*
* Destination for the swap: value at address u-p.
*/
if ((u - p) < n) {
d = &x[u - p];
} else if ((u - p) < n2) {
d = &tt1[(u - p) - n];
} else {
d = &tt2[(u - p) - n2];
}
dv = *d;

/*
* The swap should be performed only if the source
* is valid AND the jump j has its 'p' bit set.
*/
mk &= -(((j & p) + 0x1FF) >> 9);

*s = (uint16_t)(sv ^ (mk & (sv ^ dv)));
*d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
}
}
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_is_short(
const int16_t *s1, const int16_t *s2, unsigned logn) {
/*
* We use the l2-norm. Code below uses only 32-bit operations to
* compute the square of the norm with saturation to 2^32-1 if
* the value exceeds 2^31-1.
*/
size_t n, u;
uint32_t s, ng;

n = (size_t)1 << logn;
s = 0;
ng = 0;
for (u = 0; u < n; u ++) {
int32_t z;

z = s1[u];
s += (uint32_t)(z * z);
ng |= s;
z = s2[u];
s += (uint32_t)(z * z);
ng |= s;
}
s |= -(ng >> 31);

/*
* Acceptance bound on the l2-norm is:
* 1.2*1.55*sqrt(q)*sqrt(2*N)
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
*/
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_is_short_half(
uint32_t sqn, const int16_t *s2, unsigned logn) {
size_t n, u;
uint32_t ng;

n = (size_t)1 << logn;
ng = -(sqn >> 31);
for (u = 0; u < n; u ++) {
int32_t z;

z = s2[u];
sqn += (uint32_t)(z * z);
ng |= sqn;
}
sqn |= -(ng >> 31);

/*
* Acceptance bound on the l2-norm is:
* 1.2*1.55*sqrt(q)*sqrt(2*N)
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
*/
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
}

+ 0
- 1109
src/sign/falcon/falcon-1024/avx2/fft.c
File diff suppressed because it is too large
View File


+ 0
- 1078
src/sign/falcon/falcon-1024/avx2/fpr.c
File diff suppressed because it is too large
View File


+ 0
- 349
src/sign/falcon/falcon-1024/avx2/fpr.h View File

@@ -1,349 +0,0 @@
#ifndef PQCLEAN_FALCON1024_AVX2_FPR_H
#define PQCLEAN_FALCON1024_AVX2_FPR_H

/*
* Floating-point operations.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* ====================================================================== */

#include <immintrin.h>
#include <math.h>

#define FMADD(a, b, c) _mm256_add_pd(_mm256_mul_pd(a, b), c)
#define FMSUB(a, b, c) _mm256_sub_pd(_mm256_mul_pd(a, b), c)

/*
* We wrap the native 'double' type into a structure so that the C compiler
* complains if we inadvertently use raw arithmetic operators on the 'fpr'
* type instead of using the inline functions below. This should have no
* extra runtime cost, since all the functions below are 'inline'.
*/
typedef struct {
double v;
} fpr;

static inline fpr
FPR(double v) {
fpr x;

x.v = v;
return x;
}

static inline fpr
fpr_of(int64_t i) {
return FPR((double)i);
}

static const fpr fpr_q = { 12289.0 };
static const fpr fpr_inverse_of_q = { 1.0 / 12289.0 };
static const fpr fpr_inv_2sqrsigma0 = { .150865048875372721532312163019 };
static const fpr fpr_inv_sigma = { .005819826392951607426919370871 };
static const fpr fpr_sigma_min_9 = { 1.291500756233514568549480827642 };
static const fpr fpr_sigma_min_10 = { 1.311734375905083682667395805765 };
static const fpr fpr_log2 = { 0.69314718055994530941723212146 };
static const fpr fpr_inv_log2 = { 1.4426950408889634073599246810 };
static const fpr fpr_bnorm_max = { 16822.4121 };
static const fpr fpr_zero = { 0.0 };
static const fpr fpr_one = { 1.0 };
static const fpr fpr_two = { 2.0 };
static const fpr fpr_onehalf = { 0.5 };
static const fpr fpr_invsqrt2 = { 0.707106781186547524400844362105 };
static const fpr fpr_invsqrt8 = { 0.353553390593273762200422181052 };
static const fpr fpr_ptwo31 = { 2147483648.0 };
static const fpr fpr_ptwo31m1 = { 2147483647.0 };
static const fpr fpr_mtwo31m1 = { -2147483647.0 };
static const fpr fpr_ptwo63m1 = { 9223372036854775807.0 };
static const fpr fpr_mtwo63m1 = { -9223372036854775807.0 };
static const fpr fpr_ptwo63 = { 9223372036854775808.0 };

static inline int64_t
fpr_rint(fpr x) {
/*
* We do not want to use llrint() since it might be not
* constant-time.
*
* Suppose that x >= 0. If x >= 2^52, then it is already an
* integer. Otherwise, if x < 2^52, then computing x+2^52 will
* yield a value that will be rounded to the nearest integer
* with exactly the right rules (round-to-nearest-even).
*
* In order to have constant-time processing, we must do the
* computation for both x >= 0 and x < 0 cases, and use a
* cast to an integer to access the sign and select the proper
* value. Such casts also allow us to find out if |x| < 2^52.
*/
int64_t sx, tx, rp, rn, m;
uint32_t ub;

sx = (int64_t)(x.v - 1.0);
tx = (int64_t)x.v;
rp = (int64_t)(x.v + 4503599627370496.0) - 4503599627370496;
rn = (int64_t)(x.v - 4503599627370496.0) + 4503599627370496;

/*
* If tx >= 2^52 or tx < -2^52, then result is tx.
* Otherwise, if sx >= 0, then result is rp.
* Otherwise, result is rn. We use the fact that when x is
* close to 0 (|x| <= 0.25) then both rp and rn are correct;
* and if x is not close to 0, then trunc(x-1.0) yields the
* appropriate sign.
*/

/*
* Clamp rp to zero if tx < 0.
* Clamp rn to zero if tx >= 0.
*/
m = sx >> 63;
rn &= m;
rp &= ~m;

/*
* Get the 12 upper bits of tx; if they are not all zeros or
* all ones, then tx >= 2^52 or tx < -2^52, and we clamp both
* rp and rn to zero. Otherwise, we clamp tx to zero.
*/
ub = (uint32_t)((uint64_t)tx >> 52);
m = -(int64_t)((((ub + 1) & 0xFFF) - 2) >> 31);
rp &= m;
rn &= m;
tx &= ~m;

/*
* Only one of tx, rn or rp (at most) can be non-zero at this
* point.
*/
return tx | rn | rp;
}

static inline int64_t
fpr_floor(fpr x) {
int64_t r;

/*
* The cast performs a trunc() (rounding toward 0) and thus is
* wrong by 1 for most negative values. The correction below is
* constant-time as long as the compiler turns the
* floating-point conversion result into a 0/1 integer without a
* conditional branch or another non-constant-time construction.
* This should hold on all modern architectures with an FPU (and
* if it is false on a given arch, then chances are that the FPU
* itself is not constant-time, making the point moot).
*/
r = (int64_t)x.v;
return r - (x.v < (double)r);
}

static inline int64_t
fpr_trunc(fpr x) {
return (int64_t)x.v;
}

static inline fpr
fpr_add(fpr x, fpr y) {
return FPR(x.v + y.v);
}

static inline fpr
fpr_sub(fpr x, fpr y) {
return FPR(x.v - y.v);
}

static inline fpr
fpr_neg(fpr x) {
return FPR(-x.v);
}

static inline fpr
fpr_half(fpr x) {
return FPR(x.v * 0.5);
}

static inline fpr
fpr_double(fpr x) {
return FPR(x.v + x.v);
}

static inline fpr
fpr_mul(fpr x, fpr y) {
return FPR(x.v * y.v);
}

static inline fpr
fpr_sqr(fpr x) {
return FPR(x.v * x.v);
}

static inline fpr
fpr_inv(fpr x) {
return FPR(1.0 / x.v);
}

static inline fpr
fpr_div(fpr x, fpr y) {
return FPR(x.v / y.v);
}

static inline void
fpr_sqrt_avx2(double *t) {
__m128d x;

x = _mm_load1_pd(t);
x = _mm_sqrt_pd(x);
_mm_storel_pd(t, x);
}

static inline fpr
fpr_sqrt(fpr x) {
/*
* We prefer not to have a dependency on libm when it can be
* avoided. On x86, calling the sqrt() libm function inlines
* the relevant opcode (fsqrt or sqrtsd, depending on whether
* the 387 FPU or SSE2 is used for floating-point operations)
* but then makes an optional call to the library function
* for proper error handling, in case the operand is negative.
*
* To avoid this dependency, we use intrinsics or inline assembly
* on recognized platforms:
*
* - If AVX2 is explicitly enabled, then we use SSE2 intrinsics.
*
* - On GCC/Clang with SSE maths, we use SSE2 intrinsics.
*
* - On GCC/Clang on i386, or MSVC on i386, we use inline assembly
* to call the 387 FPU fsqrt opcode.
*
* - On GCC/Clang/XLC on PowerPC, we use inline assembly to call
* the fsqrt opcode (Clang needs a special hack).
*
* - On GCC/Clang on ARM with hardware floating-point, we use
* inline assembly to call the vqsrt.f64 opcode. Due to a
* complex ecosystem of compilers and assembly syntaxes, we
* have to call it "fsqrt" or "fsqrtd", depending on case.
*
* If the platform is not recognized, a call to the system
* library function sqrt() is performed. On some compilers, this
* may actually inline the relevant opcode, and call the library
* function only when the input is invalid (e.g. negative);
* Falcon never actually calls sqrt() on a negative value, but
* the dependency to libm will still be there.
*/

fpr_sqrt_avx2(&x.v);
return x;
}

static inline int
fpr_lt(fpr x, fpr y) {
return x.v < y.v;
}

static inline uint64_t
fpr_expm_p63(fpr x, fpr ccs) {
/*
* Polynomial approximation of exp(-x) is taken from FACCT:
* https://eprint.iacr.org/2018/1234
* Specifically, values are extracted from the implementation
* referenced from the FACCT article, and available at:
* https://github.com/raykzhao/gaussian
* Tests over more than 24 billions of random inputs in the
* 0..log(2) range have never shown a deviation larger than
* 2^(-50) from the true mathematical value.
*/


/*
* AVX2 implementation uses more operations than Horner's method,
* but with a lower expression tree depth. This helps because
* additions and multiplications have a latency of 4 cycles on
* a Skylake, but the CPU can issue two of them per cycle.
*/

static const union {
double d[12];
__m256d v[3];
} c = {
{
0.999999999999994892974086724280,
0.500000000000019206858326015208,
0.166666666666984014666397229121,
0.041666666666110491190622155955,
0.008333333327800835146903501993,
0.001388888894063186997887560103,
0.000198412739277311890541063977,
0.000024801566833585381209939524,
0.000002755586350219122514855659,
0.000000275607356160477811864927,
0.000000025299506379442070029551,
0.000000002073772366009083061987
}
};

double d1, d2, d4, d8, y;
__m256d d14, d58, d9c;

d1 = -x.v;
d2 = d1 * d1;
d4 = d2 * d2;
d8 = d4 * d4;
d14 = _mm256_set_pd(d4, d2 * d1, d2, d1);
d58 = _mm256_mul_pd(d14, _mm256_set1_pd(d4));
d9c = _mm256_mul_pd(d14, _mm256_set1_pd(d8));
d14 = _mm256_mul_pd(d14, _mm256_loadu_pd(&c.d[0]));
d58 = FMADD(d58, _mm256_loadu_pd(&c.d[4]), d14);
d9c = FMADD(d9c, _mm256_loadu_pd(&c.d[8]), d58);
d9c = _mm256_hadd_pd(d9c, d9c);
y = 1.0 + _mm_cvtsd_f64(_mm256_castpd256_pd128(d9c)) // _mm256_cvtsd_f64(d9c)
+ _mm_cvtsd_f64(_mm256_extractf128_pd(d9c, 1));
y *= ccs.v;

/*
* Final conversion goes through int64_t first, because that's what
* the underlying opcode (vcvttsd2si) will do, and we know that the
* result will fit, since x >= 0 and ccs < 1. If we did the
* conversion directly to uint64_t, then the compiler would add some
* extra code to cover the case of a source value of 2^63 or more,
* and though the alternate path would never be exercised, the
* extra comparison would cost us some cycles.
*/
return (uint64_t)(int64_t)(y * fpr_ptwo63.v);

}

#define fpr_gm_tab PQCLEAN_FALCON1024_AVX2_fpr_gm_tab
extern const fpr fpr_gm_tab[];

#define fpr_p2_tab PQCLEAN_FALCON1024_AVX2_fpr_p2_tab
extern const fpr fpr_p2_tab[];

/* ====================================================================== */
#endif

+ 0
- 826
src/sign/falcon/falcon-1024/avx2/inner.h View File

@@ -1,826 +0,0 @@
#ifndef PQCLEAN_FALCON1024_AVX2_INNER_H
#define PQCLEAN_FALCON1024_AVX2_INNER_H


/*
* Internal functions for Falcon. This is not the API intended to be
* used by applications; instead, this internal API provides all the
* primitives on which wrappers build to provide external APIs.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/

/*
* IMPORTANT API RULES
* -------------------
*
* This API has some non-trivial usage rules:
*
*
* - All public functions (i.e. the non-static ones) must be referenced
* with the PQCLEAN_FALCON1024_AVX2_ macro (e.g. PQCLEAN_FALCON1024_AVX2_verify_raw for the verify_raw()
* function). That macro adds a prefix to the name, which is
* configurable with the FALCON_PREFIX macro. This allows compiling
* the code into a specific "namespace" and potentially including
* several versions of this code into a single application (e.g. to
* have an AVX2 and a non-AVX2 variants and select the one to use at
* runtime based on availability of AVX2 opcodes).
*
* - Functions that need temporary buffers expects them as a final
* tmp[] array of type uint8_t*, with a size which is documented for
* each function. However, most have some alignment requirements,
* because they will use the array to store 16-bit, 32-bit or 64-bit
* values (e.g. uint64_t or double). The caller must ensure proper
* alignment. What happens on unaligned access depends on the
* underlying architecture, ranging from a slight time penalty
* to immediate termination of the process.
*
* - Some functions rely on specific rounding rules and precision for
* floating-point numbers. On some systems (in particular 32-bit x86
* with the 387 FPU), this requires setting an hardware control
* word. The caller MUST use set_fpu_cw() to ensure proper precision:
*
* oldcw = set_fpu_cw(2);
* PQCLEAN_FALCON1024_AVX2_sign_dyn(...);
* set_fpu_cw(oldcw);
*
* On systems where the native floating-point precision is already
* proper, or integer-based emulation is used, the set_fpu_cw()
* function does nothing, so it can be called systematically.
*/
#include "fips202.h"
#include "fpr.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

/*
* Some computations with floating-point elements, in particular
* rounding to the nearest integer, rely on operations using _exactly_
* the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit
* x86, the 387 FPU may be used (depending on the target OS) and, in
* that case, may use more precision bits (i.e. 64 bits, for an 80-bit
* total type length); to prevent miscomputations, we define an explicit
* function that modifies the precision in the FPU control word.
*
* set_fpu_cw() sets the precision to the provided value, and returns
* the previously set precision; callers are supposed to restore the
* previous precision on exit. The correct (52-bit) precision is
* configured with the value "2". On unsupported compilers, or on
* targets other than 32-bit x86, or when the native 'double' type is
* not used, the set_fpu_cw() function does nothing at all.
*/
static inline unsigned
set_fpu_cw(unsigned x) {
return x;
}




/* ==================================================================== */
/*
* SHAKE256 implementation (shake.c).
*
* API is defined to be easily replaced with the fips202.h API defined
* as part of PQClean.
*/



#define inner_shake256_context shake256incctx
#define inner_shake256_init(sc) shake256_inc_init(sc)
#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len)
#define inner_shake256_flip(sc) shake256_inc_finalize(sc)
#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc)
#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc)


/* ==================================================================== */
/*
* Encoding/decoding functions (codec.c).
*
* Encoding functions take as parameters an output buffer (out) with
* a given maximum length (max_out_len); returned value is the actual
* number of bytes which have been written. If the output buffer is
* not large enough, then 0 is returned (some bytes may have been
* written to the buffer). If 'out' is NULL, then 'max_out_len' is
* ignored; instead, the function computes and returns the actual
* required output length (in bytes).
*
* Decoding functions take as parameters an input buffer (in) with
* its maximum length (max_in_len); returned value is the actual number
* of bytes that have been read from the buffer. If the provided length
* is too short, then 0 is returned.
*
* Values to encode or decode are vectors of integers, with N = 2^logn
* elements.
*
* Three encoding formats are defined:
*
* - modq: sequence of values modulo 12289, each encoded over exactly
* 14 bits. The encoder and decoder verify that integers are within
* the valid range (0..12288). Values are arrays of uint16.
*
* - trim: sequence of signed integers, a specified number of bits
* each. The number of bits is provided as parameter and includes
* the sign bit. Each integer x must be such that |x| < 2^(bits-1)
* (which means that the -2^(bits-1) value is forbidden); encode and
* decode functions check that property. Values are arrays of
* int16_t or int8_t, corresponding to names 'trim_i16' and
* 'trim_i8', respectively.
*
* - comp: variable-length encoding for signed integers; each integer
* uses a minimum of 9 bits, possibly more. This is normally used
* only for signatures.
*
*/

size_t PQCLEAN_FALCON1024_AVX2_modq_encode(void *out, size_t max_out_len,
const uint16_t *x, unsigned logn);
size_t PQCLEAN_FALCON1024_AVX2_trim_i16_encode(void *out, size_t max_out_len,
const int16_t *x, unsigned logn, unsigned bits);
size_t PQCLEAN_FALCON1024_AVX2_trim_i8_encode(void *out, size_t max_out_len,
const int8_t *x, unsigned logn, unsigned bits);
size_t PQCLEAN_FALCON1024_AVX2_comp_encode(void *out, size_t max_out_len,
const int16_t *x, unsigned logn);

size_t PQCLEAN_FALCON1024_AVX2_modq_decode(uint16_t *x, unsigned logn,
const void *in, size_t max_in_len);
size_t PQCLEAN_FALCON1024_AVX2_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len);
size_t PQCLEAN_FALCON1024_AVX2_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len);
size_t PQCLEAN_FALCON1024_AVX2_comp_decode(int16_t *x, unsigned logn,
const void *in, size_t max_in_len);

/*
* Number of bits for key elements, indexed by logn (1 to 10). This
* is at most 8 bits for all degrees, but some degrees may have shorter
* elements.
*/
extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_fg_bits[];
extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_FG_bits[];

/*
* Maximum size, in bits, of elements in a signature, indexed by logn
* (1 to 10). The size includes the sign bit.
*/
extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_sig_bits[];

/* ==================================================================== */
/*
* Support functions used for both signature generation and signature
* verification (common.c).
*/

/*
* From a SHAKE256 context (must be already flipped), produce a new
* point. This is the non-constant-time version, which may leak enough
* information to serve as a stop condition on a brute force attack on
* the hashed message (provided that the nonce value is known).
*/
void PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(inner_shake256_context *sc,
uint16_t *x, unsigned logn);

/*
* From a SHAKE256 context (must be already flipped), produce a new
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes.
* This function is constant-time but is typically more expensive than
* PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime().
*
* tmp[] must have 16-bit alignment.
*/
void PQCLEAN_FALCON1024_AVX2_hash_to_point_ct(inner_shake256_context *sc,
uint16_t *x, unsigned logn, uint8_t *tmp);

/*
* Tell whether a given vector (2N coordinates, in two halves) is
* acceptable as a signature. This compares the appropriate norm of the
* vector with the acceptance bound. Returned value is 1 on success
* (vector is short enough to be acceptable), 0 otherwise.
*/
int PQCLEAN_FALCON1024_AVX2_is_short(const int16_t *s1, const int16_t *s2, unsigned logn);

/*
* Tell whether a given vector (2N coordinates, in two halves) is
* acceptable as a signature. Instead of the first half s1, this
* function receives the "saturated squared norm" of s1, i.e. the
* sum of the squares of the coordinates of s1 (saturated at 2^32-1
* if the sum exceeds 2^31-1).
*
* Returned value is 1 on success (vector is short enough to be
* acceptable), 0 otherwise.
*/
int PQCLEAN_FALCON1024_AVX2_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn);

/* ==================================================================== */
/*
* Signature verification functions (vrfy.c).
*/

/*
* Convert a public key to NTT + Montgomery format. Conversion is done
* in place.
*/
void PQCLEAN_FALCON1024_AVX2_to_ntt_monty(uint16_t *h, unsigned logn);

/*
* Internal signature verification code:
* c0[] contains the hashed nonce+message
* s2[] is the decoded signature
* h[] contains the public key, in NTT + Montgomery format
* logn is the degree log
* tmp[] temporary, must have at least 2*2^logn bytes
* Returned value is 1 on success, 0 on error.
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2,
const uint16_t *h, unsigned logn, uint8_t *tmp);

/*
* Compute the public key h[], given the private key elements f[] and
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial
* modulus. This function returns 1 on success, 0 on error (an error is
* reported if f is not invertible mod phi mod q).
*
* The tmp[] array must have room for at least 2*2^logn elements.
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_AVX2_compute_public(uint16_t *h,
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp);

/*
* Recompute the fourth private key element. Private key consists in
* four polynomials with small coefficients f, g, F and G, which are
* such that fG - gF = q mod phi; furthermore, f is invertible modulo
* phi and modulo q. This function recomputes G from f, g and F.
*
* The tmp[] array must have room for at least 4*2^logn bytes.
*
* Returned value is 1 in success, 0 on error (f not invertible).
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_AVX2_complete_private(int8_t *G,
const int8_t *f, const int8_t *g, const int8_t *F,
unsigned logn, uint8_t *tmp);

/*
* Test whether a given polynomial is invertible modulo phi and q.
* Polynomial coefficients are small integers.
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_AVX2_is_invertible(
const int16_t *s2, unsigned logn, uint8_t *tmp);

/*
* Count the number of elements of value zero in the NTT representation
* of the given polynomial: this is the number of primitive 2n-th roots
* of unity (modulo q = 12289) that are roots of the provided polynomial
* (taken modulo q).
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp);

/*
* Internal signature verification with public key recovery:
* h[] receives the public key (NOT in NTT/Montgomery format)
* c0[] contains the hashed nonce+message
* s1[] is the first signature half
* s2[] is the second signature half
* logn is the degree log
* tmp[] temporary, must have at least 2*2^logn bytes
* Returned value is 1 on success, 0 on error. Success is returned if
* the signature is a short enough vector; in that case, the public
* key has been written to h[]. However, the caller must still
* verify that h[] is the correct value (e.g. with regards to a known
* hash of the public key).
*
* h[] may not overlap with any of the other arrays.
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_AVX2_verify_recover(uint16_t *h,
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
unsigned logn, uint8_t *tmp);

/* ==================================================================== */
/*
* Implementation of floating-point real numbers (fpr.h, fpr.c).
*/

/*
* Real numbers are implemented by an extra header file, included below.
* This is meant to support pluggable implementations. The default
* implementation relies on the C type 'double'.
*
* The included file must define the following types, functions and
* constants:
*
* fpr
* type for a real number
*
* fpr fpr_of(int64_t i)
* cast an integer into a real number; source must be in the
* -(2^63-1)..+(2^63-1) range
*
* fpr fpr_scaled(int64_t i, int sc)
* compute i*2^sc as a real number; source 'i' must be in the
* -(2^63-1)..+(2^63-1) range
*
* fpr fpr_ldexp(fpr x, int e)
* compute x*2^e
*
* int64_t fpr_rint(fpr x)
* round x to the nearest integer; x must be in the -(2^63-1)
* to +(2^63-1) range
*
* int64_t fpr_trunc(fpr x)
* round to an integer; this rounds towards zero; value must
* be in the -(2^63-1) to +(2^63-1) range
*
* fpr fpr_add(fpr x, fpr y)
* compute x + y
*
* fpr fpr_sub(fpr x, fpr y)
* compute x - y
*
* fpr fpr_neg(fpr x)
* compute -x
*
* fpr fpr_half(fpr x)
* compute x/2
*
* fpr fpr_double(fpr x)
* compute x*2
*
* fpr fpr_mul(fpr x, fpr y)
* compute x * y
*
* fpr fpr_sqr(fpr x)
* compute x * x
*
* fpr fpr_inv(fpr x)
* compute 1/x
*
* fpr fpr_div(fpr x, fpr y)
* compute x/y
*
* fpr fpr_sqrt(fpr x)
* compute the square root of x
*
* int fpr_lt(fpr x, fpr y)
* return 1 if x < y, 0 otherwise
*
* uint64_t fpr_expm_p63(fpr x)
* return exp(x), assuming that 0 <= x < log(2). Returned value
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x),
* rounded to the nearest integer). Computation should have a
* precision of at least 45 bits.
*
* const fpr fpr_gm_tab[]
* array of constants for FFT / iFFT
*
* const fpr fpr_p2_tab[]
* precomputed powers of 2 (by index, 0 to 10)
*
* Constants of type 'fpr':
*
* fpr fpr_q 12289
* fpr fpr_inverse_of_q 1/12289
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2))
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289))
* fpr fpr_sigma_min_9 1.291500756233514568549480827642
* fpr fpr_sigma_min_10 1.311734375905083682667395805765
* fpr fpr_log2 log(2)
* fpr fpr_inv_log2 1/log(2)
* fpr fpr_bnorm_max 16822.4121
* fpr fpr_zero 0
* fpr fpr_one 1
* fpr fpr_two 2
* fpr fpr_onehalf 0.5
* fpr fpr_ptwo31 2^31
* fpr fpr_ptwo31m1 2^31-1
* fpr fpr_mtwo31m1 -(2^31-1)
* fpr fpr_ptwo63m1 2^63-1
* fpr fpr_mtwo63m1 -(2^63-1)
* fpr fpr_ptwo63 2^63
*/

/* ==================================================================== */
/*
* RNG (rng.c).
*
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256
* context (flipped) and is used for bulk pseudorandom generation.
* A system-dependent seed generator is also provided.
*/

/*
* Obtain a random seed from the system RNG.
*
* Returned value is 1 on success, 0 on error.
*/
int PQCLEAN_FALCON1024_AVX2_get_seed(void *seed, size_t seed_len);

/*
* Structure for a PRNG. This includes a large buffer so that values
* get generated in advance. The 'state' is used to keep the current
* PRNG algorithm state (contents depend on the selected algorithm).
*
* The unions with 'dummy_u64' are there to ensure proper alignment for
* 64-bit direct access.
*/
typedef struct {
union {
uint8_t d[512]; /* MUST be 512, exactly */
uint64_t dummy_u64;
} buf;
size_t ptr;
union {
uint8_t d[256];
uint64_t dummy_u64;
} state;
int type;
} prng;

/*
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256
* context (in "flipped" state) to obtain its initial state.
*/
void PQCLEAN_FALCON1024_AVX2_prng_init(prng *p, inner_shake256_context *src);

/*
* Refill the PRNG buffer. This is normally invoked automatically, and
* is declared here only so that prng_get_u64() may be inlined.
*/
void PQCLEAN_FALCON1024_AVX2_prng_refill(prng *p);

/*
* Get some bytes from a PRNG.
*/
void PQCLEAN_FALCON1024_AVX2_prng_get_bytes(prng *p, void *dst, size_t len);

/*
* Get a 64-bit random value from a PRNG.
*/
static inline uint64_t
prng_get_u64(prng *p) {
size_t u;

/*
* If there are less than 9 bytes in the buffer, we refill it.
* This means that we may drop the last few bytes, but this allows
* for faster extraction code. Also, it means that we never leave
* an empty buffer.
*/
u = p->ptr;
if (u >= (sizeof p->buf.d) - 9) {
PQCLEAN_FALCON1024_AVX2_prng_refill(p);
u = 0;
}
p->ptr = u + 8;

return (uint64_t)p->buf.d[u + 0]
| ((uint64_t)p->buf.d[u + 1] << 8)
| ((uint64_t)p->buf.d[u + 2] << 16)
| ((uint64_t)p->buf.d[u + 3] << 24)
| ((uint64_t)p->buf.d[u + 4] << 32)
| ((uint64_t)p->buf.d[u + 5] << 40)
| ((uint64_t)p->buf.d[u + 6] << 48)
| ((uint64_t)p->buf.d[u + 7] << 56);
}

/*
* Get an 8-bit random value from a PRNG.
*/
static inline unsigned
prng_get_u8(prng *p) {
unsigned v;

v = p->buf.d[p->ptr ++];
if (p->ptr == sizeof p->buf.d) {
PQCLEAN_FALCON1024_AVX2_prng_refill(p);
}
return v;
}

/* ==================================================================== */
/*
* FFT (falcon-fft.c).
*
* A real polynomial is represented as an array of N 'fpr' elements.
* The FFT representation of a real polynomial contains N/2 complex
* elements; each is stored as two real numbers, for the real and
* imaginary parts, respectively. See falcon-fft.c for details on the
* internal representation.
*/

/*
* Compute FFT in-place: the source array should contain a real
* polynomial (N coefficients); its storage area is reused to store
* the FFT representation of that polynomial (N/2 complex numbers).
*
* 'logn' MUST lie between 1 and 10 (inclusive).
*/
void PQCLEAN_FALCON1024_AVX2_FFT(fpr *f, unsigned logn);

/*
* Compute the inverse FFT in-place: the source array should contain the
* FFT representation of a real polynomial (N/2 elements); the resulting
* real polynomial (N coefficients of type 'fpr') is written over the
* array.
*
* 'logn' MUST lie between 1 and 10 (inclusive).
*/
void PQCLEAN_FALCON1024_AVX2_iFFT(fpr *f, unsigned logn);

/*
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This
* function works in both normal and FFT representations.
*/
void PQCLEAN_FALCON1024_AVX2_poly_add(fpr *a, const fpr *b, unsigned logn);

/*
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This
* function works in both normal and FFT representations.
*/
void PQCLEAN_FALCON1024_AVX2_poly_sub(fpr *a, const fpr *b, unsigned logn);

/*
* Negate polynomial a. This function works in both normal and FFT
* representations.
*/
void PQCLEAN_FALCON1024_AVX2_poly_neg(fpr *a, unsigned logn);

/*
* Compute adjoint of polynomial a. This function works only in FFT
* representation.
*/
void PQCLEAN_FALCON1024_AVX2_poly_adj_fft(fpr *a, unsigned logn);

/*
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap.
* This function works only in FFT representation.
*/
void PQCLEAN_FALCON1024_AVX2_poly_mul_fft(fpr *a, const fpr *b, unsigned logn);

/*
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT
* overlap. This function works only in FFT representation.
*/
void PQCLEAN_FALCON1024_AVX2_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn);

/*
* Multiply polynomial with its own adjoint. This function works only in FFT
* representation.
*/
void PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(fpr *a, unsigned logn);

/*
* Multiply polynomial with a real constant. This function works in both
* normal and FFT representations.
*/
void PQCLEAN_FALCON1024_AVX2_poly_mulconst(fpr *a, fpr x, unsigned logn);

/*
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation).
* a and b MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_AVX2_poly_div_fft(fpr *a, const fpr *b, unsigned logn);

/*
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g))
* (also in FFT representation). Since the result is auto-adjoint, all its
* coordinates in FFT representation are real; as such, only the first N/2
* values of d[] are filled (the imaginary parts are skipped).
*
* Array d MUST NOT overlap with either a or b.
*/
void PQCLEAN_FALCON1024_AVX2_poly_invnorm2_fft(fpr *d,
const fpr *a, const fpr *b, unsigned logn);

/*
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g)
* (also in FFT representation). Destination d MUST NOT overlap with
* any of the source arrays.
*/
void PQCLEAN_FALCON1024_AVX2_poly_add_muladj_fft(fpr *d,
const fpr *F, const fpr *G,
const fpr *f, const fpr *g, unsigned logn);

/*
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both
* a and b are in FFT representation. Since b is autoadjoint, all its
* FFT coefficients are real, and the array b contains only N/2 elements.
* a and b MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft(fpr *a,
const fpr *b, unsigned logn);

/*
* Divide polynomial a by polynomial b, where b is autoadjoint. Both
* a and b are in FFT representation. Since b is autoadjoint, all its
* FFT coefficients are real, and the array b contains only N/2 elements.
* a and b MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_AVX2_poly_div_autoadj_fft(fpr *a,
const fpr *b, unsigned logn);

/*
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT
* representation. On input, g00, g01 and g11 are provided (where the
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10
* and d11 values are written in g00, g01 and g11, respectively
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]).
* (In fact, d00 = g00, so the g00 operand is left unmodified.)
*/
void PQCLEAN_FALCON1024_AVX2_poly_LDL_fft(const fpr *g00,
fpr *g01, fpr *g11, unsigned logn);

/*
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT
* representation. This is identical to poly_LDL_fft() except that
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written
* in two other separate buffers provided as extra parameters.
*/
void PQCLEAN_FALCON1024_AVX2_poly_LDLmv_fft(fpr *d11, fpr *l10,
const fpr *g00, const fpr *g01,
const fpr *g11, unsigned logn);

/*
* Apply "split" operation on a polynomial in FFT representation:
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_AVX2_poly_split_fft(fpr *f0, fpr *f1,
const fpr *f, unsigned logn);

/*
* Apply "merge" operation on two polynomials in FFT representation:
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1.
* f MUST NOT overlap with either f0 or f1.
*/
void PQCLEAN_FALCON1024_AVX2_poly_merge_fft(fpr *f,
const fpr *f0, const fpr *f1, unsigned logn);

/* ==================================================================== */
/*
* Key pair generation.
*/

/*
* Required sizes of the temporary buffer (in bytes).
*
* This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1
* or 2) where it is slightly greater.
*/
#define FALCON_KEYGEN_TEMP_1 136
#define FALCON_KEYGEN_TEMP_2 272
#define FALCON_KEYGEN_TEMP_3 224
#define FALCON_KEYGEN_TEMP_4 448
#define FALCON_KEYGEN_TEMP_5 896
#define FALCON_KEYGEN_TEMP_6 1792
#define FALCON_KEYGEN_TEMP_7 3584
#define FALCON_KEYGEN_TEMP_8 7168
#define FALCON_KEYGEN_TEMP_9 14336
#define FALCON_KEYGEN_TEMP_10 28672

/*
* Generate a new key pair. Randomness is extracted from the provided
* SHAKE256 context, which must have already been seeded and flipped.
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_*
* macros) and be aligned for the uint32_t, uint64_t and fpr types.
*
* The private key elements are written in f, g, F and G, and the
* public key is written in h. Either or both of G and h may be NULL,
* in which case the corresponding element is not returned (they can
* be recomputed from f, g and F).
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_AVX2_keygen(inner_shake256_context *rng,
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h,
unsigned logn, uint8_t *tmp);

/* ==================================================================== */
/*
* Signature generation.
*/

/*
* Expand a private key into the B0 matrix in FFT representation and
* the LDL tree. All the values are written in 'expanded_key', for
* a total of (8*logn+40)*2^logn bytes.
*
* The tmp[] array must have room for at least 48*2^logn bytes.
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_AVX2_expand_privkey(fpr *expanded_key,
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G,
unsigned logn, uint8_t *tmp);

/*
* Compute a signature over the provided hashed message (hm); the
* signature value is one short vector. This function uses an
* expanded key (as generated by PQCLEAN_FALCON1024_AVX2_expand_privkey()).
*
* The sig[] and hm[] buffers may overlap.
*
* On successful output, the start of the tmp[] buffer contains the s1
* vector (as int16_t elements).
*
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes.
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_AVX2_sign_tree(int16_t *sig, inner_shake256_context *rng,
const fpr *expanded_key,
const uint16_t *hm, unsigned logn, uint8_t *tmp);

/*
* Compute a signature over the provided hashed message (hm); the
* signature value is one short vector. This function uses a raw
* key and dynamically recompute the B0 matrix and LDL tree; this
* saves RAM since there is no needed for an expanded key, but
* increases the signature cost.
*
* The sig[] and hm[] buffers may overlap.
*
* On successful output, the start of the tmp[] buffer contains the s1
* vector (as int16_t elements).
*
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes.
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_AVX2_sign_dyn(int16_t *sig, inner_shake256_context *rng,
const int8_t *f, const int8_t *g,
const int8_t *F, const int8_t *G,
const uint16_t *hm, unsigned logn, uint8_t *tmp);

/*
* Internal sampler engine. Exported for tests.
*
* sampler_context wraps around a source of random numbers (PRNG) and
* the sigma_min value (nominally dependent on the degree).
*
* sampler() takes as parameters:
* ctx pointer to the sampler_context structure
* mu center for the distribution
* isigma inverse of the distribution standard deviation
* It returns an integer sampled along the Gaussian distribution centered
* on mu and of standard deviation sigma = 1/isigma.
*
* gaussian0_sampler() takes as parameter a pointer to a PRNG, and
* returns an integer sampled along a half-Gaussian with standard
* deviation sigma0 = 1.8205 (center is 0, returned value is
* nonnegative).
*/

typedef struct {
prng p;
fpr sigma_min;
} sampler_context;

int PQCLEAN_FALCON1024_AVX2_sampler(void *ctx, fpr mu, fpr isigma);

int PQCLEAN_FALCON1024_AVX2_gaussian0_sampler(prng *p);

/* ==================================================================== */

#endif

+ 0
- 4231
src/sign/falcon/falcon-1024/avx2/keygen.c
File diff suppressed because it is too large
View File


+ 0
- 386
src/sign/falcon/falcon-1024/avx2/pqclean.c View File

@@ -1,386 +0,0 @@
#include "api.h"
#include "inner.h"
#include "randombytes.h"
#include <stddef.h>
#include <string.h>
/*
* Wrapper for implementing the PQClean API.
*/



#define NONCELEN 40
#define SEEDLEN 48

/*
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024)
*
* private key:
* header byte: 0101nnnn
* private f (6 or 5 bits by element, depending on degree)
* private g (6 or 5 bits by element, depending on degree)
* private F (8 bits by element)
*
* public key:
* header byte: 0000nnnn
* public h (14 bits by element)
*
* signature:
* header byte: 0011nnnn
* nonce 40 bytes
* value (12 bits by element)
*
* message + signature:
* signature length (2 bytes, big-endian)
* nonce 40 bytes
* message
* header byte: 0010nnnn
* value (12 bits by element)
* (signature length is 1+len(value), not counting the nonce)
*/

/* see api.h */
int
PQCLEAN_FALCON1024_AVX2_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) {
union {
uint8_t b[28 * 1024];
uint64_t dummy_u64;
fpr dummy_fpr;
} tmp;
int8_t f[1024], g[1024], F[1024], G[1024];
uint16_t h[1024];
unsigned char seed[SEEDLEN];
inner_shake256_context rng;
size_t u, v;


/*
* Generate key pair.
*/
randombytes(seed, sizeof seed);
inner_shake256_init(&rng);
inner_shake256_inject(&rng, seed, sizeof seed);
inner_shake256_flip(&rng);
PQCLEAN_FALCON1024_AVX2_keygen(&rng, f, g, F, G, h, 10, tmp.b);
inner_shake256_ctx_release(&rng);

/*
* Encode private key.
*/
sk[0] = 0x50 + 10;
u = 1;
v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode(
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u,
f, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10]);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode(
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u,
g, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10]);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode(
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u,
F, 10, PQCLEAN_FALCON1024_AVX2_max_FG_bits[10]);
if (v == 0) {
return -1;
}
u += v;
if (u != PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES) {
return -1;
}

/*
* Encode public key.
*/
pk[0] = 0x00 + 10;
v = PQCLEAN_FALCON1024_AVX2_modq_encode(
pk + 1, PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1,
h, 10);
if (v != PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) {
return -1;
}

return 0;
}

/*
* Compute the signature. nonce[] receives the nonce and must have length
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce
* or header byte), with *sigbuflen providing the maximum value length and
* receiving the actual value length.
*
* If a signature could be computed but not encoded because it would
* exceed the output buffer size, then a new signature is computed. If
* the provided buffer size is too low, this could loop indefinitely, so
* the caller must provide a size that can accommodate signatures with a
* large enough probability.
*
* Return value: 0 on success, -1 on error.
*/
static int
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
union {
uint8_t b[72 * 1024];
uint64_t dummy_u64;
fpr dummy_fpr;
} tmp;
int8_t f[1024], g[1024], F[1024], G[1024];
union {
int16_t sig[1024];
uint16_t hm[1024];
} r;
unsigned char seed[SEEDLEN];
inner_shake256_context sc;
size_t u, v;

/*
* Decode the private key.
*/
if (sk[0] != 0x50 + 10) {
return -1;
}
u = 1;
v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode(
f, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10],
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode(
g, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10],
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode(
F, 10, PQCLEAN_FALCON1024_AVX2_max_FG_bits[10],
sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u);
if (v == 0) {
return -1;
}
u += v;
if (u != PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES) {
return -1;
}
if (!PQCLEAN_FALCON1024_AVX2_complete_private(G, f, g, F, 10, tmp.b)) {
return -1;
}


/*
* Create a random nonce (40 bytes).
*/
randombytes(nonce, NONCELEN);

/*
* Hash message nonce + message into a vector.
*/
inner_shake256_init(&sc);
inner_shake256_inject(&sc, nonce, NONCELEN);
inner_shake256_inject(&sc, m, mlen);
inner_shake256_flip(&sc);
PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(&sc, r.hm, 10);
inner_shake256_ctx_release(&sc);

/*
* Initialize a RNG.
*/
randombytes(seed, sizeof seed);
inner_shake256_init(&sc);
inner_shake256_inject(&sc, seed, sizeof seed);
inner_shake256_flip(&sc);

/*
* Compute and return the signature. This loops until a signature
* value is found that fits in the provided buffer.
*/
for (;;) {
PQCLEAN_FALCON1024_AVX2_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b);
v = PQCLEAN_FALCON1024_AVX2_comp_encode(sigbuf, *sigbuflen, r.sig, 10);
if (v != 0) {
inner_shake256_ctx_release(&sc);
*sigbuflen = v;
return 0;
}
}
}

/*
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[]
* (of size sigbuflen) contains the signature value, not including the
* header byte or nonce. Return value is 0 on success, -1 on error.
*/
static int
do_verify(
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen,
const uint8_t *m, size_t mlen, const uint8_t *pk) {
union {
uint8_t b[2 * 1024];
uint64_t dummy_u64;
fpr dummy_fpr;
} tmp;
uint16_t h[1024], hm[1024];
int16_t sig[1024];
inner_shake256_context sc;

/*
* Decode public key.
*/
if (pk[0] != 0x00 + 10) {
return -1;
}
if (PQCLEAN_FALCON1024_AVX2_modq_decode(h, 10,
pk + 1, PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1)
!= PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) {
return -1;
}
PQCLEAN_FALCON1024_AVX2_to_ntt_monty(h, 10);

/*
* Decode signature.
*/
if (sigbuflen == 0) {
return -1;
}
if (PQCLEAN_FALCON1024_AVX2_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) {
return -1;
}

/*
* Hash nonce + message into a vector.
*/
inner_shake256_init(&sc);
inner_shake256_inject(&sc, nonce, NONCELEN);
inner_shake256_inject(&sc, m, mlen);
inner_shake256_flip(&sc);
PQCLEAN_FALCON1024_AVX2_hash_to_point_ct(&sc, hm, 10, tmp.b);
inner_shake256_ctx_release(&sc);

/*
* Verify signature.
*/
if (!PQCLEAN_FALCON1024_AVX2_verify_raw(hm, sig, h, 10, tmp.b)) {
return -1;
}
return 0;
}

/* see api.h */
int
PQCLEAN_FALCON1024_AVX2_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
/*
* The PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES constant is used for
* the signed message object (as produced by PQCLEAN_FALCON1024_AVX2_crypto_sign())
* and includes a two-byte length value, so we take care here
* to only generate signatures that are two bytes shorter than
* the maximum. This is done to ensure that PQCLEAN_FALCON1024_AVX2_crypto_sign()
* and PQCLEAN_FALCON1024_AVX2_crypto_sign_signature() produce the exact same signature
* value, if used on the same message, with the same private key,
* and using the same output from randombytes() (this is for
* reproducibility of tests).
*/
size_t vlen;

vlen = PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES - NONCELEN - 3;
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) {
return -1;
}
sig[0] = 0x30 + 10;
*siglen = 1 + NONCELEN + vlen;
return 0;
}

/* see api.h */
int
PQCLEAN_FALCON1024_AVX2_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk) {
if (siglen < 1 + NONCELEN) {
return -1;
}
if (sig[0] != 0x30 + 10) {
return -1;
}
return do_verify(sig + 1,
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk);
}

/* see api.h */
int
PQCLEAN_FALCON1024_AVX2_crypto_sign(
uint8_t *sm, size_t *smlen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
uint8_t *pm, *sigbuf;
size_t sigbuflen;

/*
* Move the message to its final location; this is a memmove() so
* it handles overlaps properly.
*/
memmove(sm + 2 + NONCELEN, m, mlen);
pm = sm + 2 + NONCELEN;
sigbuf = pm + 1 + mlen;
sigbuflen = PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES - NONCELEN - 3;
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) {
return -1;
}
pm[mlen] = 0x20 + 10;
sigbuflen ++;
sm[0] = (uint8_t)(sigbuflen >> 8);
sm[1] = (uint8_t)sigbuflen;
*smlen = mlen + 2 + NONCELEN + sigbuflen;
return 0;
}

/* see api.h */
int
PQCLEAN_FALCON1024_AVX2_crypto_sign_open(
uint8_t *m, size_t *mlen,
const uint8_t *sm, size_t smlen, const uint8_t *pk) {
const uint8_t *sigbuf;
size_t pmlen, sigbuflen;

if (smlen < 3 + NONCELEN) {
return -1;
}
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1];
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) {
return -1;
}
sigbuflen --;
pmlen = smlen - NONCELEN - 3 - sigbuflen;
if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) {
return -1;
}
sigbuf = sm + 2 + NONCELEN + pmlen + 1;

/*
* The 2-byte length header and the one-byte signature header
* have been verified. Nonce is at sm+2, followed by the message
* itself. Message length is in pmlen. sigbuf/sigbuflen point to
* the signature value (excluding the header byte).
*/
if (do_verify(sm + 2, sigbuf, sigbuflen,
sm + 2 + NONCELEN, pmlen, pk) < 0) {
return -1;
}

/*
* Signature is correct, we just have to copy/move the message
* to its final destination. The memmove() properly handles
* overlaps.
*/
memmove(m, sm + 2 + NONCELEN, pmlen);
*mlen = pmlen;
return 0;
}

+ 0
- 195
src/sign/falcon/falcon-1024/avx2/rng.c View File

@@ -1,195 +0,0 @@
#include "inner.h"
#include <assert.h>
/*
* PRNG and interface to the system RNG.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/



/*
* Include relevant system header files. For Win32, this will also need
* linking with advapi32.dll, which we trigger with an appropriate #pragma.
*/

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_get_seed(void *seed, size_t len) {
(void)seed;
if (len == 0) {
return 1;
}
return 0;
}

/* see inner.h */
void
PQCLEAN_FALCON1024_AVX2_prng_init(prng *p, inner_shake256_context *src) {
inner_shake256_extract(src, p->state.d, 56);
PQCLEAN_FALCON1024_AVX2_prng_refill(p);
}

/*
* PRNG based on ChaCha20.
*
* State consists in key (32 bytes) then IV (16 bytes) and block counter
* (8 bytes). Normally, we should not care about local endianness (this
* is for a PRNG), but for the NIST competition we need reproducible KAT
* vectors that work across architectures, so we enforce little-endian
* interpretation where applicable. Moreover, output words are "spread
* out" over the output buffer with the interleaving pattern that is
* naturally obtained from the AVX2 implementation that runs eight
* ChaCha20 instances in parallel.
*
* The block counter is XORed into the first 8 bytes of the IV.
*/
void
PQCLEAN_FALCON1024_AVX2_prng_refill(prng *p) {

static const uint32_t CW[] = {
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
};

uint64_t cc;
size_t u;
int i;
uint32_t *sw;
union {
uint32_t w[16];
__m256i y[2]; /* for alignment */
} t;
__m256i state[16], init[16];

sw = (uint32_t *)p->state.d;

/*
* XOR next counter values into state.
*/
cc = *(uint64_t *)(p->state.d + 48);
for (u = 0; u < 8; u ++) {
t.w[u] = (uint32_t)(cc + u);
t.w[u + 8] = (uint32_t)((cc + u) >> 32);
}
*(uint64_t *)(p->state.d + 48) = cc + 8;

/*
* Load state.
*/
for (u = 0; u < 4; u ++) {
state[u] = init[u] =
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)CW[u]));
}
for (u = 0; u < 10; u ++) {
state[u + 4] = init[u + 4] =
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[u]));
}
state[14] = init[14] = _mm256_xor_si256(
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[10])),
_mm256_loadu_si256((__m256i *)&t.w[0]));
state[15] = init[15] = _mm256_xor_si256(
_mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[11])),
_mm256_loadu_si256((__m256i *)&t.w[8]));

/*
* Do all rounds.
*/
for (i = 0; i < 10; i ++) {

#define QROUND(a, b, c, d) do { \
state[a] = _mm256_add_epi32(state[a], state[b]); \
state[d] = _mm256_xor_si256(state[d], state[a]); \
state[d] = _mm256_or_si256( \
_mm256_slli_epi32(state[d], 16), \
_mm256_srli_epi32(state[d], 16)); \
state[c] = _mm256_add_epi32(state[c], state[d]); \
state[b] = _mm256_xor_si256(state[b], state[c]); \
state[b] = _mm256_or_si256( \
_mm256_slli_epi32(state[b], 12), \
_mm256_srli_epi32(state[b], 20)); \
state[a] = _mm256_add_epi32(state[a], state[b]); \
state[d] = _mm256_xor_si256(state[d], state[a]); \
state[d] = _mm256_or_si256( \
_mm256_slli_epi32(state[d], 8), \
_mm256_srli_epi32(state[d], 24)); \
state[c] = _mm256_add_epi32(state[c], state[d]); \
state[b] = _mm256_xor_si256(state[b], state[c]); \
state[b] = _mm256_or_si256( \
_mm256_slli_epi32(state[b], 7), \
_mm256_srli_epi32(state[b], 25)); \
} while (0)

QROUND( 0, 4, 8, 12);
QROUND( 1, 5, 9, 13);
QROUND( 2, 6, 10, 14);
QROUND( 3, 7, 11, 15);
QROUND( 0, 5, 10, 15);
QROUND( 1, 6, 11, 12);
QROUND( 2, 7, 8, 13);
QROUND( 3, 4, 9, 14);

#undef QROUND

}

/*
* Add initial state back and encode the result in the destination
* buffer. We can dump the AVX2 values "as is" because the non-AVX2
* code uses a compatible order of values.
*/
for (u = 0; u < 16; u ++) {
_mm256_storeu_si256((__m256i *)&p->buf.d[u << 5],
_mm256_add_epi32(state[u], init[u]));
}


p->ptr = 0;
}

/* see inner.h */
void
PQCLEAN_FALCON1024_AVX2_prng_get_bytes(prng *p, void *dst, size_t len) {
uint8_t *buf;

buf = dst;
while (len > 0) {
size_t clen;

clen = (sizeof p->buf.d) - p->ptr;
if (clen > len) {
clen = len;
}
memcpy(buf, p->buf.d, clen);
buf += clen;
len -= clen;
p->ptr += clen;
if (p->ptr == sizeof p->buf.d) {
PQCLEAN_FALCON1024_AVX2_prng_refill(p);
}
}
}

+ 0
- 1312
src/sign/falcon/falcon-1024/avx2/sign.c
File diff suppressed because it is too large
View File


+ 0
- 853
src/sign/falcon/falcon-1024/avx2/vrfy.c View File

@@ -1,853 +0,0 @@
#include "inner.h"

/*
* Falcon signature verification.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* ===================================================================== */
/*
* Constants for NTT.
*
* n = 2^logn (2 <= n <= 1024)
* phi = X^n + 1
* q = 12289
* q0i = -1/q mod 2^16
* R = 2^16 mod q
* R2 = 2^32 mod q
*/

#define Q 12289
#define Q0I 12287
#define R 4091
#define R2 10952

/*
* Table for NTT, binary case:
* GMb[x] = R*(g^rev(x)) mod q
* where g = 7 (it is a 2048-th primitive root of 1 modulo q)
* and rev() is the bit-reversal function over 10 bits.
*/
static const uint16_t GMb[] = {
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759,
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710,
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442,
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180,
12210, 6240, 997, 117, 4783, 4407, 1549, 7072,
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042,
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863,
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872,
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340,
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045,
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180,
9277, 6130, 3323, 883, 10469, 489, 1502, 2851,
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195,
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274,
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446,
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276,
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525,
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477,
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680,
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003,
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763,
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821,
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159,
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188,
737, 3698, 4699, 5753, 9046, 3687, 16, 914,
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381,
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357,
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278,
932, 10229, 8927, 7642, 351, 9298, 237, 5858,
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204,
4602, 1748, 11300, 340, 3711, 4614, 300, 10993,
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654,
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209,
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808,
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433,
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416,
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436,
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434,
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328,
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393,
2523, 4339, 6115, 619, 937, 2834, 7775, 3279,
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113,
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884,
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520,
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399,
11192, 315, 4511, 1158, 6061, 6751, 11865, 357,
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253,
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652,
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188,
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928,
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650,
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344,
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561,
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114,
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323,
10438, 9471, 1271, 408, 6911, 3079, 360, 8276,
11535, 9156, 9049, 11539, 850, 8617, 784, 7919,
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600,
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424,
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333,
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395,
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216,
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230,
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688,
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868,
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525,
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227,
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815,
11736, 6813, 6979, 819, 8903, 6271, 10843, 348,
7514, 8339, 6439, 694, 852, 5659, 2781, 3716,
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885,
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526,
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224,
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159,
10923, 4918, 128, 7312, 725, 9157, 5006, 6393,
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668,
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365,
5110, 45, 2400, 1921, 4377, 2720, 1695, 51,
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833,
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198,
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519,
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339,
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604,
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154,
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185,
862, 3158, 477, 7279, 5678, 7914, 4254, 302,
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824,
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449,
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398,
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800,
1397, 10678, 103, 7420, 7976, 936, 764, 632,
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946,
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139,
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850,
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217,
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711,
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729,
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211,
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875,
8192, 986, 7527, 1401, 870, 3615, 8465, 2756,
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763,
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038,
2567, 708, 893, 6465, 4962, 10024, 2090, 5718,
10743, 780, 4733, 4623, 2134, 2087, 4802, 884,
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664,
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791,
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032,
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062,
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348,
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499,
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326,
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830,
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582,
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762,
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394,
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674,
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193,
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509,
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105,
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776,
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277,
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333,
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390,
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331,
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187,
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067,
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165,
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949
};

/*
* Table for inverse NTT, binary case:
* iGMb[x] = R*((1/g)^rev(x)) mod q
* Since g = 7, 1/g = 8778 mod 12289.
*/
static const uint16_t iGMb[] = {
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329,
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698,
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875,
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155,
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108,
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100,
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460,
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79,
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676,
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110,
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559,
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228,
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012,
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973,
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736,
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720,
6635, 6543, 1582, 4868, 42, 673, 2240, 7219,
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687,
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597,
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357,
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880,
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556,
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103,
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552,
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822,
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609,
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388,
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344,
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101,
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424,
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888,
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709,
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639,
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993,
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051,
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488,
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473,
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510,
6689, 386, 4462, 105, 2076, 10443, 119, 3955,
4370, 11505, 3672, 11439, 750, 3240, 3133, 754,
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851,
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327,
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255,
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787,
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325,
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707,
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489,
101, 1911, 9483, 3608, 11997, 10536, 812, 8915,
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922,
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922,
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097,
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016,
7769, 136, 617, 3157, 5889, 9219, 6855, 120,
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562,
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926,
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766,
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732,
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900,
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847,
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871,
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852,
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879,
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064,
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454,
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028,
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795,
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779,
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127,
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444,
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281,
9956, 2702, 6656, 735, 2243, 11656, 833, 3107,
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278,
3513, 9769, 3025, 779, 9433, 3392, 7437, 668,
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711,
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918,
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931,
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697,
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793,
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556,
707, 1088, 4936, 678, 10245, 18, 5684, 960,
4459, 7957, 226, 2451, 6, 8874, 320, 6298,
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876,
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679,
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378,
5227, 952, 4319, 9810, 4356, 3088, 11118, 840,
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489,
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037,
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917,
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546,
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722,
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484,
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519,
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097,
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286,
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292,
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022,
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377,
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653,
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601,
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765,
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293,
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892,
11489, 8833, 2393, 15, 10830, 5003, 17, 565,
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130,
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020,
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396,
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427,
104, 6348, 9643, 6757, 12110, 5617, 10935, 541,
135, 3041, 7200, 6526, 5085, 12136, 842, 4129,
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101,
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927,
1770, 273, 8377, 2271, 5225, 10283, 116, 11807,
91, 11699, 757, 1304, 7524, 6451, 8032, 8154,
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481,
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179,
3924, 3188, 367, 2077, 336, 5384, 5631, 8596,
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795,
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366,
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418,
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173,
9763, 12191, 459, 2966, 3166, 405, 5000, 9311,
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700,
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775,
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553,
9474, 2586, 1431, 2741, 473, 11383, 4745, 836,
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351,
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152,
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230
};

/*
* Reduce a small signed integer modulo q. The source integer MUST
* be between -q/2 and +q/2.
*/
static inline uint32_t
mq_conv_small(int x) {
/*
* If x < 0, the cast to uint32_t will set the high bit to 1.
*/
uint32_t y;

y = (uint32_t)x;
y += Q & -(y >> 31);
return y;
}

/*
* Addition modulo q. Operands must be in the 0..q-1 range.
*/
static inline uint32_t
mq_add(uint32_t x, uint32_t y) {
/*
* We compute x + y - q. If the result is negative, then the
* high bit will be set, and 'd >> 31' will be equal to 1;
* thus '-(d >> 31)' will be an all-one pattern. Otherwise,
* it will be an all-zero pattern. In other words, this
* implements a conditional addition of q.
*/
uint32_t d;

d = x + y - Q;
d += Q & -(d >> 31);
return d;
}

/*
* Subtraction modulo q. Operands must be in the 0..q-1 range.
*/
static inline uint32_t
mq_sub(uint32_t x, uint32_t y) {
/*
* As in mq_add(), we use a conditional addition to ensure the
* result is in the 0..q-1 range.
*/
uint32_t d;

d = x - y;
d += Q & -(d >> 31);
return d;
}

/*
* Division by 2 modulo q. Operand must be in the 0..q-1 range.
*/
static inline uint32_t
mq_rshift1(uint32_t x) {
x += Q & -(x & 1);
return (x >> 1);
}

/*
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then
* this function computes: x * y / R mod q
* Operands must be in the 0..q-1 range.
*/
static inline uint32_t
mq_montymul(uint32_t x, uint32_t y) {
uint32_t z, w;

/*
* We compute x*y + k*q with a value of k chosen so that the 16
* low bits of the result are 0. We can then shift the value.
* After the shift, result may still be larger than q, but it
* will be lower than 2*q, so a conditional subtraction works.
*/

z = x * y;
w = ((z * Q0I) & 0xFFFF) * Q;

/*
* When adding z and w, the result will have its low 16 bits
* equal to 0. Since x, y and z are lower than q, the sum will
* be no more than (2^15 - 1) * q + (q - 1)^2, which will
* fit on 29 bits.
*/
z = (z + w) >> 16;

/*
* After the shift, analysis shows that the value will be less
* than 2q. We do a subtraction then conditional subtraction to
* ensure the result is in the expected range.
*/
z -= Q;
z += Q & -(z >> 31);
return z;
}

/*
* Montgomery squaring (computes (x^2)/R).
*/
static inline uint32_t
mq_montysqr(uint32_t x) {
return mq_montymul(x, x);
}

/*
* Divide x by y modulo q = 12289.
*/
static inline uint32_t
mq_div_12289(uint32_t x, uint32_t y) {
/*
* We invert y by computing y^(q-2) mod q.
*
* We use the following addition chain for exponent e = 12287:
*
* e0 = 1
* e1 = 2 * e0 = 2
* e2 = e1 + e0 = 3
* e3 = e2 + e1 = 5
* e4 = 2 * e3 = 10
* e5 = 2 * e4 = 20
* e6 = 2 * e5 = 40
* e7 = 2 * e6 = 80
* e8 = 2 * e7 = 160
* e9 = e8 + e2 = 163
* e10 = e9 + e8 = 323
* e11 = 2 * e10 = 646
* e12 = 2 * e11 = 1292
* e13 = e12 + e9 = 1455
* e14 = 2 * e13 = 2910
* e15 = 2 * e14 = 5820
* e16 = e15 + e10 = 6143
* e17 = 2 * e16 = 12286
* e18 = e17 + e0 = 12287
*
* Additions on exponents are converted to Montgomery
* multiplications. We define all intermediate results as so
* many local variables, and let the C compiler work out which
* must be kept around.
*/
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18;

y0 = mq_montymul(y, R2);
y1 = mq_montysqr(y0);
y2 = mq_montymul(y1, y0);
y3 = mq_montymul(y2, y1);
y4 = mq_montysqr(y3);
y5 = mq_montysqr(y4);
y6 = mq_montysqr(y5);
y7 = mq_montysqr(y6);
y8 = mq_montysqr(y7);
y9 = mq_montymul(y8, y2);
y10 = mq_montymul(y9, y8);
y11 = mq_montysqr(y10);
y12 = mq_montysqr(y11);
y13 = mq_montymul(y12, y9);
y14 = mq_montysqr(y13);
y15 = mq_montysqr(y14);
y16 = mq_montymul(y15, y10);
y17 = mq_montysqr(y16);
y18 = mq_montymul(y17, y0);

/*
* Final multiplication with x, which is not in Montgomery
* representation, computes the correct division result.
*/
return mq_montymul(y18, x);
}

/*
* Compute NTT on a ring element.
*/
static void
mq_NTT(uint16_t *a, unsigned logn) {
size_t n, t, m;

n = (size_t)1 << logn;
t = n;
for (m = 1; m < n; m <<= 1) {
size_t ht, i, j1;

ht = t >> 1;
for (i = 0, j1 = 0; i < m; i ++, j1 += t) {
size_t j, j2;
uint32_t s;

s = GMb[m + i];
j2 = j1 + ht;
for (j = j1; j < j2; j ++) {
uint32_t u, v;

u = a[j];
v = mq_montymul(a[j + ht], s);
a[j] = (uint16_t)mq_add(u, v);
a[j + ht] = (uint16_t)mq_sub(u, v);
}
}
t = ht;
}
}

/*
* Compute the inverse NTT on a ring element, binary case.
*/
static void
mq_iNTT(uint16_t *a, unsigned logn) {
size_t n, t, m;
uint32_t ni;

n = (size_t)1 << logn;
t = 1;
m = n;
while (m > 1) {
size_t hm, dt, i, j1;

hm = m >> 1;
dt = t << 1;
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) {
size_t j, j2;
uint32_t s;

j2 = j1 + t;
s = iGMb[hm + i];
for (j = j1; j < j2; j ++) {
uint32_t u, v, w;

u = a[j];
v = a[j + t];
a[j] = (uint16_t)mq_add(u, v);
w = mq_sub(u, v);
a[j + t] = (uint16_t)
mq_montymul(w, s);
}
}
t = dt;
m = hm;
}

/*
* To complete the inverse NTT, we must now divide all values by
* n (the vector size). We thus need the inverse of n, i.e. we
* need to divide 1 by 2 logn times. But we also want it in
* Montgomery representation, i.e. we also want to multiply it
* by R = 2^16. In the common case, this should be a simple right
* shift. The loop below is generic and works also in corner cases;
* its computation time is negligible.
*/
ni = R;
for (m = n; m > 1; m >>= 1) {
ni = mq_rshift1(ni);
}
for (m = 0; m < n; m ++) {
a[m] = (uint16_t)mq_montymul(a[m], ni);
}
}

/*
* Convert a polynomial (mod q) to Montgomery representation.
*/
static void
mq_poly_tomonty(uint16_t *f, unsigned logn) {
size_t u, n;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
f[u] = (uint16_t)mq_montymul(f[u], R2);
}
}

/*
* Multiply two polynomials together (NTT representation, and using
* a Montgomery multiplication). Result f*g is written over f.
*/
static void
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) {
size_t u, n;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
f[u] = (uint16_t)mq_montymul(f[u], g[u]);
}
}

/*
* Subtract polynomial g from polynomial f.
*/
static void
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) {
size_t u, n;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
f[u] = (uint16_t)mq_sub(f[u], g[u]);
}
}

/* ===================================================================== */

/* see inner.h */
void
PQCLEAN_FALCON1024_AVX2_to_ntt_monty(uint16_t *h, unsigned logn) {
mq_NTT(h, logn);
mq_poly_tomonty(h, logn);
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2,
const uint16_t *h, unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;

n = (size_t)1 << logn;
tt = (uint16_t *)tmp;

/*
* Reduce s2 elements modulo q ([0..q-1] range).
*/
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u];
w += Q & -(w >> 31);
tt[u] = (uint16_t)w;
}

/*
* Compute -s1 = s2*h - c0 mod phi mod q (in tt[]).
*/
mq_NTT(tt, logn);
mq_poly_montymul_ntt(tt, h, logn);
mq_iNTT(tt, logn);
mq_poly_sub(tt, c0, logn);

/*
* Normalize -s1 elements into the [-q/2..q/2] range.
*/
for (u = 0; u < n; u ++) {
int32_t w;

w = (int32_t)tt[u];
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31));
((int16_t *)tt)[u] = (int16_t)w;
}

/*
* Signature is valid if and only if the aggregate (-s1,s2) vector
* is short enough.
*/
return PQCLEAN_FALCON1024_AVX2_is_short((int16_t *)tt, s2, logn);
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_compute_public(uint16_t *h,
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;

n = (size_t)1 << logn;
tt = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
tt[u] = (uint16_t)mq_conv_small(f[u]);
h[u] = (uint16_t)mq_conv_small(g[u]);
}
mq_NTT(h, logn);
mq_NTT(tt, logn);
for (u = 0; u < n; u ++) {
if (tt[u] == 0) {
return 0;
}
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]);
}
mq_iNTT(h, logn);
return 1;
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_complete_private(int8_t *G,
const int8_t *f, const int8_t *g, const int8_t *F,
unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *t1, *t2;

n = (size_t)1 << logn;
t1 = (uint16_t *)tmp;
t2 = t1 + n;
for (u = 0; u < n; u ++) {
t1[u] = (uint16_t)mq_conv_small(g[u]);
t2[u] = (uint16_t)mq_conv_small(F[u]);
}
mq_NTT(t1, logn);
mq_NTT(t2, logn);
mq_poly_tomonty(t1, logn);
mq_poly_montymul_ntt(t1, t2, logn);
for (u = 0; u < n; u ++) {
t2[u] = (uint16_t)mq_conv_small(f[u]);
}
mq_NTT(t2, logn);
for (u = 0; u < n; u ++) {
if (t2[u] == 0) {
return 0;
}
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]);
}
mq_iNTT(t1, logn);
for (u = 0; u < n; u ++) {
uint32_t w;
int32_t gi;

w = t1[u];
w -= (Q & ~ -((w - (Q >> 1)) >> 31));
gi = *(int32_t *)&w;
if (gi < -127 || gi > +127) {
return 0;
}
G[u] = (int8_t)gi;
}
return 1;
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_is_invertible(
const int16_t *s2, unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;
uint32_t r;

n = (size_t)1 << logn;
tt = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u];
w += Q & -(w >> 31);
tt[u] = (uint16_t)w;
}
mq_NTT(tt, logn);
r = 0;
for (u = 0; u < n; u ++) {
r |= (uint32_t)(tt[u] - 1);
}
return (int)(1u - (r >> 31));
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_verify_recover(uint16_t *h,
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;
uint32_t r;

n = (size_t)1 << logn;

/*
* Reduce elements of s1 and s2 modulo q; then write s2 into tt[]
* and c0 - s1 into h[].
*/
tt = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u];
w += Q & -(w >> 31);
tt[u] = (uint16_t)w;

w = (uint32_t)s1[u];
w += Q & -(w >> 31);
w = mq_sub(c0[u], w);
h[u] = (uint16_t)w;
}

/*
* Compute h = (c0 - s1) / s2. If one of the coefficients of s2
* is zero (in NTT representation) then the operation fails. We
* keep that information into a flag so that we do not deviate
* from strict constant-time processing; if all coefficients of
* s2 are non-zero, then the high bit of r will be zero.
*/
mq_NTT(tt, logn);
mq_NTT(h, logn);
r = 0;
for (u = 0; u < n; u ++) {
r |= (uint32_t)(tt[u] - 1);
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]);
}
mq_iNTT(h, logn);

/*
* Signature is acceptable if and only if it is short enough,
* and s2 was invertible mod phi mod q. The caller must still
* check that the rebuilt public key matches the expected
* value (e.g. through a hash).
*/
r = ~r & (uint32_t) - PQCLEAN_FALCON1024_AVX2_is_short(s1, s2, logn);
return (int)(r >> 31);
}

/* see inner.h */
int
PQCLEAN_FALCON1024_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) {
uint16_t *s2;
size_t u, n;
uint32_t r;

n = (size_t)1 << logn;
s2 = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)sig[u];
w += Q & -(w >> 31);
s2[u] = (uint16_t)w;
}
mq_NTT(s2, logn);
r = 0;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u] - 1u;
r += (w >> 31);
}
return (int)r;
}

+ 0
- 15
src/sign/falcon/falcon-1024/clean/CMakeLists.txt View File

@@ -1,15 +0,0 @@
set(
SRC_CLEAN_FALCON1024
codec.c
common.c
fft.c
fpr.c
keygen.c
pqclean.c
rng.c
sign.c
vrfy.c)

define_sig_alg(
falcon1024_clean
PQCLEAN_FALCON1024_CLEAN "${SRC_CLEAN_FALCON1024}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 80
src/sign/falcon/falcon-1024/clean/api.h View File

@@ -1,80 +0,0 @@
#ifndef PQCLEAN_FALCON1024_CLEAN_API_H
#define PQCLEAN_FALCON1024_CLEAN_API_H

#include <stddef.h>
#include <stdint.h>

#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES 2305
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES 1793
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES 1330

#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_ALGNAME "Falcon-1024"

/*
* Generate a new key pair. Public key goes into pk[], private key in sk[].
* Key sizes are exact (in bytes):
* public (pk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES
* private (sk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(
uint8_t *pk, uint8_t *sk);

/*
* Compute a signature on a provided message (m, mlen), with a given
* private key (sk). Signature is written in sig[], with length written
* into *siglen. Signature length is variable; maximum signature length
* (in bytes) is PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES.
*
* sig[], m[] and sk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

/*
* Verify a signature (sig, siglen) on a message (m, mlen) with a given
* public key (pk).
*
* sig[], m[] and pk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk);

/*
* Compute a signature on a message and pack the signature and message
* into a single object, written into sm[]. The length of that output is
* written in *smlen; that length may be larger than the message length
* (mlen) by up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES.
*
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall
* not overlap with sk[].
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_CLEAN_crypto_sign(
uint8_t *sm, size_t *smlen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

/*
* Open a signed message object (sm, smlen) and verify the signature;
* on success, the message itself is written into m[] and its length
* into *mlen. The message is shorter than the signed message object,
* but the size difference depends on the signature value; the difference
* may range up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES.
*
* m[], sm[] and pk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_open(
uint8_t *m, size_t *mlen,
const uint8_t *sm, size_t smlen, const uint8_t *pk);

#endif

+ 0
- 555
src/sign/falcon/falcon-1024/clean/codec.c View File

@@ -1,555 +0,0 @@
#include "inner.h"

/*
* Encoding/decoding of keys and signatures.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_modq_encode(
void *out, size_t max_out_len,
const uint16_t *x, unsigned logn) {
size_t n, out_len, u;
uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
if (x[u] >= 12289) {
return 0;
}
}
out_len = ((n * 14) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
for (u = 0; u < n; u ++) {
acc = (acc << 14) | x[u];
acc_len += 14;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_modq_decode(
uint16_t *x, unsigned logn,
const void *in, size_t max_in_len) {
size_t n, in_len, u;
const uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
in_len = ((n * 14) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
acc = 0;
acc_len = 0;
u = 0;
while (u < n) {
acc = (acc << 8) | (*buf ++);
acc_len += 8;
if (acc_len >= 14) {
unsigned w;

acc_len -= 14;
w = (acc >> acc_len) & 0x3FFF;
if (w >= 12289) {
return 0;
}
x[u ++] = (uint16_t)w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_trim_i16_encode(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn, unsigned bits) {
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint16_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_trim_i16_decode(
int16_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len) {
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
w |= -(w & mask2);
x[u ++] = (int16_t) * (int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(
void *out, size_t max_out_len,
const int8_t *x, unsigned logn, unsigned bits) {
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint8_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(
int8_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len) {
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
x[u ++] = (int8_t) * (int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_comp_encode(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn) {
uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = out;

/*
* Make sure that all values are within the -2047..+2047 range.
*/
for (u = 0; u < n; u ++) {
if (x[u] < -2047 || x[u] > +2047) {
return 0;
}
}

acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
int t;
unsigned w;

/*
* Get sign and absolute value of next integer; push the
* sign bit.
*/
acc <<= 1;
t = x[u];
if (t < 0) {
t = -t;
acc |= 1;
}
w = (unsigned)t;

/*
* Push the low 7 bits of the absolute value.
*/
acc <<= 7;
acc |= w & 127u;
w >>= 7;

/*
* We pushed exactly 8 bits.
*/
acc_len += 8;

/*
* Push as many zeros as necessary, then a one. Since the
* absolute value is at most 2047, w can only range up to
* 15 at this point, thus we will add at most 16 bits
* here. With the 8 bits above and possibly up to 7 bits
* from previous iterations, we may go up to 31 bits, which
* will fit in the accumulator, which is an uint32_t.
*/
acc <<= (w + 1);
acc |= 1;
acc_len += w + 1;

/*
* Produce all full bytes.
*/
while (acc_len >= 8) {
acc_len -= 8;
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc >> acc_len);
}
v ++;
}
}

/*
* Flush remaining bits (if any).
*/
if (acc_len > 0) {
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc << (8 - acc_len));
}
v ++;
}

return v;
}

/* see inner.h */
size_t
PQCLEAN_FALCON1024_CLEAN_comp_decode(
int16_t *x, unsigned logn,
const void *in, size_t max_in_len) {
const uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = in;
acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
unsigned b, s, m;

/*
* Get next eight bits: sign and low seven bits of the
* absolute value.
*/
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
b = acc >> acc_len;
s = b & 128;
m = b & 127;

/*
* Get next bits until a 1 is reached.
*/
for (;;) {
if (acc_len == 0) {
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
acc_len = 8;
}
acc_len --;
if (((acc >> acc_len) & 1) != 0) {
break;
}
m += 128;
if (m > 2047) {
return 0;
}
}
x[u] = (int16_t) m;
if (s) {
x[u] = (int16_t) - x[u];
}
}
return v;
}

/*
* Key elements and signatures are polynomials with small integer
* coefficients. Here are some statistics gathered over many
* generated key pairs (10000 or more for each degree):
*
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G)
* 1 2 129 56.31 143 60.02
* 2 4 123 40.93 160 46.52
* 3 8 97 28.97 159 38.01
* 4 16 100 21.48 154 32.50
* 5 32 71 15.41 151 29.36
* 6 64 59 11.07 138 27.77
* 7 128 39 7.91 144 27.00
* 8 256 32 5.63 148 26.61
* 9 512 22 4.00 137 26.46
* 10 1024 15 2.84 146 26.41
*
* We want a compact storage format for private key, and, as part of
* key generation, we are allowed to reject some keys which would
* otherwise be fine (this does not induce any noticeable vulnerability
* as long as we reject only a small proportion of possible keys).
* Hence, we enforce at key generation time maximum values for the
* elements of f, g, F and G, so that their encoding can be expressed
* in fixed-width values. Limits have been chosen so that generated
* keys are almost always within bounds, thus not impacting neither
* security or performance.
*
* IMPORTANT: the code assumes that all coefficients of f, g, F and G
* ultimately fit in the -127..+127 range. Thus, none of the elements
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8.
*/

const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[] = {
0, /* unused */
8,
8,
8,
8,
8,
7,
7,
6,
6,
5
};

const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[] = {
0, /* unused */
8,
8,
8,
8,
8,
8,
8,
8,
8,
8
};

/*
* When generating a new key pair, we can always reject keys which
* feature an abnormally large coefficient. This can also be done for
* signatures, albeit with some care: in case the signature process is
* used in a derandomized setup (explicitly seeded with the message and
* private key), we have to follow the specification faithfully, and the
* specification only enforces a limit on the L2 norm of the signature
* vector. The limit on the L2 norm implies that the absolute value of
* a coefficient of the signature cannot be more than the following:
*
* log(n) n max sig coeff (theoretical)
* 1 2 412
* 2 4 583
* 3 8 824
* 4 16 1166
* 5 32 1649
* 6 64 2332
* 7 128 3299
* 8 256 4665
* 9 512 6598
* 10 1024 9331
*
* However, the largest observed signature coefficients during our
* experiments was 1077 (in absolute value), hence we can assume that,
* with overwhelming probability, signature coefficients will fit
* in -2047..2047, i.e. 12 bits.
*/

const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[] = {
0, /* unused */
10,
11,
11,
12,
12,
12,
12,
12,
12,
12
};

+ 0
- 294
src/sign/falcon/falcon-1024/clean/common.c View File

@@ -1,294 +0,0 @@
#include "inner.h"

/*
* Support functions for signatures (hash-to-point, norm).
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(
inner_shake256_context *sc,
uint16_t *x, unsigned logn) {
/*
* This is the straightforward per-the-spec implementation. It
* is not constant-time, thus it might reveal information on the
* plaintext (at least, enough to check the plaintext against a
* list of potential plaintexts) in a scenario where the
* attacker does not have access to the signature value or to
* the public key, but knows the nonce (without knowledge of the
* nonce, the hashed output cannot be matched against potential
* plaintexts).
*/
size_t n;

n = (size_t)1 << logn;
while (n > 0) {
uint8_t buf[2];
uint32_t w;

inner_shake256_extract(sc, (void *)buf, sizeof buf);
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
if (w < 61445) {
while (w >= 12289) {
w -= 12289;
}
*x ++ = (uint16_t)w;
n --;
}
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(
inner_shake256_context *sc,
uint16_t *x, unsigned logn, uint8_t *tmp) {
/*
* Each 16-bit sample is a value in 0..65535. The value is
* kept if it falls in 0..61444 (because 61445 = 5*12289)
* and rejected otherwise; thus, each sample has probability
* about 0.93758 of being selected.
*
* We want to oversample enough to be sure that we will
* have enough values with probability at least 1 - 2^(-256).
* Depending on degree N, this leads to the following
* required oversampling:
*
* logn n oversampling
* 1 2 65
* 2 4 67
* 3 8 71
* 4 16 77
* 5 32 86
* 6 64 100
* 7 128 122
* 8 256 154
* 9 512 205
* 10 1024 287
*
* If logn >= 7, then the provided temporary buffer is large
* enough. Otherwise, we use a stack buffer of 63 entries
* (i.e. 126 bytes) for the values that do not fit in tmp[].
*/

static const uint16_t overtab[] = {
0, /* unused */
65,
67,
71,
77,
86,
100,
122,
154,
205,
287
};

unsigned n, n2, u, m, p, over;
uint16_t *tt1, tt2[63];

/*
* We first generate m 16-bit value. Values 0..n-1 go to x[].
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[].
* We also reduce modulo q the values; rejected values are set
* to 0xFFFF.
*/
n = 1U << logn;
n2 = n << 1;
over = overtab[logn];
m = n + over;
tt1 = (uint16_t *)tmp;
for (u = 0; u < m; u ++) {
uint8_t buf[2];
uint32_t w, wr;

inner_shake256_extract(sc, buf, sizeof buf);
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1));
wr |= ((w - 61445) >> 31) - 1;
if (u < n) {
x[u] = (uint16_t)wr;
} else if (u < n2) {
tt1[u - n] = (uint16_t)wr;
} else {
tt2[u - n2] = (uint16_t)wr;
}
}

/*
* Now we must "squeeze out" the invalid values. We do this in
* a logarithmic sequence of passes; each pass computes where a
* value should go, and moves it down by 'p' slots if necessary,
* where 'p' uses an increasing powers-of-two scale. It can be
* shown that in all cases where the loop decides that a value
* has to be moved down by p slots, the destination slot is
* "free" (i.e. contains an invalid value).
*/
for (p = 1; p <= over; p <<= 1) {
unsigned v;

/*
* In the loop below:
*
* - v contains the index of the final destination of
* the value; it is recomputed dynamically based on
* whether values are valid or not.
*
* - u is the index of the value we consider ("source");
* its address is s.
*
* - The loop may swap the value with the one at index
* u-p. The address of the swap destination is d.
*/
v = 0;
for (u = 0; u < m; u ++) {
uint16_t *s, *d;
unsigned j, sv, dv, mk;

if (u < n) {
s = &x[u];
} else if (u < n2) {
s = &tt1[u - n];
} else {
s = &tt2[u - n2];
}
sv = *s;

/*
* The value in sv should ultimately go to
* address v, i.e. jump back by u-v slots.
*/
j = u - v;

/*
* We increment v for the next iteration, but
* only if the source value is valid. The mask
* 'mk' is -1 if the value is valid, 0 otherwise,
* so we _subtract_ mk.
*/
mk = (sv >> 15) - 1U;
v -= mk;

/*
* In this loop we consider jumps by p slots; if
* u < p then there is nothing more to do.
*/
if (u < p) {
continue;
}

/*
* Destination for the swap: value at address u-p.
*/
if ((u - p) < n) {
d = &x[u - p];
} else if ((u - p) < n2) {
d = &tt1[(u - p) - n];
} else {
d = &tt2[(u - p) - n2];
}
dv = *d;

/*
* The swap should be performed only if the source
* is valid AND the jump j has its 'p' bit set.
*/
mk &= -(((j & p) + 0x1FF) >> 9);

*s = (uint16_t)(sv ^ (mk & (sv ^ dv)));
*d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
}
}
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_is_short(
const int16_t *s1, const int16_t *s2, unsigned logn) {
/*
* We use the l2-norm. Code below uses only 32-bit operations to
* compute the square of the norm with saturation to 2^32-1 if
* the value exceeds 2^31-1.
*/
size_t n, u;
uint32_t s, ng;

n = (size_t)1 << logn;
s = 0;
ng = 0;
for (u = 0; u < n; u ++) {
int32_t z;

z = s1[u];
s += (uint32_t)(z * z);
ng |= s;
z = s2[u];
s += (uint32_t)(z * z);
ng |= s;
}
s |= -(ng >> 31);

/*
* Acceptance bound on the l2-norm is:
* 1.2*1.55*sqrt(q)*sqrt(2*N)
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
*/
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_is_short_half(
uint32_t sqn, const int16_t *s2, unsigned logn) {
size_t n, u;
uint32_t ng;

n = (size_t)1 << logn;
ng = -(sqn >> 31);
for (u = 0; u < n; u ++) {
int32_t z;

z = s2[u];
sqn += (uint32_t)(z * z);
ng |= sqn;
}
sqn |= -(ng >> 31);

/*
* Acceptance bound on the l2-norm is:
* 1.2*1.55*sqrt(q)*sqrt(2*N)
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
*/
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
}

+ 0
- 700
src/sign/falcon/falcon-1024/clean/fft.c View File

@@ -1,700 +0,0 @@
#include "inner.h"

/*
* FFT code.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/*
* Rules for complex number macros:
* --------------------------------
*
* Operand order is: destination, source1, source2...
*
* Each operand is a real and an imaginary part.
*
* All overlaps are allowed.
*/

/*
* Addition of two complex numbers (d = a + b).
*/
#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \
fpr fpct_re, fpct_im; \
fpct_re = fpr_add(a_re, b_re); \
fpct_im = fpr_add(a_im, b_im); \
(d_re) = fpct_re; \
(d_im) = fpct_im; \
} while (0)

/*
* Subtraction of two complex numbers (d = a - b).
*/
#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \
fpr fpct_re, fpct_im; \
fpct_re = fpr_sub(a_re, b_re); \
fpct_im = fpr_sub(a_im, b_im); \
(d_re) = fpct_re; \
(d_im) = fpct_im; \
} while (0)

/*
* Multplication of two complex numbers (d = a * b).
*/
#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \
fpr fpct_a_re, fpct_a_im; \
fpr fpct_b_re, fpct_b_im; \
fpr fpct_d_re, fpct_d_im; \
fpct_a_re = (a_re); \
fpct_a_im = (a_im); \
fpct_b_re = (b_re); \
fpct_b_im = (b_im); \
fpct_d_re = fpr_sub( \
fpr_mul(fpct_a_re, fpct_b_re), \
fpr_mul(fpct_a_im, fpct_b_im)); \
fpct_d_im = fpr_add( \
fpr_mul(fpct_a_re, fpct_b_im), \
fpr_mul(fpct_a_im, fpct_b_re)); \
(d_re) = fpct_d_re; \
(d_im) = fpct_d_im; \
} while (0)

/*
* Squaring of a complex number (d = a * a).
*/
#define FPC_SQR(d_re, d_im, a_re, a_im) do { \
fpr fpct_a_re, fpct_a_im; \
fpr fpct_d_re, fpct_d_im; \
fpct_a_re = (a_re); \
fpct_a_im = (a_im); \
fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \
fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \
(d_re) = fpct_d_re; \
(d_im) = fpct_d_im; \
} while (0)

/*
* Inversion of a complex number (d = 1 / a).
*/
#define FPC_INV(d_re, d_im, a_re, a_im) do { \
fpr fpct_a_re, fpct_a_im; \
fpr fpct_d_re, fpct_d_im; \
fpr fpct_m; \
fpct_a_re = (a_re); \
fpct_a_im = (a_im); \
fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \
fpct_m = fpr_inv(fpct_m); \
fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \
fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \
(d_re) = fpct_d_re; \
(d_im) = fpct_d_im; \
} while (0)

/*
* Division of complex numbers (d = a / b).
*/
#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \
fpr fpct_a_re, fpct_a_im; \
fpr fpct_b_re, fpct_b_im; \
fpr fpct_d_re, fpct_d_im; \
fpr fpct_m; \
fpct_a_re = (a_re); \
fpct_a_im = (a_im); \
fpct_b_re = (b_re); \
fpct_b_im = (b_im); \
fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \
fpct_m = fpr_inv(fpct_m); \
fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \
fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \
fpct_d_re = fpr_sub( \
fpr_mul(fpct_a_re, fpct_b_re), \
fpr_mul(fpct_a_im, fpct_b_im)); \
fpct_d_im = fpr_add( \
fpr_mul(fpct_a_re, fpct_b_im), \
fpr_mul(fpct_a_im, fpct_b_re)); \
(d_re) = fpct_d_re; \
(d_im) = fpct_d_im; \
} while (0)

/*
* Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the
* values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots
* of X^N+1 in the field of complex numbers. A crucial property is that
* w_{N-1-j} = conj(w_j) = 1/w_j for all j.
*
* FFT representation of a polynomial f (taken modulo X^N+1) is the
* set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)),
* thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values,
* for j = 0 to N/2-1; the other half can be recomputed easily when (if)
* needed. A consequence is that FFT representation has the same size
* as normal representation: N/2 complex numbers use N real numbers (each
* complex number is the combination of a real and an imaginary part).
*
* We use a specific ordering which makes computations easier. Let rev()
* be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we
* store the real and imaginary parts of f(w_j) in slots:
*
* Re(f(w_j)) -> slot rev(j)/2
* Im(f(w_j)) -> slot rev(j)/2+N/2
*
* (Note that rev(j) is even for j < N/2.)
*/

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn) {
/*
* FFT algorithm in bit-reversal order uses the following
* iterative algorithm:
*
* t = N
* for m = 1; m < N; m *= 2:
* ht = t/2
* for i1 = 0; i1 < m; i1 ++:
* j1 = i1 * t
* s = GM[m + i1]
* for j = j1; j < (j1 + ht); j ++:
* x = f[j]
* y = s * f[j + ht]
* f[j] = x + y
* f[j + ht] = x - y
* t = ht
*
* GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N).
*
* In the description above, f[] is supposed to contain complex
* numbers. In our in-memory representation, the real and
* imaginary parts of f[k] are in array slots k and k+N/2.
*
* We only keep the first half of the complex numbers. We can
* see that after the first iteration, the first and second halves
* of the array of complex numbers have separate lives, so we
* simply ignore the second part.
*/

unsigned u;
size_t t, n, hn, m;

/*
* First iteration: compute f[j] + i * f[j+N/2] for all j < N/2
* (because GM[1] = w^rev(1) = w^(N/2) = i).
* In our chosen representation, this is a no-op: everything is
* already where it should be.
*/

/*
* Subsequent iterations are truncated to use only the first
* half of values.
*/
n = (size_t)1 << logn;
hn = n >> 1;
t = hn;
for (u = 1, m = 2; u < logn; u ++, m <<= 1) {
size_t ht, hm, i1, j1;

ht = t >> 1;
hm = m >> 1;
for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) {
size_t j, j2;

j2 = j1 + ht;
fpr s_re, s_im;

s_re = fpr_gm_tab[((m + i1) << 1) + 0];
s_im = fpr_gm_tab[((m + i1) << 1) + 1];
for (j = j1; j < j2; j ++) {
fpr x_re, x_im, y_re, y_im;

x_re = f[j];
x_im = f[j + hn];
y_re = f[j + ht];
y_im = f[j + ht + hn];
FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im);
FPC_ADD(f[j], f[j + hn],
x_re, x_im, y_re, y_im);
FPC_SUB(f[j + ht], f[j + ht + hn],
x_re, x_im, y_re, y_im);
}
}
t = ht;
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn) {
/*
* Inverse FFT algorithm in bit-reversal order uses the following
* iterative algorithm:
*
* t = 1
* for m = N; m > 1; m /= 2:
* hm = m/2
* dt = t*2
* for i1 = 0; i1 < hm; i1 ++:
* j1 = i1 * dt
* s = iGM[hm + i1]
* for j = j1; j < (j1 + t); j ++:
* x = f[j]
* y = f[j + t]
* f[j] = x + y
* f[j + t] = s * (x - y)
* t = dt
* for i1 = 0; i1 < N; i1 ++:
* f[i1] = f[i1] / N
*
* iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N)
* (actually, iGM[k] = 1/GM[k] = conj(GM[k])).
*
* In the main loop (not counting the final division loop), in
* all iterations except the last, the first and second half of f[]
* (as an array of complex numbers) are separate. In our chosen
* representation, we do not keep the second half.
*
* The last iteration recombines the recomputed half with the
* implicit half, and should yield only real numbers since the
* target polynomial is real; moreover, s = i at that step.
* Thus, when considering x and y:
* y = conj(x) since the final f[j] must be real
* Therefore, f[j] is filled with 2*Re(x), and f[j + t] is
* filled with 2*Im(x).
* But we already have Re(x) and Im(x) in array slots j and j+t
* in our chosen representation. That last iteration is thus a
* simple doubling of the values in all the array.
*
* We make the last iteration a no-op by tweaking the final
* division into a division by N/2, not N.
*/
size_t u, n, hn, t, m;

n = (size_t)1 << logn;
t = 1;
m = n;
hn = n >> 1;
for (u = logn; u > 1; u --) {
size_t hm, dt, i1, j1;

hm = m >> 1;
dt = t << 1;
for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) {
size_t j, j2;

j2 = j1 + t;
fpr s_re, s_im;

s_re = fpr_gm_tab[((hm + i1) << 1) + 0];
s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]);
for (j = j1; j < j2; j ++) {
fpr x_re, x_im, y_re, y_im;

x_re = f[j];
x_im = f[j + hn];
y_re = f[j + t];
y_im = f[j + t + hn];
FPC_ADD(f[j], f[j + hn],
x_re, x_im, y_re, y_im);
FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im);
FPC_MUL(f[j + t], f[j + t + hn],
x_re, x_im, s_re, s_im);
}
}
t = dt;
m = hm;
}

/*
* Last iteration is a no-op, provided that we divide by N/2
* instead of N. We need to make a special case for logn = 0.
*/
if (logn > 0) {
fpr ni;

ni = fpr_p2_tab[logn];
for (u = 0; u < n; u ++) {
f[u] = fpr_mul(f[u], ni);
}
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_add(
fpr *a, const fpr *b, unsigned logn) {
size_t n, u;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
a[u] = fpr_add(a[u], b[u]);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_sub(
fpr *a, const fpr *b, unsigned logn) {
size_t n, u;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
a[u] = fpr_sub(a[u], b[u]);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn) {
size_t n, u;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
a[u] = fpr_neg(a[u]);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn) {
size_t n, u;

n = (size_t)1 << logn;
for (u = (n >> 1); u < n; u ++) {
a[u] = fpr_neg(a[u]);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(
fpr *a, const fpr *b, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr a_re, a_im, b_re, b_im;

a_re = a[u];
a_im = a[u + hn];
b_re = b[u];
b_im = b[u + hn];
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(
fpr *a, const fpr *b, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr a_re, a_im, b_re, b_im;

a_re = a[u];
a_im = a[u + hn];
b_re = b[u];
b_im = fpr_neg(b[u + hn]);
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) {
/*
* Since each coefficient is multiplied with its own conjugate,
* the result contains only real values.
*/
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr a_re, a_im;

a_re = a[u];
a_im = a[u + hn];
a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im));
a[u + hn] = fpr_zero;
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) {
size_t n, u;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
a[u] = fpr_mul(a[u], x);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_div_fft(
fpr *a, const fpr *b, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr a_re, a_im, b_re, b_im;

a_re = a[u];
a_im = a[u + hn];
b_re = b[u];
b_im = b[u + hn];
FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d,
const fpr *a, const fpr *b, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr a_re, a_im;
fpr b_re, b_im;

a_re = a[u];
a_im = a[u + hn];
b_re = b[u];
b_im = b[u + hn];
d[u] = fpr_inv(fpr_add(
fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)),
fpr_add(fpr_sqr(b_re), fpr_sqr(b_im))));
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d,
const fpr *F, const fpr *G,
const fpr *f, const fpr *g, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr F_re, F_im, G_re, G_im;
fpr f_re, f_im, g_re, g_im;
fpr a_re, a_im, b_re, b_im;

F_re = F[u];
F_im = F[u + hn];
G_re = G[u];
G_im = G[u + hn];
f_re = f[u];
f_im = f[u + hn];
g_re = g[u];
g_im = g[u + hn];

FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im));
FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im));
d[u] = fpr_add(a_re, b_re);
d[u + hn] = fpr_add(a_im, b_im);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(
fpr *a, const fpr *b, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
a[u] = fpr_mul(a[u], b[u]);
a[u + hn] = fpr_mul(a[u + hn], b[u]);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft(
fpr *a, const fpr *b, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr ib;

ib = fpr_inv(b[u]);
a[u] = fpr_mul(a[u], ib);
a[u + hn] = fpr_mul(a[u + hn], ib);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft(
const fpr *g00,
fpr *g01, fpr *g11, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im;
fpr mu_re, mu_im;

g00_re = g00[u];
g00_im = g00[u + hn];
g01_re = g01[u];
g01_im = g01[u + hn];
g11_re = g11[u];
g11_im = g11[u + hn];
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im);
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im));
FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im);
g01[u] = mu_re;
g01[u + hn] = fpr_neg(mu_im);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(
fpr *d11, fpr *l10,
const fpr *g00, const fpr *g01,
const fpr *g11, unsigned logn) {
size_t n, hn, u;

n = (size_t)1 << logn;
hn = n >> 1;
for (u = 0; u < hn; u ++) {
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im;
fpr mu_re, mu_im;

g00_re = g00[u];
g00_im = g00[u + hn];
g01_re = g01[u];
g01_im = g01[u + hn];
g11_re = g11[u];
g11_im = g11[u + hn];
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im);
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im));
FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im);
l10[u] = mu_re;
l10[u + hn] = fpr_neg(mu_im);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_split_fft(
fpr *f0, fpr *f1,
const fpr *f, unsigned logn) {
/*
* The FFT representation we use is in bit-reversed order
* (element i contains f(w^(rev(i))), where rev() is the
* bit-reversal function over the ring degree. This changes
* indexes with regards to the Falcon specification.
*/
size_t n, hn, qn, u;

n = (size_t)1 << logn;
hn = n >> 1;
qn = hn >> 1;

/*
* We process complex values by pairs. For logn = 1, there is only
* one complex value (the other one is the implicit conjugate),
* so we add the two lines below because the loop will be
* skipped.
*/
f0[0] = f[0];
f1[0] = f[hn];

for (u = 0; u < qn; u ++) {
fpr a_re, a_im, b_re, b_im;
fpr t_re, t_im;

a_re = f[(u << 1) + 0];
a_im = f[(u << 1) + 0 + hn];
b_re = f[(u << 1) + 1];
b_im = f[(u << 1) + 1 + hn];

FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im);
f0[u] = fpr_half(t_re);
f0[u + qn] = fpr_half(t_im);

FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im);
FPC_MUL(t_re, t_im, t_re, t_im,
fpr_gm_tab[((u + hn) << 1) + 0],
fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1]));
f1[u] = fpr_half(t_re);
f1[u + qn] = fpr_half(t_im);
}
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(
fpr *f,
const fpr *f0, const fpr *f1, unsigned logn) {
size_t n, hn, qn, u;

n = (size_t)1 << logn;
hn = n >> 1;
qn = hn >> 1;

/*
* An extra copy to handle the special case logn = 1.
*/
f[0] = f0[0];
f[hn] = f1[0];

for (u = 0; u < qn; u ++) {
fpr a_re, a_im, b_re, b_im;
fpr t_re, t_im;

a_re = f0[u];
a_im = f0[u + qn];
FPC_MUL(b_re, b_im, f1[u], f1[u + qn],
fpr_gm_tab[((u + hn) << 1) + 0],
fpr_gm_tab[((u + hn) << 1) + 1]);
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im);
f[(u << 1) + 0] = t_re;
f[(u << 1) + 0 + hn] = t_im;
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im);
f[(u << 1) + 1] = t_re;
f[(u << 1) + 1 + hn] = t_im;
}
}

+ 0
- 1634
src/sign/falcon/falcon-1024/clean/fpr.c
File diff suppressed because it is too large
View File


+ 0
- 473
src/sign/falcon/falcon-1024/clean/fpr.h View File

@@ -1,473 +0,0 @@
#ifndef PQCLEAN_FALCON1024_CLEAN_FPR_H
#define PQCLEAN_FALCON1024_CLEAN_FPR_H

/*
* Floating-point operations.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* ====================================================================== */
/*
* Custom floating-point implementation with integer arithmetics. We
* use IEEE-754 "binary64" format, with some simplifications:
*
* - Top bit is s = 1 for negative, 0 for positive.
*
* - Exponent e uses the next 11 bits (bits 52 to 62, inclusive).
*
* - Mantissa m uses the 52 low bits.
*
* Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52))
* i.e. the mantissa really is a 53-bit number (less than 2.0, but not
* less than 1.0), but the top bit (equal to 1 by definition) is omitted
* in the encoding.
*
* In IEEE-754, there are some special values:
*
* - If e = 2047, then the value is either an infinite (m = 0) or
* a NaN (m != 0).
*
* - If e = 0, then the value is either a zero (m = 0) or a subnormal,
* aka "denormalized number" (m != 0).
*
* Of these, we only need the zeros. The caller is responsible for not
* providing operands that would lead to infinites, NaNs or subnormals.
* If inputs are such that values go out of range, then indeterminate
* values are returned (it would still be deterministic, but no specific
* value may be relied upon).
*
* At the C level, the three parts are stored in a 64-bit unsigned
* word.
*
* One may note that a property of the IEEE-754 format is that order
* is preserved for positive values: if two positive floating-point
* values x and y are such that x < y, then their respective encodings
* as _signed_ 64-bit integers i64(x) and i64(y) will be such that
* i64(x) < i64(y). For negative values, order is reversed: if x < 0,
* y < 0, and x < y, then ia64(x) > ia64(y).
*
* IMPORTANT ASSUMPTIONS:
* ======================
*
* For proper computations, and constant-time behaviour, we assume the
* following:
*
* - 32x32->64 multiplication (unsigned) has an execution time that
* is independent of its operands. This is true of most modern
* x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+
* and M3 (in the M0 and M0+, this is done in software, so it depends
* on that routine), and the PowerPC cores from the G3/G4 lines.
* For more info, see: https://www.bearssl.org/ctmul.html
*
* - Left-shifts and right-shifts of 32-bit values have an execution
* time which does not depend on the shifted value nor on the
* shift count. An historical exception is the Pentium IV, but most
* modern CPU have barrel shifters. Some small microcontrollers
* might have varying-time shifts (not the ARM Cortex M*, though).
*
* - Right-shift of a signed negative value performs a sign extension.
* As per the C standard, this operation returns an
* implementation-defined result (this is NOT an "undefined
* behaviour"). On most/all systems, an arithmetic shift is
* performed, because this is what makes most sense.
*/

/*
* Normally we should declare the 'fpr' type to be a struct or union
* around the internal 64-bit value; however, we want to use the
* direct 64-bit integer type to enable a lighter call convention on
* ARM platforms. This means that direct (invalid) use of operators
* such as '*' or '+' will not be caught by the compiler. We rely on
* the "normal" (non-emulated) code to detect such instances.
*/
typedef uint64_t fpr;

/*
* For computations, we split values into an integral mantissa in the
* 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is
* "sticky" (it is set to 1 if any of the bits below it is 1); when
* re-encoding, the low two bits are dropped, but may induce an
* increment in the value for proper rounding.
*/

/*
* Right-shift a 64-bit unsigned value by a possibly secret shift count.
* We assumed that the underlying architecture had a barrel shifter for
* 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will
* typically invoke a software routine that is not necessarily
* constant-time; hence the function below.
*
* Shift count n MUST be in the 0..63 range.
*/
static inline uint64_t
fpr_ursh(uint64_t x, int n) {
x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5);
return x >> (n & 31);
}

/*
* Right-shift a 64-bit signed value by a possibly secret shift count
* (see fpr_ursh() for the rationale).
*
* Shift count n MUST be in the 0..63 range.
*/
static inline int64_t
fpr_irsh(int64_t x, int n) {
x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5);
return x >> (n & 31);
}

/*
* Left-shift a 64-bit unsigned value by a possibly secret shift count
* (see fpr_ursh() for the rationale).
*
* Shift count n MUST be in the 0..63 range.
*/
static inline uint64_t
fpr_ulsh(uint64_t x, int n) {
x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5);
return x << (n & 31);
}

/*
* Expectations:
* s = 0 or 1
* exponent e is "arbitrary" and unbiased
* 2^54 <= m < 2^55
* Numerical value is (-1)^2 * m * 2^e
*
* Exponents which are too low lead to value zero. If the exponent is
* too large, the returned value is indeterminate.
*
* If m = 0, then a zero is returned (using the provided sign).
* If e < -1076, then a zero is returned (regardless of the value of m).
* If e >= -1076 and e != 0, m must be within the expected range
* (2^54 to 2^55-1).
*/
static inline fpr
FPR(int s, int e, uint64_t m) {
fpr x;
uint32_t t;
unsigned f;

/*
* If e >= -1076, then the value is "normal"; otherwise, it
* should be a subnormal, which we clamp down to zero.
*/
e += 1076;
t = (uint32_t)e >> 31;
m &= (uint64_t)t - 1;

/*
* If m = 0 then we want a zero; make e = 0 too, but conserve
* the sign.
*/
t = (uint32_t)(m >> 54);
e &= -(int)t;

/*
* The 52 mantissa bits come from m. Value m has its top bit set
* (unless it is a zero); we leave it "as is": the top bit will
* increment the exponent by 1, except when m = 0, which is
* exactly what we want.
*/
x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52);

/*
* Rounding: if the low three bits of m are 011, 110 or 111,
* then the value should be incremented to get the next
* representable value. This implements the usual
* round-to-nearest rule (with preference to even values in case
* of a tie). Note that the increment may make a carry spill
* into the exponent field, which is again exactly what we want
* in that case.
*/
f = (unsigned)m & 7U;
x += (0xC8U >> f) & 1;
return x;
}

#define fpr_scaled PQCLEAN_FALCON1024_CLEAN_fpr_scaled
fpr fpr_scaled(int64_t i, int sc);

static inline fpr
fpr_of(int64_t i) {
return fpr_scaled(i, 0);
}

static const fpr fpr_q = 4667981563525332992;
static const fpr fpr_inverse_of_q = 4545632735260551042;
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306;
static const fpr fpr_inv_sigma = 4573359825155195350;
static const fpr fpr_sigma_min_9 = 4608495221497168882;
static const fpr fpr_sigma_min_10 = 4608586345619182117;
static const fpr fpr_log2 = 4604418534313441775;
static const fpr fpr_inv_log2 = 4609176140021203710;
static const fpr fpr_bnorm_max = 4670353323383631276;
static const fpr fpr_zero = 0;
static const fpr fpr_one = 4607182418800017408;
static const fpr fpr_two = 4611686018427387904;
static const fpr fpr_onehalf = 4602678819172646912;
static const fpr fpr_invsqrt2 = 4604544271217802189;
static const fpr fpr_invsqrt8 = 4600040671590431693;
static const fpr fpr_ptwo31 = 4746794007248502784;
static const fpr fpr_ptwo31m1 = 4746794007244308480;
static const fpr fpr_mtwo31m1 = 13970166044099084288U;
static const fpr fpr_ptwo63m1 = 4890909195324358656;
static const fpr fpr_mtwo63m1 = 14114281232179134464U;
static const fpr fpr_ptwo63 = 4890909195324358656;

static inline int64_t
fpr_rint(fpr x) {
uint64_t m, d;
int e;
uint32_t s, dd, f;

/*
* We assume that the value fits in -(2^63-1)..+(2^63-1). We can
* thus extract the mantissa as a 63-bit integer, then right-shift
* it as needed.
*/
m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1);
e = 1085 - ((int)(x >> 52) & 0x7FF);

/*
* If a shift of more than 63 bits is needed, then simply set m
* to zero. This also covers the case of an input operand equal
* to zero.
*/
m &= -(uint64_t)((uint32_t)(e - 64) >> 31);
e &= 63;

/*
* Right-shift m as needed. Shift count is e. Proper rounding
* mandates that:
* - If the highest dropped bit is zero, then round low.
* - If the highest dropped bit is one, and at least one of the
* other dropped bits is one, then round up.
* - If the highest dropped bit is one, and all other dropped
* bits are zero, then round up if the lowest kept bit is 1,
* or low otherwise (i.e. ties are broken by "rounding to even").
*
* We thus first extract a word consisting of all the dropped bit
* AND the lowest kept bit; then we shrink it down to three bits,
* the lowest being "sticky".
*/
d = fpr_ulsh(m, 63 - e);
dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF);
f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31);
m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U);

/*
* Apply the sign bit.
*/
s = (uint32_t)(x >> 63);
return ((int64_t)m ^ -(int64_t)s) + (int64_t)s;
}

static inline int64_t
fpr_floor(fpr x) {
uint64_t t;
int64_t xi;
int e, cc;

/*
* We extract the integer as a _signed_ 64-bit integer with
* a scaling factor. Since we assume that the value fits
* in the -(2^63-1)..+(2^63-1) range, we can left-shift the
* absolute value to make it in the 2^62..2^63-1 range: we
* will only need a right-shift afterwards.
*/
e = (int)(x >> 52) & 0x7FF;
t = x >> 63;
xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62))
& (((uint64_t)1 << 63) - 1));
xi = (xi ^ -(int64_t)t) + (int64_t)t;
cc = 1085 - e;

/*
* We perform an arithmetic right-shift on the value. This
* applies floor() semantics on both positive and negative values
* (rounding toward minus infinity).
*/
xi = fpr_irsh(xi, cc & 63);

/*
* If the true shift count was 64 or more, then we should instead
* replace xi with 0 (if nonnegative) or -1 (if negative). Edge
* case: -0 will be floored to -1, not 0 (whether this is correct
* is debatable; in any case, the other functions normalize zero
* to +0).
*
* For an input of zero, the non-shifted xi was incorrect (we used
* a top implicit bit of value 1, not 0), but this does not matter
* since this operation will clamp it down.
*/
xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31);
return xi;
}

static inline int64_t
fpr_trunc(fpr x) {
uint64_t t, xu;
int e, cc;

/*
* Extract the absolute value. Since we assume that the value
* fits in the -(2^63-1)..+(2^63-1) range, we can left-shift
* the absolute value into the 2^62..2^63-1 range, and then
* do a right shift afterwards.
*/
e = (int)(x >> 52) & 0x7FF;
xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1);
cc = 1085 - e;
xu = fpr_ursh(xu, cc & 63);

/*
* If the exponent is too low (cc > 63), then the shift was wrong
* and we must clamp the value to 0. This also covers the case
* of an input equal to zero.
*/
xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31);

/*
* Apply back the sign, if the source value is negative.
*/
t = x >> 63;
xu = (xu ^ -t) + t;
return *(int64_t *)&xu;
}

#define fpr_add PQCLEAN_FALCON1024_CLEAN_fpr_add
fpr fpr_add(fpr x, fpr y);

static inline fpr
fpr_sub(fpr x, fpr y) {
y ^= (uint64_t)1 << 63;
return fpr_add(x, y);
}

static inline fpr
fpr_neg(fpr x) {
x ^= (uint64_t)1 << 63;
return x;
}

static inline fpr
fpr_half(fpr x) {
/*
* To divide a value by 2, we just have to subtract 1 from its
* exponent, but we have to take care of zero.
*/
uint32_t t;

x -= (uint64_t)1 << 52;
t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11;
x &= (uint64_t)t - 1;
return x;
}

static inline fpr
fpr_double(fpr x) {
/*
* To double a value, we just increment by one the exponent. We
* don't care about infinites or NaNs; however, 0 is a
* special case.
*/
x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52;
return x;
}

#define fpr_mul PQCLEAN_FALCON1024_CLEAN_fpr_mul
fpr fpr_mul(fpr x, fpr y);

static inline fpr
fpr_sqr(fpr x) {
return fpr_mul(x, x);
}

#define fpr_div PQCLEAN_FALCON1024_CLEAN_fpr_div
fpr fpr_div(fpr x, fpr y);

static inline fpr
fpr_inv(fpr x) {
return fpr_div(4607182418800017408u, x);
}

#define fpr_sqrt PQCLEAN_FALCON1024_CLEAN_fpr_sqrt
fpr fpr_sqrt(fpr x);

static inline int
fpr_lt(fpr x, fpr y) {
/*
* If both x and y are positive, then a signed comparison yields
* the proper result:
* - For positive values, the order is preserved.
* - The sign bit is at the same place as in integers, so
* sign is preserved.
* Moreover, we can compute [x < y] as sgn(x-y) and the computation
* of x-y will not overflow.
*
* If the signs differ, then sgn(x) gives the proper result.
*
* If both x and y are negative, then the order is reversed.
* Hence [x < y] = sgn(y-x). We must compute this separately from
* sgn(x-y); simply inverting sgn(x-y) would not handle the edge
* case x = y properly.
*/
int cc0, cc1;
int64_t sx;
int64_t sy;

sx = *(int64_t *)&x;
sy = *(int64_t *)&y;
sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */

cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */
cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */

return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63));
}

/*
* Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50
* bits or so.
*/
#define fpr_expm_p63 PQCLEAN_FALCON1024_CLEAN_fpr_expm_p63
uint64_t fpr_expm_p63(fpr x, fpr ccs);

#define fpr_gm_tab PQCLEAN_FALCON1024_CLEAN_fpr_gm_tab
extern const fpr fpr_gm_tab[];

#define fpr_p2_tab PQCLEAN_FALCON1024_CLEAN_fpr_p2_tab
extern const fpr fpr_p2_tab[];

/* ====================================================================== */
#endif

+ 0
- 834
src/sign/falcon/falcon-1024/clean/inner.h View File

@@ -1,834 +0,0 @@
#ifndef PQCLEAN_FALCON1024_CLEAN_INNER_H
#define PQCLEAN_FALCON1024_CLEAN_INNER_H


/*
* Internal functions for Falcon. This is not the API intended to be
* used by applications; instead, this internal API provides all the
* primitives on which wrappers build to provide external APIs.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/

/*
* IMPORTANT API RULES
* -------------------
*
* This API has some non-trivial usage rules:
*
*
* - All public functions (i.e. the non-static ones) must be referenced
* with the PQCLEAN_FALCON1024_CLEAN_ macro (e.g. PQCLEAN_FALCON1024_CLEAN_verify_raw for the verify_raw()
* function). That macro adds a prefix to the name, which is
* configurable with the FALCON_PREFIX macro. This allows compiling
* the code into a specific "namespace" and potentially including
* several versions of this code into a single application (e.g. to
* have an AVX2 and a non-AVX2 variants and select the one to use at
* runtime based on availability of AVX2 opcodes).
*
* - Functions that need temporary buffers expects them as a final
* tmp[] array of type uint8_t*, with a size which is documented for
* each function. However, most have some alignment requirements,
* because they will use the array to store 16-bit, 32-bit or 64-bit
* values (e.g. uint64_t or double). The caller must ensure proper
* alignment. What happens on unaligned access depends on the
* underlying architecture, ranging from a slight time penalty
* to immediate termination of the process.
*
* - Some functions rely on specific rounding rules and precision for
* floating-point numbers. On some systems (in particular 32-bit x86
* with the 387 FPU), this requires setting an hardware control
* word. The caller MUST use set_fpu_cw() to ensure proper precision:
*
* oldcw = set_fpu_cw(2);
* PQCLEAN_FALCON1024_CLEAN_sign_dyn(...);
* set_fpu_cw(oldcw);
*
* On systems where the native floating-point precision is already
* proper, or integer-based emulation is used, the set_fpu_cw()
* function does nothing, so it can be called systematically.
*/
#include "fips202.h"
#include "fpr.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>





/*
* Some computations with floating-point elements, in particular
* rounding to the nearest integer, rely on operations using _exactly_
* the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit
* x86, the 387 FPU may be used (depending on the target OS) and, in
* that case, may use more precision bits (i.e. 64 bits, for an 80-bit
* total type length); to prevent miscomputations, we define an explicit
* function that modifies the precision in the FPU control word.
*
* set_fpu_cw() sets the precision to the provided value, and returns
* the previously set precision; callers are supposed to restore the
* previous precision on exit. The correct (52-bit) precision is
* configured with the value "2". On unsupported compilers, or on
* targets other than 32-bit x86, or when the native 'double' type is
* not used, the set_fpu_cw() function does nothing at all.
*/
static inline unsigned
set_fpu_cw(unsigned x) {
return x;
}




/* ==================================================================== */
/*
* SHAKE256 implementation (shake.c).
*
* API is defined to be easily replaced with the fips202.h API defined
* as part of PQClean.
*/



#define inner_shake256_context shake256incctx
#define inner_shake256_init(sc) shake256_inc_init(sc)
#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len)
#define inner_shake256_flip(sc) shake256_inc_finalize(sc)
#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc)
#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc)


/* ==================================================================== */
/*
* Encoding/decoding functions (codec.c).
*
* Encoding functions take as parameters an output buffer (out) with
* a given maximum length (max_out_len); returned value is the actual
* number of bytes which have been written. If the output buffer is
* not large enough, then 0 is returned (some bytes may have been
* written to the buffer). If 'out' is NULL, then 'max_out_len' is
* ignored; instead, the function computes and returns the actual
* required output length (in bytes).
*
* Decoding functions take as parameters an input buffer (in) with
* its maximum length (max_in_len); returned value is the actual number
* of bytes that have been read from the buffer. If the provided length
* is too short, then 0 is returned.
*
* Values to encode or decode are vectors of integers, with N = 2^logn
* elements.
*
* Three encoding formats are defined:
*
* - modq: sequence of values modulo 12289, each encoded over exactly
* 14 bits. The encoder and decoder verify that integers are within
* the valid range (0..12288). Values are arrays of uint16.
*
* - trim: sequence of signed integers, a specified number of bits
* each. The number of bits is provided as parameter and includes
* the sign bit. Each integer x must be such that |x| < 2^(bits-1)
* (which means that the -2^(bits-1) value is forbidden); encode and
* decode functions check that property. Values are arrays of
* int16_t or int8_t, corresponding to names 'trim_i16' and
* 'trim_i8', respectively.
*
* - comp: variable-length encoding for signed integers; each integer
* uses a minimum of 9 bits, possibly more. This is normally used
* only for signatures.
*
*/

size_t PQCLEAN_FALCON1024_CLEAN_modq_encode(void *out, size_t max_out_len,
const uint16_t *x, unsigned logn);
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_encode(void *out, size_t max_out_len,
const int16_t *x, unsigned logn, unsigned bits);
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(void *out, size_t max_out_len,
const int8_t *x, unsigned logn, unsigned bits);
size_t PQCLEAN_FALCON1024_CLEAN_comp_encode(void *out, size_t max_out_len,
const int16_t *x, unsigned logn);

size_t PQCLEAN_FALCON1024_CLEAN_modq_decode(uint16_t *x, unsigned logn,
const void *in, size_t max_in_len);
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len);
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len);
size_t PQCLEAN_FALCON1024_CLEAN_comp_decode(int16_t *x, unsigned logn,
const void *in, size_t max_in_len);

/*
* Number of bits for key elements, indexed by logn (1 to 10). This
* is at most 8 bits for all degrees, but some degrees may have shorter
* elements.
*/
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[];
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[];

/*
* Maximum size, in bits, of elements in a signature, indexed by logn
* (1 to 10). The size includes the sign bit.
*/
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[];

/* ==================================================================== */
/*
* Support functions used for both signature generation and signature
* verification (common.c).
*/

/*
* From a SHAKE256 context (must be already flipped), produce a new
* point. This is the non-constant-time version, which may leak enough
* information to serve as a stop condition on a brute force attack on
* the hashed message (provided that the nonce value is known).
*/
void PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(inner_shake256_context *sc,
uint16_t *x, unsigned logn);

/*
* From a SHAKE256 context (must be already flipped), produce a new
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes.
* This function is constant-time but is typically more expensive than
* PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime().
*
* tmp[] must have 16-bit alignment.
*/
void PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(inner_shake256_context *sc,
uint16_t *x, unsigned logn, uint8_t *tmp);

/*
* Tell whether a given vector (2N coordinates, in two halves) is
* acceptable as a signature. This compares the appropriate norm of the
* vector with the acceptance bound. Returned value is 1 on success
* (vector is short enough to be acceptable), 0 otherwise.
*/
int PQCLEAN_FALCON1024_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn);

/*
* Tell whether a given vector (2N coordinates, in two halves) is
* acceptable as a signature. Instead of the first half s1, this
* function receives the "saturated squared norm" of s1, i.e. the
* sum of the squares of the coordinates of s1 (saturated at 2^32-1
* if the sum exceeds 2^31-1).
*
* Returned value is 1 on success (vector is short enough to be
* acceptable), 0 otherwise.
*/
int PQCLEAN_FALCON1024_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn);

/* ==================================================================== */
/*
* Signature verification functions (vrfy.c).
*/

/*
* Convert a public key to NTT + Montgomery format. Conversion is done
* in place.
*/
void PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn);

/*
* Internal signature verification code:
* c0[] contains the hashed nonce+message
* s2[] is the decoded signature
* h[] contains the public key, in NTT + Montgomery format
* logn is the degree log
* tmp[] temporary, must have at least 2*2^logn bytes
* Returned value is 1 on success, 0 on error.
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
const uint16_t *h, unsigned logn, uint8_t *tmp);

/*
* Compute the public key h[], given the private key elements f[] and
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial
* modulus. This function returns 1 on success, 0 on error (an error is
* reported if f is not invertible mod phi mod q).
*
* The tmp[] array must have room for at least 2*2^logn elements.
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h,
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp);

/*
* Recompute the fourth private key element. Private key consists in
* four polynomials with small coefficients f, g, F and G, which are
* such that fG - gF = q mod phi; furthermore, f is invertible modulo
* phi and modulo q. This function recomputes G from f, g and F.
*
* The tmp[] array must have room for at least 4*2^logn bytes.
*
* Returned value is 1 in success, 0 on error (f not invertible).
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G,
const int8_t *f, const int8_t *g, const int8_t *F,
unsigned logn, uint8_t *tmp);

/*
* Test whether a given polynomial is invertible modulo phi and q.
* Polynomial coefficients are small integers.
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_CLEAN_is_invertible(
const int16_t *s2, unsigned logn, uint8_t *tmp);

/*
* Count the number of elements of value zero in the NTT representation
* of the given polynomial: this is the number of primitive 2n-th roots
* of unity (modulo q = 12289) that are roots of the provided polynomial
* (taken modulo q).
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp);

/*
* Internal signature verification with public key recovery:
* h[] receives the public key (NOT in NTT/Montgomery format)
* c0[] contains the hashed nonce+message
* s1[] is the first signature half
* s2[] is the second signature half
* logn is the degree log
* tmp[] temporary, must have at least 2*2^logn bytes
* Returned value is 1 on success, 0 on error. Success is returned if
* the signature is a short enough vector; in that case, the public
* key has been written to h[]. However, the caller must still
* verify that h[] is the correct value (e.g. with regards to a known
* hash of the public key).
*
* h[] may not overlap with any of the other arrays.
*
* tmp[] must have 16-bit alignment.
*/
int PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h,
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
unsigned logn, uint8_t *tmp);

/* ==================================================================== */
/*
* Implementation of floating-point real numbers (fpr.h, fpr.c).
*/

/*
* Real numbers are implemented by an extra header file, included below.
* This is meant to support pluggable implementations. The default
* implementation relies on the C type 'double'.
*
* The included file must define the following types, functions and
* constants:
*
* fpr
* type for a real number
*
* fpr fpr_of(int64_t i)
* cast an integer into a real number; source must be in the
* -(2^63-1)..+(2^63-1) range
*
* fpr fpr_scaled(int64_t i, int sc)
* compute i*2^sc as a real number; source 'i' must be in the
* -(2^63-1)..+(2^63-1) range
*
* fpr fpr_ldexp(fpr x, int e)
* compute x*2^e
*
* int64_t fpr_rint(fpr x)
* round x to the nearest integer; x must be in the -(2^63-1)
* to +(2^63-1) range
*
* int64_t fpr_trunc(fpr x)
* round to an integer; this rounds towards zero; value must
* be in the -(2^63-1) to +(2^63-1) range
*
* fpr fpr_add(fpr x, fpr y)
* compute x + y
*
* fpr fpr_sub(fpr x, fpr y)
* compute x - y
*
* fpr fpr_neg(fpr x)
* compute -x
*
* fpr fpr_half(fpr x)
* compute x/2
*
* fpr fpr_double(fpr x)
* compute x*2
*
* fpr fpr_mul(fpr x, fpr y)
* compute x * y
*
* fpr fpr_sqr(fpr x)
* compute x * x
*
* fpr fpr_inv(fpr x)
* compute 1/x
*
* fpr fpr_div(fpr x, fpr y)
* compute x/y
*
* fpr fpr_sqrt(fpr x)
* compute the square root of x
*
* int fpr_lt(fpr x, fpr y)
* return 1 if x < y, 0 otherwise
*
* uint64_t fpr_expm_p63(fpr x)
* return exp(x), assuming that 0 <= x < log(2). Returned value
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x),
* rounded to the nearest integer). Computation should have a
* precision of at least 45 bits.
*
* const fpr fpr_gm_tab[]
* array of constants for FFT / iFFT
*
* const fpr fpr_p2_tab[]
* precomputed powers of 2 (by index, 0 to 10)
*
* Constants of type 'fpr':
*
* fpr fpr_q 12289
* fpr fpr_inverse_of_q 1/12289
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2))
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289))
* fpr fpr_sigma_min_9 1.291500756233514568549480827642
* fpr fpr_sigma_min_10 1.311734375905083682667395805765
* fpr fpr_log2 log(2)
* fpr fpr_inv_log2 1/log(2)
* fpr fpr_bnorm_max 16822.4121
* fpr fpr_zero 0
* fpr fpr_one 1
* fpr fpr_two 2
* fpr fpr_onehalf 0.5
* fpr fpr_ptwo31 2^31
* fpr fpr_ptwo31m1 2^31-1
* fpr fpr_mtwo31m1 -(2^31-1)
* fpr fpr_ptwo63m1 2^63-1
* fpr fpr_mtwo63m1 -(2^63-1)
* fpr fpr_ptwo63 2^63
*/

/* ==================================================================== */
/*
* RNG (rng.c).
*
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256
* context (flipped) and is used for bulk pseudorandom generation.
* A system-dependent seed generator is also provided.
*/

/*
* Obtain a random seed from the system RNG.
*
* Returned value is 1 on success, 0 on error.
*/
int PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t seed_len);

/*
* Structure for a PRNG. This includes a large buffer so that values
* get generated in advance. The 'state' is used to keep the current
* PRNG algorithm state (contents depend on the selected algorithm).
*
* The unions with 'dummy_u64' are there to ensure proper alignment for
* 64-bit direct access.
*/
typedef struct {
union {
uint8_t d[512]; /* MUST be 512, exactly */
uint64_t dummy_u64;
} buf;
size_t ptr;
union {
uint8_t d[256];
uint64_t dummy_u64;
} state;
int type;
} prng;

/*
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256
* context (in "flipped" state) to obtain its initial state.
*/
void PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src);

/*
* Refill the PRNG buffer. This is normally invoked automatically, and
* is declared here only so that prng_get_u64() may be inlined.
*/
void PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p);

/*
* Get some bytes from a PRNG.
*/
void PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len);

/*
* Get a 64-bit random value from a PRNG.
*/
static inline uint64_t
prng_get_u64(prng *p) {
size_t u;

/*
* If there are less than 9 bytes in the buffer, we refill it.
* This means that we may drop the last few bytes, but this allows
* for faster extraction code. Also, it means that we never leave
* an empty buffer.
*/
u = p->ptr;
if (u >= (sizeof p->buf.d) - 9) {
PQCLEAN_FALCON1024_CLEAN_prng_refill(p);
u = 0;
}
p->ptr = u + 8;

/*
* On systems that use little-endian encoding and allow
* unaligned accesses, we can simply read the data where it is.
*/
return (uint64_t)p->buf.d[u + 0]
| ((uint64_t)p->buf.d[u + 1] << 8)
| ((uint64_t)p->buf.d[u + 2] << 16)
| ((uint64_t)p->buf.d[u + 3] << 24)
| ((uint64_t)p->buf.d[u + 4] << 32)
| ((uint64_t)p->buf.d[u + 5] << 40)
| ((uint64_t)p->buf.d[u + 6] << 48)
| ((uint64_t)p->buf.d[u + 7] << 56);
}

/*
* Get an 8-bit random value from a PRNG.
*/
static inline unsigned
prng_get_u8(prng *p) {
unsigned v;

v = p->buf.d[p->ptr ++];
if (p->ptr == sizeof p->buf.d) {
PQCLEAN_FALCON1024_CLEAN_prng_refill(p);
}
return v;
}

/* ==================================================================== */
/*
* FFT (falcon-fft.c).
*
* A real polynomial is represented as an array of N 'fpr' elements.
* The FFT representation of a real polynomial contains N/2 complex
* elements; each is stored as two real numbers, for the real and
* imaginary parts, respectively. See falcon-fft.c for details on the
* internal representation.
*/

/*
* Compute FFT in-place: the source array should contain a real
* polynomial (N coefficients); its storage area is reused to store
* the FFT representation of that polynomial (N/2 complex numbers).
*
* 'logn' MUST lie between 1 and 10 (inclusive).
*/
void PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn);

/*
* Compute the inverse FFT in-place: the source array should contain the
* FFT representation of a real polynomial (N/2 elements); the resulting
* real polynomial (N coefficients of type 'fpr') is written over the
* array.
*
* 'logn' MUST lie between 1 and 10 (inclusive).
*/
void PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn);

/*
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This
* function works in both normal and FFT representations.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_add(fpr *a, const fpr *b, unsigned logn);

/*
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This
* function works in both normal and FFT representations.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_sub(fpr *a, const fpr *b, unsigned logn);

/*
* Negate polynomial a. This function works in both normal and FFT
* representations.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn);

/*
* Compute adjoint of polynomial a. This function works only in FFT
* representation.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn);

/*
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap.
* This function works only in FFT representation.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(fpr *a, const fpr *b, unsigned logn);

/*
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT
* overlap. This function works only in FFT representation.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn);

/*
* Multiply polynomial with its own adjoint. This function works only in FFT
* representation.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn);

/*
* Multiply polynomial with a real constant. This function works in both
* normal and FFT representations.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn);

/*
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation).
* a and b MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_div_fft(fpr *a, const fpr *b, unsigned logn);

/*
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g))
* (also in FFT representation). Since the result is auto-adjoint, all its
* coordinates in FFT representation are real; as such, only the first N/2
* values of d[] are filled (the imaginary parts are skipped).
*
* Array d MUST NOT overlap with either a or b.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d,
const fpr *a, const fpr *b, unsigned logn);

/*
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g)
* (also in FFT representation). Destination d MUST NOT overlap with
* any of the source arrays.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d,
const fpr *F, const fpr *G,
const fpr *f, const fpr *g, unsigned logn);

/*
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both
* a and b are in FFT representation. Since b is autoadjoint, all its
* FFT coefficients are real, and the array b contains only N/2 elements.
* a and b MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(fpr *a,
const fpr *b, unsigned logn);

/*
* Divide polynomial a by polynomial b, where b is autoadjoint. Both
* a and b are in FFT representation. Since b is autoadjoint, all its
* FFT coefficients are real, and the array b contains only N/2 elements.
* a and b MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft(fpr *a,
const fpr *b, unsigned logn);

/*
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT
* representation. On input, g00, g01 and g11 are provided (where the
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10
* and d11 values are written in g00, g01 and g11, respectively
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]).
* (In fact, d00 = g00, so the g00 operand is left unmodified.)
*/
void PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft(const fpr *g00,
fpr *g01, fpr *g11, unsigned logn);

/*
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT
* representation. This is identical to poly_LDL_fft() except that
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written
* in two other separate buffers provided as extra parameters.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(fpr *d11, fpr *l10,
const fpr *g00, const fpr *g01,
const fpr *g11, unsigned logn);

/*
* Apply "split" operation on a polynomial in FFT representation:
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_split_fft(fpr *f0, fpr *f1,
const fpr *f, unsigned logn);

/*
* Apply "merge" operation on two polynomials in FFT representation:
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1.
* f MUST NOT overlap with either f0 or f1.
*/
void PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(fpr *f,
const fpr *f0, const fpr *f1, unsigned logn);

/* ==================================================================== */
/*
* Key pair generation.
*/

/*
* Required sizes of the temporary buffer (in bytes).
*
* This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1
* or 2) where it is slightly greater.
*/
#define FALCON_KEYGEN_TEMP_1 136
#define FALCON_KEYGEN_TEMP_2 272
#define FALCON_KEYGEN_TEMP_3 224
#define FALCON_KEYGEN_TEMP_4 448
#define FALCON_KEYGEN_TEMP_5 896
#define FALCON_KEYGEN_TEMP_6 1792
#define FALCON_KEYGEN_TEMP_7 3584
#define FALCON_KEYGEN_TEMP_8 7168
#define FALCON_KEYGEN_TEMP_9 14336
#define FALCON_KEYGEN_TEMP_10 28672

/*
* Generate a new key pair. Randomness is extracted from the provided
* SHAKE256 context, which must have already been seeded and flipped.
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_*
* macros) and be aligned for the uint32_t, uint64_t and fpr types.
*
* The private key elements are written in f, g, F and G, and the
* public key is written in h. Either or both of G and h may be NULL,
* in which case the corresponding element is not returned (they can
* be recomputed from f, g and F).
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_CLEAN_keygen(inner_shake256_context *rng,
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h,
unsigned logn, uint8_t *tmp);

/* ==================================================================== */
/*
* Signature generation.
*/

/*
* Expand a private key into the B0 matrix in FFT representation and
* the LDL tree. All the values are written in 'expanded_key', for
* a total of (8*logn+40)*2^logn bytes.
*
* The tmp[] array must have room for at least 48*2^logn bytes.
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key,
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G,
unsigned logn, uint8_t *tmp);

/*
* Compute a signature over the provided hashed message (hm); the
* signature value is one short vector. This function uses an
* expanded key (as generated by PQCLEAN_FALCON1024_CLEAN_expand_privkey()).
*
* The sig[] and hm[] buffers may overlap.
*
* On successful output, the start of the tmp[] buffer contains the s1
* vector (as int16_t elements).
*
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes.
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng,
const fpr *expanded_key,
const uint16_t *hm, unsigned logn, uint8_t *tmp);

/*
* Compute a signature over the provided hashed message (hm); the
* signature value is one short vector. This function uses a raw
* key and dynamically recompute the B0 matrix and LDL tree; this
* saves RAM since there is no needed for an expanded key, but
* increases the signature cost.
*
* The sig[] and hm[] buffers may overlap.
*
* On successful output, the start of the tmp[] buffer contains the s1
* vector (as int16_t elements).
*
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes.
*
* tmp[] must have 64-bit alignment.
* This function uses floating-point rounding (see set_fpu_cw()).
*/
void PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng,
const int8_t *f, const int8_t *g,
const int8_t *F, const int8_t *G,
const uint16_t *hm, unsigned logn, uint8_t *tmp);

/*
* Internal sampler engine. Exported for tests.
*
* sampler_context wraps around a source of random numbers (PRNG) and
* the sigma_min value (nominally dependent on the degree).
*
* sampler() takes as parameters:
* ctx pointer to the sampler_context structure
* mu center for the distribution
* isigma inverse of the distribution standard deviation
* It returns an integer sampled along the Gaussian distribution centered
* on mu and of standard deviation sigma = 1/isigma.
*
* gaussian0_sampler() takes as parameter a pointer to a PRNG, and
* returns an integer sampled along a half-Gaussian with standard
* deviation sigma0 = 1.8205 (center is 0, returned value is
* nonnegative).
*/

typedef struct {
prng p;
fpr sigma_min;
} sampler_context;

int PQCLEAN_FALCON1024_CLEAN_sampler(void *ctx, fpr mu, fpr isigma);

int PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(prng *p);

/* ==================================================================== */

#endif

+ 0
- 4231
src/sign/falcon/falcon-1024/clean/keygen.c
File diff suppressed because it is too large
View File


+ 0
- 386
src/sign/falcon/falcon-1024/clean/pqclean.c View File

@@ -1,386 +0,0 @@
#include "api.h"
#include "inner.h"
#include "randombytes.h"
#include <stddef.h>
#include <string.h>
/*
* Wrapper for implementing the PQClean API.
*/



#define NONCELEN 40
#define SEEDLEN 48

/*
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024)
*
* private key:
* header byte: 0101nnnn
* private f (6 or 5 bits by element, depending on degree)
* private g (6 or 5 bits by element, depending on degree)
* private F (8 bits by element)
*
* public key:
* header byte: 0000nnnn
* public h (14 bits by element)
*
* signature:
* header byte: 0011nnnn
* nonce 40 bytes
* value (12 bits by element)
*
* message + signature:
* signature length (2 bytes, big-endian)
* nonce 40 bytes
* message
* header byte: 0010nnnn
* value (12 bits by element)
* (signature length is 1+len(value), not counting the nonce)
*/

/* see api.h */
int
PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) {
union {
uint8_t b[28 * 1024];
uint64_t dummy_u64;
fpr dummy_fpr;
} tmp;
int8_t f[1024], g[1024], F[1024], G[1024];
uint16_t h[1024];
unsigned char seed[SEEDLEN];
inner_shake256_context rng;
size_t u, v;


/*
* Generate key pair.
*/
randombytes(seed, sizeof seed);
inner_shake256_init(&rng);
inner_shake256_inject(&rng, seed, sizeof seed);
inner_shake256_flip(&rng);
PQCLEAN_FALCON1024_CLEAN_keygen(&rng, f, g, F, G, h, 10, tmp.b);
inner_shake256_ctx_release(&rng);

/*
* Encode private key.
*/
sk[0] = 0x50 + 10;
u = 1;
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u,
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u,
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u,
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10]);
if (v == 0) {
return -1;
}
u += v;
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) {
return -1;
}

/*
* Encode public key.
*/
pk[0] = 0x00 + 10;
v = PQCLEAN_FALCON1024_CLEAN_modq_encode(
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1,
h, 10);
if (v != PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) {
return -1;
}

return 0;
}

/*
* Compute the signature. nonce[] receives the nonce and must have length
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce
* or header byte), with *sigbuflen providing the maximum value length and
* receiving the actual value length.
*
* If a signature could be computed but not encoded because it would
* exceed the output buffer size, then a new signature is computed. If
* the provided buffer size is too low, this could loop indefinitely, so
* the caller must provide a size that can accommodate signatures with a
* large enough probability.
*
* Return value: 0 on success, -1 on error.
*/
static int
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
union {
uint8_t b[72 * 1024];
uint64_t dummy_u64;
fpr dummy_fpr;
} tmp;
int8_t f[1024], g[1024], F[1024], G[1024];
union {
int16_t sig[1024];
uint16_t hm[1024];
} r;
unsigned char seed[SEEDLEN];
inner_shake256_context sc;
size_t u, v;

/*
* Decode the private key.
*/
if (sk[0] != 0x50 + 10) {
return -1;
}
u = 1;
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10],
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10],
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u);
if (v == 0) {
return -1;
}
u += v;
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10],
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u);
if (v == 0) {
return -1;
}
u += v;
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) {
return -1;
}
if (!PQCLEAN_FALCON1024_CLEAN_complete_private(G, f, g, F, 10, tmp.b)) {
return -1;
}


/*
* Create a random nonce (40 bytes).
*/
randombytes(nonce, NONCELEN);

/*
* Hash message nonce + message into a vector.
*/
inner_shake256_init(&sc);
inner_shake256_inject(&sc, nonce, NONCELEN);
inner_shake256_inject(&sc, m, mlen);
inner_shake256_flip(&sc);
PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(&sc, r.hm, 10);
inner_shake256_ctx_release(&sc);

/*
* Initialize a RNG.
*/
randombytes(seed, sizeof seed);
inner_shake256_init(&sc);
inner_shake256_inject(&sc, seed, sizeof seed);
inner_shake256_flip(&sc);

/*
* Compute and return the signature. This loops until a signature
* value is found that fits in the provided buffer.
*/
for (;;) {
PQCLEAN_FALCON1024_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b);
v = PQCLEAN_FALCON1024_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 10);
if (v != 0) {
inner_shake256_ctx_release(&sc);
*sigbuflen = v;
return 0;
}
}
}

/*
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[]
* (of size sigbuflen) contains the signature value, not including the
* header byte or nonce. Return value is 0 on success, -1 on error.
*/
static int
do_verify(
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen,
const uint8_t *m, size_t mlen, const uint8_t *pk) {
union {
uint8_t b[2 * 1024];
uint64_t dummy_u64;
fpr dummy_fpr;
} tmp;
uint16_t h[1024], hm[1024];
int16_t sig[1024];
inner_shake256_context sc;

/*
* Decode public key.
*/
if (pk[0] != 0x00 + 10) {
return -1;
}
if (PQCLEAN_FALCON1024_CLEAN_modq_decode(h, 10,
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1)
!= PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) {
return -1;
}
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(h, 10);

/*
* Decode signature.
*/
if (sigbuflen == 0) {
return -1;
}
if (PQCLEAN_FALCON1024_CLEAN_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) {
return -1;
}

/*
* Hash nonce + message into a vector.
*/
inner_shake256_init(&sc);
inner_shake256_inject(&sc, nonce, NONCELEN);
inner_shake256_inject(&sc, m, mlen);
inner_shake256_flip(&sc);
PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(&sc, hm, 10, tmp.b);
inner_shake256_ctx_release(&sc);

/*
* Verify signature.
*/
if (!PQCLEAN_FALCON1024_CLEAN_verify_raw(hm, sig, h, 10, tmp.b)) {
return -1;
}
return 0;
}

/* see api.h */
int
PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
/*
* The PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES constant is used for
* the signed message object (as produced by PQCLEAN_FALCON1024_CLEAN_crypto_sign())
* and includes a two-byte length value, so we take care here
* to only generate signatures that are two bytes shorter than
* the maximum. This is done to ensure that PQCLEAN_FALCON1024_CLEAN_crypto_sign()
* and PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature() produce the exact same signature
* value, if used on the same message, with the same private key,
* and using the same output from randombytes() (this is for
* reproducibility of tests).
*/
size_t vlen;

vlen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3;
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) {
return -1;
}
sig[0] = 0x30 + 10;
*siglen = 1 + NONCELEN + vlen;
return 0;
}

/* see api.h */
int
PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk) {
if (siglen < 1 + NONCELEN) {
return -1;
}
if (sig[0] != 0x30 + 10) {
return -1;
}
return do_verify(sig + 1,
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk);
}

/* see api.h */
int
PQCLEAN_FALCON1024_CLEAN_crypto_sign(
uint8_t *sm, size_t *smlen,
const uint8_t *m, size_t mlen, const uint8_t *sk) {
uint8_t *pm, *sigbuf;
size_t sigbuflen;

/*
* Move the message to its final location; this is a memmove() so
* it handles overlaps properly.
*/
memmove(sm + 2 + NONCELEN, m, mlen);
pm = sm + 2 + NONCELEN;
sigbuf = pm + 1 + mlen;
sigbuflen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3;
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) {
return -1;
}
pm[mlen] = 0x20 + 10;
sigbuflen ++;
sm[0] = (uint8_t)(sigbuflen >> 8);
sm[1] = (uint8_t)sigbuflen;
*smlen = mlen + 2 + NONCELEN + sigbuflen;
return 0;
}

/* see api.h */
int
PQCLEAN_FALCON1024_CLEAN_crypto_sign_open(
uint8_t *m, size_t *mlen,
const uint8_t *sm, size_t smlen, const uint8_t *pk) {
const uint8_t *sigbuf;
size_t pmlen, sigbuflen;

if (smlen < 3 + NONCELEN) {
return -1;
}
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1];
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) {
return -1;
}
sigbuflen --;
pmlen = smlen - NONCELEN - 3 - sigbuflen;
if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) {
return -1;
}
sigbuf = sm + 2 + NONCELEN + pmlen + 1;

/*
* The 2-byte length header and the one-byte signature header
* have been verified. Nonce is at sm+2, followed by the message
* itself. Message length is in pmlen. sigbuf/sigbuflen point to
* the signature value (excluding the header byte).
*/
if (do_verify(sm + 2, sigbuf, sigbuflen,
sm + 2 + NONCELEN, pmlen, pk) < 0) {
return -1;
}

/*
* Signature is correct, we just have to copy/move the message
* to its final destination. The memmove() properly handles
* overlaps.
*/
memmove(m, sm + 2 + NONCELEN, pmlen);
*mlen = pmlen;
return 0;
}

+ 0
- 201
src/sign/falcon/falcon-1024/clean/rng.c View File

@@ -1,201 +0,0 @@
#include "inner.h"
#include <assert.h>
/*
* PRNG and interface to the system RNG.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/



/*
* Include relevant system header files. For Win32, this will also need
* linking with advapi32.dll, which we trigger with an appropriate #pragma.
*/

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t len) {
(void)seed;
if (len == 0) {
return 1;
}
return 0;
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src) {
/*
* To ensure reproducibility for a given seed, we
* must enforce little-endian interpretation of
* the state words.
*/
uint8_t tmp[56];
uint64_t th, tl;
int i;

inner_shake256_extract(src, tmp, 56);
for (i = 0; i < 14; i ++) {
uint32_t w;

w = (uint32_t)tmp[(i << 2) + 0]
| ((uint32_t)tmp[(i << 2) + 1] << 8)
| ((uint32_t)tmp[(i << 2) + 2] << 16)
| ((uint32_t)tmp[(i << 2) + 3] << 24);
*(uint32_t *)(p->state.d + (i << 2)) = w;
}
tl = *(uint32_t *)(p->state.d + 48);
th = *(uint32_t *)(p->state.d + 52);
*(uint64_t *)(p->state.d + 48) = tl + (th << 32);
PQCLEAN_FALCON1024_CLEAN_prng_refill(p);
}

/*
* PRNG based on ChaCha20.
*
* State consists in key (32 bytes) then IV (16 bytes) and block counter
* (8 bytes). Normally, we should not care about local endianness (this
* is for a PRNG), but for the NIST competition we need reproducible KAT
* vectors that work across architectures, so we enforce little-endian
* interpretation where applicable. Moreover, output words are "spread
* out" over the output buffer with the interleaving pattern that is
* naturally obtained from the AVX2 implementation that runs eight
* ChaCha20 instances in parallel.
*
* The block counter is XORed into the first 8 bytes of the IV.
*/
void
PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p) {

static const uint32_t CW[] = {
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
};

uint64_t cc;
size_t u;

/*
* State uses local endianness. Only the output bytes must be
* converted to little endian (if used on a big-endian machine).
*/
cc = *(uint64_t *)(p->state.d + 48);
for (u = 0; u < 8; u ++) {
uint32_t state[16];
size_t v;
int i;

memcpy(&state[0], CW, sizeof CW);
memcpy(&state[4], p->state.d, 48);
state[14] ^= (uint32_t)cc;
state[15] ^= (uint32_t)(cc >> 32);
for (i = 0; i < 10; i ++) {

#define QROUND(a, b, c, d) do { \
state[a] += state[b]; \
state[d] ^= state[a]; \
state[d] = (state[d] << 16) | (state[d] >> 16); \
state[c] += state[d]; \
state[b] ^= state[c]; \
state[b] = (state[b] << 12) | (state[b] >> 20); \
state[a] += state[b]; \
state[d] ^= state[a]; \
state[d] = (state[d] << 8) | (state[d] >> 24); \
state[c] += state[d]; \
state[b] ^= state[c]; \
state[b] = (state[b] << 7) | (state[b] >> 25); \
} while (0)

QROUND( 0, 4, 8, 12);
QROUND( 1, 5, 9, 13);
QROUND( 2, 6, 10, 14);
QROUND( 3, 7, 11, 15);
QROUND( 0, 5, 10, 15);
QROUND( 1, 6, 11, 12);
QROUND( 2, 7, 8, 13);
QROUND( 3, 4, 9, 14);

#undef QROUND

}

for (v = 0; v < 4; v ++) {
state[v] += CW[v];
}
for (v = 4; v < 14; v ++) {
state[v] += ((uint32_t *)p->state.d)[v - 4];
}
state[14] += ((uint32_t *)p->state.d)[10]
^ (uint32_t)cc;
state[15] += ((uint32_t *)p->state.d)[11]
^ (uint32_t)(cc >> 32);
cc ++;

/*
* We mimic the interleaving that is used in the AVX2
* implementation.
*/
for (v = 0; v < 16; v ++) {
p->buf.d[(u << 2) + (v << 5) + 0] =
(uint8_t)state[v];
p->buf.d[(u << 2) + (v << 5) + 1] =
(uint8_t)(state[v] >> 8);
p->buf.d[(u << 2) + (v << 5) + 2] =
(uint8_t)(state[v] >> 16);
p->buf.d[(u << 2) + (v << 5) + 3] =
(uint8_t)(state[v] >> 24);
}
}
*(uint64_t *)(p->state.d + 48) = cc;


p->ptr = 0;
}

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) {
uint8_t *buf;

buf = dst;
while (len > 0) {
size_t clen;

clen = (sizeof p->buf.d) - p->ptr;
if (clen > len) {
clen = len;
}
memcpy(buf, p->buf.d, clen);
buf += clen;
len -= clen;
p->ptr += clen;
if (p->ptr == sizeof p->buf.d) {
PQCLEAN_FALCON1024_CLEAN_prng_refill(p);
}
}
}

+ 0
- 1254
src/sign/falcon/falcon-1024/clean/sign.c
File diff suppressed because it is too large
View File


+ 0
- 853
src/sign/falcon/falcon-1024/clean/vrfy.c View File

@@ -1,853 +0,0 @@
#include "inner.h"

/*
* Falcon signature verification.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* ===================================================================== */
/*
* Constants for NTT.
*
* n = 2^logn (2 <= n <= 1024)
* phi = X^n + 1
* q = 12289
* q0i = -1/q mod 2^16
* R = 2^16 mod q
* R2 = 2^32 mod q
*/

#define Q 12289
#define Q0I 12287
#define R 4091
#define R2 10952

/*
* Table for NTT, binary case:
* GMb[x] = R*(g^rev(x)) mod q
* where g = 7 (it is a 2048-th primitive root of 1 modulo q)
* and rev() is the bit-reversal function over 10 bits.
*/
static const uint16_t GMb[] = {
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759,
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710,
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442,
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180,
12210, 6240, 997, 117, 4783, 4407, 1549, 7072,
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042,
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863,
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872,
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340,
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045,
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180,
9277, 6130, 3323, 883, 10469, 489, 1502, 2851,
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195,
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274,
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446,
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276,
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525,
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477,
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680,
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003,
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763,
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821,
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159,
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188,
737, 3698, 4699, 5753, 9046, 3687, 16, 914,
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381,
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357,
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278,
932, 10229, 8927, 7642, 351, 9298, 237, 5858,
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204,
4602, 1748, 11300, 340, 3711, 4614, 300, 10993,
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654,
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209,
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808,
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433,
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416,
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436,
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434,
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328,
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393,
2523, 4339, 6115, 619, 937, 2834, 7775, 3279,
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113,
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884,
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520,
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399,
11192, 315, 4511, 1158, 6061, 6751, 11865, 357,
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253,
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652,
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188,
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928,
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650,
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344,
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561,
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114,
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323,
10438, 9471, 1271, 408, 6911, 3079, 360, 8276,
11535, 9156, 9049, 11539, 850, 8617, 784, 7919,
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600,
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424,
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333,
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395,
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216,
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230,
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688,
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868,
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525,
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227,
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815,
11736, 6813, 6979, 819, 8903, 6271, 10843, 348,
7514, 8339, 6439, 694, 852, 5659, 2781, 3716,
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885,
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526,
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224,
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159,
10923, 4918, 128, 7312, 725, 9157, 5006, 6393,
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668,
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365,
5110, 45, 2400, 1921, 4377, 2720, 1695, 51,
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833,
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198,
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519,
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339,
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604,
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154,
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185,
862, 3158, 477, 7279, 5678, 7914, 4254, 302,
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824,
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449,
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398,
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800,
1397, 10678, 103, 7420, 7976, 936, 764, 632,
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946,
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139,
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850,
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217,
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711,
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729,
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211,
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875,
8192, 986, 7527, 1401, 870, 3615, 8465, 2756,
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763,
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038,
2567, 708, 893, 6465, 4962, 10024, 2090, 5718,
10743, 780, 4733, 4623, 2134, 2087, 4802, 884,
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664,
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791,
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032,
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062,
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348,
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499,
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326,
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830,
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582,
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762,
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394,
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674,
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193,
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509,
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105,
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776,
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277,
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333,
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390,
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331,
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187,
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067,
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165,
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949
};

/*
* Table for inverse NTT, binary case:
* iGMb[x] = R*((1/g)^rev(x)) mod q
* Since g = 7, 1/g = 8778 mod 12289.
*/
static const uint16_t iGMb[] = {
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329,
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698,
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875,
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155,
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108,
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100,
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460,
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79,
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676,
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110,
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559,
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228,
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012,
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973,
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736,
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720,
6635, 6543, 1582, 4868, 42, 673, 2240, 7219,
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687,
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597,
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357,
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880,
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556,
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103,
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552,
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822,
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609,
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388,
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344,
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101,
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424,
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888,
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709,
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639,
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993,
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051,
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488,
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473,
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510,
6689, 386, 4462, 105, 2076, 10443, 119, 3955,
4370, 11505, 3672, 11439, 750, 3240, 3133, 754,
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851,
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327,
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255,
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787,
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325,
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707,
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489,
101, 1911, 9483, 3608, 11997, 10536, 812, 8915,
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922,
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922,
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097,
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016,
7769, 136, 617, 3157, 5889, 9219, 6855, 120,
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562,
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926,
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766,
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732,
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900,
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847,
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871,
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852,
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879,
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064,
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454,
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028,
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795,
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779,
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127,
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444,
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281,
9956, 2702, 6656, 735, 2243, 11656, 833, 3107,
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278,
3513, 9769, 3025, 779, 9433, 3392, 7437, 668,
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711,
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918,
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931,
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697,
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793,
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556,
707, 1088, 4936, 678, 10245, 18, 5684, 960,
4459, 7957, 226, 2451, 6, 8874, 320, 6298,
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876,
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679,
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378,
5227, 952, 4319, 9810, 4356, 3088, 11118, 840,
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489,
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037,
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917,
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546,
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722,
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484,
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519,
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097,
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286,
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292,
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022,
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377,
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653,
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601,
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765,
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293,
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892,
11489, 8833, 2393, 15, 10830, 5003, 17, 565,
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130,
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020,
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396,
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427,
104, 6348, 9643, 6757, 12110, 5617, 10935, 541,
135, 3041, 7200, 6526, 5085, 12136, 842, 4129,
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101,
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927,
1770, 273, 8377, 2271, 5225, 10283, 116, 11807,
91, 11699, 757, 1304, 7524, 6451, 8032, 8154,
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481,
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179,
3924, 3188, 367, 2077, 336, 5384, 5631, 8596,
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795,
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366,
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418,
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173,
9763, 12191, 459, 2966, 3166, 405, 5000, 9311,
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700,
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775,
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553,
9474, 2586, 1431, 2741, 473, 11383, 4745, 836,
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351,
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152,
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230
};

/*
* Reduce a small signed integer modulo q. The source integer MUST
* be between -q/2 and +q/2.
*/
static inline uint32_t
mq_conv_small(int x) {
/*
* If x < 0, the cast to uint32_t will set the high bit to 1.
*/
uint32_t y;

y = (uint32_t)x;
y += Q & -(y >> 31);
return y;
}

/*
* Addition modulo q. Operands must be in the 0..q-1 range.
*/
static inline uint32_t
mq_add(uint32_t x, uint32_t y) {
/*
* We compute x + y - q. If the result is negative, then the
* high bit will be set, and 'd >> 31' will be equal to 1;
* thus '-(d >> 31)' will be an all-one pattern. Otherwise,
* it will be an all-zero pattern. In other words, this
* implements a conditional addition of q.
*/
uint32_t d;

d = x + y - Q;
d += Q & -(d >> 31);
return d;
}

/*
* Subtraction modulo q. Operands must be in the 0..q-1 range.
*/
static inline uint32_t
mq_sub(uint32_t x, uint32_t y) {
/*
* As in mq_add(), we use a conditional addition to ensure the
* result is in the 0..q-1 range.
*/
uint32_t d;

d = x - y;
d += Q & -(d >> 31);
return d;
}

/*
* Division by 2 modulo q. Operand must be in the 0..q-1 range.
*/
static inline uint32_t
mq_rshift1(uint32_t x) {
x += Q & -(x & 1);
return (x >> 1);
}

/*
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then
* this function computes: x * y / R mod q
* Operands must be in the 0..q-1 range.
*/
static inline uint32_t
mq_montymul(uint32_t x, uint32_t y) {
uint32_t z, w;

/*
* We compute x*y + k*q with a value of k chosen so that the 16
* low bits of the result are 0. We can then shift the value.
* After the shift, result may still be larger than q, but it
* will be lower than 2*q, so a conditional subtraction works.
*/

z = x * y;
w = ((z * Q0I) & 0xFFFF) * Q;

/*
* When adding z and w, the result will have its low 16 bits
* equal to 0. Since x, y and z are lower than q, the sum will
* be no more than (2^15 - 1) * q + (q - 1)^2, which will
* fit on 29 bits.
*/
z = (z + w) >> 16;

/*
* After the shift, analysis shows that the value will be less
* than 2q. We do a subtraction then conditional subtraction to
* ensure the result is in the expected range.
*/
z -= Q;
z += Q & -(z >> 31);
return z;
}

/*
* Montgomery squaring (computes (x^2)/R).
*/
static inline uint32_t
mq_montysqr(uint32_t x) {
return mq_montymul(x, x);
}

/*
* Divide x by y modulo q = 12289.
*/
static inline uint32_t
mq_div_12289(uint32_t x, uint32_t y) {
/*
* We invert y by computing y^(q-2) mod q.
*
* We use the following addition chain for exponent e = 12287:
*
* e0 = 1
* e1 = 2 * e0 = 2
* e2 = e1 + e0 = 3
* e3 = e2 + e1 = 5
* e4 = 2 * e3 = 10
* e5 = 2 * e4 = 20
* e6 = 2 * e5 = 40
* e7 = 2 * e6 = 80
* e8 = 2 * e7 = 160
* e9 = e8 + e2 = 163
* e10 = e9 + e8 = 323
* e11 = 2 * e10 = 646
* e12 = 2 * e11 = 1292
* e13 = e12 + e9 = 1455
* e14 = 2 * e13 = 2910
* e15 = 2 * e14 = 5820
* e16 = e15 + e10 = 6143
* e17 = 2 * e16 = 12286
* e18 = e17 + e0 = 12287
*
* Additions on exponents are converted to Montgomery
* multiplications. We define all intermediate results as so
* many local variables, and let the C compiler work out which
* must be kept around.
*/
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18;

y0 = mq_montymul(y, R2);
y1 = mq_montysqr(y0);
y2 = mq_montymul(y1, y0);
y3 = mq_montymul(y2, y1);
y4 = mq_montysqr(y3);
y5 = mq_montysqr(y4);
y6 = mq_montysqr(y5);
y7 = mq_montysqr(y6);
y8 = mq_montysqr(y7);
y9 = mq_montymul(y8, y2);
y10 = mq_montymul(y9, y8);
y11 = mq_montysqr(y10);
y12 = mq_montysqr(y11);
y13 = mq_montymul(y12, y9);
y14 = mq_montysqr(y13);
y15 = mq_montysqr(y14);
y16 = mq_montymul(y15, y10);
y17 = mq_montysqr(y16);
y18 = mq_montymul(y17, y0);

/*
* Final multiplication with x, which is not in Montgomery
* representation, computes the correct division result.
*/
return mq_montymul(y18, x);
}

/*
* Compute NTT on a ring element.
*/
static void
mq_NTT(uint16_t *a, unsigned logn) {
size_t n, t, m;

n = (size_t)1 << logn;
t = n;
for (m = 1; m < n; m <<= 1) {
size_t ht, i, j1;

ht = t >> 1;
for (i = 0, j1 = 0; i < m; i ++, j1 += t) {
size_t j, j2;
uint32_t s;

s = GMb[m + i];
j2 = j1 + ht;
for (j = j1; j < j2; j ++) {
uint32_t u, v;

u = a[j];
v = mq_montymul(a[j + ht], s);
a[j] = (uint16_t)mq_add(u, v);
a[j + ht] = (uint16_t)mq_sub(u, v);
}
}
t = ht;
}
}

/*
* Compute the inverse NTT on a ring element, binary case.
*/
static void
mq_iNTT(uint16_t *a, unsigned logn) {
size_t n, t, m;
uint32_t ni;

n = (size_t)1 << logn;
t = 1;
m = n;
while (m > 1) {
size_t hm, dt, i, j1;

hm = m >> 1;
dt = t << 1;
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) {
size_t j, j2;
uint32_t s;

j2 = j1 + t;
s = iGMb[hm + i];
for (j = j1; j < j2; j ++) {
uint32_t u, v, w;

u = a[j];
v = a[j + t];
a[j] = (uint16_t)mq_add(u, v);
w = mq_sub(u, v);
a[j + t] = (uint16_t)
mq_montymul(w, s);
}
}
t = dt;
m = hm;
}

/*
* To complete the inverse NTT, we must now divide all values by
* n (the vector size). We thus need the inverse of n, i.e. we
* need to divide 1 by 2 logn times. But we also want it in
* Montgomery representation, i.e. we also want to multiply it
* by R = 2^16. In the common case, this should be a simple right
* shift. The loop below is generic and works also in corner cases;
* its computation time is negligible.
*/
ni = R;
for (m = n; m > 1; m >>= 1) {
ni = mq_rshift1(ni);
}
for (m = 0; m < n; m ++) {
a[m] = (uint16_t)mq_montymul(a[m], ni);
}
}

/*
* Convert a polynomial (mod q) to Montgomery representation.
*/
static void
mq_poly_tomonty(uint16_t *f, unsigned logn) {
size_t u, n;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
f[u] = (uint16_t)mq_montymul(f[u], R2);
}
}

/*
* Multiply two polynomials together (NTT representation, and using
* a Montgomery multiplication). Result f*g is written over f.
*/
static void
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) {
size_t u, n;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
f[u] = (uint16_t)mq_montymul(f[u], g[u]);
}
}

/*
* Subtract polynomial g from polynomial f.
*/
static void
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) {
size_t u, n;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
f[u] = (uint16_t)mq_sub(f[u], g[u]);
}
}

/* ===================================================================== */

/* see inner.h */
void
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) {
mq_NTT(h, logn);
mq_poly_tomonty(h, logn);
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
const uint16_t *h, unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;

n = (size_t)1 << logn;
tt = (uint16_t *)tmp;

/*
* Reduce s2 elements modulo q ([0..q-1] range).
*/
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u];
w += Q & -(w >> 31);
tt[u] = (uint16_t)w;
}

/*
* Compute -s1 = s2*h - c0 mod phi mod q (in tt[]).
*/
mq_NTT(tt, logn);
mq_poly_montymul_ntt(tt, h, logn);
mq_iNTT(tt, logn);
mq_poly_sub(tt, c0, logn);

/*
* Normalize -s1 elements into the [-q/2..q/2] range.
*/
for (u = 0; u < n; u ++) {
int32_t w;

w = (int32_t)tt[u];
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31));
((int16_t *)tt)[u] = (int16_t)w;
}

/*
* Signature is valid if and only if the aggregate (-s1,s2) vector
* is short enough.
*/
return PQCLEAN_FALCON1024_CLEAN_is_short((int16_t *)tt, s2, logn);
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h,
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;

n = (size_t)1 << logn;
tt = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
tt[u] = (uint16_t)mq_conv_small(f[u]);
h[u] = (uint16_t)mq_conv_small(g[u]);
}
mq_NTT(h, logn);
mq_NTT(tt, logn);
for (u = 0; u < n; u ++) {
if (tt[u] == 0) {
return 0;
}
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]);
}
mq_iNTT(h, logn);
return 1;
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G,
const int8_t *f, const int8_t *g, const int8_t *F,
unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *t1, *t2;

n = (size_t)1 << logn;
t1 = (uint16_t *)tmp;
t2 = t1 + n;
for (u = 0; u < n; u ++) {
t1[u] = (uint16_t)mq_conv_small(g[u]);
t2[u] = (uint16_t)mq_conv_small(F[u]);
}
mq_NTT(t1, logn);
mq_NTT(t2, logn);
mq_poly_tomonty(t1, logn);
mq_poly_montymul_ntt(t1, t2, logn);
for (u = 0; u < n; u ++) {
t2[u] = (uint16_t)mq_conv_small(f[u]);
}
mq_NTT(t2, logn);
for (u = 0; u < n; u ++) {
if (t2[u] == 0) {
return 0;
}
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]);
}
mq_iNTT(t1, logn);
for (u = 0; u < n; u ++) {
uint32_t w;
int32_t gi;

w = t1[u];
w -= (Q & ~ -((w - (Q >> 1)) >> 31));
gi = *(int32_t *)&w;
if (gi < -127 || gi > +127) {
return 0;
}
G[u] = (int8_t)gi;
}
return 1;
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_is_invertible(
const int16_t *s2, unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;
uint32_t r;

n = (size_t)1 << logn;
tt = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u];
w += Q & -(w >> 31);
tt[u] = (uint16_t)w;
}
mq_NTT(tt, logn);
r = 0;
for (u = 0; u < n; u ++) {
r |= (uint32_t)(tt[u] - 1);
}
return (int)(1u - (r >> 31));
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h,
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
unsigned logn, uint8_t *tmp) {
size_t u, n;
uint16_t *tt;
uint32_t r;

n = (size_t)1 << logn;

/*
* Reduce elements of s1 and s2 modulo q; then write s2 into tt[]
* and c0 - s1 into h[].
*/
tt = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u];
w += Q & -(w >> 31);
tt[u] = (uint16_t)w;

w = (uint32_t)s1[u];
w += Q & -(w >> 31);
w = mq_sub(c0[u], w);
h[u] = (uint16_t)w;
}

/*
* Compute h = (c0 - s1) / s2. If one of the coefficients of s2
* is zero (in NTT representation) then the operation fails. We
* keep that information into a flag so that we do not deviate
* from strict constant-time processing; if all coefficients of
* s2 are non-zero, then the high bit of r will be zero.
*/
mq_NTT(tt, logn);
mq_NTT(h, logn);
r = 0;
for (u = 0; u < n; u ++) {
r |= (uint32_t)(tt[u] - 1);
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]);
}
mq_iNTT(h, logn);

/*
* Signature is acceptable if and only if it is short enough,
* and s2 was invertible mod phi mod q. The caller must still
* check that the rebuilt public key matches the expected
* value (e.g. through a hash).
*/
r = ~r & (uint32_t) - PQCLEAN_FALCON1024_CLEAN_is_short(s1, s2, logn);
return (int)(r >> 31);
}

/* see inner.h */
int
PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) {
uint16_t *s2;
size_t u, n;
uint32_t r;

n = (size_t)1 << logn;
s2 = (uint16_t *)tmp;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)sig[u];
w += Q & -(w >> 31);
s2[u] = (uint16_t)w;
}
mq_NTT(s2, logn);
r = 0;
for (u = 0; u < n; u ++) {
uint32_t w;

w = (uint32_t)s2[u] - 1u;
r += (w >> 31);
}
return (int)r;
}

+ 0
- 15
src/sign/falcon/falcon-512/avx2/CMakeLists.txt View File

@@ -1,15 +0,0 @@
set(
SRC_AVX2_FALCON512
codec.c
common.c
fft.c
fpr.c
keygen.c
pqclean.c
rng.c
sign.c
vrfy.c)

define_sig_alg(
falcon512_avx2
PQCLEAN_FALCON512_AVX2 "${SRC_AVX2_FALCON512}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 80
src/sign/falcon/falcon-512/avx2/api.h View File

@@ -1,80 +0,0 @@
#ifndef PQCLEAN_FALCON512_AVX2_API_H
#define PQCLEAN_FALCON512_AVX2_API_H

#include <stddef.h>
#include <stdint.h>

#define PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES 1281
#define PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES 897
#define PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES 690

#define PQCLEAN_FALCON512_AVX2_CRYPTO_ALGNAME "Falcon-512"

/*
* Generate a new key pair. Public key goes into pk[], private key in sk[].
* Key sizes are exact (in bytes):
* public (pk): PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES
* private (sk): PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON512_AVX2_crypto_sign_keypair(
uint8_t *pk, uint8_t *sk);

/*
* Compute a signature on a provided message (m, mlen), with a given
* private key (sk). Signature is written in sig[], with length written
* into *siglen. Signature length is variable; maximum signature length
* (in bytes) is PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES.
*
* sig[], m[] and sk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON512_AVX2_crypto_sign_signature(
uint8_t *sig, size_t *siglen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

/*
* Verify a signature (sig, siglen) on a message (m, mlen) with a given
* public key (pk).
*
* sig[], m[] and pk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON512_AVX2_crypto_sign_verify(
const uint8_t *sig, size_t siglen,
const uint8_t *m, size_t mlen, const uint8_t *pk);

/*
* Compute a signature on a message and pack the signature and message
* into a single object, written into sm[]. The length of that output is
* written in *smlen; that length may be larger than the message length
* (mlen) by up to PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES.
*
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall
* not overlap with sk[].
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON512_AVX2_crypto_sign(
uint8_t *sm, size_t *smlen,
const uint8_t *m, size_t mlen, const uint8_t *sk);

/*
* Open a signed message object (sm, smlen) and verify the signature;
* on success, the message itself is written into m[] and its length
* into *mlen. The message is shorter than the signed message object,
* but the size difference depends on the signature value; the difference
* may range up to PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES.
*
* m[], sm[] and pk[] may overlap each other arbitrarily.
*
* Return value: 0 on success, -1 on error.
*/
int PQCLEAN_FALCON512_AVX2_crypto_sign_open(
uint8_t *m, size_t *mlen,
const uint8_t *sm, size_t smlen, const uint8_t *pk);

#endif

+ 0
- 555
src/sign/falcon/falcon-512/avx2/codec.c View File

@@ -1,555 +0,0 @@
#include "inner.h"

/*
* Encoding/decoding of keys and signatures.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_modq_encode(
void *out, size_t max_out_len,
const uint16_t *x, unsigned logn) {
size_t n, out_len, u;
uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
for (u = 0; u < n; u ++) {
if (x[u] >= 12289) {
return 0;
}
}
out_len = ((n * 14) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
for (u = 0; u < n; u ++) {
acc = (acc << 14) | x[u];
acc_len += 14;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_modq_decode(
uint16_t *x, unsigned logn,
const void *in, size_t max_in_len) {
size_t n, in_len, u;
const uint8_t *buf;
uint32_t acc;
int acc_len;

n = (size_t)1 << logn;
in_len = ((n * 14) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
acc = 0;
acc_len = 0;
u = 0;
while (u < n) {
acc = (acc << 8) | (*buf ++);
acc_len += 8;
if (acc_len >= 14) {
unsigned w;

acc_len -= 14;
w = (acc >> acc_len) & 0x3FFF;
if (w >= 12289) {
return 0;
}
x[u ++] = (uint16_t)w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_trim_i16_encode(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn, unsigned bits) {
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint16_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_trim_i16_decode(
int16_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len) {
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
w |= -(w & mask2);
x[u ++] = (int16_t) * (int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_trim_i8_encode(
void *out, size_t max_out_len,
const int8_t *x, unsigned logn, unsigned bits) {
size_t n, u, out_len;
int minv, maxv;
uint8_t *buf;
uint32_t acc, mask;
unsigned acc_len;

n = (size_t)1 << logn;
maxv = (1 << (bits - 1)) - 1;
minv = -maxv;
for (u = 0; u < n; u ++) {
if (x[u] < minv || x[u] > maxv) {
return 0;
}
}
out_len = ((n * bits) + 7) >> 3;
if (out == NULL) {
return out_len;
}
if (out_len > max_out_len) {
return 0;
}
buf = out;
acc = 0;
acc_len = 0;
mask = ((uint32_t)1 << bits) - 1;
for (u = 0; u < n; u ++) {
acc = (acc << bits) | ((uint8_t)x[u] & mask);
acc_len += bits;
while (acc_len >= 8) {
acc_len -= 8;
*buf ++ = (uint8_t)(acc >> acc_len);
}
}
if (acc_len > 0) {
*buf ++ = (uint8_t)(acc << (8 - acc_len));
}
return out_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_trim_i8_decode(
int8_t *x, unsigned logn, unsigned bits,
const void *in, size_t max_in_len) {
size_t n, in_len;
const uint8_t *buf;
size_t u;
uint32_t acc, mask1, mask2;
unsigned acc_len;

n = (size_t)1 << logn;
in_len = ((n * bits) + 7) >> 3;
if (in_len > max_in_len) {
return 0;
}
buf = in;
u = 0;
acc = 0;
acc_len = 0;
mask1 = ((uint32_t)1 << bits) - 1;
mask2 = (uint32_t)1 << (bits - 1);
while (u < n) {
acc = (acc << 8) | *buf ++;
acc_len += 8;
while (acc_len >= bits && u < n) {
uint32_t w;

acc_len -= bits;
w = (acc >> acc_len) & mask1;
w |= -(w & mask2);
if (w == -mask2) {
/*
* The -2^(bits-1) value is forbidden.
*/
return 0;
}
x[u ++] = (int8_t) * (int32_t *)&w;
}
}
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) {
/*
* Extra bits in the last byte must be zero.
*/
return 0;
}
return in_len;
}

/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_comp_encode(
void *out, size_t max_out_len,
const int16_t *x, unsigned logn) {
uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = out;

/*
* Make sure that all values are within the -2047..+2047 range.
*/
for (u = 0; u < n; u ++) {
if (x[u] < -2047 || x[u] > +2047) {
return 0;
}
}

acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
int t;
unsigned w;

/*
* Get sign and absolute value of next integer; push the
* sign bit.
*/
acc <<= 1;
t = x[u];
if (t < 0) {
t = -t;
acc |= 1;
}
w = (unsigned)t;

/*
* Push the low 7 bits of the absolute value.
*/
acc <<= 7;
acc |= w & 127u;
w >>= 7;

/*
* We pushed exactly 8 bits.
*/
acc_len += 8;

/*
* Push as many zeros as necessary, then a one. Since the
* absolute value is at most 2047, w can only range up to
* 15 at this point, thus we will add at most 16 bits
* here. With the 8 bits above and possibly up to 7 bits
* from previous iterations, we may go up to 31 bits, which
* will fit in the accumulator, which is an uint32_t.
*/
acc <<= (w + 1);
acc |= 1;
acc_len += w + 1;

/*
* Produce all full bytes.
*/
while (acc_len >= 8) {
acc_len -= 8;
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc >> acc_len);
}
v ++;
}
}

/*
* Flush remaining bits (if any).
*/
if (acc_len > 0) {
if (buf != NULL) {
if (v >= max_out_len) {
return 0;
}
buf[v] = (uint8_t)(acc << (8 - acc_len));
}
v ++;
}

return v;
}

/* see inner.h */
size_t
PQCLEAN_FALCON512_AVX2_comp_decode(
int16_t *x, unsigned logn,
const void *in, size_t max_in_len) {
const uint8_t *buf;
size_t n, u, v;
uint32_t acc;
unsigned acc_len;

n = (size_t)1 << logn;
buf = in;
acc = 0;
acc_len = 0;
v = 0;
for (u = 0; u < n; u ++) {
unsigned b, s, m;

/*
* Get next eight bits: sign and low seven bits of the
* absolute value.
*/
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
b = acc >> acc_len;
s = b & 128;
m = b & 127;

/*
* Get next bits until a 1 is reached.
*/
for (;;) {
if (acc_len == 0) {
if (v >= max_in_len) {
return 0;
}
acc = (acc << 8) | (uint32_t)buf[v ++];
acc_len = 8;
}
acc_len --;
if (((acc >> acc_len) & 1) != 0) {
break;
}
m += 128;
if (m > 2047) {
return 0;
}
}
x[u] = (int16_t) m;
if (s) {
x[u] = (int16_t) - x[u];
}
}
return v;
}

/*
* Key elements and signatures are polynomials with small integer
* coefficients. Here are some statistics gathered over many
* generated key pairs (10000 or more for each degree):
*
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G)
* 1 2 129 56.31 143 60.02
* 2 4 123 40.93 160 46.52
* 3 8 97 28.97 159 38.01
* 4 16 100 21.48 154 32.50
* 5 32 71 15.41 151 29.36
* 6 64 59 11.07 138 27.77
* 7 128 39 7.91 144 27.00
* 8 256 32 5.63 148 26.61
* 9 512 22 4.00 137 26.46
* 10 1024 15 2.84 146 26.41
*
* We want a compact storage format for private key, and, as part of
* key generation, we are allowed to reject some keys which would
* otherwise be fine (this does not induce any noticeable vulnerability
* as long as we reject only a small proportion of possible keys).
* Hence, we enforce at key generation time maximum values for the
* elements of f, g, F and G, so that their encoding can be expressed
* in fixed-width values. Limits have been chosen so that generated
* keys are almost always within bounds, thus not impacting neither
* security or performance.
*
* IMPORTANT: the code assumes that all coefficients of f, g, F and G
* ultimately fit in the -127..+127 range. Thus, none of the elements
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8.
*/

const uint8_t PQCLEAN_FALCON512_AVX2_max_fg_bits[] = {
0, /* unused */
8,
8,
8,
8,
8,
7,
7,
6,
6,
5
};

const uint8_t PQCLEAN_FALCON512_AVX2_max_FG_bits[] = {
0, /* unused */
8,
8,
8,
8,
8,
8,
8,
8,
8,
8
};

/*
* When generating a new key pair, we can always reject keys which
* feature an abnormally large coefficient. This can also be done for
* signatures, albeit with some care: in case the signature process is
* used in a derandomized setup (explicitly seeded with the message and
* private key), we have to follow the specification faithfully, and the
* specification only enforces a limit on the L2 norm of the signature
* vector. The limit on the L2 norm implies that the absolute value of
* a coefficient of the signature cannot be more than the following:
*
* log(n) n max sig coeff (theoretical)
* 1 2 412
* 2 4 583
* 3 8 824
* 4 16 1166
* 5 32 1649
* 6 64 2332
* 7 128 3299
* 8 256 4665
* 9 512 6598
* 10 1024 9331
*
* However, the largest observed signature coefficients during our
* experiments was 1077 (in absolute value), hence we can assume that,
* with overwhelming probability, signature coefficients will fit
* in -2047..2047, i.e. 12 bits.
*/

const uint8_t PQCLEAN_FALCON512_AVX2_max_sig_bits[] = {
0, /* unused */
10,
11,
11,
12,
12,
12,
12,
12,
12,
12
};

+ 0
- 294
src/sign/falcon/falcon-512/avx2/common.c View File

@@ -1,294 +0,0 @@
#include "inner.h"

/*
* Support functions for signatures (hash-to-point, norm).
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2017-2019 Falcon Project
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
*/


/* see inner.h */
void
PQCLEAN_FALCON512_AVX2_hash_to_point_vartime(
inner_shake256_context *sc,
uint16_t *x, unsigned logn) {
/*
* This is the straightforward per-the-spec implementation. It
* is not constant-time, thus it might reveal information on the
* plaintext (at least, enough to check the plaintext against a
* list of potential plaintexts) in a scenario where the
* attacker does not have access to the signature value or to
* the public key, but knows the nonce (without knowledge of the
* nonce, the hashed output cannot be matched against potential
* plaintexts).
*/
size_t n;

n = (size_t)1 << logn;
while (n > 0) {
uint8_t buf[2];
uint32_t w;

inner_shake256_extract(sc, (void *)buf, sizeof buf);
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
if (w < 61445) {
while (w >= 12289) {
w -= 12289;
}
*x ++ = (uint16_t)w;
n --;
}
}
}

/* see inner.h */
void
PQCLEAN_FALCON512_AVX2_hash_to_point_ct(
inner_shake256_context *sc,
uint16_t *x, unsigned logn, uint8_t *tmp) {
/*
* Each 16-bit sample is a value in 0..65535. The value is
* kept if it falls in 0..61444 (because 61445 = 5*12289)
* and rejected otherwise; thus, each sample has probability
* about 0.93758 of being selected.
*
* We want to oversample enough to be sure that we will
* have enough values with probability at least 1 - 2^(-256).
* Depending on degree N, this leads to the following
* required oversampling:
*
* logn n oversampling
* 1 2 65
* 2 4 67
* 3 8 71
* 4 16 77
* 5 32 86
* 6 64 100
* 7 128 122
* 8 256 154
* 9 512 205
* 10 1024 287
*
* If logn >= 7, then the provided temporary buffer is large
* enough. Otherwise, we use a stack buffer of 63 entries
* (i.e. 126 bytes) for the values that do not fit in tmp[].
*/

static const uint16_t overtab[] = {
0, /* unused */
65,
67,
71,
77,
86,
100,
122,
154,
205,
287
};

unsigned n, n2, u, m, p, over;
uint16_t *tt1, tt2[63];

/*
* We first generate m 16-bit value. Values 0..n-1 go to x[].
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[].
* We also reduce modulo q the values; rejected values are set
* to 0xFFFF.
*/
n = 1U << logn;
n2 = n << 1;
over = overtab[logn];
m = n + over;
tt1 = (uint16_t *)tmp;
for (u = 0; u < m; u ++) {
uint8_t buf[2];
uint32_t w, wr;

inner_shake256_extract(sc, buf, sizeof buf);
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1));
wr |= ((w - 61445) >> 31) - 1;
if (u < n) {
x[u] = (uint16_t)wr;
} else if (u < n2) {
tt1[u - n] = (uint16_t)wr;
} else {
tt2[u - n2] = (uint16_t)wr;
}
}

/*
* Now we must "squeeze out" the invalid values. We do this in
* a logarithmic sequence of passes; each pass computes where a
* value should go, and moves it down by 'p' slots if necessary,
* where 'p' uses an increasing powers-of-two scale. It can be
* shown that in all cases where the loop decides that a value
* has to be moved down by p slots, the destination slot is
* "free" (i.e. contains an invalid value).
*/
for (p = 1; p <= over; p <<= 1) {
unsigned v;

/*
* In the loop below:
*
* - v contains the index of the final destination of
* the value; it is recomputed dynamically based on
* whether values are valid or not.
*
* - u is the index of the value we consider ("source");
* its address is s.
*
* - The loop may swap the value with the one at index
* u-p. The address of the swap destination is d.
*/
v = 0;
for (u = 0; u < m; u ++) {
uint16_t *s, *d;
unsigned j, sv, dv, mk;

if (u < n) {
s = &x[u];
} else if (u < n2) {
s = &tt1[u - n];
} else {
s = &tt2[u - n2];
}
sv = *s;

/*
* The value in sv should ultimately go to
* address v, i.e. jump back by u-v slots.
*/
j = u - v;

/*
* We increment v for the next iteration, but
* only if the source value is valid. The mask
* 'mk' is -1 if the value is valid, 0 otherwise,
* so we _subtract_ mk.
*/
mk = (sv >> 15) - 1U;
v -= mk;

/*
* In this loop we consider jumps by p slots; if
* u < p then there is nothing more to do.
*/
if (u < p) {
continue;
}

/*
* Destination for the swap: value at address u-p.
*/
if ((u - p) < n) {
d = &x[u - p];
} else if ((u - p) < n2) {
d = &tt1[(u - p) - n];
} else {
d = &tt2[(u - p) - n2];
}
dv = *d;

/*
* The swap should be performed only if the source
* is valid AND the jump j has its 'p' bit set.
*/
mk &= -(((j & p) + 0x1FF) >> 9);

*s = (uint16_t)(sv ^ (mk & (sv ^ dv)));
*d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
}
}
}

/* see inner.h */
int
PQCLEAN_FALCON512_AVX2_is_short(
const int16_t *s1, const int16_t *s2, unsigned logn) {
/*
* We use the l2-norm. Code below uses only 32-bit operations to
* compute the square of the norm with saturation to 2^32-1 if
* the value exceeds 2^31-1.
*/
size_t n, u;
uint32_t s, ng;

n = (size_t)1 << logn;
s = 0;
ng = 0;
for (u = 0; u < n; u ++) {
int32_t z;

z = s1[u];
s += (uint32_t)(z * z);
ng |= s;
z = s2[u];
s += (uint32_t)(z * z);
ng |= s;
}
s |= -(ng >> 31);

/*
* Acceptance bound on the l2-norm is:
* 1.2*1.55*sqrt(q)*sqrt(2*N)
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
*/
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
}

/* see inner.h */
int
PQCLEAN_FALCON512_AVX2_is_short_half(
uint32_t sqn, const int16_t *s2, unsigned logn) {
size_t n, u;
uint32_t ng;

n = (size_t)1 << logn;
ng = -(sqn >> 31);
for (u = 0; u < n; u ++) {
int32_t z;

z = s2[u];
sqn += (uint32_t)(z * z);
ng |= sqn;
}
sqn |= -(ng >> 31);

/*
* Acceptance bound on the l2-norm is:
* 1.2*1.55*sqrt(q)*sqrt(2*N)
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
*/
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
}

+ 0
- 1109
src/sign/falcon/falcon-512/avx2/fft.c
File diff suppressed because it is too large
View File


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save