@@ -1,14 +0,0 @@ | |||
#--unpad-paren | |||
# disable backup files |
@@ -1,6 +0,0 @@ | |||
* text=auto | |||
*.[ch] text whitespacestrict | |||
*.yaml text whitespacestrict | |||
Makefile text whitespace="tabwidth=4,-tab-in-indent,indent-with-non-tab" | |||
[attr]whitespacestrict whitespace="trailing-space,tab-in-indent,space-before-tab,tabwidth=4" |
@@ -7,7 +7,4 @@ bin/ | |||
# Object and library files on Windows | |||
*.lib | |||
*.obj | |||
__pycache__ | |||
testcases/ | |||
*.obj |
@@ -7,3 +7,6 @@ | |||
[submodule "3rd/gbench"] | |||
path = 3rd/gbench | |||
url = https://github.com/henrydcase/benchmark.git | |||
[submodule "3rd/cpu_features"] | |||
path = 3rd/cpu_features | |||
url = https://github.com/henrydcase/cpu_features.git |
@@ -0,0 +1 @@ | |||
Subproject commit 2b07c2ab7df71d0b6c19afb93f68a808b412a7ff |
@@ -1,10 +1,13 @@ | |||
cmake_minimum_required(VERSION 3.13) | |||
project(cryptocore NONE) | |||
project(cryptocore VERSION 0.0.1 LANGUAGES C) | |||
enable_language(C) | |||
enable_language(CXX) | |||
enable_language(ASM) | |||
add_subdirectory(3rd/gtest) | |||
add_subdirectory(3rd/cpu_features) | |||
set(CMAKE_VERBOSE_MAKEFILE ON) | |||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "~/.cmake/Modules") | |||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "3rd/cmake-modules") | |||
@@ -33,7 +36,19 @@ else() | |||
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR}) | |||
endif() | |||
add_subdirectory(3rd/gtest) | |||
if(NOT CMAKE_BUILD_TYPE_LOWER STREQUAL "debug") | |||
# settings below are required by benchmark library | |||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) | |||
# Target for benchmark - it also builds gtest library | |||
set(BENCHMARK_ENABLE_GTEST_TESTS ON CACHE BOOL "Enable testing of the benchmark library." FORCE) | |||
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark tests" FORCE) | |||
set(GOOGLETEST_PATH "${CMAKE_SOURCE_DIR}/3rd/gtest" CACHE PATH "Path to the gtest sources" FORCE) | |||
#if (NOT MACOSX) | |||
# set(BENCHMARK_ENABLE_LTO ON CACHE BOOL "Enable link time optim" FORCE) | |||
#endif() | |||
set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) | |||
add_subdirectory(${CMAKE_SOURCE_DIR}/3rd/gbench) | |||
endif() | |||
# Arch settings | |||
@@ -82,6 +97,7 @@ include_directories( | |||
public | |||
src/common/ | |||
src | |||
3rd/cpu_features/include | |||
) | |||
set_property(GLOBAL PROPERTY obj_libs "") | |||
@@ -128,6 +144,7 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/clean) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-128s-robust/clean) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-128f-simple/clean) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-robust/clean) | |||
add_subdirectory(src/sign/picnic/picnic3l1/clean) | |||
add_subdirectory(src/kem/kyber/kyber512/clean) | |||
add_subdirectory(src/kem/kyber/kyber768/clean) | |||
@@ -148,12 +165,13 @@ add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean) | |||
add_subdirectory(src/kem/sike) | |||
# Hardware optimized targets | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
set(CMAKE_C_FLAGS | |||
"${CMAKE_C_FLAGS} -march=native -mtune=native") | |||
"${CMAKE_C_FLAGS} -march=haswell") | |||
set(SRC_COMMON_AVX2 | |||
src/common/keccak4x/KeccakP-1600-times4-SIMD256.c | |||
) | |||
@@ -241,12 +259,16 @@ get_property(OBJ_LIBS GLOBAL PROPERTY obj_libs) | |||
target_link_libraries( | |||
pqc | |||
common | |||
${OBJ_LIBS} | |||
cpu_features | |||
common | |||
) | |||
target_link_libraries( | |||
pqc_s | |||
cpu_features | |||
common | |||
${OBJ_LIBS} | |||
) | |||
@@ -268,6 +290,9 @@ target_include_directories( | |||
${CMAKE_SOURCE_DIR}) | |||
if(NOT CMAKE_BUILD_TYPE_LOWER STREQUAL "debug") | |||
add_subdirectory(test/bench) | |||
endif() | |||
install(TARGETS pqc pqc_s | |||
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE | |||
@@ -18,6 +18,7 @@ Users shouldn't expect any level of security provided by this code. The library | |||
| Falcon | 2 | | | |||
| Rainbow | 3 | | | |||
| SPHINCS+ SHA256/SHAKE256 | 3 | x | | |||
| SIKE/p434 | 3 | x | | |||
## Building | |||
@@ -38,13 +39,13 @@ Library provides simple API, wrapping PQClean. For example to use KEM, one shoul | |||
```c | |||
#include <pqc/pqc.h> | |||
const params_t *p = pqc_kem_alg_by_id(KYBER512); | |||
std::vector<uint8_t> ct(ciphertext_bsz(p)); | |||
std::vector<uint8_t> ss1(shared_secret_bsz(p)); | |||
std::vector<uint8_t> ss2(shared_secret_bsz(p)); | |||
std::vector<uint8_t> sk(private_key_bsz(p)); | |||
std::vector<uint8_t> pk(public_key_bsz(p)); | |||
const params_t *p = pqc_kem_alg_by_id(KYBER512); | |||
pqc_keygen(p, pk.data(), sk.data()); | |||
pqc_kem_encapsulate(p, ct.data(), ss1.data(), pk.data()); | |||
pqc_kem_decapsulate(p, ss2.data(), ct.data(), sk.data()); | |||
@@ -0,0 +1,9 @@ | |||
# Security Policy | |||
## Supported Versions | |||
No security guaranteed. | |||
## Reporting a Vulnerability | |||
Any comments welcome: contact (at) amongbytes.com |
@@ -0,0 +1,14 @@ | |||
kind: pipeline | |||
type: exec | |||
name: default | |||
steps: | |||
- name: build | |||
commands: | |||
- git submodule init | |||
- git submodule update --recursive --remote | |||
- mkdir build | |||
- cd build | |||
- cmake .. | |||
- make | |||
- ./test |
@@ -41,7 +41,8 @@ extern "C" { | |||
_(SPHINCSSHA256256SROBUST) \ | |||
_(SPHINCSSHA256128SROBUST) \ | |||
_(SPHINCSSHA256128FSIMPLE) \ | |||
_(SPHINCSSHA256192FROBUST) | |||
_(SPHINCSSHA256192FROBUST) \ | |||
_(PICNIC3L1) | |||
// defines supported kem algorithm list | |||
#define PQC_SUPPORTED_KEMS(_)\ | |||
@@ -63,7 +64,8 @@ extern "C" { | |||
_(SABER) \ | |||
_(HQCRMRS128) \ | |||
_(HQCRMRS192) \ | |||
_(HQCRMRS256) | |||
_(HQCRMRS256) \ | |||
_(SIKE434) | |||
// Defines IDs for each algorithm. The | |||
// PQC_ALG_SIG/KEM_MAX indicates number | |||
@@ -1,124 +1,9 @@ | |||
#include <stdint.h> | |||
#include <stdbool.h> | |||
#include <pqc/pqc.h> | |||
#include <cpuinfo_x86.h> | |||
// PQClean include | |||
#include "sign/rainbow/rainbowV-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowI-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowIII-classic/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h" | |||
#include "sign/falcon/falcon-1024/clean/api.h" | |||
#include "sign/falcon/falcon-1024/avx2/api.h" | |||
#include "sign/falcon/falcon-512/clean/api.h" | |||
#include "sign/falcon/falcon-512/avx2/api.h" | |||
#include "sign/dilithium/dilithium2/clean/api.h" | |||
#include "sign/dilithium/dilithium2/avx2/api.h" | |||
#include "sign/dilithium/dilithium3/clean/api.h" | |||
#include "sign/dilithium/dilithium3/avx2/api.h" | |||
#include "sign/dilithium/dilithium5/clean/api.h" | |||
#include "sign/dilithium/dilithium5/avx2/api.h" | |||
#include "kem/ntru/ntruhps4096821/clean/api.h" | |||
#include "kem/ntru/ntruhps4096821/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048509/clean/api.h" | |||
#include "kem/ntru/ntruhps2048509/avx2/api.h" | |||
#include "kem/ntru/ntruhrss701/clean/api.h" | |||
#include "kem/ntru/ntruhrss701/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048677/clean/api.h" | |||
#include "kem/ntru/ntruhps2048677/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/avx2/api.h" | |||
#include "kem/kyber/kyber768/clean/api.h" | |||
#include "kem/kyber/kyber768/avx2/api.h" | |||
#include "kem/kyber/kyber1024/clean/api.h" | |||
#include "kem/kyber/kyber1024/avx2/api.h" | |||
#include "kem/kyber/kyber512/clean/api.h" | |||
#include "kem/kyber/kyber512/avx2/api.h" | |||
#include "kem/mceliece/mceliece460896f/avx/api.h" | |||
#include "kem/mceliece/mceliece460896f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128f/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128f/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119f/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119f/clean/api.h" | |||
#include "kem/mceliece/mceliece460896/avx/api.h" | |||
#include "kem/mceliece/mceliece460896/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128/clean/api.h" | |||
#include "kem/mceliece/mceliece348864f/avx/api.h" | |||
#include "kem/mceliece/mceliece348864f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119/clean/api.h" | |||
#include "kem/mceliece/mceliece348864/avx/api.h" | |||
#include "kem/mceliece/mceliece348864/clean/api.h" | |||
#include "kem/frodo/frodokem976shake/clean/api.h" | |||
#include "kem/frodo/frodokem1344shake/clean/api.h" | |||
#include "kem/frodo/frodokem640shake/clean/api.h" | |||
#include "kem/saber/lightsaber/clean/api.h" | |||
#include "kem/saber/lightsaber/avx2/api.h" | |||
#include "kem/saber/firesaber/clean/api.h" | |||
#include "kem/saber/firesaber/avx2/api.h" | |||
#include "kem/saber/saber/clean/api.h" | |||
#include "kem/saber/saber/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/avx2/api.h" | |||
#include "schemes.h" | |||
// not proud of this thingy | |||
#define OPT_VERSION _CLEAN_ | |||
@@ -192,6 +77,13 @@ const sig_params_t sigs[] = { | |||
PQC_SUPPORTED_SIGS(REG_SIG) | |||
}; | |||
// Contains capabilities on x86 CPU on which implementation is running | |||
X86Features CPU_CAPS; | |||
const X86Features * const get_cpu_caps(void) { | |||
return &CPU_CAPS; | |||
} | |||
const params_t *pqc_kem_alg_by_id(uint8_t id) { | |||
int i; | |||
for(i=0; i<PQC_ALG_KEM_MAX; i++) { | |||
@@ -242,3 +134,8 @@ bool pqc_sig_verify(const params_t *p, | |||
const uint8_t *pk) { | |||
return !((sig_params_t *)p)->verify(sig, siglen, m, mlen, pk); | |||
} | |||
void static_initialization(void) __attribute__((constructor)); | |||
void static_initialization(void) { | |||
CPU_CAPS = GetX86Info().features; | |||
} |
@@ -0,0 +1,124 @@ | |||
#ifndef PQC_SCHEMES_ | |||
#define PQC_SCHEMES_ | |||
// PQClean include | |||
#include "sign/rainbow/rainbowV-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowI-classic/clean/api.h" | |||
#include "sign/rainbow/rainbowIII-classic/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h" | |||
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h" | |||
#include "sign/falcon/falcon-1024/clean/api.h" | |||
#include "sign/falcon/falcon-1024/avx2/api.h" | |||
#include "sign/falcon/falcon-512/clean/api.h" | |||
#include "sign/falcon/falcon-512/avx2/api.h" | |||
#include "sign/dilithium/dilithium2/clean/api.h" | |||
#include "sign/dilithium/dilithium2/avx2/api.h" | |||
#include "sign/dilithium/dilithium3/clean/api.h" | |||
#include "sign/dilithium/dilithium3/avx2/api.h" | |||
#include "sign/dilithium/dilithium5/clean/api.h" | |||
#include "sign/dilithium/dilithium5/avx2/api.h" | |||
#include "sign/picnic/picnic3l1/clean/api.h" | |||
#include "kem/ntru/ntruhps4096821/clean/api.h" | |||
#include "kem/ntru/ntruhps4096821/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048509/clean/api.h" | |||
#include "kem/ntru/ntruhps2048509/avx2/api.h" | |||
#include "kem/ntru/ntruhrss701/clean/api.h" | |||
#include "kem/ntru/ntruhrss701/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048677/clean/api.h" | |||
#include "kem/ntru/ntruhps2048677/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/avx2/api.h" | |||
#include "kem/kyber/kyber768/clean/api.h" | |||
#include "kem/kyber/kyber768/avx2/api.h" | |||
#include "kem/kyber/kyber1024/clean/api.h" | |||
#include "kem/kyber/kyber1024/avx2/api.h" | |||
#include "kem/kyber/kyber512/clean/api.h" | |||
#include "kem/kyber/kyber512/avx2/api.h" | |||
#include "kem/mceliece/mceliece460896f/avx/api.h" | |||
#include "kem/mceliece/mceliece460896f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128f/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128f/clean/api.h" | |||
#include "kem/mceliece/mceliece8192128f/avx/api.h" | |||
#include "kem/mceliece/mceliece8192128f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119f/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119f/clean/api.h" | |||
#include "kem/mceliece/mceliece460896/avx/api.h" | |||
#include "kem/mceliece/mceliece460896/clean/api.h" | |||
#include "kem/mceliece/mceliece6688128/avx/api.h" | |||
#include "kem/mceliece/mceliece6688128/clean/api.h" | |||
#include "kem/mceliece/mceliece348864f/avx/api.h" | |||
#include "kem/mceliece/mceliece348864f/clean/api.h" | |||
#include "kem/mceliece/mceliece6960119/avx/api.h" | |||
#include "kem/mceliece/mceliece6960119/clean/api.h" | |||
#include "kem/mceliece/mceliece348864/avx/api.h" | |||
#include "kem/mceliece/mceliece348864/clean/api.h" | |||
#include "kem/frodo/frodokem976shake/clean/api.h" | |||
#include "kem/frodo/frodokem1344shake/clean/api.h" | |||
#include "kem/frodo/frodokem640shake/clean/api.h" | |||
#include "kem/saber/lightsaber/clean/api.h" | |||
#include "kem/saber/lightsaber/avx2/api.h" | |||
#include "kem/saber/firesaber/clean/api.h" | |||
#include "kem/saber/firesaber/avx2/api.h" | |||
#include "kem/saber/saber/clean/api.h" | |||
#include "kem/saber/saber/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/avx2/api.h" | |||
#include "kem/sike/includes/sike/sike.h" | |||
#endif |
@@ -0,0 +1,8 @@ | |||
#ifndef PQC_COMMON_UTILS_ | |||
#define PQC_COMMON_UTILS_ | |||
#include <cpuinfo_x86.h> | |||
const X86Features * const get_cpu_caps(void); | |||
#endif |
@@ -0,0 +1,20 @@ | |||
set( | |||
SRC_CLEAN_SIKE_P434 | |||
p434/fpx.c | |||
p434/fp_generic.c | |||
p434/isogeny.c | |||
p434/params.c | |||
p434/sike.c) | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
add_definitions(-DPQC_ASM=1) | |||
set( | |||
SRC_CLEAN_SIKE_P434 | |||
${SRC_CLEAN_SIKE_P434} | |||
p434/fp-x86_64.S | |||
) | |||
endif() | |||
define_kem_alg( | |||
sike_p434_clean | |||
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -0,0 +1,81 @@ | |||
#ifndef SIKE_H_ | |||
#define SIKE_H_ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "randombytes.h" | |||
/* SIKE | |||
* | |||
* SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the | |||
* algorithm is provided in [SIKE]. This implementation uses 434-bit field size. The code | |||
* is based on "Additional_Implementations" from PQC NIST submission package which can | |||
* be found here: | |||
* https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip | |||
* | |||
* [SIKE] https://sike.org/files/SIDH-spec.pdf | |||
*/ | |||
// SIKE_PUB_BYTESZ is the number of bytes in a public key. | |||
#define SIKE_PUB_BYTESZ 330 | |||
// SIKE_PRV_BYTESZ is the number of bytes in a private key. | |||
#define SIKE_PRV_BYTESZ 28 | |||
// SIKE_SS_BYTESZ is the number of bytes in a shared key. | |||
#define SIKE_SS_BYTESZ 16 | |||
// SIKE_MSG_BYTESZ is the number of bytes in a random bit string concatenated | |||
// with the public key (see 1.4 of SIKE). | |||
#define SIKE_MSG_BYTESZ 16 | |||
// SIKE_SS_BYTESZ is the number of bytes in a ciphertext. | |||
#define SIKE_CT_BYTESZ (SIKE_PUB_BYTESZ + SIKE_MSG_BYTESZ) | |||
// SIKE_keypair outputs a public and secret key. In case of success | |||
// function returns 1, otherwise 0. | |||
int SIKE_keypair( | |||
uint8_t out_priv[SIKE_PRV_BYTESZ], | |||
uint8_t out_pub[SIKE_PUB_BYTESZ]); | |||
// SIKE_encaps generates and encrypts a random session key, writing those values to | |||
// |out_shared_key| and |out_ciphertext|, respectively. | |||
void SIKE_encaps( | |||
uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
uint8_t out_ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ]); | |||
// SIKE_decaps outputs a random session key, writing it to |out_shared_key|. | |||
void SIKE_decaps( | |||
uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
const uint8_t ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ], | |||
const uint8_t priv_key[SIKE_PRV_BYTESZ]); | |||
// boilerplate needed for integration | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_BYTES SIKE_SS_BYTESZ | |||
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_ALGNAME "SIKE/p434" | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_BYTES SIKE_SS_BYTESZ | |||
#define PQCLEAN_SIKE434_AVX2_CRYPTO_ALGNAME "SIKE/p434" | |||
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
SIKE_keypair(sk, pk); | |||
// KATs require the public key to be concatenated after private key | |||
memcpy(&sk[SIKE_MSG_BYTESZ+SIKE_PRV_BYTESZ], pk, SIKE_PUB_BYTESZ); | |||
return 0; | |||
} | |||
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { | |||
SIKE_encaps(ss,ct,pk); | |||
return 0; | |||
} | |||
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { | |||
SIKE_decaps(ss, ct, &sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], sk); | |||
return 0; | |||
} | |||
#endif |
@@ -0,0 +1,926 @@ | |||
.text | |||
.Lp434x2: | |||
.quad 0xFFFFFFFFFFFFFFFE | |||
.quad 0xFFFFFFFFFFFFFFFF | |||
.quad 0xFB82ECF5C5FFFFFF | |||
.quad 0xF78CB8F062B15D47 | |||
.quad 0xD9F8BFAD038A40AC | |||
.quad 0x0004683E4E2EE688 | |||
.Lp434p1: | |||
.quad 0xFDC1767AE3000000 | |||
.quad 0x7BC65C783158AEA3 | |||
.quad 0x6CFC5FD681C52056 | |||
.quad 0x0002341F27177344 | |||
.globl sike_fpadd_asm | |||
.hidden sike_fpadd_asm | |||
.type sike_fpadd_asm,@function | |||
sike_fpadd_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
addq 0(%rsi),%r8 | |||
movq 8(%rdi),%r9 | |||
adcq 8(%rsi),%r9 | |||
movq 16(%rdi),%r10 | |||
adcq 16(%rsi),%r10 | |||
movq 24(%rdi),%r11 | |||
adcq 24(%rsi),%r11 | |||
movq 32(%rdi),%r12 | |||
adcq 32(%rsi),%r12 | |||
movq 40(%rdi),%r13 | |||
adcq 40(%rsi),%r13 | |||
movq 48(%rdi),%r14 | |||
adcq 48(%rsi),%r14 | |||
movq .Lp434x2(%rip),%rcx | |||
subq %rcx,%r8 | |||
movq 8+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r9 | |||
sbbq %rcx,%r10 | |||
movq 16+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r11 | |||
movq 24+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r12 | |||
movq 32+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r13 | |||
movq 40+.Lp434x2(%rip),%rcx | |||
sbbq %rcx,%r14 | |||
sbbq $0,%rax | |||
movq .Lp434x2(%rip),%rdi | |||
andq %rax,%rdi | |||
movq 8+.Lp434x2(%rip),%rsi | |||
andq %rax,%rsi | |||
movq 16+.Lp434x2(%rip),%rcx | |||
andq %rax,%rcx | |||
addq %rdi,%r8 | |||
movq %r8,0(%rdx) | |||
adcq %rsi,%r9 | |||
movq %r9,8(%rdx) | |||
adcq %rsi,%r10 | |||
movq %r10,16(%rdx) | |||
adcq %rcx,%r11 | |||
movq %r11,24(%rdx) | |||
setc %cl | |||
movq 24+.Lp434x2(%rip),%r8 | |||
andq %rax,%r8 | |||
movq 32+.Lp434x2(%rip),%r9 | |||
andq %rax,%r9 | |||
movq 40+.Lp434x2(%rip),%r10 | |||
andq %rax,%r10 | |||
btq $0,%rcx | |||
adcq %r8,%r12 | |||
movq %r12,32(%rdx) | |||
adcq %r9,%r13 | |||
movq %r13,40(%rdx) | |||
adcq %r10,%r14 | |||
movq %r14,48(%rdx) | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_fpsub_asm | |||
.hidden sike_fpsub_asm | |||
.type sike_fpsub_asm,@function | |||
sike_fpsub_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
subq 0(%rsi),%r8 | |||
movq 8(%rdi),%r9 | |||
sbbq 8(%rsi),%r9 | |||
movq 16(%rdi),%r10 | |||
sbbq 16(%rsi),%r10 | |||
movq 24(%rdi),%r11 | |||
sbbq 24(%rsi),%r11 | |||
movq 32(%rdi),%r12 | |||
sbbq 32(%rsi),%r12 | |||
movq 40(%rdi),%r13 | |||
sbbq 40(%rsi),%r13 | |||
movq 48(%rdi),%r14 | |||
sbbq 48(%rsi),%r14 | |||
sbbq $0x0,%rax | |||
movq .Lp434x2(%rip),%rdi | |||
andq %rax,%rdi | |||
movq 8+.Lp434x2(%rip),%rsi | |||
andq %rax,%rsi | |||
movq 16+.Lp434x2(%rip),%rcx | |||
andq %rax,%rcx | |||
addq %rdi,%r8 | |||
movq %r8,0(%rdx) | |||
adcq %rsi,%r9 | |||
movq %r9,8(%rdx) | |||
adcq %rsi,%r10 | |||
movq %r10,16(%rdx) | |||
adcq %rcx,%r11 | |||
movq %r11,24(%rdx) | |||
setc %cl | |||
movq 24+.Lp434x2(%rip),%r8 | |||
andq %rax,%r8 | |||
movq 32+.Lp434x2(%rip),%r9 | |||
andq %rax,%r9 | |||
movq 40+.Lp434x2(%rip),%r10 | |||
andq %rax,%r10 | |||
btq $0x0,%rcx | |||
adcq %r8,%r12 | |||
adcq %r9,%r13 | |||
adcq %r10,%r14 | |||
movq %r12,32(%rdx) | |||
movq %r13,40(%rdx) | |||
movq %r14,48(%rdx) | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpadd_asm | |||
.hidden sike_mpadd_asm | |||
.type sike_mpadd_asm,@function | |||
sike_mpadd_asm: | |||
.cfi_startproc | |||
movq 0(%rdi),%r8; | |||
movq 8(%rdi),%r9 | |||
movq 16(%rdi),%r10 | |||
movq 24(%rdi),%r11 | |||
movq 32(%rdi),%rcx | |||
addq 0(%rsi),%r8 | |||
adcq 8(%rsi),%r9 | |||
adcq 16(%rsi),%r10 | |||
adcq 24(%rsi),%r11 | |||
adcq 32(%rsi),%rcx | |||
movq %r8,0(%rdx) | |||
movq %r9,8(%rdx) | |||
movq %r10,16(%rdx) | |||
movq %r11,24(%rdx) | |||
movq %rcx,32(%rdx) | |||
movq 40(%rdi),%r8 | |||
movq 48(%rdi),%r9 | |||
adcq 40(%rsi),%r8 | |||
adcq 48(%rsi),%r9 | |||
movq %r8,40(%rdx) | |||
movq %r9,48(%rdx) | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpsubx2_asm | |||
.hidden sike_mpsubx2_asm | |||
.type sike_mpsubx2_asm,@function | |||
sike_mpsubx2_asm: | |||
.cfi_startproc | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
movq 8(%rdi),%r9 | |||
movq 16(%rdi),%r10 | |||
movq 24(%rdi),%r11 | |||
movq 32(%rdi),%rcx | |||
subq 0(%rsi),%r8 | |||
sbbq 8(%rsi),%r9 | |||
sbbq 16(%rsi),%r10 | |||
sbbq 24(%rsi),%r11 | |||
sbbq 32(%rsi),%rcx | |||
movq %r8,0(%rdx) | |||
movq %r9,8(%rdx) | |||
movq %r10,16(%rdx) | |||
movq %r11,24(%rdx) | |||
movq %rcx,32(%rdx) | |||
movq 40(%rdi),%r8 | |||
movq 48(%rdi),%r9 | |||
movq 56(%rdi),%r10 | |||
movq 64(%rdi),%r11 | |||
movq 72(%rdi),%rcx | |||
sbbq 40(%rsi),%r8 | |||
sbbq 48(%rsi),%r9 | |||
sbbq 56(%rsi),%r10 | |||
sbbq 64(%rsi),%r11 | |||
sbbq 72(%rsi),%rcx | |||
movq %r8,40(%rdx) | |||
movq %r9,48(%rdx) | |||
movq %r10,56(%rdx) | |||
movq %r11,64(%rdx) | |||
movq %rcx,72(%rdx) | |||
movq 80(%rdi),%r8 | |||
movq 88(%rdi),%r9 | |||
movq 96(%rdi),%r10 | |||
movq 104(%rdi),%r11 | |||
sbbq 80(%rsi),%r8 | |||
sbbq 88(%rsi),%r9 | |||
sbbq 96(%rsi),%r10 | |||
sbbq 104(%rsi),%r11 | |||
sbbq $0x0,%rax | |||
movq %r8,80(%rdx) | |||
movq %r9,88(%rdx) | |||
movq %r10,96(%rdx) | |||
movq %r11,104(%rdx) | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpdblsubx2_asm | |||
.hidden sike_mpdblsubx2_asm | |||
.type sike_mpdblsubx2_asm,@function | |||
sike_mpdblsubx2_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
xorq %rax,%rax | |||
movq 0(%rdx),%r8 | |||
movq 8(%rdx),%r9 | |||
movq 16(%rdx),%r10 | |||
movq 24(%rdx),%r11 | |||
movq 32(%rdx),%r12 | |||
movq 40(%rdx),%r13 | |||
movq 48(%rdx),%rcx | |||
subq 0(%rdi),%r8 | |||
sbbq 8(%rdi),%r9 | |||
sbbq 16(%rdi),%r10 | |||
sbbq 24(%rdi),%r11 | |||
sbbq 32(%rdi),%r12 | |||
sbbq 40(%rdi),%r13 | |||
sbbq 48(%rdi),%rcx | |||
adcq $0x0,%rax | |||
subq 0(%rsi),%r8 | |||
sbbq 8(%rsi),%r9 | |||
sbbq 16(%rsi),%r10 | |||
sbbq 24(%rsi),%r11 | |||
sbbq 32(%rsi),%r12 | |||
sbbq 40(%rsi),%r13 | |||
sbbq 48(%rsi),%rcx | |||
adcq $0x0,%rax | |||
movq %r8,0(%rdx) | |||
movq %r9,8(%rdx) | |||
movq %r10,16(%rdx) | |||
movq %r11,24(%rdx) | |||
movq %r12,32(%rdx) | |||
movq %r13,40(%rdx) | |||
movq %rcx,48(%rdx) | |||
movq 56(%rdx),%r8 | |||
movq 64(%rdx),%r9 | |||
movq 72(%rdx),%r10 | |||
movq 80(%rdx),%r11 | |||
movq 88(%rdx),%r12 | |||
movq 96(%rdx),%r13 | |||
movq 104(%rdx),%rcx | |||
subq %rax,%r8 | |||
sbbq 56(%rdi),%r8 | |||
sbbq 64(%rdi),%r9 | |||
sbbq 72(%rdi),%r10 | |||
sbbq 80(%rdi),%r11 | |||
sbbq 88(%rdi),%r12 | |||
sbbq 96(%rdi),%r13 | |||
sbbq 104(%rdi),%rcx | |||
subq 56(%rsi),%r8 | |||
sbbq 64(%rsi),%r9 | |||
sbbq 72(%rsi),%r10 | |||
sbbq 80(%rsi),%r11 | |||
sbbq 88(%rsi),%r12 | |||
sbbq 96(%rsi),%r13 | |||
sbbq 104(%rsi),%rcx | |||
movq %r8,56(%rdx) | |||
movq %r9,64(%rdx) | |||
movq %r10,72(%rdx) | |||
movq %r11,80(%rdx) | |||
movq %r12,88(%rdx) | |||
movq %r13,96(%rdx) | |||
movq %rcx,104(%rdx) | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_fprdc_asm | |||
.hidden sike_fprdc_asm | |||
.type sike_fprdc_asm,@function | |||
sike_fprdc_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
pushq %r15 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r15, -40 | |||
xorq %rax,%rax | |||
movq 0+0(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
xorq %rax,%rax | |||
movq 0+8(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r13,%rcx | |||
adcxq %r13,%r9 | |||
adcxq %rcx,%r10 | |||
mulxq 8+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r11 | |||
adoxq %rcx,%r10 | |||
mulxq 16+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r12 | |||
adoxq %rcx,%r11 | |||
mulxq 24+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %rax,%r13 | |||
adoxq %rcx,%r12 | |||
adoxq %rax,%r13 | |||
xorq %rcx,%rcx | |||
addq 24(%rdi),%r8 | |||
adcq 32(%rdi),%r9 | |||
adcq 40(%rdi),%r10 | |||
adcq 48(%rdi),%r11 | |||
adcq 56(%rdi),%r12 | |||
adcq 64(%rdi),%r13 | |||
adcq 72(%rdi),%rcx | |||
movq %r8,24(%rdi) | |||
movq %r9,32(%rdi) | |||
movq %r10,40(%rdi) | |||
movq %r11,48(%rdi) | |||
movq %r12,56(%rdi) | |||
movq %r13,64(%rdi) | |||
movq %rcx,72(%rdi) | |||
movq 80(%rdi),%r8 | |||
movq 88(%rdi),%r9 | |||
movq 96(%rdi),%r10 | |||
movq 104(%rdi),%r11 | |||
adcq $0x0,%r8 | |||
adcq $0x0,%r9 | |||
adcq $0x0,%r10 | |||
adcq $0x0,%r11 | |||
movq %r8,80(%rdi) | |||
movq %r9,88(%rdi) | |||
movq %r10,96(%rdi) | |||
movq %r11,104(%rdi) | |||
xorq %rax,%rax | |||
movq 16+0(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
xorq %rax,%rax | |||
movq 16+8(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r13,%rcx | |||
adcxq %r13,%r9 | |||
adcxq %rcx,%r10 | |||
mulxq 8+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r11 | |||
adoxq %rcx,%r10 | |||
mulxq 16+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r12 | |||
adoxq %rcx,%r11 | |||
mulxq 24+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %rax,%r13 | |||
adoxq %rcx,%r12 | |||
adoxq %rax,%r13 | |||
xorq %rcx,%rcx | |||
addq 40(%rdi),%r8 | |||
adcq 48(%rdi),%r9 | |||
adcq 56(%rdi),%r10 | |||
adcq 64(%rdi),%r11 | |||
adcq 72(%rdi),%r12 | |||
adcq 80(%rdi),%r13 | |||
adcq 88(%rdi),%rcx | |||
movq %r8,40(%rdi) | |||
movq %r9,48(%rdi) | |||
movq %r10,56(%rdi) | |||
movq %r11,64(%rdi) | |||
movq %r12,72(%rdi) | |||
movq %r13,80(%rdi) | |||
movq %rcx,88(%rdi) | |||
movq 96(%rdi),%r8 | |||
movq 104(%rdi),%r9 | |||
adcq $0x0,%r8 | |||
adcq $0x0,%r9 | |||
movq %r8,96(%rdi) | |||
movq %r9,104(%rdi) | |||
xorq %rax,%rax | |||
movq 32+0(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
xorq %rax,%rax | |||
movq 32+8(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r13,%rcx | |||
adcxq %r13,%r9 | |||
adcxq %rcx,%r10 | |||
mulxq 8+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r11 | |||
adoxq %rcx,%r10 | |||
mulxq 16+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %r13,%r12 | |||
adoxq %rcx,%r11 | |||
mulxq 24+.Lp434p1(%rip),%rcx,%r13 | |||
adcxq %rax,%r13 | |||
adoxq %rcx,%r12 | |||
adoxq %rax,%r13 | |||
xorq %rcx,%rcx | |||
addq 56(%rdi),%r8 | |||
adcq 64(%rdi),%r9 | |||
adcq 72(%rdi),%r10 | |||
adcq 80(%rdi),%r11 | |||
adcq 88(%rdi),%r12 | |||
adcq 96(%rdi),%r13 | |||
adcq 104(%rdi),%rcx | |||
movq %r8,0(%rsi) | |||
movq %r9,8(%rsi) | |||
movq %r10,72(%rdi) | |||
movq %r11,80(%rdi) | |||
movq %r12,88(%rdi) | |||
movq %r13,96(%rdi) | |||
movq %rcx,104(%rdi) | |||
xorq %rax,%rax | |||
movq 48(%rdi),%rdx | |||
mulxq 0+.Lp434p1(%rip),%r8,%r9 | |||
mulxq 8+.Lp434p1(%rip),%r12,%r10 | |||
mulxq 16+.Lp434p1(%rip),%r13,%r11 | |||
adoxq %r12,%r9 | |||
adoxq %r13,%r10 | |||
mulxq 24+.Lp434p1(%rip),%r13,%r12 | |||
adoxq %r13,%r11 | |||
adoxq %rax,%r12 | |||
addq 72(%rdi),%r8 | |||
adcq 80(%rdi),%r9 | |||
adcq 88(%rdi),%r10 | |||
adcq 96(%rdi),%r11 | |||
adcq 104(%rdi),%r12 | |||
movq %r8,16(%rsi) | |||
movq %r9,24(%rsi) | |||
movq %r10,32(%rsi) | |||
movq %r11,40(%rsi) | |||
movq %r12,48(%rsi) | |||
popq %r15 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc | |||
.globl sike_mpmul_asm | |||
.hidden sike_mpmul_asm | |||
.type sike_mpmul_asm,@function | |||
sike_mpmul_asm: | |||
.cfi_startproc | |||
pushq %r12 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r12, -16 | |||
pushq %r13 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r13, -24 | |||
pushq %r14 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r14, -32 | |||
pushq %r15 | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset r15, -40 | |||
movq %rdx,%rcx | |||
xorq %rax,%rax | |||
movq 0(%rdi),%r8 | |||
movq 8(%rdi),%r9 | |||
movq 16(%rdi),%r10 | |||
movq 24(%rdi),%r11 | |||
pushq %rbx | |||
.cfi_adjust_cfa_offset 8 | |||
.cfi_offset rbx, -48 | |||
pushq %rbp | |||
.cfi_offset rbp, -56 | |||
.cfi_adjust_cfa_offset 8 | |||
subq $96,%rsp | |||
.cfi_adjust_cfa_offset 96 | |||
addq 32(%rdi),%r8 | |||
adcq 40(%rdi),%r9 | |||
adcq 48(%rdi),%r10 | |||
adcq $0x0,%r11 | |||
sbbq $0x0,%rax | |||
movq %r8,0(%rsp) | |||
movq %r9,8(%rsp) | |||
movq %r10,16(%rsp) | |||
movq %r11,24(%rsp) | |||
xorq %rbx,%rbx | |||
movq 0(%rsi),%r12 | |||
movq 8(%rsi),%r13 | |||
movq 16(%rsi),%r14 | |||
movq 24(%rsi),%r15 | |||
addq 32(%rsi),%r12 | |||
adcq 40(%rsi),%r13 | |||
adcq 48(%rsi),%r14 | |||
adcq $0x0,%r15 | |||
sbbq $0x0,%rbx | |||
movq %r12,32(%rsp) | |||
movq %r13,40(%rsp) | |||
movq %r14,48(%rsp) | |||
movq %r15,56(%rsp) | |||
andq %rax,%r12 | |||
andq %rax,%r13 | |||
andq %rax,%r14 | |||
andq %rax,%r15 | |||
andq %rbx,%r8 | |||
andq %rbx,%r9 | |||
andq %rbx,%r10 | |||
andq %rbx,%r11 | |||
addq %r12,%r8 | |||
adcq %r13,%r9 | |||
adcq %r14,%r10 | |||
adcq %r15,%r11 | |||
movq %r8,64(%rsp) | |||
movq %r9,72(%rsp) | |||
movq %r10,80(%rsp) | |||
movq %r11,88(%rsp) | |||
movq 0+0(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r9,%r8 | |||
movq %r9,0+0(%rsp) | |||
mulxq 32+8(%rsp),%r10,%r9 | |||
xorq %rax,%rax | |||
adoxq %r10,%r8 | |||
mulxq 32+16(%rsp),%r11,%r10 | |||
adoxq %r11,%r9 | |||
mulxq 32+24(%rsp),%r12,%r11 | |||
adoxq %r12,%r10 | |||
movq 0+8(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r12,%r13 | |||
adoxq %rax,%r11 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsp),%r15,%r14 | |||
adoxq %r8,%r12 | |||
movq %r12,0+8(%rsp) | |||
adcxq %r15,%r13 | |||
mulxq 32+16(%rsp),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r9,%r13 | |||
mulxq 32+24(%rsp),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r10,%r14 | |||
movq 0+16(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r8,%r9 | |||
adoxq %r11,%r15 | |||
adoxq %rax,%rbx | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsp),%r11,%r10 | |||
adoxq %r13,%r8 | |||
movq %r8,0+16(%rsp) | |||
adcxq %r11,%r9 | |||
mulxq 32+16(%rsp),%r12,%r11 | |||
adcxq %r12,%r10 | |||
adoxq %r14,%r9 | |||
mulxq 32+24(%rsp),%rbp,%r12 | |||
adcxq %rbp,%r11 | |||
adcxq %rax,%r12 | |||
adoxq %r15,%r10 | |||
adoxq %rbx,%r11 | |||
adoxq %rax,%r12 | |||
movq 0+24(%rsp),%rdx | |||
mulxq 32+0(%rsp),%r8,%r13 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsp),%r15,%r14 | |||
adcxq %r15,%r13 | |||
adoxq %r8,%r9 | |||
mulxq 32+16(%rsp),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r13,%r10 | |||
mulxq 32+24(%rsp),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r14,%r11 | |||
adoxq %r15,%r12 | |||
adoxq %rax,%rbx | |||
movq %r9,0+24(%rsp) | |||
movq %r10,0+32(%rsp) | |||
movq %r11,0+40(%rsp) | |||
movq %r12,0+48(%rsp) | |||
movq %rbx,0+56(%rsp) | |||
movq 0+0(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r9,%r8 | |||
movq %r9,0+0(%rcx) | |||
mulxq 0+8(%rsi),%r10,%r9 | |||
xorq %rax,%rax | |||
adoxq %r10,%r8 | |||
mulxq 0+16(%rsi),%r11,%r10 | |||
adoxq %r11,%r9 | |||
mulxq 0+24(%rsi),%r12,%r11 | |||
adoxq %r12,%r10 | |||
movq 0+8(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r12,%r13 | |||
adoxq %rax,%r11 | |||
xorq %rax,%rax | |||
mulxq 0+8(%rsi),%r15,%r14 | |||
adoxq %r8,%r12 | |||
movq %r12,0+8(%rcx) | |||
adcxq %r15,%r13 | |||
mulxq 0+16(%rsi),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r9,%r13 | |||
mulxq 0+24(%rsi),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r10,%r14 | |||
movq 0+16(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r8,%r9 | |||
adoxq %r11,%r15 | |||
adoxq %rax,%rbx | |||
xorq %rax,%rax | |||
mulxq 0+8(%rsi),%r11,%r10 | |||
adoxq %r13,%r8 | |||
movq %r8,0+16(%rcx) | |||
adcxq %r11,%r9 | |||
mulxq 0+16(%rsi),%r12,%r11 | |||
adcxq %r12,%r10 | |||
adoxq %r14,%r9 | |||
mulxq 0+24(%rsi),%rbp,%r12 | |||
adcxq %rbp,%r11 | |||
adcxq %rax,%r12 | |||
adoxq %r15,%r10 | |||
adoxq %rbx,%r11 | |||
adoxq %rax,%r12 | |||
movq 0+24(%rdi),%rdx | |||
mulxq 0+0(%rsi),%r8,%r13 | |||
xorq %rax,%rax | |||
mulxq 0+8(%rsi),%r15,%r14 | |||
adcxq %r15,%r13 | |||
adoxq %r8,%r9 | |||
mulxq 0+16(%rsi),%rbx,%r15 | |||
adcxq %rbx,%r14 | |||
adoxq %r13,%r10 | |||
mulxq 0+24(%rsi),%rbp,%rbx | |||
adcxq %rbp,%r15 | |||
adcxq %rax,%rbx | |||
adoxq %r14,%r11 | |||
adoxq %r15,%r12 | |||
adoxq %rax,%rbx | |||
movq %r9,0+24(%rcx) | |||
movq %r10,0+32(%rcx) | |||
movq %r11,0+40(%rcx) | |||
movq %r12,0+48(%rcx) | |||
movq %rbx,0+56(%rcx) | |||
movq 32+0(%rdi),%rdx | |||
mulxq 32+0(%rsi),%r9,%r8 | |||
movq %r9,64+0(%rcx) | |||
mulxq 32+8(%rsi),%r10,%r9 | |||
xorq %rax,%rax | |||
adoxq %r10,%r8 | |||
mulxq 32+16(%rsi),%r11,%r10 | |||
adoxq %r11,%r9 | |||
movq 32+8(%rdi),%rdx | |||
mulxq 32+0(%rsi),%r12,%r11 | |||
adoxq %rax,%r10 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsi),%r14,%r13 | |||
adoxq %r8,%r12 | |||
movq %r12,64+8(%rcx) | |||
adcxq %r14,%r11 | |||
mulxq 32+16(%rsi),%r8,%r14 | |||
adoxq %r9,%r11 | |||
adcxq %r8,%r13 | |||
adcxq %rax,%r14 | |||
adoxq %r10,%r13 | |||
movq 32+16(%rdi),%rdx | |||
mulxq 32+0(%rsi),%r8,%r9 | |||
adoxq %rax,%r14 | |||
xorq %rax,%rax | |||
mulxq 32+8(%rsi),%r10,%r12 | |||
adoxq %r11,%r8 | |||
movq %r8,64+16(%rcx) | |||
adcxq %r13,%r9 | |||
mulxq 32+16(%rsi),%r11,%r8 | |||
adcxq %r14,%r12 | |||
adcxq %rax,%r8 | |||
adoxq %r10,%r9 | |||
adoxq %r12,%r11 | |||
adoxq %rax,%r8 | |||
movq %r9,64+24(%rcx) | |||
movq %r11,64+32(%rcx) | |||
movq %r8,64+40(%rcx) | |||
movq 64(%rsp),%r8 | |||
movq 72(%rsp),%r9 | |||
movq 80(%rsp),%r10 | |||
movq 88(%rsp),%r11 | |||
movq 32(%rsp),%rax | |||
addq %rax,%r8 | |||
movq 40(%rsp),%rax | |||
adcq %rax,%r9 | |||
movq 48(%rsp),%rax | |||
adcq %rax,%r10 | |||
movq 56(%rsp),%rax | |||
adcq %rax,%r11 | |||
movq 0(%rsp),%r12 | |||
movq 8(%rsp),%r13 | |||
movq 16(%rsp),%r14 | |||
movq 24(%rsp),%r15 | |||
subq 0(%rcx),%r12 | |||
sbbq 8(%rcx),%r13 | |||
sbbq 16(%rcx),%r14 | |||
sbbq 24(%rcx),%r15 | |||
sbbq 32(%rcx),%r8 | |||
sbbq 40(%rcx),%r9 | |||
sbbq 48(%rcx),%r10 | |||
sbbq 56(%rcx),%r11 | |||
subq 64(%rcx),%r12 | |||
sbbq 72(%rcx),%r13 | |||
sbbq 80(%rcx),%r14 | |||
sbbq 88(%rcx),%r15 | |||
sbbq 96(%rcx),%r8 | |||
sbbq 104(%rcx),%r9 | |||
sbbq $0x0,%r10 | |||
sbbq $0x0,%r11 | |||
addq 32(%rcx),%r12 | |||
movq %r12,32(%rcx) | |||
adcq 40(%rcx),%r13 | |||
movq %r13,40(%rcx) | |||
adcq 48(%rcx),%r14 | |||
movq %r14,48(%rcx) | |||
adcq 56(%rcx),%r15 | |||
movq %r15,56(%rcx) | |||
adcq 64(%rcx),%r8 | |||
movq %r8,64(%rcx) | |||
adcq 72(%rcx),%r9 | |||
movq %r9,72(%rcx) | |||
adcq 80(%rcx),%r10 | |||
movq %r10,80(%rcx) | |||
adcq 88(%rcx),%r11 | |||
movq %r11,88(%rcx) | |||
movq 96(%rcx),%r12 | |||
adcq $0x0,%r12 | |||
movq %r12,96(%rcx) | |||
movq 104(%rcx),%r13 | |||
adcq $0x0,%r13 | |||
movq %r13,104(%rcx) | |||
addq $96,%rsp | |||
.cfi_adjust_cfa_offset -96 | |||
popq %rbp | |||
.cfi_adjust_cfa_offset -8 | |||
.cfi_same_value rbp | |||
popq %rbx | |||
.cfi_adjust_cfa_offset -8 | |||
.cfi_same_value rbx | |||
popq %r15 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r14 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r13 | |||
.cfi_adjust_cfa_offset -8 | |||
popq %r12 | |||
.cfi_adjust_cfa_offset -8 | |||
.byte 0xf3,0xc3 | |||
.cfi_endproc |
@@ -0,0 +1,207 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: portable modular arithmetic for P503 | |||
*********************************************************************************************/ | |||
#include "common/utils.h" | |||
#include "utils.h" | |||
#include "fpx.h" | |||
#ifndef PQC_NOASM | |||
void sike_fprdc_asm(const felm_t ma, felm_t mc); | |||
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c); | |||
void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c); | |||
void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c); | |||
#endif | |||
// Global constants | |||
extern const struct params_t params; | |||
// Digit multiplication, digit * digit -> 2-digit result | |||
static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c) | |||
{ | |||
crypto_word_t al, ah, bl, bh, temp; | |||
crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; | |||
crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4); | |||
crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4); | |||
al = a & mask_low; // Low part | |||
ah = a >> (sizeof(crypto_word_t) * 4); // High part | |||
bl = b & mask_low; | |||
bh = b >> (sizeof(crypto_word_t) * 4); | |||
albl = al*bl; | |||
albh = al*bh; | |||
ahbl = ah*bl; | |||
ahbh = ah*bh; | |||
c[0] = albl & mask_low; // C00 | |||
res1 = albl >> (sizeof(crypto_word_t) * 4); | |||
res2 = ahbl & mask_low; | |||
res3 = albh & mask_low; | |||
temp = res1 + res2 + res3; | |||
carry = temp >> (sizeof(crypto_word_t) * 4); | |||
c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01 | |||
res1 = ahbl >> (sizeof(crypto_word_t) * 4); | |||
res2 = albh >> (sizeof(crypto_word_t) * 4); | |||
res3 = ahbh & mask_low; | |||
temp = res1 + res2 + res3 + carry; | |||
c[1] = temp & mask_low; // C10 | |||
carry = temp & mask_high; | |||
c[1] ^= (ahbh & mask_high) + carry; // C11 | |||
} | |||
// Modular addition, c = a+b mod p434. | |||
// Inputs: a, b in [0, 2*p434-1] | |||
// Output: c in [0, 2*p434-1] | |||
void sike_fpadd(const felm_t a, const felm_t b, felm_t c) | |||
{ | |||
#ifdef PQC_ASM | |||
sike_fpadd_asm(a,b,c); | |||
#else | |||
unsigned int i, carry = 0; | |||
crypto_word_t mask; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(carry, a[i], b[i], carry, c[i]); | |||
} | |||
carry = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(carry, c[i], params.prime_x2[i], carry, c[i]); | |||
} | |||
mask = 0 - (crypto_word_t)carry; | |||
carry = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]); | |||
} | |||
#endif | |||
} | |||
void sike_fpsub(const felm_t a, const felm_t b, felm_t c) | |||
{ // Modular subtraction, c = a-b mod p434. | |||
// Inputs: a, b in [0, 2*p434-1] | |||
// Output: c in [0, 2*p434-1] | |||
#ifdef PQC_ASM | |||
sike_fpsub_asm(a,b,c); | |||
#else | |||
unsigned int i, borrow = 0; | |||
crypto_word_t mask; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(borrow, a[i], b[i], borrow, c[i]); | |||
} | |||
mask = 0 - (crypto_word_t)borrow; | |||
borrow = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]); | |||
} | |||
#endif | |||
} | |||
// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. | |||
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) | |||
{ | |||
#ifdef PQC_ASM | |||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | |||
sike_mpmul_asm(a,b,c); | |||
return; | |||
} | |||
#endif | |||
unsigned int i, j; | |||
crypto_word_t t = 0, u = 0, v = 0, UV[2]; | |||
unsigned int carry = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
for (j = 0; j <= i; j++) { | |||
MUL(a[j], b[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
c[i] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { | |||
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { | |||
MUL(a[j], b[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
c[i] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
c[2*NWORDS_FIELD-1] = v; | |||
} | |||
// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434. | |||
// mc = ma*R^-1 mod p434x2, where R = 2^448. | |||
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. | |||
// ma is assumed to be in Montgomery representation. | |||
void sike_fprdc(const felm_t ma, felm_t mc) | |||
{ | |||
#ifdef PQC_ASM | |||
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) { | |||
sike_fprdc_asm(ma, mc); | |||
return; | |||
} | |||
#endif | |||
unsigned int i, j, carry, count = ZERO_WORDS; | |||
crypto_word_t UV[2], t = 0, u = 0, v = 0; | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
mc[i] = 0; | |||
} | |||
for (i = 0; i < NWORDS_FIELD; i++) { | |||
for (j = 0; j < i; j++) { | |||
if (j < (i-ZERO_WORDS+1)) { | |||
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
} | |||
ADDC(0, v, ma[i], carry, v); | |||
ADDC(carry, u, 0, carry, u); | |||
t += carry; | |||
mc[i] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { | |||
if (count > 0) { | |||
count -= 1; | |||
} | |||
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { | |||
if (j < (NWORDS_FIELD-count)) { | |||
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]); | |||
ADDC(0, UV[0], v, carry, v); | |||
ADDC(carry, UV[1], u, carry, u); | |||
t += carry; | |||
} | |||
} | |||
ADDC(0, v, ma[i], carry, v); | |||
ADDC(carry, u, 0, carry, u); | |||
t += carry; | |||
mc[i-NWORDS_FIELD] = v; | |||
v = u; | |||
u = t; | |||
t = 0; | |||
} | |||
ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); | |||
mc[NWORDS_FIELD-1] = v; | |||
} |
@@ -0,0 +1,282 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: core functions over GF(p) and GF(p^2) | |||
*********************************************************************************************/ | |||
#include <stddef.h> | |||
#include "utils.h" | |||
#include "fpx.h" | |||
extern const struct params_t params; | |||
// Multiprecision squaring, c = a^2 mod p. | |||
static void fpsqr_mont(const felm_t ma, felm_t mc) | |||
{ | |||
dfelm_t temp = {0}; | |||
sike_mpmul(ma, ma, temp); | |||
sike_fprdc(temp, mc); | |||
} | |||
// Chain to compute a^(p-3)/4 using Montgomery arithmetic. | |||
static void fpinv_chain_mont(felm_t a) | |||
{ | |||
unsigned int i, j; | |||
felm_t t[31], tt; | |||
// Precomputed table | |||
fpsqr_mont(a, tt); | |||
sike_fpmul_mont(a, tt, t[0]); | |||
for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]); | |||
sike_fpcopy(a, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[5], tt, tt); | |||
for (i = 0; i < 10; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[14], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[3], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[23], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[13], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[24], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[7], tt, tt); | |||
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[12], tt, tt); | |||
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[1], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[21], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[2], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[19], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[1], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[24], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[26], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[16], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[10], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[6], tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[0], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[20], tt, tt); | |||
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[9], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[25], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[26], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(a, tt, tt); | |||
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[28], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[6], tt, tt); | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[10], tt, tt); | |||
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[22], tt, tt); | |||
for (j = 0; j < 35; j++) { | |||
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(t[30], tt, tt); | |||
} | |||
sike_fpcopy(tt, a); | |||
} | |||
// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. | |||
static void fpinv_mont(felm_t a) | |||
{ | |||
felm_t tt = {0}; | |||
sike_fpcopy(a, tt); | |||
fpinv_chain_mont(tt); | |||
fpsqr_mont(tt, tt); | |||
fpsqr_mont(tt, tt); | |||
sike_fpmul_mont(a, tt, a); | |||
} | |||
// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. | |||
#ifndef PQC_ASM | |||
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { | |||
uint8_t carry = 0; | |||
for (size_t i = 0; i < nwords; i++) { | |||
ADDC(carry, a[i], b[i], carry, c[i]); | |||
} | |||
return carry; | |||
} | |||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. | |||
inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { | |||
uint32_t borrow = 0; | |||
for (size_t i = 0; i < nwords; i++) { | |||
SUBC(borrow, a[i], b[i], borrow, c[i]); | |||
} | |||
return borrow; | |||
} | |||
#endif | |||
// Multiprecision addition, c = a+b. | |||
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) | |||
{ | |||
#ifdef PQC_ASM | |||
sike_mpadd_asm(a, b, c); | |||
#else | |||
mp_add(a, b, c, NWORDS_FIELD); | |||
#endif | |||
} | |||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | |||
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 | |||
inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | |||
#ifdef PQC_ASM | |||
return sike_mpsubx2_asm(a, b, c); | |||
#else | |||
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); | |||
#endif | |||
} | |||
// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. | |||
// Inputs should be s.t. c > a and c > b | |||
inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { | |||
#ifdef PQC_ASM | |||
sike_mpdblsubx2_asm(a, b, c); | |||
#else | |||
mp_sub(c, a, c, 2*NWORDS_FIELD); | |||
mp_sub(c, b, c, 2*NWORDS_FIELD); | |||
#endif | |||
} | |||
// Copy a field element, c = a. | |||
void sike_fpcopy(const felm_t a, felm_t c) { | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
c[i] = a[i]; | |||
} | |||
} | |||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768 | |||
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) | |||
{ | |||
dfelm_t temp = {0}; | |||
sike_mpmul(ma, mb, temp); | |||
sike_fprdc(temp, mc); | |||
} | |||
// Conversion from Montgomery representation to standard representation, | |||
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. | |||
void sike_from_mont(const felm_t ma, felm_t c) | |||
{ | |||
felm_t one = {0}; | |||
one[0] = 1; | |||
sike_fpmul_mont(ma, one, c); | |||
sike_fpcorrection(c); | |||
} | |||
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). | |||
// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] | |||
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] | |||
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) { | |||
felm_t t1, t2, t3; | |||
mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 | |||
sike_fpsub(a->c0, a->c1, t2); // t2 = a0-a1 | |||
mp_addfast(a->c0, a->c0, t3); // t3 = 2a0 | |||
sike_fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1) | |||
sike_fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1 | |||
} | |||
// Modular negation, a = -a mod p503. | |||
// Input/output: a in [0, 2*p503-1] | |||
void sike_fpneg(felm_t a) { | |||
uint32_t borrow = 0; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(borrow, params.prime_x2[i], a[i], borrow, a[i]); | |||
} | |||
} | |||
// Modular division by two, c = a/2 mod p503. | |||
// Input : a in [0, 2*p503-1] | |||
// Output: c in [0, 2*p503-1] | |||
void sike_fpdiv2(const felm_t a, felm_t c) { | |||
uint32_t carry = 0; | |||
crypto_word_t mask; | |||
mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503 | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(carry, a[i], params.prime[i] & mask, carry, c[i]); | |||
} | |||
// Multiprecision right shift by one. | |||
for (size_t i = 0; i < NWORDS_FIELD-1; i++) { | |||
c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1)); | |||
} | |||
c[NWORDS_FIELD-1] >>= 1; | |||
} | |||
// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. | |||
void sike_fpcorrection(felm_t a) { | |||
uint32_t borrow = 0; | |||
crypto_word_t mask; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
SUBC(borrow, a[i], params.prime[i], borrow, a[i]); | |||
} | |||
mask = 0 - (crypto_word_t)borrow; | |||
borrow = 0; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
ADDC(borrow, a[i], params.prime[i] & mask, borrow, a[i]); | |||
} | |||
} | |||
// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). | |||
// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] | |||
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] | |||
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) { | |||
felm_t t1, t2; | |||
dfelm_t tt1, tt2, tt3; | |||
crypto_word_t mask; | |||
mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 | |||
mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1 | |||
sike_mpmul(a->c0, b->c0, tt1); // tt1 = a0*b0 | |||
sike_mpmul(a->c1, b->c1, tt2); // tt2 = a1*b1 | |||
sike_mpmul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1) | |||
mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 | |||
mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0 | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
t1[i] = params.prime[i] & mask; | |||
} | |||
sike_fprdc(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 | |||
mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]); | |||
sike_fprdc(tt1, c->c0); // c[0] = a0*b0 - a1*b1 | |||
} | |||
// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). | |||
void sike_fp2inv_mont(f2elm_t a) { | |||
f2elm_t t1; | |||
fpsqr_mont(a->c0, t1->c0); // t10 = a0^2 | |||
fpsqr_mont(a->c1, t1->c1); // t11 = a1^2 | |||
sike_fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2 | |||
fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1 | |||
sike_fpneg(a->c1); // a = a0-i*a1 | |||
sike_fpmul_mont(a->c0, t1->c0, a->c0); | |||
sike_fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1 | |||
} |
@@ -0,0 +1,110 @@ | |||
#ifndef FPX_H_ | |||
#define FPX_H_ | |||
#include "utils.h" | |||
#if defined(__cplusplus) | |||
extern "C" { | |||
#endif | |||
// Modular addition, c = a+b mod p. | |||
void sike_fpadd(const felm_t a, const felm_t b, felm_t c); | |||
// Modular subtraction, c = a-b mod p. | |||
void sike_fpsub(const felm_t a, const felm_t b, felm_t c); | |||
// Modular division by two, c = a/2 mod p. | |||
void sike_fpdiv2(const felm_t a, felm_t c); | |||
// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1]. | |||
void sike_fpcorrection(felm_t a); | |||
// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. | |||
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c); | |||
// 443-bit Montgomery reduction, c = a mod p | |||
void sike_fprdc(const dfelm_t a, felm_t c); | |||
// Double 2x443-bit multiprecision subtraction, c = c-a-b | |||
void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c); | |||
// Multiprecision subtraction, c = a-b | |||
crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c); | |||
// 443-bit multiprecision addition, c = a+b | |||
void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c); | |||
// Modular negation, a = -a mod p. | |||
void sike_fpneg(felm_t a); | |||
// Copy of a field element, c = a | |||
void sike_fpcopy(const felm_t a, felm_t c); | |||
// Copy a field element, c = a. | |||
void sike_fpzero(felm_t a); | |||
// Conversion from Montgomery representation to standard representation, | |||
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. | |||
void sike_from_mont(const felm_t ma, felm_t c); | |||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768 | |||
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc); | |||
// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2) | |||
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); | |||
// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) | |||
void sike_fp2inv_mont(f2elm_t a); | |||
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). | |||
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c); | |||
// Modular correction, a = a in GF(p^2). | |||
void sike_fp2correction(f2elm_t a); | |||
#if defined(__cplusplus) | |||
} // extern C | |||
#endif | |||
// GF(p^2) addition, c = a+b in GF(p^2). | |||
#define sike_fp2add(a, b, c) \ | |||
do { \ | |||
sike_fpadd(a->c0, b->c0, c->c0); \ | |||
sike_fpadd(a->c1, b->c1, c->c1); \ | |||
} while(0) | |||
// GF(p^2) subtraction, c = a-b in GF(p^2). | |||
#define sike_fp2sub(a,b,c) \ | |||
do { \ | |||
sike_fpsub(a->c0, b->c0, c->c0); \ | |||
sike_fpsub(a->c1, b->c1, c->c1); \ | |||
} while(0) | |||
// Copy a GF(p^2) element, c = a. | |||
#define sike_fp2copy(a, c) \ | |||
do { \ | |||
sike_fpcopy(a->c0, c->c0); \ | |||
sike_fpcopy(a->c1, c->c1); \ | |||
} while(0) | |||
// GF(p^2) negation, a = -a in GF(p^2). | |||
#define sike_fp2neg(a) \ | |||
do { \ | |||
sike_fpneg(a->c0); \ | |||
sike_fpneg(a->c1); \ | |||
} while(0) | |||
// GF(p^2) division by two, c = a/2 in GF(p^2). | |||
#define sike_fp2div2(a, c) \ | |||
do { \ | |||
sike_fpdiv2(a->c0, c->c0); \ | |||
sike_fpdiv2(a->c1, c->c1); \ | |||
} while(0) | |||
// Modular correction, a = a in GF(p^2). | |||
#define sike_fp2correction(a) \ | |||
do { \ | |||
sike_fpcorrection(a->c0); \ | |||
sike_fpcorrection(a->c1); \ | |||
} while(0) | |||
// Conversion of a GF(p^2) element to Montgomery representation, | |||
// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). | |||
#define sike_to_fp2mont(a, mc) \ | |||
do { \ | |||
sike_fpmul_mont(a->c0, params.mont_R2, mc->c0); \ | |||
sike_fpmul_mont(a->c1, params.mont_R2, mc->c1); \ | |||
} while(0) | |||
// Conversion of a GF(p^2) element from Montgomery representation to standard representation, | |||
// c_i = ma_i*R^(-1) = a_i in GF(p^2). | |||
#define sike_from_fp2mont(ma, c) \ | |||
do { \ | |||
sike_from_mont(ma->c0, c->c0); \ | |||
sike_from_mont(ma->c1, c->c1); \ | |||
} while(0) | |||
#endif // FPX_H_ |
@@ -0,0 +1,262 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: elliptic curve and isogeny functions | |||
*********************************************************************************************/ | |||
#include <stddef.h> | |||
#include <string.h> | |||
#include "utils.h" | |||
#include "isogeny.h" | |||
#include "fpx.h" | |||
static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24) | |||
{ // Doubling of a Montgomery point in projective coordinates (X:Z). | |||
// Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. | |||
// Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). | |||
f2elm_t t0, t1; | |||
sike_fp2sub(P->X, P->Z, t0); // t0 = X1-Z1 | |||
sike_fp2add(P->X, P->Z, t1); // t1 = X1+Z1 | |||
sike_fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 | |||
sike_fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 | |||
sike_fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 | |||
sike_fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 | |||
sike_fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 | |||
sike_fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] | |||
sike_fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 | |||
sike_fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] | |||
} | |||
void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e) | |||
{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. | |||
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. | |||
// Output: projective Montgomery x-coordinates Q <- (2^e)*P. | |||
memmove(Q, P, sizeof(*P)); | |||
for (size_t i = 0; i < e; i++) { | |||
xDBL(Q, Q, A24plus, C24); | |||
} | |||
} | |||
void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff) | |||
{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. | |||
// Input: projective point of order four P = (X4:Z4). | |||
// Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients | |||
// that are used to evaluate the isogeny at a point in eval_4_isog(). | |||
sike_fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4 | |||
sike_fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4 | |||
sike_fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2 | |||
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2 | |||
sike_fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4 | |||
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2 | |||
sike_fp2sqr_mont(P->X, A24plus); // A24plus = X4^2 | |||
sike_fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 | |||
sike_fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 | |||
} | |||
void eval_4_isog(point_proj_t P, f2elm_t* coeff) | |||
{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined | |||
// by the 3 coefficients in coeff (computed in the function get_4_isog()). | |||
// Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). | |||
// Output: the projective point P = phi(P) = (X:Z) in the codomain. | |||
f2elm_t t0, t1; | |||
sike_fp2add(P->X, P->Z, t0); // t0 = X+Z | |||
sike_fp2sub(P->X, P->Z, t1); // t1 = X-Z | |||
sike_fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1] | |||
sike_fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2] | |||
sike_fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) | |||
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) | |||
sike_fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] | |||
sike_fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] | |||
sike_fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 | |||
sike_fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 | |||
sike_fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 | |||
sike_fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) | |||
sike_fp2mul_mont(P->X, t1, P->X); // Xfinal | |||
sike_fp2mul_mont(P->Z, t0, P->Z); // Zfinal | |||
} | |||
void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus) | |||
{ // Tripling of a Montgomery point in projective coordinates (X:Z). | |||
// Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. | |||
// Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). | |||
f2elm_t t0, t1, t2, t3, t4, t5, t6; | |||
sike_fp2sub(P->X, P->Z, t0); // t0 = X-Z | |||
sike_fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 | |||
sike_fp2add(P->X, P->Z, t1); // t1 = X+Z | |||
sike_fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 | |||
sike_fp2add(t0, t1, t4); // t4 = 2*X | |||
sike_fp2sub(t1, t0, t0); // t0 = 2*Z | |||
sike_fp2sqr_mont(t4, t1); // t1 = 4*X^2 | |||
sike_fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 | |||
sike_fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 | |||
sike_fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 | |||
sike_fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 | |||
sike_fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 | |||
sike_fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 | |||
sike_fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 | |||
sike_fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 | |||
sike_fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] | |||
sike_fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 | |||
sike_fp2sqr_mont(t2, t2); // t2 = t2^2 | |||
sike_fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2 | |||
sike_fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] | |||
sike_fp2sqr_mont(t1, t1); // t1 = t1^2 | |||
sike_fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1 | |||
} | |||
void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e) | |||
{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. | |||
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. | |||
// Output: projective Montgomery x-coordinates Q <- (3^e)*P. | |||
memmove(Q, P, sizeof(*P)); | |||
for (size_t i = 0; i < e; i++) { | |||
xTPL(Q, Q, A24minus, A24plus); | |||
} | |||
} | |||
void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff) | |||
{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. | |||
// Input: projective point of order three P = (X3:Z3). | |||
// Output: the 3-isogenous Montgomery curve with projective coefficient A/C. | |||
f2elm_t t0, t1, t2, t3, t4; | |||
sike_fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z | |||
sike_fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2 | |||
sike_fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z | |||
sike_fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2 | |||
sike_fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 | |||
sike_fp2add(coeff[0], coeff[1], t3); // t3 = 2*X | |||
sike_fp2sqr_mont(t3, t3); // t3 = 4*X^2 | |||
sike_fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 | |||
sike_fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 | |||
sike_fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 | |||
sike_fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 | |||
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) | |||
sike_fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 | |||
sike_fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] | |||
sike_fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 | |||
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) | |||
sike_fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 | |||
sike_fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] | |||
sike_fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] | |||
sike_fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 | |||
} | |||
void eval_3_isog(point_proj_t Q, f2elm_t* coeff) | |||
{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and | |||
// a point P with 2 coefficients in coeff (computed in the function get_3_isog()). | |||
// Inputs: projective points P = (X3:Z3) and Q = (X:Z). | |||
// Output: the projective point Q <- phi(Q) = (X3:Z3). | |||
f2elm_t t0, t1, t2; | |||
sike_fp2add(Q->X, Q->Z, t0); // t0 = X+Z | |||
sike_fp2sub(Q->X, Q->Z, t1); // t1 = X-Z | |||
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z) | |||
sike_fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z) | |||
sike_fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z) | |||
sike_fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z) | |||
sike_fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2 | |||
sike_fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2 | |||
sike_fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2 | |||
sike_fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2 | |||
} | |||
void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3) | |||
{ // 3-way simultaneous inversion | |||
// Input: z1,z2,z3 | |||
// Output: 1/z1,1/z2,1/z3 (override inputs). | |||
f2elm_t t0, t1, t2, t3; | |||
sike_fp2mul_mont(z1, z2, t0); // t0 = z1*z2 | |||
sike_fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 | |||
sike_fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) | |||
sike_fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) | |||
sike_fp2mul_mont(t2, z2, t3); // t3 = 1/z1 | |||
sike_fp2mul_mont(t2, z1, z2); // z2 = 1/z2 | |||
sike_fp2mul_mont(t0, t1, z3); // z3 = 1/z3 | |||
sike_fp2copy(t3, z1); // z1 = 1/z1 | |||
} | |||
void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A) | |||
{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. | |||
// Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. | |||
// Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. | |||
f2elm_t t0, t1, one = F2ELM_INIT; | |||
extern const struct params_t params; | |||
sike_fpcopy(params.mont_one, one->c0); | |||
sike_fp2add(xP, xQ, t1); // t1 = xP+xQ | |||
sike_fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ | |||
sike_fp2mul_mont(xR, t1, A); // A = xR*t1 | |||
sike_fp2add(t0, A, A); // A = A+t0 | |||
sike_fp2mul_mont(t0, xR, t0); // t0 = t0*xR | |||
sike_fp2sub(A, one, A); // A = A-1 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2add(t1, xR, t1); // t1 = t1+xR | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2sqr_mont(A, A); // A = A^2 | |||
sike_fp2inv_mont(t0); // t0 = 1/t0 | |||
sike_fp2mul_mont(A, t0, A); // A = A*t0 | |||
sike_fp2sub(A, t1, A); // Afinal = A-t1 | |||
} | |||
void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv) | |||
{ // Computes the j-invariant of a Montgomery curve with projective constant. | |||
// Input: A,C in GF(p^2). | |||
// Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. | |||
f2elm_t t0, t1; | |||
sike_fp2sqr_mont(A, jinv); // jinv = A^2 | |||
sike_fp2sqr_mont(C, t1); // t1 = C^2 | |||
sike_fp2add(t1, t1, t0); // t0 = t1+t1 | |||
sike_fp2sub(jinv, t0, t0); // t0 = jinv-t0 | |||
sike_fp2sub(t0, t1, t0); // t0 = t0-t1 | |||
sike_fp2sub(t0, t1, jinv); // jinv = t0-t1 | |||
sike_fp2sqr_mont(t1, t1); // t1 = t1^2 | |||
sike_fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2sqr_mont(t0, t1); // t1 = t0^2 | |||
sike_fp2mul_mont(t0, t1, t0); // t0 = t0*t1 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2add(t0, t0, t0); // t0 = t0+t0 | |||
sike_fp2inv_mont(jinv); // jinv = 1/jinv | |||
sike_fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv | |||
} | |||
void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24) | |||
{ // Simultaneous doubling and differential addition. | |||
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. | |||
// Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. | |||
f2elm_t t0, t1, t2; | |||
sike_fp2add(P->X, P->Z, t0); // t0 = XP+ZP | |||
sike_fp2sub(P->X, P->Z, t1); // t1 = XP-ZP | |||
sike_fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 | |||
sike_fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ | |||
sike_fp2correction(t2); | |||
sike_fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ | |||
sike_fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) | |||
sike_fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 | |||
sike_fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) | |||
sike_fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 | |||
sike_fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 | |||
sike_fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] | |||
sike_fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) | |||
sike_fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 | |||
sike_fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) | |||
sike_fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] | |||
sike_fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 | |||
sike_fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 | |||
sike_fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 | |||
} |
@@ -0,0 +1,49 @@ | |||
#ifndef ISOGENY_H_ | |||
#define ISOGENY_H_ | |||
// Computes [2^e](X:Z) on Montgomery curve with projective | |||
// constant via e repeated doublings. | |||
void xDBLe( | |||
const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, | |||
const f2elm_t C24, size_t e); | |||
// Simultaneous doubling and differential addition. | |||
void xDBLADD( | |||
point_proj_t P, point_proj_t Q, const f2elm_t xPQ, | |||
const f2elm_t A24); | |||
// Tripling of a Montgomery point in projective coordinates (X:Z). | |||
void xTPL( | |||
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, | |||
const f2elm_t A24plus); | |||
// Computes [3^e](X:Z) on Montgomery curve with projective constant | |||
// via e repeated triplings. | |||
void xTPLe( | |||
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, | |||
const f2elm_t A24plus, size_t e); | |||
// Given the x-coordinates of P, Q, and R, returns the value A | |||
// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. | |||
void get_A( | |||
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A); | |||
// Computes the j-invariant of a Montgomery curve with projective constant. | |||
void j_inv( | |||
const f2elm_t A, const f2elm_t C, f2elm_t jinv); | |||
// Computes the corresponding 4-isogeny of a projective Montgomery | |||
// point (X4:Z4) of order 4. | |||
void get_4_isog( | |||
const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff); | |||
// Computes the corresponding 3-isogeny of a projective Montgomery | |||
// point (X3:Z3) of order 3. | |||
void get_3_isog( | |||
const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, | |||
f2elm_t* coeff); | |||
// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) | |||
// of order 3 on a Montgomery curve and a point P with coefficients given in coeff. | |||
void eval_3_isog( | |||
point_proj_t Q, f2elm_t* coeff); | |||
// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. | |||
void eval_4_isog( | |||
point_proj_t P, f2elm_t* coeff); | |||
// 3-way simultaneous inversion | |||
void inv_3_way( | |||
f2elm_t z1, f2elm_t z2, f2elm_t z3); | |||
#endif // ISOGENY_H_ |
@@ -0,0 +1,128 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: supersingular isogeny parameters and generation of functions for P434 | |||
*********************************************************************************************/ | |||
#include "utils.h" | |||
// Parameters for isogeny system "SIKE" | |||
const struct params_t params = { | |||
.prime = { | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF), | |||
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056), | |||
U64_TO_WORDS(0x0002341F27177344) | |||
}, | |||
.prime_p1 = { | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000), | |||
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056), | |||
U64_TO_WORDS(0x0002341F27177344) | |||
}, | |||
.prime_x2 = { | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), | |||
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF), | |||
U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC), | |||
U64_TO_WORDS(0x0004683E4E2EE688) | |||
}, | |||
.A_gen = { | |||
U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B), | |||
U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1), | |||
U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F), | |||
U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0 | |||
U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B), | |||
U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA), | |||
U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C), | |||
U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1 | |||
U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6), | |||
U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03), | |||
U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215), | |||
U64_TO_WORDS(0x0001C4CB77542876), // XQA0 | |||
U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050), | |||
U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374), | |||
U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508), | |||
U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1 | |||
U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611), | |||
U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD), | |||
U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3), | |||
U64_TO_WORDS(0x00022A81D8D55643), // XRA0 | |||
U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED), | |||
U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4), | |||
U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2), | |||
U64_TO_WORDS(0x0000B87FC716C0C6) // XRA1 | |||
}, | |||
.B_gen = { | |||
U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05), | |||
U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F), | |||
U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9), | |||
U64_TO_WORDS(0x0001BED4772E551F), // XPB0 | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), // XPB1 | |||
U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C), | |||
U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62), | |||
U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F), | |||
U64_TO_WORDS(0x000034080181D8AE), // XQB0 | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), // XQB1 | |||
U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647), | |||
U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D), | |||
U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504), | |||
U64_TO_WORDS(0x00007E8A50F02E37), // XRB0 | |||
U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230), | |||
U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53), | |||
U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B), | |||
U64_TO_WORDS(0x000173FA910377D3) // XRB1 | |||
}, | |||
.mont_R2 = { | |||
U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2), | |||
U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B), | |||
U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A), | |||
U64_TO_WORDS(0x000025A89BCDD12A) | |||
}, | |||
.mont_one = { | |||
U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000), | |||
U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C), | |||
U64_TO_WORDS(0x0000ECEEA7BD2EDA) | |||
}, | |||
.mont_six = { | |||
U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000), | |||
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000), | |||
U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0), | |||
U64_TO_WORDS(0x00012559A0403298) | |||
}, | |||
.A_strat = { | |||
0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, | |||
0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, | |||
0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, | |||
0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, | |||
0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, | |||
0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01, | |||
0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03, | |||
0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04, | |||
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01 | |||
}, | |||
.B_strat = { | |||
0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01, | |||
0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01, | |||
0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, | |||
0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10, | |||
0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, | |||
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, | |||
0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01, | |||
0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, | |||
0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, | |||
0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01, | |||
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, | |||
0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, | |||
0x02, 0x01, 0x01, 0x02, 0x01, 0x01 | |||
} | |||
}; |
@@ -0,0 +1,505 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: supersingular isogeny key encapsulation (SIKE) protocol | |||
*********************************************************************************************/ | |||
#include <assert.h> | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include <randombytes.h> | |||
#include <common/fips202.h> | |||
#include "utils.h" | |||
#include "isogeny.h" | |||
#include "fpx.h" | |||
extern const struct params_t params; | |||
// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant. | |||
#define SIDH_JINV_BYTESZ 110U | |||
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny) | |||
#define SIDH_PRV_A_BITSZ 216U | |||
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny) | |||
#define SIDH_PRV_B_BITSZ 217U | |||
// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation | |||
#define MAX_INT_POINTS_ALICE 7U | |||
// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation | |||
#define MAX_INT_POINTS_BOB 8U | |||
// Swap points. | |||
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P | |||
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) | |||
{ | |||
crypto_word_t temp; | |||
for (size_t i = 0; i < NWORDS_FIELD; i++) { | |||
temp = option & (P->X->c0[i] ^ Q->X->c0[i]); | |||
P->X->c0[i] = temp ^ P->X->c0[i]; | |||
Q->X->c0[i] = temp ^ Q->X->c0[i]; | |||
temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]); | |||
P->Z->c0[i] = temp ^ P->Z->c0[i]; | |||
Q->Z->c0[i] = temp ^ Q->Z->c0[i]; | |||
temp = option & (P->X->c1[i] ^ Q->X->c1[i]); | |||
P->X->c1[i] = temp ^ P->X->c1[i]; | |||
Q->X->c1[i] = temp ^ Q->X->c1[i]; | |||
temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]); | |||
P->Z->c1[i] = temp ^ P->Z->c1[i]; | |||
Q->Z->c1[i] = temp ^ Q->Z->c1[i]; | |||
} | |||
} | |||
static void ladder3Pt( | |||
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m, | |||
int is_A, point_proj_t R, const f2elm_t A) { | |||
point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT; | |||
f2elm_t A24 = F2ELM_INIT; | |||
crypto_word_t mask; | |||
int bit, swap, prevbit = 0; | |||
const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ; | |||
// Initializing constant | |||
sike_fpcopy(params.mont_one, A24[0].c0); | |||
sike_fp2add(A24, A24, A24); | |||
sike_fp2add(A, A24, A24); | |||
sike_fp2div2(A24, A24); | |||
sike_fp2div2(A24, A24); // A24 = (A+2)/4 | |||
// Initializing points | |||
sike_fp2copy(xQ, R0->X); | |||
sike_fpcopy(params.mont_one, R0->Z[0].c0); | |||
sike_fp2copy(xPQ, R2->X); | |||
sike_fpcopy(params.mont_one, R2->Z[0].c0); | |||
sike_fp2copy(xP, R->X); | |||
sike_fpcopy(params.mont_one, R->Z[0].c0); | |||
memset(R->Z->c1, 0, sizeof(R->Z->c1)); | |||
// Main loop | |||
for (size_t i = 0; i < nbits; i++) { | |||
bit = (m[i >> 3] >> (i & 7)) & 1; | |||
swap = bit ^ prevbit; | |||
prevbit = bit; | |||
mask = 0 - (crypto_word_t)swap; | |||
sike_fp2cswap(R, R2, mask); | |||
xDBLADD(R0, R2, R->X, A24); | |||
sike_fp2mul_mont(R2->X, R->Z, R2->X); | |||
} | |||
swap = 0 ^ prevbit; | |||
mask = 0 - (crypto_word_t)swap; | |||
sike_fp2cswap(R, R2, mask); | |||
} | |||
// Initialization of basis points | |||
static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) { | |||
sike_fpcopy(gen, XP->c0); | |||
sike_fpcopy(gen + NWORDS_FIELD, XP->c1); | |||
sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0); | |||
sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1); | |||
sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0); | |||
sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1); | |||
} | |||
// Conversion of GF(p^2) element from Montgomery to standard representation. | |||
static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) { | |||
f2elm_t t; | |||
sike_from_fp2mont(x, t); | |||
// convert to bytes in little endian form | |||
for (size_t i=0; i<FIELD_BYTESZ; i++) { | |||
enc[i+ 0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF; | |||
enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF; | |||
} | |||
} | |||
// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation. | |||
// Elements over GF(p503) are encoded in 63 octets in little endian format | |||
// (i.e., the least significant octet is located in the lowest memory address). | |||
static inline void fp2_decode(const uint8_t *enc, f2elm_t t) { | |||
memset(t[0].c0, 0, sizeof(t[0].c0)); | |||
memset(t[0].c1, 0, sizeof(t[0].c1)); | |||
// convert bytes in little endian form to f2elm_t | |||
for (size_t i = 0; i < FIELD_BYTESZ; i++) { | |||
t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (8*(i%LSZ)); | |||
t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ)); | |||
} | |||
sike_to_fp2mont(t, t); | |||
} | |||
// Alice's ephemeral public key generation | |||
// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes. | |||
// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes. | |||
static void gen_iso_A(const uint8_t* skA, uint8_t* pkA) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_ALICE]; | |||
point_proj_t phiP = POINT_PROJ_INIT; | |||
point_proj_t phiQ = POINT_PROJ_INIT; | |||
point_proj_t phiR = POINT_PROJ_INIT; | |||
f2elm_t XPA, XQA, XRA, coeff[3]; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t C24 = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; | |||
// Initialize basis points | |||
sike_init_basis(params.A_gen, XPA, XQA, XRA); | |||
sike_init_basis(params.B_gen, phiP->X, phiQ->X, phiR->X); | |||
sike_fpcopy(params.mont_one, (phiP->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiQ->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiR->Z)->c0); | |||
// Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1 | |||
sike_fpcopy(params.mont_one, A24plus->c0); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
sike_fp2add(A24plus, A24plus, C24); | |||
sike_fp2add(A24plus, C24, A); | |||
sike_fp2add(C24, C24, A24plus); | |||
// Retrieve kernel point | |||
ladder3Pt(XPA, XQA, XRA, skA, 1, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < A_max; row++) { | |||
while (index < A_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.A_strat[ii++]; | |||
xDBLe(R, R, A24plus, C24, (2*m)); | |||
index += m; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_4_isog(pts[i], coeff); | |||
} | |||
eval_4_isog(phiP, coeff); | |||
eval_4_isog(phiQ, coeff); | |||
eval_4_isog(phiR, coeff); | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
eval_4_isog(phiP, coeff); | |||
eval_4_isog(phiQ, coeff); | |||
eval_4_isog(phiR, coeff); | |||
inv_3_way(phiP->Z, phiQ->Z, phiR->Z); | |||
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); | |||
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); | |||
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); | |||
// Format public key | |||
sike_fp2_encode(phiP->X, pkA); | |||
sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ); | |||
sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ); | |||
} | |||
// Bob's ephemeral key-pair generation | |||
// It produces a private key skB and computes the public key pkB. | |||
// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. | |||
// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes. | |||
static void gen_iso_B(const uint8_t* skB, uint8_t* pkB) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_BOB]; | |||
point_proj_t phiP = POINT_PROJ_INIT; | |||
point_proj_t phiQ = POINT_PROJ_INIT; | |||
point_proj_t phiR = POINT_PROJ_INIT; | |||
f2elm_t XPB, XQB, XRB, coeff[3]; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t A24minus = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; | |||
// Initialize basis points | |||
sike_init_basis(params.B_gen, XPB, XQB, XRB); | |||
sike_init_basis(params.A_gen, phiP->X, phiQ->X, phiR->X); | |||
sike_fpcopy(params.mont_one, (phiP->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiQ->Z)->c0); | |||
sike_fpcopy(params.mont_one, (phiR->Z)->c0); | |||
// Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1 | |||
sike_fpcopy(params.mont_one, A24plus->c0); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
sike_fp2add(A24plus, A24plus, A24minus); | |||
sike_fp2add(A24plus, A24minus, A); | |||
sike_fp2add(A24minus, A24minus, A24plus); | |||
// Retrieve kernel point | |||
ladder3Pt(XPB, XQB, XRB, skB, 0, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < B_max; row++) { | |||
while (index < B_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.B_strat[ii++]; | |||
xTPLe(R, R, A24minus, A24plus, m); | |||
index += m; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_3_isog(pts[i], coeff); | |||
} | |||
eval_3_isog(phiP, coeff); | |||
eval_3_isog(phiQ, coeff); | |||
eval_3_isog(phiR, coeff); | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
eval_3_isog(phiP, coeff); | |||
eval_3_isog(phiQ, coeff); | |||
eval_3_isog(phiR, coeff); | |||
inv_3_way(phiP->Z, phiQ->Z, phiR->Z); | |||
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); | |||
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); | |||
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); | |||
// Format public key | |||
sike_fp2_encode(phiP->X, pkB); | |||
sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ); | |||
sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ); | |||
} | |||
// Alice's ephemeral shared secret computation | |||
// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB | |||
// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. | |||
// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes. | |||
// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes. | |||
static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_ALICE]; | |||
f2elm_t coeff[3], PKB[3], jinv; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t C24 = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; | |||
// Initialize images of Bob's basis | |||
fp2_decode(pkB, PKB[0]); | |||
fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]); | |||
fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]); | |||
// Initialize constants | |||
get_A(PKB[0], PKB[1], PKB[2], A); | |||
sike_fpadd(params.mont_one, params.mont_one, C24->c0); | |||
sike_fp2add(A, C24, A24plus); | |||
sike_fpadd(C24->c0, C24->c0, C24->c0); | |||
// Retrieve kernel point | |||
ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < A_max; row++) { | |||
while (index < A_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.A_strat[ii++]; | |||
xDBLe(R, R, A24plus, C24, (2*m)); | |||
index += m; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_4_isog(pts[i], coeff); | |||
} | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_4_isog(R, A24plus, C24, coeff); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
sike_fp2sub(A24plus, C24, A24plus); | |||
sike_fp2add(A24plus, A24plus, A24plus); | |||
j_inv(A24plus, C24, jinv); | |||
sike_fp2_encode(jinv, ssA); | |||
} | |||
// Bob's ephemeral shared secret computation | |||
// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA | |||
// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. | |||
// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes. | |||
// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes. | |||
static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB) | |||
{ | |||
point_proj_t R, pts[MAX_INT_POINTS_BOB]; | |||
f2elm_t coeff[3], PKB[3], jinv; | |||
f2elm_t A24plus = F2ELM_INIT; | |||
f2elm_t A24minus = F2ELM_INIT; | |||
f2elm_t A = F2ELM_INIT; | |||
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; | |||
// Initialize images of Alice's basis | |||
fp2_decode(pkA, PKB[0]); | |||
fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]); | |||
fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]); | |||
// Initialize constants | |||
get_A(PKB[0], PKB[1], PKB[2], A); | |||
sike_fpadd(params.mont_one, params.mont_one, A24minus->c0); | |||
sike_fp2add(A, A24minus, A24plus); | |||
sike_fp2sub(A, A24minus, A24minus); | |||
// Retrieve kernel point | |||
ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A); | |||
// Traverse tree | |||
index = 0; | |||
for (size_t row = 1; row < B_max; row++) { | |||
while (index < B_max-row) { | |||
sike_fp2copy(R->X, pts[npts]->X); | |||
sike_fp2copy(R->Z, pts[npts]->Z); | |||
pts_index[npts++] = index; | |||
m = params.B_strat[ii++]; | |||
xTPLe(R, R, A24minus, A24plus, m); | |||
index += m; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
for (size_t i = 0; i < npts; i++) { | |||
eval_3_isog(pts[i], coeff); | |||
} | |||
sike_fp2copy(pts[npts-1]->X, R->X); | |||
sike_fp2copy(pts[npts-1]->Z, R->Z); | |||
index = pts_index[npts-1]; | |||
npts -= 1; | |||
} | |||
get_3_isog(R, A24minus, A24plus, coeff); | |||
sike_fp2add(A24plus, A24minus, A); | |||
sike_fp2add(A, A, A); | |||
sike_fp2sub(A24plus, A24minus, A24plus); | |||
j_inv(A, A24plus, jinv); | |||
sike_fp2_encode(jinv, ssB); | |||
} | |||
int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ], | |||
uint8_t out_pub[SIKE_PUB_BYTESZ]) { | |||
// Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and < | |||
// 253 bits | |||
randombytes(out_priv, SIKE_MSG_BYTESZ); | |||
randombytes(&out_priv[SIKE_MSG_BYTESZ], SIKE_PRV_BYTESZ); | |||
out_priv[SIKE_MSG_BYTESZ+28-1] = (out_priv[SIKE_MSG_BYTESZ+28-1] & 0x01); | |||
gen_iso_B(&out_priv[SIKE_MSG_BYTESZ], out_pub); | |||
return 1; | |||
} | |||
void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
uint8_t out_ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ]) { | |||
// Secret buffer is reused by the function to store some ephemeral | |||
// secret data. It's size must be maximum of 64, | |||
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes. | |||
uint8_t secret[32]; // OZAPTF, why? | |||
uint8_t j[SIDH_JINV_BYTESZ]; | |||
uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ]; | |||
shake256incctx ctx; | |||
// Generate secret key for A | |||
// secret key A = SHAKE256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ | |||
randombytes(temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Generate public key for A - first part of the ciphertext | |||
gen_iso_A(secret, out_ciphertext); | |||
// Generate c1: | |||
// h = SHAKE256(j-invariant) | |||
// c1 = h ^ m | |||
ex_iso_A(secret, pub_key, j); | |||
shake256(secret, sizeof secret, j, sizeof j); | |||
// c1 = h ^ m | |||
uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ]; | |||
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { | |||
c1[i] = temp[i] ^ secret[i]; | |||
} | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, out_ciphertext, SIKE_CT_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Generate shared secret out_shared_key = SHAKE256(m||out_ciphertext) | |||
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ); | |||
} | |||
void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ], | |||
const uint8_t ciphertext[SIKE_CT_BYTESZ], | |||
const uint8_t pub_key[SIKE_PUB_BYTESZ], | |||
const uint8_t priv_key[SIKE_MSG_BYTESZ + SIKE_PRV_BYTESZ]) { | |||
// Secret buffer is reused by the function to store some ephemeral | |||
// secret data. It's size must be maximum of 64, | |||
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes. | |||
uint8_t secret[32]; | |||
uint8_t j[SIDH_JINV_BYTESZ]; | |||
uint8_t c0[SIKE_PUB_BYTESZ]; | |||
uint8_t temp[SIKE_MSG_BYTESZ]; | |||
shake256incctx ctx; | |||
// Recover m | |||
// Let ciphertext = c0 || c1 - both have fixed sizes | |||
// m = F(j-invariant(c0, priv_key)) ^ c1 | |||
ex_iso_B(&priv_key[SIKE_MSG_BYTESZ], ciphertext, j); | |||
shake256(secret, sizeof secret, j, sizeof j); | |||
const uint8_t *c1 = &ciphertext[sizeof(c0)]; | |||
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { | |||
temp[i] = c1[i] ^ secret[i]; | |||
} | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Recover c0 = public key A | |||
gen_iso_A(secret, c0); | |||
crypto_word_t ok = ct_uint_eq( | |||
ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1); | |||
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) { | |||
temp[i] = ct_select_8(ok, temp[i], priv_key[i]); | |||
} | |||
shake256_inc_init(&ctx); | |||
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ); | |||
shake256_inc_absorb(&ctx, ciphertext, SIKE_CT_BYTESZ); | |||
shake256_inc_finalize(&ctx); | |||
shake256_inc_squeeze(secret, 32, &ctx); | |||
shake256_inc_ctx_release(&ctx); | |||
// Generate shared secret out_shared_key = SHAKE256(m||ciphertext) | |||
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ); | |||
} |
@@ -0,0 +1,214 @@ | |||
/******************************************************************************************** | |||
* SIDH: an efficient supersingular isogeny cryptography library | |||
* | |||
* Abstract: internal header file for P434 | |||
*********************************************************************************************/ | |||
#ifndef UTILS_H_ | |||
#define UTILS_H_ | |||
#include <stddef.h> | |||
#include <kem/sike/includes/sike/sike.h> | |||
// Conversion macro from number of bits to number of bytes | |||
#define BITS_TO_BYTES(nbits) (((nbits)+7)/8) | |||
// Bit size of the field | |||
#define BITS_FIELD 434 | |||
// Byte size of the field | |||
#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD) | |||
// Number of 64-bit words of a 224-bit element | |||
#define NBITS_ORDER 224 | |||
#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) | |||
// Number of elements in Alice's strategy | |||
#define A_max 108 | |||
// Number of elements in Bob's strategy | |||
#define B_max 137 | |||
// Word size size | |||
#define RADIX sizeof(crypto_word_t)*8 | |||
// Byte size of a limb | |||
#define LSZ sizeof(crypto_word_t) | |||
#if defined(CPU_64_BIT) | |||
typedef uint64_t crypto_word_t; | |||
// Number of words of a 434-bit field element | |||
#define NWORDS_FIELD 7 | |||
// Number of "0" digits in the least significant part of p434 + 1 | |||
#define ZERO_WORDS 3 | |||
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. | |||
#define U64_TO_WORDS(x) UINT64_C(x) | |||
#else | |||
typedef uint32_t crypto_word_t; | |||
// Number of words of a 434-bit field element | |||
#define NWORDS_FIELD 14 | |||
// Number of "0" digits in the least significant part of p434 + 1 | |||
#define ZERO_WORDS 6 | |||
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. | |||
#define U64_TO_WORDS(x) \ | |||
(uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32) | |||
#endif | |||
// Extended datatype support | |||
#if !defined(HAS_UINT128) | |||
typedef uint64_t uint128_t[2]; | |||
#endif | |||
// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise | |||
// Digit multiplication | |||
#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo)); | |||
// If mask |x|==0xff.ff set |x| to 1, otherwise 0 | |||
#define M2B(x) ((x)>>(RADIX-1)) | |||
// Digit addition with carry | |||
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ | |||
do { \ | |||
crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \ | |||
(sumOut) = (addend2) + tempReg; \ | |||
(carryOut) = M2B(ct_uint_lt(tempReg, (crypto_word_t)(carryIn)) | \ | |||
ct_uint_lt((sumOut), tempReg)); \ | |||
} while(0) | |||
// Digit subtraction with borrow | |||
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ | |||
do { \ | |||
crypto_word_t tempReg = (minuend) - (subtrahend); \ | |||
crypto_word_t borrowReg = M2B(ct_uint_lt((minuend), (subtrahend))); \ | |||
borrowReg |= ((borrowIn) & ct_uint_eq(tempReg, 0)); \ | |||
(differenceOut) = tempReg - (crypto_word_t)(borrowIn); \ | |||
(borrowOut) = borrowReg; \ | |||
} while(0) | |||
/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly, | |||
which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8). | |||
Defines below are used to work around the bug, and provide a way | |||
to initialize f2elem_t and point_proj_t structs. | |||
Bug has been fixed in GCC6 (debian stretch). | |||
*/ | |||
#define F2ELM_INIT {{ {0}, {0} }} | |||
#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }} | |||
// Datatype for representing 434-bit field elements (448-bit max.) | |||
// Elements over GF(p434) are encoded in 63 octets in little endian format | |||
// (i.e., the least significant octet is located in the lowest memory address). | |||
typedef crypto_word_t felm_t[NWORDS_FIELD]; | |||
// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e. | |||
// Fp2 element = c0 + c1*i in F_{p^2} | |||
// Datatype for representing double-precision 2x434-bit field elements (448-bit max.) | |||
// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are | |||
// encoded as {a, b}, with a in the lowest memory portion. | |||
typedef struct { | |||
felm_t c0; | |||
felm_t c1; | |||
} fp2; | |||
// Our F_{p^2} element type is a pointer to the struct. | |||
typedef fp2 f2elm_t[1]; | |||
// Datatype for representing double-precision 2x434-bit | |||
// field elements in contiguous memory. | |||
typedef crypto_word_t dfelm_t[2*NWORDS_FIELD]; | |||
// Constants used during SIKE computation. | |||
struct params_t { | |||
// Stores a prime | |||
const crypto_word_t prime[NWORDS_FIELD]; | |||
// Stores prime + 1 | |||
const crypto_word_t prime_p1[NWORDS_FIELD]; | |||
// Stores prime * 2 | |||
const crypto_word_t prime_x2[NWORDS_FIELD]; | |||
// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i} | |||
// in GF(prime^2), expressed in Montgomery representation | |||
const crypto_word_t A_gen[6*NWORDS_FIELD]; | |||
// Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i} | |||
// in GF(prime^2), expressed in Montgomery representation | |||
const crypto_word_t B_gen[6*NWORDS_FIELD]; | |||
// Montgomery constant mont_R2 = (2^448)^2 mod prime | |||
const crypto_word_t mont_R2[NWORDS_FIELD]; | |||
// Value 'one' in Montgomery representation | |||
const crypto_word_t mont_one[NWORDS_FIELD]; | |||
// Value '6' in Montgomery representation | |||
const crypto_word_t mont_six[NWORDS_FIELD]; | |||
// Fixed parameters for isogeny tree computation | |||
const unsigned int A_strat[A_max-1]; | |||
const unsigned int B_strat[B_max-1]; | |||
}; | |||
// Point representation in projective XZ Montgomery coordinates. | |||
typedef struct { | |||
f2elm_t X; | |||
f2elm_t Z; | |||
} point_proj; | |||
typedef point_proj point_proj_t[1]; | |||
// Checks whether two words are equal. Returns 1 in case it is, | |||
// otherwise 0. | |||
static inline crypto_word_t ct_uint_eq(crypto_word_t x, crypto_word_t y) | |||
{ | |||
// if x==y then t = 0 | |||
crypto_word_t t = x ^ y; | |||
// if x!=y t will have first bit set | |||
t = (t >> 1) - t; | |||
// return MSB - 1 in case x==y, otherwise 0 | |||
return ((~t) >> (RADIX-1)); | |||
} | |||
// Constant time select. | |||
// if pick == 1 (out = in1) | |||
// if pick == 0 (out = in2) | |||
// else out is undefined | |||
static inline uint8_t ct_select_8(uint8_t flag, uint8_t in1, uint8_t in2) { | |||
uint8_t mask = ((int8_t)(flag << 7))>>7; | |||
return (in1&mask) | (in2&(~mask)); | |||
} | |||
// Constant time memcmp. Returns 1 if p==q, otherwise 0 | |||
static inline int ct_mem_eq(const void *p, const void *q, size_t n) | |||
{ | |||
const uint8_t *pp = (uint8_t*)p, *qq = (uint8_t*)q; | |||
uint8_t a = 0; | |||
while (n--) a |= *pp++ ^ *qq++; | |||
return (ct_uint_eq(a, 0)); | |||
} | |||
static inline crypto_word_t constant_time_msb_w(crypto_word_t a) { | |||
return 0u - (a >> (sizeof(a) * 8 - 1)); | |||
} | |||
// constant_time_lt_w returns 0xff..f if a < b and 0 otherwise. | |||
static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y) | |||
{ | |||
// Consider the two cases of the problem: | |||
// msb(a) == msb(b): a < b iff the MSB of a - b is set. | |||
// msb(a) != msb(b): a < b iff the MSB of b is set. | |||
// | |||
// If msb(a) == msb(b) then the following evaluates as: | |||
// msb(a^((a^b)|((a-b)^a))) == | |||
// msb(a^((a-b) ^ a)) == (because msb(a^b) == 0) | |||
// msb(a^a^(a-b)) == (rearranging) | |||
// msb(a-b) (because ∀x. x^x == 0) | |||
// | |||
// Else, if msb(a) != msb(b) then the following evaluates as: | |||
// msb(a^((a^b)|((a-b)^a))) == | |||
// msb(a^(𝟙 | ((a-b)^a))) == (because msb(a^b) == 1 and 𝟙 | |||
// represents a value s.t. msb(𝟙) = 1) | |||
// msb(a^𝟙) == (because ORing with 1 results in 1) | |||
// msb(b) | |||
// | |||
// | |||
// Here is an SMT-LIB verification of this formula: | |||
// | |||
// (define-fun lt ((a (_ BitVec 32)) (b (_ BitVec 32))) (_ BitVec 32) | |||
// (bvxor a (bvor (bvxor a b) (bvxor (bvsub a b) a))) | |||
// ) | |||
// | |||
// (declare-fun a () (_ BitVec 32)) | |||
// (declare-fun b () (_ BitVec 32)) | |||
// | |||
// (assert (not (= (= #x00000001 (bvlshr (lt a b) #x0000001f)) (bvult a b)))) | |||
// (check-sat) | |||
// (get-model) | |||
return constant_time_msb_w(x^((x^y)|((x-y)^x))); | |||
} | |||
#endif // UTILS_H_ |
@@ -235,7 +235,8 @@ pub const SPHINCSSHA256256SROBUST: ::std::os::raw::c_uint = 28; | |||
pub const SPHINCSSHA256128SROBUST: ::std::os::raw::c_uint = 29; | |||
pub const SPHINCSSHA256128FSIMPLE: ::std::os::raw::c_uint = 30; | |||
pub const SPHINCSSHA256192FROBUST: ::std::os::raw::c_uint = 31; | |||
pub const PQC_ALG_SIG_MAX: ::std::os::raw::c_uint = 32; | |||
pub const PICNIC3L1: ::std::os::raw::c_uint = 32; | |||
pub const PQC_ALG_SIG_MAX: ::std::os::raw::c_uint = 33; | |||
pub type _bindgen_ty_1 = ::std::os::raw::c_uint; | |||
pub const FRODOKEM976SHAKE: ::std::os::raw::c_uint = 0; | |||
pub const FRODOKEM1344SHAKE: ::std::os::raw::c_uint = 1; | |||
@@ -256,7 +257,8 @@ pub const SABER: ::std::os::raw::c_uint = 15; | |||
pub const HQCRMRS128: ::std::os::raw::c_uint = 16; | |||
pub const HQCRMRS192: ::std::os::raw::c_uint = 17; | |||
pub const HQCRMRS256: ::std::os::raw::c_uint = 18; | |||
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 19; | |||
pub const SIKE434: ::std::os::raw::c_uint = 19; | |||
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 20; | |||
pub type _bindgen_ty_2 = ::std::os::raw::c_uint; | |||
#[repr(C)] | |||
#[derive(Debug, Copy, Clone)] | |||
@@ -4,12 +4,14 @@ extern crate bindgen; | |||
fn main() { | |||
let dst = Config::new("../../../") | |||
.profile("Release") | |||
.profile("Debug") | |||
.very_verbose(true) | |||
.build(); | |||
.build(); | |||
println!("cargo:rustc-link-search=native={}/lib", dst.display()); | |||
println!("cargo:rustc-link-lib=static=pqc_s"); | |||
// For some reason GetX86Info symbol is undefined in the pqc_s. Hence this line | |||
println!("cargo:rustc-link-lib=static=cpu_features"); | |||
println!("cargo:rerun-if-changed=../../../capi/*,../../../kem/*,../../../sign/*,../../../../public/pqc/pqc.h"); | |||
// The bindgen::Builder is the main entry point | |||
@@ -0,0 +1,10 @@ | |||
Greg Zaverucha | |||
Sebastian Ramacher | |||
Daniel Kales | |||
Steven Goldfeder | |||
This reference implementation is derived from the earlier Picnic implementation | |||
at https://github.com/Microsoft/Picnic by Steven Goldfeder and Greg Zaverucha. | |||
The SHA-3 implementation redistributed here is from the Keccak Code Package, | |||
see https://github.com/gvanas/KeccakCodePackage for authorship. |
@@ -0,0 +1,21 @@ | |||
MIT License | |||
Copyright (c) Steven Goldfeder and Microsoft Corporation. All rights reserved. | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE |
@@ -0,0 +1,277 @@ | |||
// | |||
// PQCgenKAT_sign.c | |||
// | |||
// Created by Bassham, Lawrence E (Fed) on 8/29/17. | |||
// Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. | |||
// | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <ctype.h> | |||
#include "rng.h" | |||
#include "api.h" | |||
#define MAX_MARKER_LEN 50 | |||
#define KAT_SUCCESS 0 | |||
#define KAT_FILE_OPEN_ERROR -1 | |||
#define KAT_DATA_ERROR -3 | |||
#define KAT_CRYPTO_FAILURE -4 | |||
int FindMarker(FILE *infile, const char *marker); | |||
int ReadHex(FILE *infile, unsigned char *A, int Length, char *str); | |||
void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L); | |||
char AlgName[] = CRYPTO_ALGNAME; | |||
static const char* l1 = "L1"; | |||
static const char* l3 = "L3"; | |||
static const char* l5 = "L5"; | |||
static const char* unknown = "UNKNOWN_PARAM_SET"; | |||
int | |||
main() | |||
{ | |||
char fn_req[33], fn_rsp[33]; | |||
FILE *fp_req, *fp_rsp; | |||
unsigned char seed[48]; | |||
unsigned char msg[3300]; | |||
unsigned char entropy_input[48]; | |||
unsigned char *m, *sm, *m1; | |||
unsigned long long mlen, smlen, mlen1; | |||
int count; | |||
int done; | |||
unsigned char pk[CRYPTO_PUBLICKEYBYTES], sk[CRYPTO_SECRETKEYBYTES]; | |||
int ret_val; | |||
const char* suffix; | |||
switch (CRYPTO_PUBLICKEYBYTES) { | |||
case 1 + 2 * 16: | |||
case 1 + 2 * 17: | |||
suffix = l1; | |||
break; | |||
case 1 + 2 * 24: | |||
suffix = l3; | |||
break; | |||
case 1 + 2 * 32: | |||
suffix = l5; | |||
break; | |||
default: | |||
suffix = unknown; | |||
break; | |||
} | |||
// Create the REQUEST file | |||
sprintf(fn_req, "PQCsignKAT_%s.req", suffix); | |||
if ( (fp_req = fopen(fn_req, "w")) == NULL ) { | |||
printf("Couldn't open <%s> for write\n", fn_req); | |||
return KAT_FILE_OPEN_ERROR; | |||
} | |||
sprintf(fn_rsp, "PQCsignKAT_%s.rsp", suffix); | |||
if ( (fp_rsp = fopen(fn_rsp, "w")) == NULL ) { | |||
printf("Couldn't open <%s> for write\n", fn_rsp); | |||
return KAT_FILE_OPEN_ERROR; | |||
} | |||
for (int i=0; i<48; i++) | |||
entropy_input[i] = i; | |||
randombytes_init(entropy_input, NULL, 256); | |||
for (int i=0; i<100; i++) { | |||
fprintf(fp_req, "count = %d\n", i); | |||
randombytes(seed, 48); | |||
fprintBstr(fp_req, "seed = ", seed, 48); | |||
mlen = 33*(i+1); | |||
fprintf(fp_req, "mlen = %llu\n", mlen); | |||
randombytes(msg, mlen); | |||
fprintBstr(fp_req, "msg = ", msg, mlen); | |||
fprintf(fp_req, "pk =\n"); | |||
fprintf(fp_req, "sk =\n"); | |||
fprintf(fp_req, "smlen =\n"); | |||
fprintf(fp_req, "sm =\n\n"); | |||
} | |||
fclose(fp_req); | |||
//Create the RESPONSE file based on what's in the REQUEST file | |||
if ( (fp_req = fopen(fn_req, "r")) == NULL ) { | |||
printf("Couldn't open <%s> for read\n", fn_req); | |||
return KAT_FILE_OPEN_ERROR; | |||
} | |||
fprintf(fp_rsp, "# %s\n\n", CRYPTO_ALGNAME); | |||
done = 0; | |||
do { | |||
if ( FindMarker(fp_req, "count = ") ) | |||
fscanf(fp_req, "%d", &count); | |||
else { | |||
done = 1; | |||
break; | |||
} | |||
fprintf(fp_rsp, "count = %d\n", count); | |||
if ( !ReadHex(fp_req, seed, 48, "seed = ") ) { | |||
printf("ERROR: unable to read 'seed' from <%s>\n", fn_req); | |||
return KAT_DATA_ERROR; | |||
} | |||
fprintBstr(fp_rsp, "seed = ", seed, 48); | |||
randombytes_init(seed, NULL, 256); | |||
if ( FindMarker(fp_req, "mlen = ") ) | |||
fscanf(fp_req, "%llu", &mlen); | |||
else { | |||
printf("ERROR: unable to read 'mlen' from <%s>\n", fn_req); | |||
return KAT_DATA_ERROR; | |||
} | |||
fprintf(fp_rsp, "mlen = %llu\n", mlen); | |||
m = (unsigned char *)calloc(mlen, sizeof(unsigned char)); | |||
m1 = (unsigned char *)calloc(mlen, sizeof(unsigned char)); | |||
sm = (unsigned char *)calloc(mlen+CRYPTO_BYTES, sizeof(unsigned char)); | |||
if ( !ReadHex(fp_req, m, (int)mlen, "msg = ") ) { | |||
printf("ERROR: unable to read 'msg' from <%s>\n", fn_req); | |||
return KAT_DATA_ERROR; | |||
} | |||
fprintBstr(fp_rsp, "msg = ", m, mlen); | |||
// Generate the public/private keypair | |||
if ( (ret_val = crypto_sign_keypair(pk, sk)) != 0) { | |||
printf("crypto_sign_keypair returned <%d>\n", ret_val); | |||
return KAT_CRYPTO_FAILURE; | |||
} | |||
fprintBstr(fp_rsp, "pk = ", pk, CRYPTO_PUBLICKEYBYTES); | |||
fprintBstr(fp_rsp, "sk = ", sk, CRYPTO_SECRETKEYBYTES); | |||
if ( (ret_val = crypto_sign(sm, &smlen, m, mlen, sk)) != 0) { | |||
printf("crypto_sign returned <%d>\n", ret_val); | |||
return KAT_CRYPTO_FAILURE; | |||
} | |||
fprintf(fp_rsp, "smlen = %llu\n", smlen); | |||
fprintBstr(fp_rsp, "sm = ", sm, smlen); | |||
fprintf(fp_rsp, "\n"); | |||
if ( (ret_val = crypto_sign_open(m1, &mlen1, sm, smlen, pk)) != 0) { | |||
printf("crypto_sign_open returned <%d>\n", ret_val); | |||
return KAT_CRYPTO_FAILURE; | |||
} | |||
if ( mlen != mlen1 ) { | |||
printf("crypto_sign_open returned bad 'mlen': Got <%llu>, expected <%llu>\n", mlen1, mlen); | |||
return KAT_CRYPTO_FAILURE; | |||
} | |||
if ( memcmp(m, m1, mlen) ) { | |||
printf("crypto_sign_open returned bad 'm' value\n"); | |||
return KAT_CRYPTO_FAILURE; | |||
} | |||
free(m); | |||
free(m1); | |||
free(sm); | |||
} while ( !done ); | |||
fclose(fp_req); | |||
fclose(fp_rsp); | |||
return KAT_SUCCESS; | |||
} | |||
// | |||
// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.) | |||
// | |||
int | |||
FindMarker(FILE *infile, const char *marker) | |||
{ | |||
char line[MAX_MARKER_LEN]; | |||
int i, len; | |||
len = (int)strlen(marker); | |||
if ( len > MAX_MARKER_LEN-1 ) | |||
len = MAX_MARKER_LEN-1; | |||
for ( i=0; i<len; i++ ) | |||
if ( (line[i] = fgetc(infile)) == EOF ) | |||
return 0; | |||
line[len] = '\0'; | |||
while ( 1 ) { | |||
if ( !strncmp(line, marker, len) ) | |||
return 1; | |||
for ( i=0; i<len-1; i++ ) | |||
line[i] = line[i+1]; | |||
if ( (line[len-1] = fgetc(infile)) == EOF ) | |||
return 0; | |||
line[len] = '\0'; | |||
} | |||
// shouldn't get here | |||
return 0; | |||
} | |||
// | |||
// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.) | |||
// | |||
int | |||
ReadHex(FILE *infile, unsigned char *A, int Length, char *str) | |||
{ | |||
int i, ch, started; | |||
unsigned char ich; | |||
if ( Length == 0 ) { | |||
A[0] = 0x00; | |||
return 1; | |||
} | |||
memset(A, 0x00, Length); | |||
started = 0; | |||
if ( FindMarker(infile, str) ) | |||
while ( (ch = fgetc(infile)) != EOF ) { | |||
if ( !isxdigit(ch) ) { | |||
if ( !started ) { | |||
if ( ch == '\n' ) | |||
break; | |||
else | |||
continue; | |||
} | |||
else | |||
break; | |||
} | |||
started = 1; | |||
if ( (ch >= '0') && (ch <= '9') ) | |||
ich = ch - '0'; | |||
else if ( (ch >= 'A') && (ch <= 'F') ) | |||
ich = ch - 'A' + 10; | |||
else if ( (ch >= 'a') && (ch <= 'f') ) | |||
ich = ch - 'a' + 10; | |||
else // shouldn't ever get here | |||
ich = 0; | |||
for ( i=0; i<Length-1; i++ ) | |||
A[i] = (A[i] << 4) | (A[i+1] >> 4); | |||
A[Length-1] = (A[Length-1] << 4) | ich; | |||
} | |||
else | |||
return 0; | |||
return 1; | |||
} | |||
void | |||
fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L) | |||
{ | |||
unsigned long long i; | |||
fprintf(fp, "%s", S); | |||
for ( i=0; i<L; i++ ) | |||
fprintf(fp, "%02X", A[i]); | |||
if ( L == 0 ) | |||
fprintf(fp, "00"); | |||
fprintf(fp, "\n"); | |||
} | |||
@@ -0,0 +1,222 @@ | |||
// | |||
// rng.c | |||
// | |||
// Created by Bassham, Lawrence E (Fed) on 8/29/17. | |||
// Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. | |||
// | |||
#include <string.h> | |||
#include "rng.h" | |||
#include <openssl/conf.h> | |||
#include <openssl/evp.h> | |||
#include <openssl/err.h> | |||
AES256_CTR_DRBG_struct DRBG_ctx; | |||
void AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer); | |||
/* | |||
seedexpander_init() | |||
ctx - stores the current state of an instance of the seed expander | |||
seed - a 32 byte random value | |||
diversifier - an 8 byte diversifier | |||
maxlen - maximum number of bytes (less than 2**32) generated under this seed and diversifier | |||
*/ | |||
int | |||
seedexpander_init(AES_XOF_struct *ctx, | |||
unsigned char *seed, | |||
unsigned char *diversifier, | |||
unsigned long maxlen) | |||
{ | |||
if ( maxlen >= 0x100000000 ) | |||
return RNG_BAD_MAXLEN; | |||
ctx->length_remaining = maxlen; | |||
memcpy(ctx->key, seed, 32); | |||
memcpy(ctx->ctr, diversifier, 8); | |||
ctx->ctr[11] = maxlen % 256; | |||
maxlen >>= 8; | |||
ctx->ctr[10] = maxlen % 256; | |||
maxlen >>= 8; | |||
ctx->ctr[9] = maxlen % 256; | |||
maxlen >>= 8; | |||
ctx->ctr[8] = maxlen % 256; | |||
memset(ctx->ctr+12, 0x00, 4); | |||
ctx->buffer_pos = 16; | |||
memset(ctx->buffer, 0x00, 16); | |||
return RNG_SUCCESS; | |||
} | |||
/* | |||
seedexpander() | |||
ctx - stores the current state of an instance of the seed expander | |||
x - returns the XOF data | |||
xlen - number of bytes to return | |||
*/ | |||
int | |||
seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen) | |||
{ | |||
unsigned long offset; | |||
if ( x == NULL ) | |||
return RNG_BAD_OUTBUF; | |||
if ( xlen >= ctx->length_remaining ) | |||
return RNG_BAD_REQ_LEN; | |||
ctx->length_remaining -= xlen; | |||
offset = 0; | |||
while ( xlen > 0 ) { | |||
if ( xlen <= (16-ctx->buffer_pos) ) { // buffer has what we need | |||
memcpy(x+offset, ctx->buffer+ctx->buffer_pos, xlen); | |||
ctx->buffer_pos += xlen; | |||
return RNG_SUCCESS; | |||
} | |||
// take what's in the buffer | |||
memcpy(x+offset, ctx->buffer+ctx->buffer_pos, 16-ctx->buffer_pos); | |||
xlen -= 16-ctx->buffer_pos; | |||
offset += 16-ctx->buffer_pos; | |||
AES256_ECB(ctx->key, ctx->ctr, ctx->buffer); | |||
ctx->buffer_pos = 0; | |||
//increment the counter | |||
for (int i=15; i>=12; i--) { | |||
if ( ctx->ctr[i] == 0xff ) | |||
ctx->ctr[i] = 0x00; | |||
else { | |||
ctx->ctr[i]++; | |||
break; | |||
} | |||
} | |||
} | |||
return RNG_SUCCESS; | |||
} | |||
void handleErrors(void) | |||
{ | |||
ERR_print_errors_fp(stderr); | |||
abort(); | |||
} | |||
// Use whatever AES implementation you have. This uses AES from openSSL library | |||
// key - 256-bit AES key | |||
// ctr - a 128-bit plaintext value | |||
// buffer - a 128-bit ciphertext value | |||
void | |||
AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer) | |||
{ | |||
EVP_CIPHER_CTX *ctx; | |||
int len; | |||
int ciphertext_len; | |||
/* Create and initialise the context */ | |||
if(!(ctx = EVP_CIPHER_CTX_new())) handleErrors(); | |||
if(1 != EVP_EncryptInit_ex(ctx, EVP_aes_256_ecb(), NULL, key, NULL)) | |||
handleErrors(); | |||
if(1 != EVP_EncryptUpdate(ctx, buffer, &len, ctr, 16)) | |||
handleErrors(); | |||
ciphertext_len = len; | |||
/* Clean up */ | |||
EVP_CIPHER_CTX_free(ctx); | |||
} | |||
void | |||
randombytes_init(unsigned char *entropy_input, | |||
unsigned char *personalization_string, | |||
int security_strength) | |||
{ | |||
unsigned char seed_material[48]; | |||
memcpy(seed_material, entropy_input, 48); | |||
if (personalization_string) | |||
for (int i=0; i<48; i++) | |||
seed_material[i] ^= personalization_string[i]; | |||
memset(DRBG_ctx.Key, 0x00, 32); | |||
memset(DRBG_ctx.V, 0x00, 16); | |||
AES256_CTR_DRBG_Update(seed_material, DRBG_ctx.Key, DRBG_ctx.V); | |||
DRBG_ctx.reseed_counter = 1; | |||
} | |||
int | |||
randombytes(unsigned char *x, unsigned long long xlen) | |||
{ | |||
unsigned char block[16]; | |||
int i = 0; | |||
while ( xlen > 0 ) { | |||
//increment V | |||
for (int j=15; j>=0; j--) { | |||
if ( DRBG_ctx.V[j] == 0xff ) | |||
DRBG_ctx.V[j] = 0x00; | |||
else { | |||
DRBG_ctx.V[j]++; | |||
break; | |||
} | |||
} | |||
AES256_ECB(DRBG_ctx.Key, DRBG_ctx.V, block); | |||
if ( xlen > 15 ) { | |||
memcpy(x+i, block, 16); | |||
i += 16; | |||
xlen -= 16; | |||
} | |||
else { | |||
memcpy(x+i, block, xlen); | |||
xlen = 0; | |||
} | |||
} | |||
AES256_CTR_DRBG_Update(NULL, DRBG_ctx.Key, DRBG_ctx.V); | |||
DRBG_ctx.reseed_counter++; | |||
return RNG_SUCCESS; | |||
} | |||
void | |||
AES256_CTR_DRBG_Update(unsigned char *provided_data, | |||
unsigned char *Key, | |||
unsigned char *V) | |||
{ | |||
unsigned char temp[48]; | |||
for (int i=0; i<3; i++) { | |||
//increment V | |||
for (int j=15; j>=0; j--) { | |||
if ( V[j] == 0xff ) | |||
V[j] = 0x00; | |||
else { | |||
V[j]++; | |||
break; | |||
} | |||
} | |||
AES256_ECB(Key, V, temp+16*i); | |||
} | |||
if ( provided_data != NULL ) | |||
for (int i=0; i<48; i++) | |||
temp[i] ^= provided_data[i]; | |||
memcpy(Key, temp, 32); | |||
memcpy(V, temp+32, 16); | |||
} | |||
@@ -0,0 +1,55 @@ | |||
// | |||
// rng.h | |||
// | |||
// Created by Bassham, Lawrence E (Fed) on 8/29/17. | |||
// Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. | |||
// | |||
#ifndef rng_h | |||
#define rng_h | |||
#include <stdio.h> | |||
#define RNG_SUCCESS 0 | |||
#define RNG_BAD_MAXLEN -1 | |||
#define RNG_BAD_OUTBUF -2 | |||
#define RNG_BAD_REQ_LEN -3 | |||
typedef struct { | |||
unsigned char buffer[16]; | |||
int buffer_pos; | |||
unsigned long length_remaining; | |||
unsigned char key[32]; | |||
unsigned char ctr[16]; | |||
} AES_XOF_struct; | |||
typedef struct { | |||
unsigned char Key[32]; | |||
unsigned char V[16]; | |||
int reseed_counter; | |||
} AES256_CTR_DRBG_struct; | |||
void | |||
AES256_CTR_DRBG_Update(unsigned char *provided_data, | |||
unsigned char *Key, | |||
unsigned char *V); | |||
int | |||
seedexpander_init(AES_XOF_struct *ctx, | |||
unsigned char *seed, | |||
unsigned char *diversifier, | |||
unsigned long maxlen); | |||
int | |||
seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen); | |||
void | |||
randombytes_init(unsigned char *entropy_input, | |||
unsigned char *personalization_string, | |||
int security_strength); | |||
int | |||
randombytes(unsigned char *x, unsigned long long xlen); | |||
#endif /* rng_h */ |
@@ -0,0 +1,73 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
/* define HAVE_* for more known good configurations */ | |||
#if !defined(HAVE_POSIX_MEMALIGN) && \ | |||
((defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || defined(__APPLE__)) | |||
/* defined in POSIX and available on OS X */ | |||
#define HAVE_POSIX_MEMALIGN | |||
#endif | |||
#if !defined(HAVE_MEMALIGN) && defined(__linux__) | |||
/* always available on Linux */ | |||
#define HAVE_MEMALIGN | |||
#endif | |||
#include "compat.h" | |||
#if !defined(HAVE_ALIGNED_ALLOC) | |||
#include <errno.h> | |||
#include <stdlib.h> | |||
#if !defined(HAVE_POSIX_MEMALIGN) || defined(__MING32__) || defined(__MING64__) || defined(_MSC_VER) | |||
#include <malloc.h> | |||
#endif | |||
void* aligned_alloc(size_t alignment, size_t size) { | |||
/* check alignment (power of 2) and size (multiple of alignment) */ | |||
if (alignment & (alignment - 1) || size & (alignment - 1)) { | |||
errno = EINVAL; | |||
return NULL; | |||
} | |||
#if defined(HAVE_POSIX_MEMALIGN) | |||
/* check alignment (needs to be >= sizeof(void*)) */ | |||
if (alignment < sizeof(void*)) { | |||
alignment = sizeof(void*); | |||
} | |||
void* ptr = NULL; | |||
const int err = posix_memalign(&ptr, alignment, size); | |||
if (err) { | |||
errno = err; | |||
} | |||
return ptr; | |||
#elif defined(HAVE_MEMALIGN) | |||
return memalign(alignment, size); | |||
#elif defined(__MINGW32__) || defined(__MINGW64__) | |||
return __mingw_aligned_malloc(size, alignment); | |||
#elif defined(_MSC_VER) | |||
return _aligned_malloc(size, alignment); | |||
#else | |||
if (size > 0) { | |||
errno = ENOMEM; | |||
} | |||
return NULL; | |||
#endif | |||
} | |||
void aligned_free(void* ptr) { | |||
#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) | |||
free(ptr); | |||
#elif defined(__MINGW32__) || defined(__MINGW64__) | |||
__mingw_aligned_free(ptr); | |||
#elif defined(_MSC_VER) | |||
_aligned_free(ptr); | |||
#endif | |||
} | |||
#endif |
@@ -0,0 +1,16 @@ | |||
#ifndef PICNIC3_L1_FS_API_H | |||
#define PICNIC3_L1_FS_API_H | |||
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 17 + 17) | |||
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 17) | |||
#define CRYPTO_BYTES (4 + 14608) | |||
#define CRYPTO_ALGNAME "picnic3l1" | |||
#define CRYPTO_DETERMINISTIC 1 | |||
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk); | |||
int crypto_sign(unsigned char* sm, unsigned long long* smlen, const unsigned char* m, | |||
unsigned long long mlen, const unsigned char* sk); | |||
int crypto_sign_open(unsigned char* m, unsigned long long* mlen, const unsigned char* sm, | |||
unsigned long long smlen, const unsigned char* pk); | |||
#endif |
@@ -0,0 +1,188 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "bitstream.h" | |||
#include "macros.h" | |||
uint64_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) { | |||
ASSUME(1 <= num_bits && num_bits <= 64); | |||
const uint8_t* p = &bs->buffer.r[bs->position / 8]; | |||
const unsigned int skip_bits = bs->position % 8; | |||
const unsigned int start_bits = 8 - skip_bits; | |||
bs->position += num_bits; | |||
uint64_t ret = (*p++ & ((1 << start_bits) - 1)); | |||
if (num_bits <= start_bits) { | |||
return ret >> (start_bits - num_bits); | |||
} | |||
num_bits -= start_bits; | |||
for (; num_bits >= 8; num_bits -= 8, ++p) { | |||
ret = ret << 8 | *p; | |||
} | |||
if (num_bits > 0) { | |||
ret = ret << num_bits | ((*p >> (8 - num_bits)) & ((1 << num_bits) - 1)); | |||
} | |||
return ret; | |||
} | |||
uint8_t bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits) { | |||
ASSUME(1 <= num_bits && num_bits <= 8); | |||
const uint8_t* p = &bs->buffer.r[bs->position / 8]; | |||
const unsigned int skip_bits = bs->position % 8; | |||
const unsigned int start_bits = 8 - skip_bits; | |||
bs->position += num_bits; | |||
uint8_t ret = (*p++ & ((1 << start_bits) - 1)); | |||
if (num_bits <= start_bits) { | |||
return ret >> (start_bits - num_bits); | |||
} | |||
num_bits -= start_bits; | |||
if (num_bits > 0) { | |||
ret = ret << num_bits | ((*p >> (8 - num_bits)) & ((1 << num_bits) - 1)); | |||
} | |||
return ret; | |||
} | |||
uint32_t bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits) { | |||
ASSUME(1 <= num_bits && num_bits <= 32); | |||
const uint8_t* p = &bs->buffer.r[bs->position / 8]; | |||
const unsigned int skip_bits = bs->position % 8; | |||
const unsigned int start_bits = 8 - skip_bits; | |||
bs->position += num_bits; | |||
uint32_t ret = (*p++ & ((1 << start_bits) - 1)); | |||
if (num_bits <= start_bits) { | |||
return ret >> (start_bits - num_bits); | |||
} | |||
num_bits -= start_bits; | |||
for (; num_bits >= 8; num_bits -= 8, ++p) { | |||
ret = ret << 8 | *p; | |||
} | |||
if (num_bits > 0) { | |||
ret = ret << num_bits | ((*p >> (8 - num_bits)) & ((1 << num_bits) - 1)); | |||
} | |||
return ret; | |||
} | |||
void bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits) { | |||
ASSUME(1 <= num_bits && num_bits <= 64); | |||
const unsigned int skip_bits = bs->position % 8; | |||
uint8_t* p = &bs->buffer.w[bs->position / 8]; | |||
bs->position += num_bits; | |||
if (skip_bits) { | |||
// the upper skip_bits of current pos have already been taken | |||
const unsigned int start_bits = 8 - skip_bits; | |||
const unsigned int bits = num_bits < start_bits ? num_bits : start_bits; | |||
*p &= (0xFF << start_bits) | (0xFF >> (skip_bits + bits)); // clear bits before setting | |||
*p++ |= (value >> (num_bits - bits)) << (start_bits - bits); | |||
num_bits -= bits; | |||
} | |||
for (; num_bits >= 8; num_bits -= 8, ++p) { | |||
*p = value >> (num_bits - 8); | |||
} | |||
if (num_bits > 0) { | |||
*p &= (0xFF >> num_bits); // clear bits before setting | |||
*p |= (value & ((1 << num_bits) - 1)) << (8 - num_bits); | |||
} | |||
} | |||
void bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits) { | |||
ASSUME(1 <= num_bits && num_bits <= 8); | |||
const unsigned int skip_bits = bs->position % 8; | |||
uint8_t* p = &bs->buffer.w[bs->position / 8]; | |||
bs->position += num_bits; | |||
if (skip_bits) { | |||
// the upper skip_bits of current pos have already been taken | |||
const unsigned int start_bits = 8 - skip_bits; | |||
const unsigned int bits = num_bits < start_bits ? num_bits : start_bits; | |||
*p++ |= (value >> (num_bits - bits)) << (8 - skip_bits - bits); | |||
num_bits -= bits; | |||
} | |||
if (num_bits > 0) { | |||
*p = (value & ((1 << num_bits) - 1)) << (8 - num_bits); | |||
} | |||
} | |||
void bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits) { | |||
ASSUME(1 <= num_bits && num_bits <= 32); | |||
const unsigned int skip_bits = bs->position % 8; | |||
uint8_t* p = &bs->buffer.w[bs->position / 8]; | |||
bs->position += num_bits; | |||
if (skip_bits) { | |||
// the upper skip_bits of current pos have already been taken | |||
const unsigned int start_bits = 8 - skip_bits; | |||
const unsigned int bits = num_bits < start_bits ? num_bits : start_bits; | |||
*p++ |= (value >> (num_bits - bits)) << (8 - skip_bits - bits); | |||
num_bits -= bits; | |||
} | |||
for (; num_bits >= 8; num_bits -= 8, ++p) { | |||
*p = value >> (num_bits - 8); | |||
} | |||
if (num_bits > 0) { | |||
*p = (value & ((1 << num_bits) - 1)) << (8 - num_bits); | |||
} | |||
} | |||
void mzd_to_bitstream(bitstream_t* bs, const mzd_local_t* v, const size_t width, | |||
const size_t size) { | |||
const uint64_t* d = &CONST_BLOCK(v, 0)->w64[width - 1]; | |||
size_t bits = size; | |||
for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) { | |||
bitstream_put_bits(bs, *d, sizeof(uint64_t) * 8); | |||
} | |||
if (bits) { | |||
bitstream_put_bits(bs, *d >> (sizeof(uint64_t) * 8 - bits), bits); | |||
} | |||
} | |||
void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t width, const size_t size) { | |||
uint64_t* d = &BLOCK(v, 0)->w64[width - 1]; | |||
uint64_t* f = BLOCK(v, 0)->w64; | |||
size_t bits = size; | |||
for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) { | |||
*d = bitstream_get_bits(bs, sizeof(uint64_t) * 8); | |||
} | |||
if (bits) { | |||
*d = bitstream_get_bits(bs, bits) << (sizeof(uint64_t) * 8 - bits); | |||
--d; | |||
} | |||
for (; d >= f; --d) { | |||
*d = 0; | |||
} | |||
} |
@@ -0,0 +1,35 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef BITSTREAM_H | |||
#define BITSTREAM_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include "mzd_additional.h" | |||
typedef struct { | |||
union { | |||
uint8_t* w; | |||
const uint8_t* r; | |||
} buffer; | |||
size_t position; | |||
} bitstream_t; | |||
uint64_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits); | |||
uint8_t bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits); | |||
uint32_t bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits); | |||
void bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits); | |||
void bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits); | |||
void bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits); | |||
void mzd_to_bitstream(bitstream_t* bs, const mzd_local_t* v, const size_t width, const size_t size); | |||
void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t width, const size_t size); | |||
#endif |
@@ -0,0 +1,104 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC_COMPAT_H | |||
#define PICNIC_COMPAT_H | |||
/* in case cmake checks were not run, define HAVE_* for known good configurations */ | |||
#include "macros.h" | |||
#if defined(__OpenBSD__) | |||
#include <sys/param.h> | |||
#endif /* __OpenBSD__ */ | |||
#if !defined(HAVE_ALIGNED_ALLOC) && !defined(__APPLE__) && !defined(__MINGW32__) && \ | |||
!defined(__MINGW64__) && \ | |||
(defined(_ISOC11_SOURCE) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)) | |||
/* aligned_alloc was introduced in ISO C 2011 */ | |||
#define HAVE_ALIGNED_ALLOC | |||
#endif /* HAVE_ALIGNED_ALLOC */ | |||
#if !defined(HAVE_EXPLICIT_BZERO) && \ | |||
(GLIBC_CHECK(2, 25) || (defined(__OpenBSD__) && OpenBSD >= 201405) || FREEBSD_CHECK(11, 0) || \ | |||
NETBSD_CHECK(8, 0)) | |||
/* explicit_bzero was introduced in glibc 2.35, OpenBSD 5.5, FreeBSD 11.0 and NetBSD 8.0 */ | |||
#define HAVE_EXPLICIT_BZERO | |||
#endif /* HAVE_EXPLICIT_BZERO */ | |||
#if !defined(HAVE_CONSTTIME_MEMEQUAL) && NETBSD_CHECK(7, 0) | |||
/* consttime_memequal was introduced in NetBSD 7.0 */ | |||
#define HAVE_CONSTTIME_MEMEQUAL | |||
#endif /* HAVE_CONSTTIME_MEMEQUAL */ | |||
#if !defined(HAVE_TIMINGSAFE_BCMP) && ((defined(__OpenBSD__) && OpenBSD >= 201105) || \ | |||
FREEBSD_CHECK(12, 0) || MACOSX_CHECK(10, 12, 1)) | |||
/* timingsafe_bcmp was introduced in OpenBSD 4.9, FreeBSD 12.0, and MacOS X 10.12 */ | |||
#define HAVE_TIMINGSAFE_BCMP | |||
#endif /* HAVE_TIMINGSAFE_BCMP */ | |||
#if defined(HAVE_ALIGNED_ALLOC) | |||
#include <stdlib.h> | |||
#define aligned_free(ptr) free((ptr)) | |||
#else | |||
#include <stddef.h> | |||
/** | |||
* Compatibility implementation of aligned_alloc from ISO C 2011. | |||
*/ | |||
void* aligned_alloc(size_t alignment, size_t size); | |||
/** | |||
* Some aligned_alloc compatbility implementations require custom free | |||
* functions, so we provide one too. | |||
*/ | |||
void aligned_free(void* ptr); | |||
#endif /* HAVE_ALIGNED_ALLOC */ | |||
#include "endian_compat.h" | |||
#if !defined(HAVE_TIMINGSAFE_BCMP) | |||
/** | |||
* Compatibility implementation of timingsafe_bcmp from OpenBSD 4.9 and FreeBSD 12.0. | |||
*/ | |||
static inline int timingsafe_bcmp(const void* a, const void* b, size_t len) { | |||
#if defined(HAVE_CONSTTIME_MEMEQUAL) | |||
return !consttime_memequal(a, b, len); | |||
#else | |||
const unsigned char* p1 = a; | |||
const unsigned char* p2 = b; | |||
unsigned int res = 0; | |||
for (; len; --len, ++p1, ++p2) { | |||
res |= *p1 ^ *p2; | |||
} | |||
return res; | |||
#endif | |||
} | |||
#endif /* HAVE_TIMINGSAFE_BCMP */ | |||
#if !defined(HAVE_EXPLICIT_BZERO) | |||
#if defined(_WIN32) | |||
#include <windows.h> | |||
#endif | |||
/** | |||
* Compatibility implementation of explicit_bzero | |||
*/ | |||
static inline void explicit_bzero(void* a, size_t len) { | |||
#if defined(_WIN32) | |||
SecureZeroMemory(a, len); | |||
#else | |||
volatile char* p = a; | |||
for (; len; ++a, --len) { | |||
*p = 0; | |||
} | |||
#endif | |||
} | |||
#endif /* HAVE_EXPLICIT_BZERO */ | |||
#endif |
@@ -0,0 +1,122 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
/* If cmake checks were not run, define some known values. */ | |||
#if !defined(HAVE_SYS_AUXV_H) && defined(__linux__) | |||
#define HAVE_SYS_AUXV_H | |||
#endif | |||
#if !defined(HAVE_ASM_HWCAP_H) && defined(__linux__) && defined(__arm__) | |||
#define HAVE_ASM_HWCAP_H | |||
#endif | |||
#include "cpu.h" | |||
#if !defined(BUILTIN_CPU_SUPPORTED) || defined(BUILTIN_CPU_SUPPORTED_BROKEN_BMI2) | |||
#if defined(__arm__) && defined(HAVE_SYS_AUXV_H) && defined(HAVE_ASM_HWCAP_H) | |||
#include <asm/hwcap.h> | |||
#include <sys/auxv.h> | |||
static unsigned int init_caps(void) { | |||
unsigned int caps = 0; | |||
if (getauxval(AT_HWCAP) & HWCAP_NEON) { | |||
caps |= CPU_CAP_NEON; | |||
} | |||
return caps; | |||
} | |||
#elif (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)) && (defined(__GNUC__) || defined(_MSC_VER)) | |||
#ifdef _MSC_VER | |||
#include <intrin.h> | |||
static unsigned init_caps(void) { | |||
unsigned int caps = 0; | |||
union { | |||
struct { | |||
unsigned int eax, ebx, ecx, edx; | |||
}; | |||
int data[4]; | |||
} regs = {0}; | |||
__cpuid(regs.data, 0); | |||
unsigned int max = regs.eax; | |||
if (max >= 1) { | |||
__cpuid(regs.data, 0); | |||
if (regs.edx & (1 << 26)) { | |||
caps |= CPU_CAP_SSE2; | |||
} | |||
if (regs.ecx & (1 << 23)) { | |||
caps |= CPU_CAP_POPCNT; | |||
} | |||
} | |||
if (max >= 7) { | |||
__cpuidex(regs.data, 7, 0); | |||
if (regs.ebx & (1 << 5)) { | |||
caps |= CPU_CAP_AVX2; | |||
} | |||
if (regs.ebx & (1 << 8)) { | |||
caps |= CPU_CAP_BMI2; | |||
} | |||
} | |||
return caps; | |||
} | |||
#else | |||
#include <cpuid.h> | |||
static unsigned init_caps(void) { | |||
unsigned int caps = 0; | |||
unsigned int eax, ebx, ecx, edx; | |||
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { | |||
if (edx & (1 << 26)) { | |||
caps |= CPU_CAP_SSE2; | |||
} | |||
if (ecx & (1 << 23)) { | |||
caps |= CPU_CAP_POPCNT; | |||
} | |||
} | |||
if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) { | |||
if (ebx & (1 << 5)) { | |||
caps |= CPU_CAP_AVX2; | |||
} | |||
if (ebx & (1 << 8)) { | |||
caps |= CPU_CAP_BMI2; | |||
} | |||
} | |||
return caps; | |||
} | |||
#endif | |||
#else | |||
static unsigned init_caps(void) { | |||
return 0; | |||
} | |||
#endif | |||
#include <limits.h> | |||
static unsigned int cpu_caps = UINT_MAX; | |||
bool cpu_supports(unsigned int caps) { | |||
if (cpu_caps == UINT_MAX) { | |||
cpu_caps = init_caps(); | |||
} | |||
return (cpu_caps & caps) == caps; | |||
} | |||
#endif |
@@ -0,0 +1,45 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef CPU_H | |||
#define CPU_H | |||
#include "macros.h" | |||
#if defined(__GNUC__) && !(defined(__APPLE__) && (__clang_major__ <= 8)) && \ | |||
!defined(__MINGW32__) && !defined(__MINGW64__) | |||
#define BUILTIN_CPU_SUPPORTED | |||
#endif | |||
#if defined(BUILTIN_CPU_SUPPORTED) && GNUC_CHECK(4, 9) && !GNUC_CHECK(5, 0) | |||
/* gcc 4.9's __builtin_cpu_support does not support "bmi2" */ | |||
#define BUILTIN_CPU_SUPPORTED_BROKEN_BMI2 | |||
#endif | |||
#if !defined(BUILTIN_CPU_SUPPORTED) || defined(BUILTIN_CPU_SUPPORTED_BROKEN_BMI2) | |||
#include <stdbool.h> | |||
/* CPU supports SSE2 */ | |||
#define CPU_CAP_SSE2 0x00000001 | |||
/* CPU supports popcnt */ | |||
#define CPU_CAP_POPCNT 0x00000002 | |||
/* CPU supports AVX2 */ | |||
#define CPU_CAP_AVX2 0x00000004 | |||
/* CPU supports BMI2 */ | |||
#define CPU_CAP_BMI2 0x00000010 | |||
/* CPU supports NEON */ | |||
#define CPU_CAP_NEON 0x00000008 | |||
/** | |||
* Helper function in case __builtin_cpu_supports is not available. | |||
*/ | |||
bool cpu_supports(unsigned int caps); | |||
#endif | |||
#endif |
@@ -0,0 +1,6 @@ | |||
#ifndef CRYPTO_SIGN_H | |||
#define CRYPTO_SIGN_H | |||
#include "api.h" | |||
#endif |
@@ -0,0 +1,173 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC_COMPAT_ENDIAN_H | |||
#define PICNIC_COMPAT_ENDIAN_H | |||
#include <stdint.h> | |||
#include "macros.h" | |||
#if defined(__GNUC__) || defined(__clang__) | |||
#define bswap16(x) __builtin_bswap16(x) | |||
#define bswap32(x) __builtin_bswap32(x) | |||
#define bswap64(x) __builtin_bswap64(x) | |||
#elif defined(_MSC_VER) | |||
#include <stdlib.h> | |||
#define bswap16(x) _byteswap_ushort(x) | |||
#define bswap32(x) _byteswap_ulong(x) | |||
#define bswap64(x) _byteswap_uint64(x) | |||
#else | |||
static inline uint16_t ATTR_CONST bswap16(uint16_t x) { | |||
return ((x & 0xff00) >> 8) | ((x & 0x00ff) << 8); | |||
} | |||
static inline uint32_t ATTR_CONST bswap32(uint32_t x) { | |||
return ((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) | ((x & 0x0000ff00) << 8) | | |||
((x & 0x000000ff) << 24); | |||
} | |||
static inline uint64_t ATTR_CONST bswap64(uint64_t x) { | |||
return ((x & UINT64_C(0xff00000000000000)) >> 56) | ((x & UINT64_C(0x00ff000000000000)) >> 40) | | |||
((x & UINT64_C(0x0000ff0000000000)) >> 24) | ((x & UINT64_C(0x000000ff00000000)) >> 8) | | |||
((x & UINT64_C(0x00000000ff000000)) << 8) | ((x & UINT64_C(0x0000000000ff0000)) << 24) | | |||
((x & UINT64_C(0x000000000000ff00)) << 40) | ((x & UINT64_C(0x00000000000000ff)) << 56); | |||
} | |||
#endif | |||
/* Linux / GLIBC */ | |||
#if defined(__linux__) || defined(__GLIBC__) | |||
#include <endian.h> | |||
/* endian.h only provides conversion functions if built with one these defines */ | |||
#if defined(_DEFAULT_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) | |||
#define HAVE_HOSTSWAP | |||
#endif | |||
#endif | |||
/* Windows */ | |||
#if defined(_WIN16) || defined(_WIN32) || defined(_WIN64) | |||
#if defined(__MINGW32__) || defined(__MINGW64__) | |||
#include <sys/param.h> | |||
#else | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#endif | |||
/* OS X */ | |||
#if defined(__APPLE__) | |||
#include <machine/endian.h> | |||
#endif | |||
/* OpenBSD */ | |||
#if defined(__OpenBSD__) | |||
#include <machine/endian.h> | |||
#define HAVE_HOSTSWAP | |||
#endif | |||
/* other BSDs */ | |||
#if defined(__FreeBSD__) || defined(__NETBSD__) || defined(__NetBSD__) | |||
#include <sys/endian.h> | |||
#define HAVE_HOSTSWAP | |||
#endif | |||
#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN) | |||
#if defined(BIG_ENDIAN) && defined(LITTLE_ENDIAN) | |||
#if defined(BYTE_ORDER) && BYTE_ORDER == BIG_ENDIAN | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#elif defined(BIG_ENDIAN) | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(LITTLE_ENDIAN) | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#endif | |||
#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN) | |||
#if defined(_BIG_ENDIAN) && defined(_LITTLE_ENDIAN) | |||
#if defined(_BYTE_ORDER) && _BYTE_ORDER == _BIG_ENDIAN | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(_BYTE_ORDER) && _BYTE_ORDER == _LITTLE_ENDIAN | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#elif defined(_BIG_ENDIAN) | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(_LITTLE_ENDIAN) | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#endif | |||
#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN) | |||
#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) | |||
#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#elif defined(__BIG_ENDIAN) | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(__LITTLE_ENDIAN) | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#endif | |||
#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN) | |||
#if defined(__BIG_ENDIAN__) && defined(__LITTLE_ENDIAN__) | |||
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __BIG_ENDIAN__ | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#elif defined(__BIG_ENDIAN__) | |||
#define PICNIC_IS_BIG_ENDIAN | |||
#elif defined(__LITTLE_ENDIAN__) | |||
#define PICNIC_IS_LITTLE_ENDIAN | |||
#endif | |||
#endif | |||
#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN) | |||
#error "Unknown platform!" | |||
#endif | |||
#if !defined(HAVE_HOSTSWAP) | |||
#if defined(PICNIC_IS_LITTLE_ENDIAN) | |||
#define htobe16(x) bswap16((x)) | |||
#define htole16(x) ((uint16_t)(x)) | |||
#define be16toh(x) bswap16((x)) | |||
#define le16toh(x) ((uint16_t)(x)) | |||
#define htobe32(x) bswap32((x)) | |||
#define htole32(x) ((uint32_t)(x)) | |||
#define be32toh(x) bswap32((x)) | |||
#define le32toh(x) ((uint32_t)(x)) | |||
#define htobe64(x) bswap64((x)) | |||
#define htole64(x) ((uint64_t)(x)) | |||
#define be64toh(x) bswap64((x)) | |||
#define le64toh(x) ((uint64_t)(x)) | |||
#elif defined(PICNIC_IS_BIG_ENDIAN) | |||
#define htobe16(x) ((uint16_t)(x)) | |||
#define htole16(x) bswap16((x)) | |||
#define be16toh(x) ((uint16_t)(x)) | |||
#define le16toh(x) bswap16((x)) | |||
#define htobe32(x) ((uint32_t)(x)) | |||
#define htole32(x) bswap32((x)) | |||
#define be32toh(x) ((uint32_t)(x)) | |||
#define le32toh(x) bswap32((x)) | |||
#define htobe64(x) ((uint64_t)(x)) | |||
#define htole64(x) bswap64((x)) | |||
#define be64toh(x) ((uint64_t)(x)) | |||
#define le64toh(x) bswap64((x)) | |||
#endif | |||
#endif | |||
#endif |
@@ -0,0 +1,43 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "io.h" | |||
#include <string.h> | |||
#include "compat.h" | |||
void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t len) { | |||
const size_t word_count = (len + 7) / sizeof(uint64_t); | |||
const block_t* block = CONST_BLOCK(data, 0); | |||
for (size_t i = word_count; i; --i, dst += sizeof(uint64_t), len -= sizeof(uint64_t)) { | |||
const uint64_t tmp = htobe64(block->w64[i - 1]); | |||
memcpy(dst, &tmp, MIN(sizeof(tmp), len)); | |||
} | |||
} | |||
void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len) { | |||
const size_t word_count = (len + 7) / sizeof(uint64_t); | |||
block_t* block = BLOCK(result, 0); | |||
for (size_t i = word_count; i; --i, data += sizeof(uint64_t), len -= sizeof(uint64_t)) { | |||
uint64_t tmp = 0; | |||
memcpy(&tmp, data, MIN(sizeof(tmp), len)); | |||
block->w64[i - 1] = be64toh(tmp); | |||
} | |||
} | |||
#if defined(PICNIC_STATIC) || !defined(NDEBUG) | |||
void print_hex(FILE* out, const uint8_t* data, size_t len) { | |||
for (size_t i = len; i; --i, ++data) { | |||
fprintf(out, "%02X", *data); | |||
} | |||
} | |||
#endif |
@@ -0,0 +1,40 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef IO_H | |||
#define IO_H | |||
#include <stdint.h> | |||
#include <stdio.h> | |||
#include "mzd_additional.h" | |||
void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t numbytes); | |||
void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len); | |||
/* Get one bit from a byte array */ | |||
static inline uint8_t getBit(const uint8_t* array, size_t bitNumber) { | |||
return (array[bitNumber / 8] >> (7 - (bitNumber % 8))) & 0x01; | |||
} | |||
/* Set a specific bit in a byte array to a given value */ | |||
static inline void setBit(uint8_t* bytes, size_t bitNumber, uint8_t val) { | |||
bytes[bitNumber / 8] = | |||
(bytes[bitNumber >> 3] & ~(1 << (7 - (bitNumber % 8)))) | (val << (7 - (bitNumber % 8))); | |||
} | |||
static inline int check_padding_bits(const uint8_t byte, const unsigned int diff) { | |||
return byte & ~(UINT8_C(0xff) << diff); | |||
} | |||
#if defined(PICNIC_STATIC) || !defined(NDEBUG) | |||
void print_hex(FILE* out, const uint8_t* data, size_t len); | |||
#endif | |||
#endif |
@@ -0,0 +1,159 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef KDF_SHAKE_H | |||
#define KDF_SHAKE_H | |||
#include <stdint.h> | |||
#include "macros.h" | |||
#include "endian_compat.h" | |||
#if defined(WITH_SHAKE_S390_CPACF) | |||
/* use the KIMD/KLMD instructions from CPACF for SHAKE support on S390 */ | |||
#include "sha3/s390_cpacf.h" | |||
#else | |||
#if !defined(SUPERCOP) | |||
/* use SHAKE implementation in sha3/ */ | |||
#include "sha3/KeccakHash.h" | |||
#else | |||
/* use SUPERCOP implementation */ | |||
#include <libkeccak.a.headers/KeccakHash.h> | |||
#endif | |||
/* use the Keccakx4 implementation */ | |||
#include "KeccakHashtimes4.h" | |||
typedef Keccak_HashInstance hash_context ATTR_ALIGNED(32); | |||
/** | |||
* Initialize hash context based on the digest size used by Picnic. If the size is 32 bytes, | |||
* SHAKE128 is used, otherwise SHAKE256 is used. | |||
*/ | |||
static inline void hash_init(hash_context* ctx, size_t digest_size) { | |||
if (digest_size == 32) { | |||
Keccak_HashInitialize_SHAKE128(ctx); | |||
} else { | |||
Keccak_HashInitialize_SHAKE256(ctx); | |||
} | |||
} | |||
static inline void hash_update(hash_context* ctx, const uint8_t* data, size_t size) { | |||
Keccak_HashUpdate(ctx, data, size << 3); | |||
} | |||
static inline void hash_final(hash_context* ctx) { | |||
Keccak_HashFinal(ctx, NULL); | |||
} | |||
static inline void hash_squeeze(hash_context* ctx, uint8_t* buffer, size_t buflen) { | |||
Keccak_HashSqueeze(ctx, buffer, buflen << 3); | |||
} | |||
#endif | |||
static inline void hash_update_uint16_le(hash_context* ctx, uint16_t data) { | |||
const uint16_t data_le = htole16(data); | |||
hash_update(ctx, (const uint8_t*)&data_le, sizeof(data_le)); | |||
} | |||
static inline void hash_init_prefix(hash_context* ctx, size_t digest_size, | |||
const uint8_t prefix) { | |||
hash_init(ctx, digest_size); | |||
hash_update(ctx, &prefix, sizeof(prefix)); | |||
} | |||
typedef hash_context kdf_shake_t; | |||
#define kdf_shake_init(ctx, digest_size) hash_init((ctx), (digest_size)) | |||
#define kdf_shake_init_prefix(ctx, digest_size, prefix) hash_init_prefix((ctx), (digest_size), (prefix)) | |||
#define kdf_shake_update_key(ctx, key, keylen) hash_update((ctx), (key), (keylen)) | |||
#define kdf_shake_update_key_uint16_le(ctx, key) hash_update_uint16_le((ctx), (key)) | |||
#define kdf_shake_finalize_key(ctx) hash_final((ctx)) | |||
#define kdf_shake_get_randomness(ctx, dst, count) hash_squeeze((ctx), (dst), (count)) | |||
#define kdf_shake_clear(ctx) | |||
/* Instances that work with 4 states in parallel. */ | |||
typedef Keccak_HashInstancetimes4 hash_context_x4 ATTR_ALIGNED(32); | |||
static inline void hash_init_x4(hash_context_x4* ctx, size_t digest_size) { | |||
if (digest_size == 32) { | |||
Keccak_HashInitializetimes4_SHAKE128(ctx); | |||
} else { | |||
Keccak_HashInitializetimes4_SHAKE256(ctx); | |||
} | |||
} | |||
static inline void hash_update_x4(hash_context_x4* ctx, const uint8_t** data, size_t size) { | |||
Keccak_HashUpdatetimes4(ctx, data, size << 3); | |||
} | |||
static inline void hash_update_x4_4(hash_context_x4* ctx, const uint8_t* data0, | |||
const uint8_t* data1, const uint8_t* data2, | |||
const uint8_t* data3, size_t size) { | |||
const uint8_t* data[4] = { data0, data1, data2, data3 }; | |||
hash_update_x4(ctx, data, size); | |||
} | |||
static inline void hash_update_x4_1(hash_context_x4* ctx, const uint8_t* data, size_t size) { | |||
const uint8_t* tmp[4] = { data, data, data, data }; | |||
hash_update_x4(ctx, tmp, size); | |||
} | |||
static inline void hash_init_prefix_x4(hash_context_x4* ctx, size_t digest_size, | |||
const uint8_t prefix) { | |||
hash_init_x4(ctx, digest_size); | |||
hash_update_x4_1(ctx, &prefix, sizeof(prefix)); | |||
} | |||
static inline void hash_final_x4(hash_context_x4* ctx) { | |||
Keccak_HashFinaltimes4(ctx, NULL); | |||
} | |||
static inline void hash_squeeze_x4(hash_context_x4* ctx, uint8_t** buffer, size_t buflen) { | |||
Keccak_HashSqueezetimes4(ctx, buffer, buflen << 3); | |||
} | |||
static inline void hash_squeeze_x4_4(hash_context_x4* ctx, uint8_t* buffer0, uint8_t* buffer1, | |||
uint8_t* buffer2, uint8_t* buffer3, size_t buflen) { | |||
uint8_t* buffer[4] = { buffer0, buffer1, buffer2, buffer3 }; | |||
hash_squeeze_x4(ctx, buffer, buflen); | |||
} | |||
static inline void hash_update_x4_uint16_le(hash_context_x4* ctx, uint16_t data) { | |||
const uint16_t data_le = htole16(data); | |||
hash_update_x4_1(ctx, (const uint8_t*)&data_le, sizeof(data_le)); | |||
} | |||
static inline void hash_update_x4_uint16s_le(hash_context_x4* ctx, const uint16_t data[4]) { | |||
const uint16_t data0_le = htole16(data[0]); | |||
const uint16_t data1_le = htole16(data[1]); | |||
const uint16_t data2_le = htole16(data[2]); | |||
const uint16_t data3_le = htole16(data[3]); | |||
hash_update_x4_4(ctx, (const uint8_t*)&data0_le, (const uint8_t*)&data1_le, | |||
(const uint8_t*)&data2_le, (const uint8_t*)&data3_le, sizeof(data[0])); | |||
} | |||
typedef hash_context_x4 kdf_shake_x4_t; | |||
#define kdf_shake_x4_init(ctx, digest_size) hash_init_x4((ctx), (digest_size)) | |||
#define kdf_shake_x4_init_prefix(ctx, digest_size, prefix) \ | |||
hash_init_prefix_x4((ctx), (digest_size), (prefix)) | |||
#define kdf_shake_x4_update_key(ctx, key, keylen) hash_update_x4((ctx), (key), (keylen)) | |||
#define kdf_shake_x4_update_key_4(ctx, key0, key1, key2, key3, keylen) \ | |||
hash_update_x4_4((ctx), (key0), (key1), (key2), (key3), (keylen)) | |||
#define kdf_shake_x4_update_key_1(ctx, key, keylen) hash_update_x4_1((ctx), (key), (keylen)) | |||
#define kdf_shake_x4_update_key_uint16_le(ctx, key) hash_update_x4_uint16_le((ctx), (key)) | |||
#define kdf_shake_x4_update_key_uint16s_le(ctx, keys) hash_update_x4_uint16s_le((ctx), (keys)) | |||
#define kdf_shake_x4_finalize_key(ctx) hash_final_x4((ctx)) | |||
#define kdf_shake_x4_get_randomness(ctx, dst, count) hash_squeeze_x4((ctx), (dst), (count)) | |||
#define kdf_shake_x4_get_randomness_4(ctx, dst0, dst1, dst2, dst3, count) \ | |||
hash_squeeze_x4_4((ctx), (dst0), (dst1), (dst2), (dst3), (count)) | |||
#define kdf_shake_x4_clear(ctx) | |||
#endif |
@@ -0,0 +1,511 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "io.h" | |||
#include "lowmc.h" | |||
#include "mzd_additional.h" | |||
#include "bitstream.h" | |||
#include "picnic3_impl.h" | |||
#include "picnic3_types.h" | |||
#include "simd.h" | |||
#if !defined(_MSC_VER) | |||
#include <stdalign.h> | |||
#endif | |||
#include <string.h> | |||
#include <assert.h> | |||
#include "lowmc_129_129_4.h" | |||
#if !defined(NO_UINT64_FALLBACK) | |||
/** | |||
* S-box for m = 43 | |||
*/ | |||
static void sbox_uint64_lowmc_129_129_4(mzd_local_t* in) { | |||
mzd_local_t x0m[1], x1m[1], x2m[1]; | |||
// a | |||
mzd_and_uint64_192(x0m, mask_129_129_43_a, in); | |||
// b | |||
mzd_and_uint64_192(x1m, mask_129_129_43_b, in); | |||
// c | |||
mzd_and_uint64_192(x2m, mask_129_129_43_c, in); | |||
mzd_shift_left_uint64_192(x0m, x0m, 2); | |||
mzd_shift_left_uint64_192(x1m, x1m, 1); | |||
mzd_local_t t0[1], t1[1], t2[1]; | |||
// b & c | |||
mzd_and_uint64_192(t0, x1m, x2m); | |||
// c & a | |||
mzd_and_uint64_192(t1, x0m, x2m); | |||
// a & b | |||
mzd_and_uint64_192(t2, x0m, x1m); | |||
// (b & c) ^ a | |||
mzd_xor_uint64_192(t0, t0, x0m); | |||
// (c & a) ^ a ^ b | |||
mzd_xor_uint64_192(t1, t1, x0m); | |||
mzd_xor_uint64_192(t1, t1, x1m); | |||
// (a & b) ^ a ^ b ^c | |||
mzd_xor_uint64_192(t2, t2, x0m); | |||
mzd_xor_uint64_192(t2, t2, x1m); | |||
mzd_xor_uint64_192(t2, t2, x2m); | |||
mzd_shift_right_uint64_192(t0, t0, 2); | |||
mzd_shift_right_uint64_192(t1, t1, 1); | |||
mzd_xor_uint64_192(t2, t2, t1); | |||
mzd_xor_uint64_192(in, t2, t0); | |||
} | |||
#endif /* NO_UINT_FALLBACK */ | |||
ATTR_TARGET_S128 | |||
static inline void sbox_s128_full(mzd_local_t* in, const word128* mask_a, const word128* mask_b, | |||
const word128* mask_c) { | |||
word128 x0m[2] ATTR_ALIGNED(alignof(word128)), x1m[2] ATTR_ALIGNED(alignof(word128)), | |||
x2m[2] ATTR_ALIGNED(alignof(word128)); | |||
mm128_and_256(x0m, CONST_BLOCK(in, 0)->w128, mask_a); | |||
mm128_and_256(x1m, CONST_BLOCK(in, 0)->w128, mask_b); | |||
mm128_and_256(x2m, CONST_BLOCK(in, 0)->w128, mask_c); | |||
mm128_shift_left_256(x0m, x0m, 2); | |||
mm128_shift_left_256(x1m, x1m, 1); | |||
word128 t0[2] ATTR_ALIGNED(alignof(word128)), t1[2] ATTR_ALIGNED(alignof(word128)), | |||
t2[2] ATTR_ALIGNED(alignof(word128)); | |||
mm128_and_256(t0, x1m, x2m); | |||
mm128_and_256(t1, x0m, x2m); | |||
mm128_and_256(t2, x0m, x1m); | |||
mm128_xor_256(t0, t0, x0m); | |||
mm128_xor_256(x0m, x0m, x1m); | |||
mm128_xor_256(t1, t1, x0m); | |||
mm128_xor_256(t2, t2, x0m); | |||
mm128_xor_256(t2, t2, x2m); | |||
mm128_shift_right_256(t0, t0, 2); | |||
mm128_shift_right_256(t1, t1, 1); | |||
mm128_xor_256(t0, t0, t1); | |||
mm128_xor_256(in->w128, t0, t2); | |||
} | |||
ATTR_TARGET_S128 | |||
static inline void sbox_s128_lowmc_129_129_4(mzd_local_t* in) { | |||
sbox_s128_full(in, mask_129_129_43_a->w128, mask_129_129_43_b->w128, mask_129_129_43_c->w128); | |||
} | |||
ATTR_TARGET_AVX2 | |||
static inline word256 sbox_s256_lowmc_full(const word256 min, const word256 mask_a, | |||
const word256 mask_b, const word256 mask_c) { | |||
word256 x0m ATTR_ALIGNED(alignof(word256)) = mm256_and(min, mask_a); | |||
word256 x1m ATTR_ALIGNED(alignof(word256)) = mm256_and(min, mask_b); | |||
word256 x2m ATTR_ALIGNED(alignof(word256)) = mm256_and(min, mask_c); | |||
x0m = mm256_rotate_left(x0m, 2); | |||
x1m = mm256_rotate_left(x1m, 1); | |||
word256 t0 ATTR_ALIGNED(alignof(word256)) = mm256_and(x1m, x2m); | |||
word256 t1 ATTR_ALIGNED(alignof(word256)) = mm256_and(x0m, x2m); | |||
word256 t2 ATTR_ALIGNED(alignof(word256)) = mm256_and(x0m, x1m); | |||
t0 = mm256_xor(t0, x0m); | |||
x0m = mm256_xor(x0m, x1m); | |||
t1 = mm256_xor(t1, x0m); | |||
t2 = mm256_xor(t2, x0m); | |||
t2 = mm256_xor(t2, x2m); | |||
t0 = mm256_rotate_right(t0, 2); | |||
t1 = mm256_rotate_right(t1, 1); | |||
return mm256_xor(mm256_xor(t0, t1), t2); | |||
} | |||
ATTR_TARGET_AVX2 | |||
static inline void sbox_s256_lowmc_129_129_4(mzd_local_t* in) { | |||
BLOCK(in, 0)->w256 = sbox_s256_lowmc_full( | |||
BLOCK(in, 0)->w256, CONST_BLOCK(mask_129_129_43_a, 0)->w256, | |||
CONST_BLOCK(mask_129_129_43_b, 0)->w256, CONST_BLOCK(mask_129_129_43_c, 0)->w256); | |||
} | |||
#if !defined(NO_UINT64_FALLBACK) | |||
#define picnic3_aux_sbox_bitsliced(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, bitmask_c) \ | |||
do { \ | |||
mzd_local_t a[1], b[1], c[1]; \ | |||
/* a */ \ | |||
AND(a, bitmask_a, statein); \ | |||
/* b */ \ | |||
AND(b, bitmask_b, statein); \ | |||
/* c */ \ | |||
AND(c, bitmask_c, statein); \ | |||
\ | |||
SHL(a, a, 2); \ | |||
SHL(b, b, 1); \ | |||
mzd_local_t d[1], e[1], f[1]; \ | |||
/* a */ \ | |||
AND(d, bitmask_a, stateout); \ | |||
/* b */ \ | |||
AND(e, bitmask_b, stateout); \ | |||
/* c */ \ | |||
AND(f, bitmask_c, stateout); \ | |||
\ | |||
SHL(d, d, 2); \ | |||
SHL(e, e, 1); \ | |||
\ | |||
mzd_local_t fresh_output_ab[1], fresh_output_bc[1], fresh_output_ca[1]; \ | |||
XOR(fresh_output_ab, a, b); \ | |||
XOR(fresh_output_ca, e, fresh_output_ab); \ | |||
XOR(fresh_output_bc, d, a); \ | |||
XOR(fresh_output_ab, fresh_output_ab, c); \ | |||
XOR(fresh_output_ab, fresh_output_ab, f); \ | |||
\ | |||
mzd_local_t t0[1], t1[1], t2[1], aux[1]; \ | |||
SHR(t2, fresh_output_ca, 2); \ | |||
SHR(t1, fresh_output_bc, 1); \ | |||
XOR(t2, t2, t1); \ | |||
XOR(aux, t2, fresh_output_ab); \ | |||
/* a & b */ \ | |||
AND(t0, a, b); \ | |||
/* b & c */ \ | |||
AND(t1, b, c); \ | |||
/* c & a */ \ | |||
AND(t2, c, a); \ | |||
SHR(t2, t2, 2); \ | |||
SHR(t1, t1, 1); \ | |||
XOR(t2, t2, t1); \ | |||
XOR(t2, t2, t0); \ | |||
XOR(aux, aux, t2); \ | |||
\ | |||
bitstream_t parity_tape = {{tapes->parity_tapes}, tapes->pos}; \ | |||
bitstream_t last_party_tape = {{tapes->tape[15]}, tapes->pos}; \ | |||
\ | |||
/* calculate aux_bits to fix and_helper */ \ | |||
mzd_from_bitstream(&parity_tape, t0, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
XOR(aux, aux, t0); \ | |||
mzd_from_bitstream(&last_party_tape, t1, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
XOR(aux, aux, t1); \ | |||
\ | |||
last_party_tape.position = tapes->pos; \ | |||
mzd_to_bitstream(&last_party_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
bitstream_t aux_tape = {{tapes->aux_bits}, tapes->aux_pos}; \ | |||
mzd_to_bitstream(&aux_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
\ | |||
tapes->aux_pos += LOWMC_N; \ | |||
} while (0) | |||
static void sbox_aux_uint64_lowmc_129_129_4(mzd_local_t* statein, mzd_local_t* stateout, | |||
randomTape_t* tapes) { | |||
picnic3_aux_sbox_bitsliced(LOWMC_129_129_4_N, mzd_xor_uint64_192, mzd_and_uint64_192, | |||
mzd_shift_left_uint64_192, mzd_shift_right_uint64_192, | |||
mask_129_129_43_a, mask_129_129_43_b, mask_129_129_43_c); | |||
} | |||
#endif /* !NO_UINT64_FALLBACK */ | |||
#if !defined(NO_UINT64_FALLBACK) | |||
// uint64 based implementation | |||
#define IMPL uint64 | |||
#include "lowmc_129_129_4_fns_uint64.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_192_192_4_fns_uint64.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_255_255_4_fns_uint64.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_128_128_20_fns_uint64.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_192_192_30_fns_uint64.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_256_256_38_fns_uint64.h" | |||
#include "lowmc.c.i" | |||
#endif | |||
#define FN_ATTR ATTR_TARGET_S128 | |||
#undef IMPL | |||
#define IMPL s128 | |||
#define picnic3_aux_sbox_bitsliced_mm128(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, \ | |||
bitmask_c) \ | |||
do { \ | |||
word128 a[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 b[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 c[2] ATTR_ALIGNED(alignof(word128)); \ | |||
/* a */ \ | |||
AND(a, bitmask_a->w128, statein->w128); \ | |||
/* b */ \ | |||
AND(b, bitmask_b->w128, statein->w128); \ | |||
/* c */ \ | |||
AND(c, bitmask_c->w128, statein->w128); \ | |||
\ | |||
SHL(a, a, 2); \ | |||
SHL(b, b, 1); \ | |||
word128 d[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 e[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 f[2] ATTR_ALIGNED(alignof(word128)); \ | |||
/* a */ \ | |||
AND(d, bitmask_a->w128, stateout->w128); \ | |||
/* b */ \ | |||
AND(e, bitmask_b->w128, stateout->w128); \ | |||
/* c */ \ | |||
AND(f, bitmask_c->w128, stateout->w128); \ | |||
\ | |||
SHL(d, d, 2); \ | |||
SHL(e, e, 1); \ | |||
\ | |||
word128 fresh_output_ab[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 fresh_output_bc[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 fresh_output_ca[2] ATTR_ALIGNED(alignof(word128)); \ | |||
XOR(fresh_output_ab, a, b); \ | |||
XOR(fresh_output_ca, e, fresh_output_ab); \ | |||
XOR(fresh_output_bc, d, a); \ | |||
XOR(fresh_output_ab, fresh_output_ab, c); \ | |||
XOR(fresh_output_ab, fresh_output_ab, f); \ | |||
\ | |||
word128 t0[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 t1[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 t2[2] ATTR_ALIGNED(alignof(word128)); \ | |||
mzd_local_t tmp[1], aux[1]; \ | |||
SHR(t2, fresh_output_ca, 2); \ | |||
SHR(t1, fresh_output_bc, 1); \ | |||
XOR(t2, t2, t1); \ | |||
XOR(aux->w128, t2, fresh_output_ab); \ | |||
\ | |||
/* a & b */ \ | |||
AND(t0, a, b); \ | |||
/* b & c */ \ | |||
AND(t1, b, c); \ | |||
/* c & a */ \ | |||
AND(t2, c, a); \ | |||
SHR(t2, t2, 2); \ | |||
SHR(t1, t1, 1); \ | |||
XOR(t2, t2, t1); \ | |||
XOR(t2, t2, t0); \ | |||
XOR(aux->w128, aux->w128, t2); \ | |||
\ | |||
bitstream_t parity_tape = {{tapes->parity_tapes}, tapes->pos}; \ | |||
bitstream_t last_party_tape = {{tapes->tape[15]}, tapes->pos}; \ | |||
\ | |||
/* calculate aux_bits to fix and_helper */ \ | |||
mzd_from_bitstream(&parity_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
XOR(aux->w128, aux->w128, tmp->w128); \ | |||
mzd_from_bitstream(&last_party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
XOR(aux->w128, aux->w128, tmp->w128); \ | |||
\ | |||
last_party_tape.position = tapes->pos; \ | |||
mzd_to_bitstream(&last_party_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
bitstream_t aux_tape = {{tapes->aux_bits}, tapes->aux_pos}; \ | |||
mzd_to_bitstream(&aux_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
\ | |||
tapes->aux_pos += LOWMC_N; \ | |||
} while (0) | |||
ATTR_TARGET_S128 | |||
static void sbox_aux_s128_lowmc_129_129_4(mzd_local_t* statein, mzd_local_t* stateout, | |||
randomTape_t* tapes) { | |||
picnic3_aux_sbox_bitsliced_mm128(LOWMC_129_129_4_N, mm128_xor_256, mm128_and_256, | |||
mm128_shift_left_256, mm128_shift_right_256, mask_129_129_43_a, | |||
mask_129_129_43_b, mask_129_129_43_c); | |||
} | |||
#include "lowmc_129_129_4_fns_s128.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_192_192_4_fns_s128.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_255_255_4_fns_s128.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_128_128_20_fns_s128.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_192_192_30_fns_s128.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_256_256_38_fns_s128.h" | |||
#include "lowmc.c.i" | |||
#undef FN_ATTR | |||
#define FN_ATTR ATTR_TARGET_AVX2 | |||
#undef IMPL | |||
#define IMPL s256 | |||
#define picnic3_aux_sbox_bitsliced_mm256(LOWMC_N, XOR, AND, ROL, ROR, bitmask_a, bitmask_b, \ | |||
bitmask_c) \ | |||
do { \ | |||
word256 a ATTR_ALIGNED(alignof(word256)); \ | |||
word256 b ATTR_ALIGNED(alignof(word256)); \ | |||
word256 c ATTR_ALIGNED(alignof(word256)); \ | |||
/* a */ \ | |||
a = AND(bitmask_a->w256, statein->w256); \ | |||
/* b */ \ | |||
b = AND(bitmask_b->w256, statein->w256); \ | |||
/* c */ \ | |||
c = AND(bitmask_c->w256, statein->w256); \ | |||
\ | |||
a = ROL(a, 2); \ | |||
b = ROL(b, 1); \ | |||
word256 d ATTR_ALIGNED(alignof(word256)); \ | |||
word256 e ATTR_ALIGNED(alignof(word256)); \ | |||
word256 f ATTR_ALIGNED(alignof(word256)); \ | |||
/* d */ \ | |||
d = AND(bitmask_a->w256, stateout->w256); \ | |||
/* e */ \ | |||
e = AND(bitmask_b->w256, stateout->w256); \ | |||
/* f */ \ | |||
f = AND(bitmask_c->w256, stateout->w256); \ | |||
\ | |||
d = ROL(d, 2); \ | |||
e = ROL(e, 1); \ | |||
\ | |||
word256 fresh_output_ab ATTR_ALIGNED(alignof(word256)); \ | |||
word256 fresh_output_bc ATTR_ALIGNED(alignof(word256)); \ | |||
word256 fresh_output_ca ATTR_ALIGNED(alignof(word256)); \ | |||
fresh_output_ab = XOR(a, b); \ | |||
fresh_output_ca = XOR(e, fresh_output_ab); \ | |||
fresh_output_bc = XOR(d, a); \ | |||
fresh_output_ab = XOR(fresh_output_ab, c); \ | |||
fresh_output_ab = XOR(fresh_output_ab, f); \ | |||
\ | |||
word256 t0 ATTR_ALIGNED(alignof(word256)); \ | |||
word256 t1 ATTR_ALIGNED(alignof(word256)); \ | |||
word256 t2 ATTR_ALIGNED(alignof(word256)); \ | |||
mzd_local_t tmp[1], aux[1]; \ | |||
t2 = ROR(fresh_output_ca, 2); \ | |||
t1 = ROR(fresh_output_bc, 1); \ | |||
t2 = XOR(t2, t1); \ | |||
aux->w256 = XOR(t2, fresh_output_ab); \ | |||
\ | |||
/* a & b */ \ | |||
t0 = AND(a, b); \ | |||
/* b & c */ \ | |||
t1 = AND(b, c); \ | |||
/* c & a */ \ | |||
t2 = AND(c, a); \ | |||
t2 = ROR(t2, 2); \ | |||
t1 = ROR(t1, 1); \ | |||
t2 = XOR(t2, t1); \ | |||
t2 = XOR(t2, t0); \ | |||
aux->w256 = XOR(aux->w256, t2); \ | |||
\ | |||
bitstream_t parity_tape = {{tapes->parity_tapes}, tapes->pos}; \ | |||
bitstream_t last_party_tape = {{tapes->tape[15]}, tapes->pos}; \ | |||
\ | |||
/* calculate aux_bits to fix and_helper */ \ | |||
mzd_from_bitstream(&parity_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
aux->w256 = XOR(aux->w256, tmp->w256); \ | |||
mzd_from_bitstream(&last_party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
aux->w256 = XOR(aux->w256, tmp->w256); \ | |||
\ | |||
last_party_tape.position = tapes->pos; \ | |||
mzd_to_bitstream(&last_party_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
bitstream_t aux_tape = {{tapes->aux_bits}, tapes->aux_pos}; \ | |||
mzd_to_bitstream(&aux_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
\ | |||
tapes->aux_pos += LOWMC_N; \ | |||
} while (0) | |||
ATTR_TARGET_AVX2 | |||
static void sbox_aux_s256_lowmc_129_129_4(mzd_local_t* statein, mzd_local_t* stateout, | |||
randomTape_t* tapes) { | |||
picnic3_aux_sbox_bitsliced_mm256(LOWMC_129_129_4_N, mm256_xor, mm256_and, mm256_shift_left, | |||
mm256_shift_right, mask_129_129_43_a, mask_129_129_43_b, | |||
mask_129_129_43_c); | |||
} | |||
#include "lowmc_129_129_4_fns_s256.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_192_192_4_fns_s256.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_255_255_4_fns_s256.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_128_128_20_fns_s256.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_192_192_30_fns_s256.h" | |||
#include "lowmc.c.i" | |||
#include "lowmc_256_256_38_fns_s256.h" | |||
#include "lowmc.c.i" | |||
lowmc_implementation_f lowmc_get_implementation(const lowmc_parameters_t* lowmc) { | |||
assert((lowmc->m == 43 && lowmc->n == 129) || (lowmc->m == 64 && lowmc->n == 192) || | |||
(lowmc->m == 85 && lowmc->n == 255) || | |||
(lowmc->m == 10 && (lowmc->n == 128 || lowmc->n == 192 || lowmc->n == 256))); | |||
/* AVX2 enabled instances */ | |||
if (CPU_SUPPORTS_AVX2) { | |||
/* Instances with full Sbox layer */ | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_s256_lowmc_129_129_4; | |||
} | |||
/* SSE2/NEON enabled instances */ | |||
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) { | |||
/* Instances with full Sbox layer */ | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_s128_lowmc_129_129_4; | |||
} | |||
#if !defined(NO_UINT64_FALLBACK) | |||
/* uint64_t implementations */ | |||
/* Instances with full Sbox layer */ | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_uint64_lowmc_129_129_4; | |||
#endif | |||
return NULL; | |||
} | |||
lowmc_compute_aux_implementation_f | |||
lowmc_compute_aux_get_implementation(const lowmc_parameters_t* lowmc) { | |||
assert((lowmc->m == 43 && lowmc->n == 129) || (lowmc->m == 64 && lowmc->n == 192) || | |||
(lowmc->m == 85 && lowmc->n == 255)); | |||
if (CPU_SUPPORTS_AVX2) { | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_compute_aux_s256_lowmc_129_129_4; | |||
} | |||
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) { | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_compute_aux_s128_lowmc_129_129_4; | |||
} | |||
#if !defined(NO_UINT64_FALLBACK) | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_compute_aux_uint64_lowmc_129_129_4; | |||
#endif | |||
return NULL; | |||
} |
@@ -0,0 +1,38 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#if defined(LOWMC_INSTANCE) | |||
#define N_LOWMC CONCAT(lowmc, CONCAT(IMPL, LOWMC_INSTANCE)) | |||
#define SBOX_FUNC CONCAT(sbox, CONCAT(IMPL, LOWMC_INSTANCE)) | |||
#if defined(LOWMC_PARTIAL) | |||
#define SBOX(x) sbox_layer_10_uint64(&BLOCK(x, 0)->w64[(LOWMC_N / (sizeof(word) * 8)) - 1]) | |||
#include "lowmc_impl_partial.c.i" | |||
#else | |||
#define SBOX(x) SBOX_FUNC(BLOCK(x, 0)) | |||
#include "lowmc_impl.c.i" | |||
#endif | |||
#if !defined(LOWMC_PARTIAL) | |||
#undef N_LOWMC | |||
#undef RECORD_STATE | |||
#undef SBOX | |||
#undef SBOX_FUNC | |||
#define SBOX_FUNC CONCAT(sbox_aux, CONCAT(IMPL, LOWMC_INSTANCE)) | |||
#define SBOX(x, y, tapes) SBOX_FUNC(BLOCK(x, 0), BLOCK(y, 0), tapes) | |||
#define N_LOWMC CONCAT(lowmc_compute_aux, CONCAT(IMPL, LOWMC_INSTANCE)) | |||
#include "lowmc_impl_aux.c.i" | |||
#endif | |||
#undef N_LOWMC | |||
#undef RECORD_STATE | |||
#undef SBOX | |||
#undef SBOX_FUNC | |||
#endif | |||
// vim: ft=c |
@@ -0,0 +1,31 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef LOWMC_H | |||
#define LOWMC_H | |||
#include "lowmc_pars.h" | |||
typedef struct { | |||
mzd_local_t state[(MAX_LOWMC_BLOCK_SIZE + 255) / 256]; | |||
} recorded_state_t; | |||
// forward decleration to picnic3_types.h since we get some cyclic dependencies otherwise | |||
typedef struct randomTape_t randomTape_t; | |||
typedef void (*lowmc_implementation_f)(lowmc_key_t const*, mzd_local_t const*, mzd_local_t*); | |||
typedef void (*lowmc_store_implementation_f)(lowmc_key_t const*, mzd_local_t const*, | |||
recorded_state_t* state); | |||
typedef void (*lowmc_compute_aux_implementation_f)(lowmc_key_t*, randomTape_t* tapes); | |||
lowmc_implementation_f lowmc_get_implementation(const lowmc_parameters_t* lowmc); | |||
lowmc_store_implementation_f lowmc_store_get_implementation(const lowmc_parameters_t* lowmc); | |||
lowmc_compute_aux_implementation_f lowmc_compute_aux_get_implementation(const lowmc_parameters_t* lowmc); | |||
#endif |
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s128_128 | |||
#define MUL mzd_mul_v_s128_128 | |||
#define SHUFFLE mzd_shuffle_128_30 | |||
#define XOR mzd_xor_s128_128 | |||
#define COPY mzd_copy_s128_128 | |||
#define MUL_MC mzd_mul_v_s128_128_640 | |||
#define ADDMUL_R mzd_addmul_v_s128_30_128 | |||
#define MUL_Z mzd_mul_v_parity_uint64_128_30 | |||
#define XOR_MC mzd_xor_s128_640 | |||
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s256_128 | |||
#define MUL mzd_mul_v_s256_128 | |||
#define SHUFFLE mzd_shuffle_pext_128_30 | |||
#define XOR mzd_xor_s256_128 | |||
#define COPY mzd_copy_s256_128 | |||
#define MUL_MC mzd_mul_v_s256_128_768 | |||
#define ADDMUL_R mzd_addmul_v_s256_30_128 | |||
#define MUL_Z mzd_mul_v_parity_uint64_128_30 | |||
#define XOR_MC mzd_xor_s256_768 | |||
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_uint64_128 | |||
#define MUL mzd_mul_v_uint64_128 | |||
#define XOR mzd_xor_uint64_128 | |||
#define SHUFFLE mzd_shuffle_128_30 | |||
#define COPY mzd_copy_uint64_128 | |||
#define MUL_MC mzd_mul_v_uint64_128_640 | |||
#define ADDMUL_R mzd_addmul_v_uint64_30_128 | |||
#define MUL_Z mzd_mul_v_parity_uint64_128_30 | |||
#define XOR_MC mzd_xor_uint64_640 | |||
@@ -0,0 +1,22 @@ | |||
#ifndef LOWMC_129_129_4_H | |||
#define LOWMC_129_129_4_H | |||
#include "lowmc_pars.h" | |||
#define LOWMC_129_129_4_N 129 | |||
#define LOWMC_129_129_4_M 43 | |||
#define LOWMC_129_129_4_K LOWMC_129_129_4_N | |||
#define LOWMC_129_129_4_R 4 | |||
extern const lowmc_t lowmc_129_129_4; | |||
#define lowmc_parameters_129_129_4 {43, 129, 4, 129} | |||
static const mzd_local_t mask_129_129_43_a[1] = { | |||
{{UINT64_C(0x8000000000000000), UINT64_C(0x4924924924924924), UINT64_C(0x2492492492492492), | |||
UINT64_C(0x0)}}}; | |||
static const mzd_local_t mask_129_129_43_b[1] = { | |||
{{UINT64_C(0x0), UINT64_C(0x9249249249249249), UINT64_C(0x4924924924924924), UINT64_C(0x0)}}}; | |||
static const mzd_local_t mask_129_129_43_c[1] = { | |||
{{UINT64_C(0x0), UINT64_C(0x2492492492492492), UINT64_C(0x9249249249249249), UINT64_C(0x0)}}}; | |||
#endif |
@@ -0,0 +1,21 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s128_129 | |||
#define MUL mzd_mul_v_s128_129 | |||
#define XOR mzd_xor_s128_256 | |||
#define COPY mzd_copy_s128_256 | |||
#define MPC_MUL mpc_matrix_mul_s128_129 | |||
#define LOWMC_INSTANCE lowmc_129_129_4 | |||
#define LOWMC_N LOWMC_129_129_4_N | |||
#define LOWMC_R LOWMC_129_129_4_R | |||
#define LOWMC_M LOWMC_129_129_4_M |
@@ -0,0 +1,21 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s256_129 | |||
#define MUL mzd_mul_v_s256_129 | |||
#define XOR mzd_xor_s256_256 | |||
#define COPY mzd_copy_s256_256 | |||
#define MPC_MUL mpc_matrix_mul_s256_129 | |||
#define LOWMC_INSTANCE lowmc_129_129_4 | |||
#define LOWMC_N LOWMC_129_129_4_N | |||
#define LOWMC_R LOWMC_129_129_4_R | |||
#define LOWMC_M LOWMC_129_129_4_M |
@@ -0,0 +1,21 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_uint64_129 | |||
#define MUL mzd_mul_v_uint64_129 | |||
#define XOR mzd_xor_uint64_192 | |||
#define COPY mzd_copy_uint64_192 | |||
#define MPC_MUL mpc_matrix_mul_uint64_129 | |||
#define LOWMC_INSTANCE lowmc_129_129_4 | |||
#define LOWMC_N LOWMC_129_129_4_N | |||
#define LOWMC_R LOWMC_129_129_4_R | |||
#define LOWMC_M LOWMC_129_129_4_M |
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s128_192 | |||
#define MUL mzd_mul_v_s128_192 | |||
#define SHUFFLE mzd_shuffle_192_30 | |||
#define XOR mzd_xor_s128_256 | |||
#define COPY mzd_copy_s128_256 | |||
#define MUL_MC mzd_mul_v_s128_192_1024 | |||
#define ADDMUL_R mzd_addmul_v_s128_30_192 | |||
#define MUL_Z mzd_mul_v_parity_uint64_192_30 | |||
#define XOR_MC mzd_xor_s128_1024 | |||
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s256_192 | |||
#define MUL mzd_mul_v_s256_192 | |||
#define SHUFFLE mzd_shuffle_pext_192_30 | |||
#define XOR mzd_xor_s256_256 | |||
#define COPY mzd_copy_s256_256 | |||
#define MUL_MC mzd_mul_v_s256_192_1024 | |||
#define ADDMUL_R mzd_addmul_v_s256_30_192 | |||
#define MUL_Z mzd_mul_v_parity_uint64_192_30 | |||
#define XOR_MC mzd_xor_s256_1024 | |||
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_uint64_192 | |||
#define MUL mzd_mul_v_uint64_192 | |||
#define SHUFFLE mzd_shuffle_192_30 | |||
#define XOR mzd_xor_uint64_192 | |||
#define COPY mzd_copy_uint64_192 | |||
#define MUL_MC mzd_mul_v_uint64_192_960 | |||
#define ADDMUL_R mzd_addmul_v_uint64_30_192 | |||
#define MUL_Z mzd_mul_v_parity_uint64_192_30 | |||
#define XOR_MC mzd_xor_uint64_960 | |||
@@ -0,0 +1,17 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s128_192 | |||
#define MUL mzd_mul_v_s128_192 | |||
#define XOR mzd_xor_s128_256 | |||
#define COPY mzd_copy_s128_256 | |||
#define MPC_MUL mpc_matrix_mul_s128_192 | |||
@@ -0,0 +1,17 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s256_192 | |||
#define MUL mzd_mul_v_s256_192 | |||
#define XOR mzd_xor_s256_256 | |||
#define COPY mzd_copy_s256_256 | |||
#define MPC_MUL mpc_matrix_mul_s256_192 | |||
@@ -0,0 +1,17 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_uint64_192 | |||
#define MUL mzd_mul_v_uint64_192 | |||
#define XOR mzd_xor_uint64_192 | |||
#define COPY mzd_copy_uint64_192 | |||
#define MPC_MUL mpc_matrix_mul_uint64_192 | |||
@@ -0,0 +1,17 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s128_256 | |||
#define MUL mzd_mul_v_s128_256 | |||
#define XOR mzd_xor_s128_256 | |||
#define COPY mzd_copy_s128_256 | |||
#define MPC_MUL mpc_matrix_mul_s128_256 | |||
@@ -0,0 +1,17 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s256_256 | |||
#define MUL mzd_mul_v_s256_256 | |||
#define XOR mzd_xor_s256_256 | |||
#define COPY mzd_copy_s256_256 | |||
#define MPC_MUL mpc_matrix_mul_s256_256 | |||
@@ -0,0 +1,17 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_uint64_256 | |||
#define MUL mzd_mul_v_uint64_256 | |||
#define XOR mzd_xor_uint64_256 | |||
#define COPY mzd_copy_uint64_256 | |||
#define MPC_MUL mpc_matrix_mul_uint64_256 | |||
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s128_256 | |||
#define MUL mzd_mul_v_s128_256 | |||
#define SHUFFLE mzd_shuffle_256_30 | |||
#define XOR mzd_xor_s128_256 | |||
#define COPY mzd_copy_s128_256 | |||
#define MUL_MC mzd_mul_v_s128_256_1280 | |||
#define ADDMUL_R mzd_addmul_v_s128_30_256 | |||
#define MUL_Z mzd_mul_v_parity_uint64_256_30 | |||
#define XOR_MC mzd_xor_s128_1280 | |||
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_s256_256 | |||
#define MUL mzd_mul_v_s256_256 | |||
#define SHUFFLE mzd_shuffle_pext_256_30 | |||
#define XOR mzd_xor_s256_256 | |||
#define COPY mzd_copy_s256_256 | |||
#define MUL_MC mzd_mul_v_s256_256_1280 | |||
#define ADDMUL_R mzd_addmul_v_s256_30_256 | |||
#define MUL_Z mzd_mul_v_parity_uint64_256_30 | |||
#define XOR_MC mzd_xor_s256_1280 | |||
@@ -0,0 +1,22 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "lowmc_fns_undef.h" | |||
#define ADDMUL mzd_addmul_v_uint64_256 | |||
#define MUL mzd_mul_v_uint64_256 | |||
#define SHUFFLE mzd_shuffle_256_30 | |||
#define XOR mzd_xor_uint64_256 | |||
#define COPY mzd_copy_uint64_256 | |||
#define MUL_MC mzd_mul_v_uint64_256_1216 | |||
#define ADDMUL_R mzd_addmul_v_uint64_30_256 | |||
#define MUL_Z mzd_mul_v_parity_uint64_256_30 | |||
#define XOR_MC mzd_xor_uint64_1216 | |||
@@ -0,0 +1,24 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#undef ADDMUL | |||
#undef COPY | |||
#undef LOWMC_INSTANCE | |||
#undef LOWMC_N | |||
#undef LOWMC_R | |||
#undef LOWMC_M | |||
#undef LOWMC_PARTIAL | |||
#undef MUL | |||
#undef MUL_MC | |||
#undef ADDMUL_R | |||
#undef MUL_Z | |||
#undef SHUFFLE | |||
#undef XOR_MC | |||
#undef XOR | |||
#undef MPC_MUL |
@@ -0,0 +1,44 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#if defined(FN_ATTR) | |||
FN_ATTR | |||
#endif | |||
#if defined(RECORD_STATE) | |||
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, recorded_state_t* state) { | |||
#else | |||
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, mzd_local_t* c) { | |||
#endif | |||
mzd_local_t x[((LOWMC_N) + 255) / 256]; | |||
mzd_local_t y[((LOWMC_N) + 255) / 256]; | |||
COPY(x, p); | |||
ADDMUL(x, lowmc_key, LOWMC_INSTANCE.k0_matrix); | |||
lowmc_round_t const* round = LOWMC_INSTANCE.rounds; | |||
for (unsigned i = 0; i < LOWMC_R; ++i, ++round) { | |||
#if defined(RECORD_STATE) | |||
COPY(state[i].state, x); | |||
#endif | |||
SBOX(x); | |||
MUL(y, x, round->l_matrix); | |||
XOR(x, y, round->constant); | |||
ADDMUL(x, lowmc_key, round->k_matrix); | |||
} | |||
#if defined(RECORD_STATE) | |||
COPY(state[LOWMC_R].state, x); | |||
#else | |||
COPY(c, x); | |||
#endif | |||
} | |||
// vim: ft=c |
@@ -0,0 +1,39 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#if defined(FN_ATTR) | |||
FN_ATTR | |||
#endif | |||
static void N_LOWMC(lowmc_key_t* lowmc_key, randomTape_t* tapes) { | |||
mzd_local_t x[((LOWMC_N) + 255) / 256] = {{{0, 0, 0, 0}}}; | |||
mzd_local_t y[((LOWMC_N) + 255) / 256]; | |||
mzd_local_t key0[((LOWMC_N) + 255) / 256]; | |||
COPY(key0, lowmc_key); | |||
MUL(lowmc_key, key0, LOWMC_INSTANCE.ki0_matrix); | |||
lowmc_round_t const* round = &LOWMC_INSTANCE.rounds[LOWMC_R - 1]; | |||
for (unsigned r = 0; r < LOWMC_R; ++r, round--) { | |||
ADDMUL(x, lowmc_key, round->k_matrix); | |||
MUL(y, x, round->li_matrix); | |||
// recover input masks from tapes, only in first round we use the key as input | |||
if (r == LOWMC_R - 1) { | |||
COPY(x, key0); | |||
} else { | |||
bitstream_t bs = {{tapes->parity_tapes}, LOWMC_N * 2 * (LOWMC_R - 1 - r)}; | |||
mzd_from_bitstream(&bs, x, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); | |||
} | |||
tapes->pos = LOWMC_N * 2 * (LOWMC_R - 1 - r) + LOWMC_N; | |||
tapes->aux_pos = LOWMC_N * (LOWMC_R - 1 - r); | |||
SBOX(x, y, tapes); | |||
} | |||
} | |||
// vim: ft=c |
@@ -0,0 +1,67 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#if defined(FN_ATTR) | |||
FN_ATTR | |||
#endif | |||
#if defined(RECORD_STATE) | |||
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, recorded_state_t* state) { | |||
#else | |||
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, mzd_local_t* c) { | |||
#endif | |||
mzd_local_t x[((LOWMC_N) + 255) / 256]; | |||
mzd_local_t y[((LOWMC_N) + 255) / 256]; | |||
mzd_local_t nl_part[(LOWMC_R * 32 + 255) / 256]; | |||
XOR(x, p, LOWMC_INSTANCE.precomputed_constant_linear); | |||
ADDMUL(x, lowmc_key, LOWMC_INSTANCE.k0_matrix); | |||
MUL_MC(nl_part, lowmc_key, LOWMC_INSTANCE.precomputed_non_linear_part_matrix); | |||
XOR_MC(nl_part, nl_part, LOWMC_INSTANCE.precomputed_constant_non_linear); | |||
// multiply non-linear part of state with Z0 matrix | |||
lowmc_partial_round_t const* round = LOWMC_INSTANCE.rounds; | |||
for (unsigned i = 0; i < LOWMC_R - 1; ++i, ++round) { | |||
#if defined(RECORD_STATE) | |||
COPY(state[i].state, x); | |||
#endif | |||
SBOX(x); | |||
const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1]; | |||
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] ^= | |||
(nl << (1 - (i & 1)) * 32) & WORD_C(0xFFFFFFFF00000000); | |||
MUL_Z(y, x, round->z_matrix); | |||
SHUFFLE(x, round->r_mask); | |||
ADDMUL_R(y, x, round->r_matrix); | |||
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] &= | |||
WORD_C(0x00000003FFFFFFFF); // clear nl part | |||
XOR(x, y, x); | |||
} | |||
#if defined(RECORD_STATE) | |||
COPY(state[LOWMC_R - 1].state, x); | |||
#endif | |||
SBOX(x); | |||
unsigned int i = (LOWMC_R - 1); | |||
const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1]; | |||
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] ^= | |||
(nl << (1 - (i & 1)) * 32) & WORD_C(0xFFFFFFFF00000000); | |||
MUL(y, x, LOWMC_INSTANCE.zr_matrix); | |||
COPY(x, y); | |||
#if defined(RECORD_STATE) | |||
COPY(state[LOWMC_R].state, x); | |||
#else | |||
COPY(c, x); | |||
#endif | |||
} | |||
// vim: ft=c |
@@ -0,0 +1,84 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef LOWMC_PARS_H | |||
#define LOWMC_PARS_H | |||
#include <stddef.h> | |||
#include "mzd_additional.h" | |||
typedef mzd_local_t lowmc_key_t; | |||
#define MAX_LOWMC_BLOCK_SIZE 32 | |||
#define MAX_LOWMC_BLOCK_SIZE_BITS (MAX_LOWMC_BLOCK_SIZE * 8) | |||
#define MAX_LOWMC_KEY_SIZE MAX_LOWMC_BLOCK_SIZE | |||
#define MAX_LOWMC_KEY_SIZE_BITS (MAX_LOWMC_KEY_SIZE * 8) | |||
#define MAX_LOWMC_ROUNDS 38 | |||
/** | |||
* Masks for 10 S-boxes. | |||
*/ | |||
#define MASK_X0I UINT64_C(0x2492492400000000) | |||
#define MASK_X1I UINT64_C(0x4924924800000000) | |||
#define MASK_X2I UINT64_C(0x9249249000000000) | |||
#define MASK_MASK UINT64_C(0x00000003ffffffff) | |||
/** | |||
* LowMC parameters | |||
*/ | |||
typedef struct { | |||
unsigned int m; | |||
unsigned int n; | |||
unsigned int r; | |||
unsigned int k; | |||
} lowmc_parameters_t; | |||
/** | |||
* LowMC round with full Sblox layer | |||
*/ | |||
typedef struct { | |||
const mzd_local_t* k_matrix; | |||
const mzd_local_t* l_matrix; | |||
const mzd_local_t* li_matrix; | |||
const mzd_local_t* constant; | |||
} lowmc_round_t; | |||
/** | |||
* LowMC definition with full Sbox layer | |||
*/ | |||
typedef struct { | |||
const mzd_local_t* k0_matrix; // K_0 | |||
const mzd_local_t* ki0_matrix; // inverse of K_0 | |||
const lowmc_round_t* rounds; | |||
} lowmc_t; | |||
/** | |||
* LowMC round with partial Sblox layer | |||
*/ | |||
typedef struct { | |||
const mzd_local_t* z_matrix; | |||
const mzd_local_t* r_matrix; | |||
const word r_mask; | |||
} lowmc_partial_round_t; | |||
/** | |||
* LowMC definition with partial Sbox layer | |||
*/ | |||
typedef struct { | |||
const mzd_local_t* k0_matrix; // K_0 + precomputed | |||
const mzd_local_t* zr_matrix; // combined linear layers | |||
const lowmc_partial_round_t* rounds; | |||
const mzd_local_t* precomputed_non_linear_part_matrix; | |||
const mzd_local_t* precomputed_constant_linear; | |||
const mzd_local_t* precomputed_constant_non_linear; | |||
} lowmc_partial_t; | |||
#endif |
@@ -0,0 +1,312 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC_MACROS_H | |||
#define PICNIC_MACROS_H | |||
/* __FUNCTION__ generates a warning on Linux with -Wpedantic and newer versions | |||
* of GCC (tested with 5.4). So we use __func__ in all source and define it on | |||
* Windows. | |||
*/ | |||
#if defined(__WINDOWS__) | |||
#define __func__ __FUNCTION__ | |||
#endif | |||
/* compatibility with clang and other compilers */ | |||
#if !defined(__has_attribute) | |||
#define __has_attribute(a) 0 | |||
#endif | |||
#if !defined(__has_builtin) | |||
#define __has_builtin(b) 0 | |||
#endif | |||
/* gcc version check macro */ | |||
#if defined(__GNUC__) && defined(__GNUC_MINOR__) | |||
#define GNUC_CHECK(maj, min) \ | |||
(((__GNUC__ << 20) + (__GNUC_MINOR__ << 10)) >= (((maj) << 20) + ((min) << 10))) | |||
#else | |||
#define GNUC_CHECK(maj, min) 0 | |||
#endif | |||
/* glibc version check macro */ | |||
#if defined(__GLIBC__) | |||
#define GLIBC_CHECK(maj, min) __GLIBC_PREREQ(maj, min) | |||
#else | |||
#define GLIBC_CHECK(maj, min) 0 | |||
#endif | |||
/* FreeBSD version check macro */ | |||
#if defined(__FreeBSD__) | |||
#define FREEBSD_CHECK(maj, min) (__FreeBSD__ >= (maj)) | |||
#else | |||
#define FREEBSD_CHECK(maj, min) 0 | |||
#endif | |||
/* NetBSD version check macro */ | |||
#if defined(__NetBSD__) | |||
#include <sys/param.h> | |||
#define NETBSD_CHECK(maj, min) (__NetBSD_Version__ >= ((maj)*1000000000 + (min)*10000000)) | |||
#else | |||
#define NETBSD_CHECK(maj, min) 0 | |||
#endif | |||
/* Apple version check macro */ | |||
#if defined(__APPLE__) | |||
#include <Availability.h> | |||
#define MACOSX_CHECK(maj, min, rev) \ | |||
(__MAC_OS_X_VERSION_MIN_REQUIRED >= ((maj)*10000 + (min)*100 + (rev))) | |||
#else | |||
#define MACOSX_CHECK(maj, min, rev) 0 | |||
#endif | |||
#ifndef MIN | |||
#define MIN(a, b) ((a) < (b) ? (a) : (b)) | |||
#endif | |||
#ifndef MAX | |||
#define MAX(a, b) ((a) > (b) ? (a) : (b)) | |||
#endif | |||
/* assume */ | |||
#if GNUC_CHECK(4, 5) || __has_builtin(__builtin_unreachable) | |||
#define ASSUME(p) \ | |||
if (!(p)) \ | |||
__builtin_unreachable() | |||
#elif defined(_MSC_VER) | |||
#define ASSUME(p) __assume(p) | |||
#else | |||
#define ASSUME(p) (void)(p) | |||
#endif | |||
/* nonnull attribute */ | |||
#if GNUC_CHECK(3, 3) || __has_attribute(nonnull) | |||
#define ATTR_NONNULL __attribute__((nonnull)) | |||
#define ATTR_NONNULL_ARG(i) __attribute__((nonnull(i))) | |||
#else | |||
#define ATTR_NONNULL | |||
#define ATTR_NONNULL_ARG(i) | |||
#endif | |||
/* destructor attribute */ | |||
#if GNUC_CHECK(2, 7) || __has_attribute(destructor) | |||
#define ATTR_DTOR __attribute__((destructor)) | |||
#else | |||
#define ATTR_DTOR | |||
#endif | |||
/* assumed aligned attribute */ | |||
#if GNUC_CHECK(4, 9) || __has_attribute(assume_aligned) | |||
#define ATTR_ASSUME_ALIGNED(i) __attribute__((assume_aligned(i))) | |||
#else | |||
#define ATTR_ASSUME_ALIGNED(i) | |||
#endif | |||
/* aligned attribute */ | |||
/* note that C11's alignas will only do the job once DR 444 is implemented */ | |||
#if GNUC_CHECK(4, 9) || __has_attribute(aligned) | |||
#define ATTR_ALIGNED(i) __attribute__((aligned((i)))) | |||
#define HAVE_USEFUL_ATTR_ALIGNED | |||
/* #elif defined(_MSC_VER) | |||
#define ATTR_ALIGNED(i) __declspec(align((i))) | |||
#define HAVE_USEFUL_ATTR_ALIGNED */ | |||
#else | |||
#define ATTR_ALIGNED(i) | |||
#endif | |||
/* round size to meet alignment requirements */ | |||
#define ALIGNT(s, t) (((s) + sizeof(t) - 1) & ~(sizeof(t) - 1)) | |||
#define ALIGNU64T(s) ALIGNT(s, uint64_t) | |||
/* unreachable builtin */ | |||
#if GNUC_CHECK(4, 5) || __has_builtin(__builtin_unreachable) | |||
#define UNREACHABLE __builtin_unreachable() | |||
/* #elif defined(_MSC_VER) | |||
#define UNREACHABLE __assume(0) */ | |||
#endif | |||
/* assume aligned builtin */ | |||
#if GNUC_CHECK(4, 9) || __has_builtin(__builtin_assume_aligned) | |||
#define ASSUME_ALIGNED(p, a) __builtin_assume_aligned((p), (a)) | |||
#elif defined(UNREACHABLE) && defined(HAVE_USEFUL_ATTR_ALIGNED) | |||
#define ASSUME_ALIGNED(p, a) (((((uintptr_t)(p)) % (a)) == 0) ? (p) : (UNREACHABLE, (p))) | |||
#else | |||
#define ASSUME_ALIGNED(p, a) (p) | |||
#endif | |||
/* always inline attribute */ | |||
#if GNUC_CHECK(4, 0) || __has_attribute(always_inline) | |||
#define ATTR_ALWAYS_INLINE __attribute__((always_inline)) | |||
#elif defined(_MSC_VER) | |||
#define ATTR_ALWAYS_INLINE __forceinline | |||
#else | |||
#define ATTR_ALWAYS_INLINE | |||
#endif | |||
/* pure attribute */ | |||
#if defined(__GNUC__) || __has_attribute(pure) | |||
#define ATTR_PURE __attribute__((pure)) | |||
#else | |||
#define ATTR_PURE | |||
#endif | |||
/* const attribute */ | |||
#if defined(__GNUC__) || __has_attribute(const) | |||
#define ATTR_CONST __attribute__((const)) | |||
#else | |||
#define ATTR_CONST | |||
#endif | |||
/* target attribute */ | |||
#if defined(__GNUC__) || __has_attribute(target) | |||
#define ATTR_TARGET(x) __attribute__((target((x)))) | |||
#else | |||
#define ATTR_TARGET(x) | |||
#endif | |||
/* artificial attribute */ | |||
#if GNUC_CHECK(4, 7) || __has_attribute(__artificial__) | |||
#define ATTR_ARTIFICIAL __attribute__((__artificial__)) | |||
#else | |||
#define ATTR_ARTIFICIAL | |||
#endif | |||
#define ATTR_TARGET_AVX2 ATTR_TARGET("avx2,bmi2") | |||
#define ATTR_TARGET_SSE2 ATTR_TARGET("sse2") | |||
#define FN_ATTRIBUTES_AVX2 ATTR_ARTIFICIAL ATTR_ALWAYS_INLINE ATTR_TARGET_AVX2 | |||
#define FN_ATTRIBUTES_SSE2 ATTR_ARTIFICIAL ATTR_ALWAYS_INLINE ATTR_TARGET_SSE2 | |||
#define FN_ATTRIBUTES_NEON ATTR_ARTIFICIAL ATTR_ALWAYS_INLINE | |||
#define FN_ATTRIBUTES_AVX2_PURE FN_ATTRIBUTES_AVX2 ATTR_PURE | |||
#define FN_ATTRIBUTES_SSE2_PURE FN_ATTRIBUTES_SSE2 ATTR_PURE | |||
#define FN_ATTRIBUTES_NEON_PURE FN_ATTRIBUTES_NEON ATTR_PURE | |||
#define FN_ATTRIBUTES_AVX2_CONST FN_ATTRIBUTES_AVX2 ATTR_CONST | |||
#define FN_ATTRIBUTES_SSE2_CONST FN_ATTRIBUTES_SSE2 ATTR_CONST | |||
#define FN_ATTRIBUTES_NEON_CONST FN_ATTRIBUTES_NEON ATTR_CONST | |||
/* concatenation */ | |||
#define CONCAT2(a, b) a##_##b | |||
#define CONCAT(a, b) CONCAT2(a, b) | |||
/* helper macros/functions for checked integer subtraction */ | |||
#if GNUC_CHECK(5, 0) || __has_builtin(__builtin_add_overflow) | |||
#define sub_overflow_size_t(x, y, diff) __builtin_sub_overflow(x, y, diff) | |||
#else | |||
#include <stdbool.h> | |||
#include <stddef.h> | |||
ATTR_ARTIFICIAL | |||
static inline bool sub_overflow_size_t(const size_t x, const size_t y, size_t* diff) { | |||
*diff = x - y; | |||
return x < y; | |||
} | |||
#endif | |||
#include <stdint.h> | |||
/* helper functions for parity computations */ | |||
#if GNUC_CHECK(4, 9) || __has_builtin(__builtin_parity) | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint8_t parity64_uint8(uint8_t in) { | |||
return __builtin_parity(in); | |||
} | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint16_t parity64_uint16(uint16_t in) { | |||
return __builtin_parity(in); | |||
} | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint64_t parity64_uint64(uint64_t in) { | |||
return __builtin_parityll(in); | |||
} | |||
#else | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint8_t parity64_uint8(uint8_t in) { | |||
/* byte parity from: https://graphics.stanford.edu/~seander/bithacks.html#ParityWith64Bits */ | |||
return (((in * UINT64_C(0x0101010101010101)) & UINT64_C(0x8040201008040201)) % 0x1FF) & 1; | |||
} | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint16_t parity64_uint16(uint16_t in) { | |||
in ^= in >> 1; | |||
in ^= in >> 2; | |||
in = (in & 0x1111) * 0x1111; | |||
return (in >> 12) & 1; | |||
} | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint64_t parity64_uint64(uint64_t in) { | |||
in ^= in >> 1; | |||
in ^= in >> 2; | |||
in = (in & 0x1111111111111111) * 0x1111111111111111; | |||
return (in >> 60) & 1; | |||
} | |||
#endif | |||
/* helper functions to compute number of leading zeroes */ | |||
#if GNUC_CHECK(4, 7) || __has_builtin(__builtin_clz) | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint32_t clz(uint32_t x) { | |||
return x ? __builtin_clz(x) : 32; | |||
} | |||
#else | |||
/* Number of leading zeroes of x. | |||
* From the book | |||
* H.S. Warren, *Hacker's Delight*, Pearson Education, 2003. | |||
* http://www.hackersdelight.org/hdcodetxt/nlz.c.txt | |||
*/ | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint32_t clz(uint32_t x) { | |||
if (!x) { | |||
return 32; | |||
} | |||
uint32_t n = 1; | |||
if (!(x >> 16)) { | |||
n = n + 16; | |||
x = x << 16; | |||
} | |||
if (!(x >> 24)) { | |||
n = n + 8; | |||
x = x << 8; | |||
} | |||
if (!(x >> 28)) { | |||
n = n + 4; | |||
x = x << 4; | |||
} | |||
if (!(x >> 30)) { | |||
n = n + 2; | |||
x = x << 2; | |||
} | |||
n = n - (x >> 31); | |||
return n; | |||
} | |||
#endif | |||
ATTR_CONST ATTR_ARTIFICIAL static inline uint32_t ceil_log2(uint32_t x) { | |||
if (!x) { | |||
return 0; | |||
} | |||
return 32 - clz(x - 1); | |||
} | |||
#if defined(__WIN32__) | |||
#define SIZET_FMT "%Iu" | |||
#else | |||
#define SIZET_FMT "%zu" | |||
#endif | |||
/* crypto_declassify wrapper */ | |||
#if defined(TIMECOP) | |||
#include "crypto_declassify.h" | |||
#define picnic_declassify(x, len) crypto_declassify(x, len) | |||
#elif defined(WITH_VALGRIND) | |||
#include <valgrind/memcheck.h> | |||
#define picnic_declassify(x, len) VALGRIND_MAKE_MEM_DEFINED(x, len) | |||
#else | |||
#define picnic_declassify(x, len) | |||
#endif | |||
#endif |
@@ -0,0 +1,912 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#if !defined(_MSC_VER) | |||
#include <stdalign.h> | |||
#endif | |||
#include <assert.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "compat.h" | |||
#include "mzd_additional.h" | |||
#if !defined(_MSC_VER) && !defined(static_assert) | |||
#define static_assert _Static_assert | |||
#endif | |||
static_assert(((sizeof(mzd_local_t) + 0x1f) & ~0x1f) == 32, "sizeof mzd_local_t not supported"); | |||
#include "simd.h" | |||
static const unsigned int align_bound = 128 / (8 * sizeof(word)); | |||
static size_t calculate_rowstride(size_t width) { | |||
// As soon as we hit the AVX bound, use 32 byte alignment. Otherwise use 16 | |||
// byte alignment for SSE2 and 128 bit vectors. | |||
if (width > align_bound) { | |||
return ((width * sizeof(word) + 31) & ~31) / sizeof(word); | |||
} else { | |||
return ((width * sizeof(word) + 15) & ~15) / sizeof(word); | |||
} | |||
} | |||
static size_t calculate_width(size_t c) { | |||
return (c + sizeof(word) * 8 - 1) / (sizeof(word) * 8); | |||
} | |||
// Notes on the memory layout: mzd_init allocates multiple memory blocks (one | |||
// for mzd_local_t, one for rows and multiple for the buffers). We use one memory | |||
// block for mzd_local_t, rows and the buffer. This improves memory locality and | |||
// requires less calls to malloc. | |||
// | |||
// In mzd_local_init_multiple we do the same, but store n mzd_local_t instances in one | |||
// memory block. | |||
mzd_local_t* mzd_local_init_ex(unsigned int r, unsigned int c, bool clear) { | |||
const size_t rowstride = calculate_rowstride(calculate_width(c)); | |||
const size_t buffer_size = r * rowstride * sizeof(word); | |||
const size_t alloc_size = (buffer_size + 31) & ~31; | |||
/* We always align mzd_local_ts to 32 bytes. Thus the first row is always | |||
* aligned to 32 bytes as well. For 128 bit and SSE all other rows are then | |||
* aligned to 16 bytes. */ | |||
unsigned char* buffer = aligned_alloc(32, alloc_size); | |||
if (clear) { | |||
memset(buffer, 0, alloc_size); | |||
} | |||
return (mzd_local_t*)buffer; | |||
} | |||
void mzd_local_free(mzd_local_t* v) { | |||
aligned_free(v); | |||
} | |||
void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, unsigned int r, unsigned int c, bool clear) { | |||
const size_t rowstride = calculate_rowstride(calculate_width(c)); | |||
const size_t buffer_size = r * rowstride * sizeof(word); | |||
const size_t size_per_elem = (buffer_size + 31) & ~31; | |||
unsigned char* full_buffer = aligned_alloc(32, size_per_elem * n); | |||
if (clear) { | |||
memset(full_buffer, 0, size_per_elem * n); | |||
} | |||
for (size_t s = 0; s < n; ++s, full_buffer += size_per_elem) { | |||
dst[s] = (mzd_local_t*)full_buffer; | |||
} | |||
} | |||
void mzd_local_free_multiple(mzd_local_t** vs) { | |||
if (vs) { | |||
aligned_free(vs[0]); | |||
} | |||
} | |||
/* implementation of copy */ | |||
void mzd_copy_uint64_128(mzd_local_t* dst, mzd_local_t const* src) { | |||
const block_t* sblock = CONST_BLOCK(src, 0); | |||
block_t* dblock = BLOCK(dst, 0); | |||
for (unsigned int i = 0; i < 2; ++i) { | |||
dblock->w64[i] = sblock->w64[i]; | |||
} | |||
} | |||
void mzd_copy_uint64_192(mzd_local_t* dst, mzd_local_t const* src) { | |||
const block_t* sblock = CONST_BLOCK(src, 0); | |||
block_t* dblock = BLOCK(dst, 0); | |||
for (unsigned int i = 0; i < 3; ++i) { | |||
dblock->w64[i] = sblock->w64[i]; | |||
} | |||
} | |||
void mzd_copy_uint64_256(mzd_local_t* dst, mzd_local_t const* src) { | |||
const block_t* sblock = CONST_BLOCK(src, 0); | |||
block_t* dblock = BLOCK(dst, 0); | |||
for (unsigned int i = 0; i < 4; ++i) { | |||
dblock->w64[i] = sblock->w64[i]; | |||
} | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_copy_s128_128(mzd_local_t* dst, mzd_local_t const* src) { | |||
BLOCK(dst, 0)->w128[0] = CONST_BLOCK(src, 0)->w128[0]; | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_copy_s128_256(mzd_local_t* dst, mzd_local_t const* src) { | |||
for (unsigned int i = 0; i < 2; ++i) { | |||
dst->w128[i] = src->w128[i]; | |||
} | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_copy_s256_128(mzd_local_t* dst, mzd_local_t const* src) { | |||
BLOCK(dst, 0)->w128[0] = CONST_BLOCK(src, 0)->w128[0]; | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_copy_s256_256(mzd_local_t* dst, mzd_local_t const* src) { | |||
BLOCK(dst, 0)->w256 = CONST_BLOCK(src, 0)->w256; | |||
} | |||
/* implementation of mzd_xor and variants */ | |||
ATTR_TARGET_S128 | |||
void mzd_xor_s128_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
block_t* rblock = BLOCK(res, 0); | |||
const block_t* fblock = CONST_BLOCK(first, 0); | |||
const block_t* sblock = CONST_BLOCK(second, 0); | |||
rblock->w128[0] = mm128_xor(fblock->w128[0], sblock->w128[0]); | |||
} | |||
ATTR_TARGET_S128 | |||
static void mzd_xor_s128_blocks(block_t* rblock, const block_t* fblock, const block_t* sblock, | |||
unsigned int count) { | |||
for (; count; --count, ++rblock, ++fblock, ++sblock) { | |||
rblock->w128[0] = mm128_xor(fblock->w128[0], sblock->w128[0]); | |||
rblock->w128[1] = mm128_xor(fblock->w128[1], sblock->w128[1]); | |||
} | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_xor_s128_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_s128_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_xor_s256_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
block_t* rblock = BLOCK(res, 0); | |||
const block_t* fblock = CONST_BLOCK(first, 0); | |||
const block_t* sblock = CONST_BLOCK(second, 0); | |||
rblock->w128[0] = mm128_xor(fblock->w128[0], sblock->w128[0]); | |||
} | |||
ATTR_TARGET_AVX2 | |||
static void mzd_xor_s256_blocks(block_t* rblock, const block_t* fblock, const block_t* sblock, | |||
unsigned int count) { | |||
for (; count; --count, ++rblock, ++fblock, ++sblock) { | |||
rblock->w256 = mm256_xor(fblock->w256, sblock->w256); | |||
} | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_xor_s256_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_xor_s256_768(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 3); | |||
} | |||
void mzd_xor_s256_1024(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 4); | |||
} | |||
void mzd_xor_s256_1280(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 5); | |||
} | |||
static void mzd_xor_uint64_block(block_t* rblock, const block_t* fblock, const block_t* sblock, | |||
const unsigned int len) { | |||
for (unsigned int i = 0; i < len; ++i) { | |||
rblock->w64[i] = fblock->w64[i] ^ sblock->w64[i]; | |||
} | |||
} | |||
void mzd_xor_uint64_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 2); | |||
} | |||
void mzd_xor_uint64_192(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 3); | |||
} | |||
void mzd_xor_uint64_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_xor_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 4); | |||
} | |||
/* implementation of mzd_and_* and variants */ | |||
ATTR_TARGET_S128 | |||
void mzd_and_s128_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
block_t* rblock = BLOCK(res, 0); | |||
const block_t* fblock = CONST_BLOCK(first, 0); | |||
const block_t* sblock = CONST_BLOCK(second, 0); | |||
rblock->w128[0] = mm128_and(fblock->w128[0], sblock->w128[0]); | |||
} | |||
ATTR_TARGET_S128 | |||
static inline void mzd_and_s128_blocks(block_t* rblock, const block_t* fblock, | |||
const block_t* sblock, unsigned int count) { | |||
for (; count; --count, ++rblock, ++fblock, ++sblock) { | |||
rblock->w128[0] = mm128_and(fblock->w128[0], sblock->w128[0]); | |||
rblock->w128[1] = mm128_and(fblock->w128[1], sblock->w128[1]); | |||
} | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_and_s128_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_and_s128_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_and_s256_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
block_t* rblock = BLOCK(res, 0); | |||
const block_t* fblock = CONST_BLOCK(first, 0); | |||
const block_t* sblock = CONST_BLOCK(second, 0); | |||
rblock->w128[0] = mm128_and(fblock->w128[0], sblock->w128[0]); | |||
} | |||
ATTR_TARGET_AVX2 | |||
static inline void mzd_and_s256_blocks(block_t* rblock, const block_t* fblock, | |||
const block_t* sblock, unsigned int count) { | |||
for (; count; --count, ++rblock, ++fblock, ++sblock) { | |||
rblock->w256 = mm256_and(fblock->w256, sblock->w256); | |||
} | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_and_s256_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_and_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1); | |||
} | |||
static inline void mzd_and_uint64_block(block_t* rblock, const block_t* fblock, | |||
const block_t* sblock, const unsigned int len) { | |||
for (unsigned int i = 0; i < len; ++i) { | |||
rblock->w64[i] = fblock->w64[i] & sblock->w64[i]; | |||
} | |||
} | |||
void mzd_and_uint64_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_and_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 2); | |||
} | |||
void mzd_and_uint64_192(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_and_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 3); | |||
} | |||
void mzd_and_uint64_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { | |||
mzd_and_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 4); | |||
} | |||
/* shifts and rotations */ | |||
void mzd_shift_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int right_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count); | |||
rblock->w64[0] = block->w64[0] << count; | |||
} | |||
void mzd_shift_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int left_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count); | |||
rblock->w64[1] = block->w64[1] >> count; | |||
} | |||
void mzd_shift_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int right_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count); | |||
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count); | |||
rblock->w64[0] = block->w64[0] << count; | |||
} | |||
void mzd_shift_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int left_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count); | |||
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count); | |||
rblock->w64[2] = block->w64[2] >> count; | |||
} | |||
void mzd_shift_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int right_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
rblock->w64[3] = (block->w64[3] << count) | (block->w64[2] >> right_count); | |||
rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count); | |||
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count); | |||
rblock->w64[0] = block->w64[0] << count; | |||
} | |||
void mzd_shift_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int left_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count); | |||
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count); | |||
rblock->w64[2] = (block->w64[2] >> count) | (block->w64[3] << left_count); | |||
rblock->w64[3] = block->w64[3] >> count; | |||
} | |||
#if defined(PICNIC_STATIC) | |||
void mzd_rotate_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int right_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
const word tmp = block->w64[1] >> right_count; | |||
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count); | |||
rblock->w64[0] = (block->w64[0] << count) | tmp; | |||
} | |||
void mzd_rotate_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int left_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
const word tmp = block->w64[0] << left_count; | |||
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count); | |||
rblock->w64[1] = (block->w64[1] >> count) | tmp; | |||
} | |||
void mzd_rotate_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int right_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
const word tmp = block->w64[2] >> right_count; | |||
rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count); | |||
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count); | |||
rblock->w64[0] = (block->w64[0] << count) | tmp; | |||
} | |||
void mzd_rotate_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int left_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
const word tmp = block->w64[0] << left_count; | |||
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count); | |||
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count); | |||
rblock->w64[2] = (block->w64[2] >> count) | tmp; | |||
} | |||
void mzd_rotate_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int right_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
const word tmp = block->w64[3] >> right_count; | |||
rblock->w64[3] = (block->w64[3] << count) | (block->w64[2] >> right_count); | |||
rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count); | |||
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count); | |||
rblock->w64[0] = (block->w64[0] << count) | tmp; | |||
} | |||
void mzd_rotate_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) { | |||
const unsigned int left_count = 8 * sizeof(word) - count; | |||
const block_t* block = CONST_BLOCK(val, 0); | |||
block_t* rblock = BLOCK(res, 0); | |||
const word tmp = block->w64[0] << left_count; | |||
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count); | |||
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count); | |||
rblock->w64[2] = (block->w64[2] >> count) | (block->w64[3] << left_count); | |||
rblock->w64[3] = (block->w64[3] >> count) | tmp; | |||
} | |||
#endif | |||
ATTR_TARGET_S128 ATTR_ARTIFICIAL ATTR_CONST static inline word128 | |||
mm128_compute_mask(const word idx, const size_t bit) { | |||
return mm128_broadcast_u64(-((idx >> bit) & 1)); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_mul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero}; | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 2) { | |||
cval[0] = mm128_xor_mask(cval[0], Ablock[0].w128[0], mm128_compute_mask(idx, 0)); | |||
cval[1] = mm128_xor_mask(cval[1], Ablock[0].w128[1], mm128_compute_mask(idx, 1)); | |||
cval[0] = mm128_xor_mask(cval[0], Ablock[1].w128[0], mm128_compute_mask(idx, 2)); | |||
cval[1] = mm128_xor_mask(cval[1], Ablock[1].w128[1], mm128_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[1]); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_addmul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], mm128_zero, mm128_zero, | |||
mm128_zero}; | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 2) { | |||
cval[0] = mm128_xor_mask(cval[0], Ablock[0].w128[0], mm128_compute_mask(idx, 0)); | |||
cval[1] = mm128_xor_mask(cval[1], Ablock[0].w128[1], mm128_compute_mask(idx, 1)); | |||
cval[0] = mm128_xor_mask(cval[0], Ablock[1].w128[0], mm128_compute_mask(idx, 2)); | |||
cval[1] = mm128_xor_mask(cval[1], Ablock[1].w128[1], mm128_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[1]); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_mul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero}; | |||
{ | |||
Ablock += 63; | |||
word idx = (*vptr) >> 63; | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
vptr++; | |||
Ablock++; | |||
} | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) { | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[2]); | |||
cblock->w128[1] = mm128_xor(cval[1], cval[3]); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_addmul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], cblock->w128[1], mm128_zero, | |||
mm128_zero}; | |||
{ | |||
Ablock += 63; | |||
word idx = (*vptr) >> 63; | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
vptr++; | |||
Ablock++; | |||
} | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) { | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[2]); | |||
cblock->w128[1] = mm128_xor(cval[1], cval[3]); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_mul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero}; | |||
for (unsigned int w = 3; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) { | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[2]); | |||
cblock->w128[1] = mm128_xor(cval[1], cval[3]); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_addmul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], cblock->w128[1], mm128_zero, | |||
mm128_zero}; | |||
for (unsigned int w = 3; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) { | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[2]); | |||
cblock->w128[1] = mm128_xor(cval[1], cval[3]); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_mul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero}; | |||
for (unsigned int w = 4; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) { | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[2]); | |||
cblock->w128[1] = mm128_xor(cval[1], cval[3]); | |||
} | |||
ATTR_TARGET_S128 | |||
void mzd_addmul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], cblock->w128[1], mm128_zero, | |||
mm128_zero}; | |||
for (unsigned int w = 4; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) { | |||
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2); | |||
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2); | |||
} | |||
} | |||
cblock->w128[0] = mm128_xor(cval[0], cval[2]); | |||
cblock->w128[1] = mm128_xor(cval[1], cval[3]); | |||
} | |||
ATTR_TARGET_AVX2 ATTR_ARTIFICIAL ATTR_CONST static inline word256 | |||
mm256_compute_mask(const word idx, const size_t bit) { | |||
return _mm256_set1_epi64x(-((idx >> bit) & 1)); | |||
} | |||
ATTR_TARGET_AVX2 ATTR_ARTIFICIAL ATTR_CONST static inline word256 | |||
mm256_compute_mask_2(const word idx, const size_t bit) { | |||
const uint64_t m1 = -((idx >> bit) & 1); | |||
const uint64_t m2 = -((idx >> (bit + 1)) & 1); | |||
return _mm256_set_epi64x(m2, m2, m1, m1); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_addmul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {_mm256_setr_m128i(cblock->w128[0], mm128_zero), | |||
mm256_zero}; | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 8, idx >>= 8, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask_2(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask_2(idx, 2)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask_2(idx, 4)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask_2(idx, 6)); | |||
} | |||
} | |||
cval[0] = mm256_xor(cval[0], cval[1]); | |||
cblock->w128[0] = _mm256_extracti128_si256( | |||
mm256_xor(cval[0], _mm256_permute4x64_epi64(cval[0], _MM_SHUFFLE(3, 2, 3, 2))), 0); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_mul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero}; | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 8, idx >>= 8, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask_2(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask_2(idx, 2)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask_2(idx, 4)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask_2(idx, 6)); | |||
} | |||
} | |||
cval[0] = mm256_xor(cval[0], cval[1]); | |||
cblock->w128[0] = _mm256_extracti128_si256( | |||
mm256_xor(cval[0], _mm256_permute4x64_epi64(cval[0], _MM_SHUFFLE(3, 2, 3, 2))), 0); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_addmul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {cblock->w256, mm256_zero}; | |||
{ | |||
Ablock += 63; | |||
word idx = (*vptr) >> 63; | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
vptr++; | |||
Ablock++; | |||
} | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w256 = mm256_xor(cval[0], cval[1]); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_mul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero}; | |||
{ | |||
Ablock += 63; | |||
word idx = (*vptr) >> 63; | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
vptr++; | |||
Ablock++; | |||
} | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w256 = mm256_xor(cval[0], cval[1]); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_addmul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {cblock->w256, mm256_zero}; | |||
for (unsigned int w = 3; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w256 = mm256_xor(cval[0], cval[1]); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_mul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero}; | |||
for (unsigned int w = 3; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w256 = mm256_xor(cval[0], cval[1]); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_addmul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {cblock->w256, mm256_zero}; | |||
for (unsigned int w = 4; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w256 = mm256_xor(cval[0], cval[1]); | |||
} | |||
ATTR_TARGET_AVX2 | |||
void mzd_mul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero}; | |||
for (unsigned int w = 4; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) { | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1)); | |||
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2)); | |||
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3)); | |||
} | |||
} | |||
cblock->w256 = mm256_xor(cval[0], cval[1]); | |||
} | |||
static void clear_uint64_block(block_t* block, const unsigned int idx) { | |||
for (unsigned int i = 0; i < idx; ++i) { | |||
block->w64[i] = 0; | |||
} | |||
} | |||
static void mzd_xor_mask_uint64_block(block_t* rblock, const block_t* fblock, const word mask, | |||
const unsigned int idx) { | |||
for (unsigned int i = 0; i < idx; ++i) { | |||
rblock->w64[i] ^= fblock->w64[i] & mask; | |||
} | |||
} | |||
void mzd_addmul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 1) { | |||
const uint64_t mask1 = -(idx & 1); | |||
const uint64_t mask2 = -((idx >> 1) & 1); | |||
cblock->w64[0] ^= (Ablock->w64[0] & mask1) ^ (Ablock->w64[2] & mask2); | |||
cblock->w64[1] ^= (Ablock->w64[1] & mask1) ^ (Ablock->w64[3] & mask2); | |||
} | |||
} | |||
} | |||
void mzd_mul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
clear_uint64_block(BLOCK(c, 0), 2); | |||
mzd_addmul_v_uint64_128(c, v, A); | |||
} | |||
void mzd_addmul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
Ablock += 63; | |||
{ | |||
word idx = (*vptr) >> 63; | |||
const uint64_t mask = -(idx & 1); | |||
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 3); | |||
Ablock++; | |||
vptr++; | |||
} | |||
for (unsigned int w = 2; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; --i, idx >>= 1, ++Ablock) { | |||
const uint64_t mask = -(idx & 1); | |||
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 3); | |||
} | |||
} | |||
} | |||
void mzd_mul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
clear_uint64_block(BLOCK(c, 0), 3); | |||
mzd_addmul_v_uint64_129(c, v, A); | |||
} | |||
void mzd_addmul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
for (unsigned int w = 3; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; --i, idx >>= 1, ++Ablock) { | |||
const uint64_t mask = -(idx & 1); | |||
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 3); | |||
} | |||
} | |||
} | |||
void mzd_mul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
clear_uint64_block(BLOCK(c, 0), 3); | |||
mzd_addmul_v_uint64_192(c, v, A); | |||
} | |||
void mzd_addmul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
block_t* cblock = BLOCK(c, 0); | |||
const word* vptr = CONST_BLOCK(v, 0)->w64; | |||
const block_t* Ablock = CONST_BLOCK(A, 0); | |||
for (unsigned int w = 4; w; --w, ++vptr) { | |||
word idx = *vptr; | |||
for (unsigned int i = sizeof(word) * 8; i; --i, idx >>= 1, ++Ablock) { | |||
const uint64_t mask = -(idx & 1); | |||
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 4); | |||
} | |||
} | |||
} | |||
void mzd_mul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { | |||
clear_uint64_block(BLOCK(c, 0), 4); | |||
mzd_addmul_v_uint64_256(c, v, A); | |||
} | |||
// no SIMD | |||
@@ -0,0 +1,247 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
/* Inspired by m4ri's mzd implementation, but completely re-written for our use-case. */ | |||
#ifndef MZD_ADDITIONAL_H | |||
#define MZD_ADDITIONAL_H | |||
#include "macros.h" | |||
#include <stdbool.h> | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
typedef uint64_t word; | |||
#define WORD_C(v) UINT64_C(v) | |||
#include "simd.h" | |||
typedef union { | |||
word w64[4]; | |||
word128 w128[2]; | |||
word256 w256; | |||
} block_t ATTR_ALIGNED(32); | |||
/** | |||
* Representation of matrices and vectors | |||
* | |||
* The basic memory unit is a block of 256 bit. Each row is stored in (possible multiple) blocks | |||
* depending on the number of columns. Matrices with up to 128 columns are the only excpetion. In | |||
* this case a block actually contains two rows. The row with even index is contained in w64[0] and | |||
* w61[1], the row with odd index is contained in w64[2] and w64[3]. | |||
*/ | |||
typedef block_t mzd_local_t; | |||
mzd_local_t* mzd_local_init_ex(unsigned int r, unsigned int c, bool clear) ATTR_ASSUME_ALIGNED(32); | |||
#define mzd_local_init(r, c) mzd_local_init_ex(r, c, true) | |||
void mzd_local_free(mzd_local_t* v); | |||
void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, unsigned int r, unsigned int c, bool clear) | |||
ATTR_NONNULL_ARG(1); | |||
#define mzd_local_init_multiple(dst, n, r, c) mzd_local_init_multiple_ex(dst, n, r, c, true) | |||
/** | |||
* mzd_local_free for mzd_local_init_multiple. | |||
*/ | |||
void mzd_local_free_multiple(mzd_local_t** vs); | |||
void mzd_copy_uint64_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL; | |||
void mzd_copy_uint64_192(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL; | |||
void mzd_copy_uint64_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL; | |||
void mzd_copy_s128_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL; | |||
void mzd_copy_s128_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL; | |||
void mzd_copy_s256_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL; | |||
void mzd_copy_s256_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL; | |||
/** | |||
* mzd_xor variants | |||
*/ | |||
void mzd_xor_uint64_128(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_uint64_192(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_uint64_256(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_uint64_640(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_uint64_960(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_uint64_1216(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s128_128(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s128_256(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s128_640(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s128_1024(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s128_1280(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s256_128(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s256_256(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s256_768(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s256_1024(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_xor_s256_1280(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
/** | |||
* mzd_and variants | |||
*/ | |||
void mzd_and_uint64_128(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_and_uint64_192(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_and_uint64_256(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_and_s128_128(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_and_s128_256(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_and_s256_128(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
void mzd_and_s256_256(mzd_local_t* res, mzd_local_t const* first, | |||
mzd_local_t const* second) ATTR_NONNULL; | |||
/** | |||
* shifts and rotations | |||
*/ | |||
void mzd_shift_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_shift_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_shift_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_shift_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_shift_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_shift_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
#if defined(PICNIC_STATIC) | |||
/* only needed for tests */ | |||
void mzd_rotate_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_rotate_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_rotate_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_rotate_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_rotate_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
void mzd_rotate_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count); | |||
#endif | |||
/** | |||
* Compute v * A optimized for v being a vector. | |||
*/ | |||
void mzd_mul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_uint64_128_640(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_uint64_192_960(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_uint64_256_1216(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s128_128_640(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s128_192_1024(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s128_256_1280(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_s256_128_768(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_s256_192_1024(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* At) ATTR_NONNULL; | |||
void mzd_mul_v_s256_256_1280(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* At) ATTR_NONNULL; | |||
/** | |||
* Compute v * A optimized for v being a vector, for specific sizes depending on instance | |||
* Only work for specific sizes and RLL_NEXT algorithm using uint64 operations | |||
*/ | |||
void mzd_addmul_v_uint64_30_128(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_uint64_30_192(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_uint64_30_256(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
/** | |||
* Use SSE2 or NEON | |||
*/ | |||
void mzd_addmul_v_s128_30_128(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s128_30_192(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s128_30_256(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
/** | |||
* Use AVX2 | |||
*/ | |||
void mzd_addmul_v_s256_30_128(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s256_30_192(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s256_30_256(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
/** | |||
* Compute using parity based algorithm | |||
* */ | |||
void mzd_mul_v_parity_uint64_128_30(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_parity_uint64_192_30(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_mul_v_parity_uint64_256_30(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
/** | |||
* Compute c + v * A optimized for c and v being vectors. | |||
*/ | |||
void mzd_addmul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, | |||
mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
void mzd_addmul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; | |||
/** | |||
* Shuffle vector x according to info in mask. Needed for OLLE optimiztaions. | |||
*/ | |||
void mzd_shuffle_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL; | |||
void mzd_shuffle_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL; | |||
void mzd_shuffle_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL; | |||
void mzd_shuffle_pext_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL; | |||
void mzd_shuffle_pext_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL; | |||
void mzd_shuffle_pext_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL; | |||
#define BLOCK(v, b) ((block_t*)ASSUME_ALIGNED(&(v)[(b)], 32)) | |||
#define CONST_BLOCK(v, b) ((const block_t*)ASSUME_ALIGNED(&(v)[(b)], 32)) | |||
#endif |
@@ -0,0 +1,390 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "picnic.h" | |||
#include <assert.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "compat.h" | |||
#include "io.h" | |||
#include "lowmc.h" | |||
#include "picnic_instances.h" | |||
#include "picnic3_impl.h" | |||
#include "randomness.h" | |||
// Public and private keys are serialized as follows: | |||
// - public key: instance || C || p | |||
// - secret key: instance || sk || C || p | |||
#define SK_SK(sk) &(sk)->data[1] | |||
#define SK_C(sk) &(sk)->data[1 + input_size] | |||
#define SK_PT(sk) &(sk)->data[1 + input_size + output_size] | |||
#define PK_SK(pk) &(pk)->data[1] | |||
#define PK_C(pk) &(pk)->data[1] | |||
#define PK_PT(pk) &(pk)->data[1 + output_size] | |||
size_t PICNIC_CALLING_CONVENTION picnic_get_lowmc_block_size(picnic_params_t param) { | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return 0; | |||
} | |||
return instance->output_size; | |||
} | |||
size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t param) { | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return 0; | |||
} | |||
return instance->max_signature_size; | |||
} | |||
size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t param) { | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return 0; | |||
} | |||
return picnic_get_public_key_size(param) + instance->input_size; | |||
} | |||
size_t PICNIC_CALLING_CONVENTION picnic_get_public_key_size(picnic_params_t param) { | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return 0; | |||
} | |||
return 1 + (instance->output_size << 1); | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_keygen(picnic_params_t param, picnic_publickey_t* pk, | |||
picnic_privatekey_t* sk) { | |||
if (!pk || !sk) { | |||
return -1; | |||
} | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t input_size = instance->input_size; | |||
const size_t output_size = instance->output_size; | |||
uint8_t* sk_sk = SK_SK(sk); | |||
uint8_t* sk_pt = SK_PT(sk); | |||
uint8_t* sk_c = SK_C(sk); | |||
// generate private key | |||
sk->data[0] = param; | |||
// random secret key | |||
if (rand_bits(sk_sk, instance->lowmc.k)) { | |||
return -1; | |||
} | |||
// random plain text | |||
if (rand_bits(sk_pt, instance->lowmc.n)) { | |||
return -1; | |||
} | |||
// encrypt plaintext under secret key | |||
if (picnic_sk_to_pk(sk, pk)) { | |||
return -1; | |||
} | |||
// copy ciphertext to secret key | |||
memcpy(sk_c, PK_C(pk), output_size); | |||
return 0; | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* sk, | |||
picnic_publickey_t* pk) { | |||
if (!sk || !pk) { | |||
return -1; | |||
} | |||
const picnic_params_t param = sk->data[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t input_size = instance->input_size; | |||
const size_t output_size = instance->output_size; | |||
const uint8_t* sk_sk = SK_SK(sk); | |||
uint8_t* pk_c = PK_C(pk); | |||
uint8_t* pk_pt = PK_PT(pk); | |||
const uint8_t* sk_pt = SK_PT(sk); | |||
mzd_local_t plaintext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256]; | |||
mzd_local_t privkey[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256]; | |||
mzd_local_t ciphertext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256]; | |||
mzd_from_char_array(plaintext, sk_pt, output_size); | |||
mzd_from_char_array(privkey, sk_sk, input_size); | |||
// compute public key | |||
instance->impls.lowmc(privkey, plaintext, ciphertext); | |||
pk->data[0] = param; | |||
memcpy(pk_pt, sk_pt, output_size); | |||
mzd_to_char_array(pk_c, ciphertext, output_size); | |||
return 0; | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_validate_keypair(const picnic_privatekey_t* sk, | |||
const picnic_publickey_t* pk) { | |||
if (!sk || !pk) { | |||
return -1; | |||
} | |||
const picnic_params_t param = sk->data[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t input_size = instance->input_size; | |||
const size_t output_size = instance->output_size; | |||
const uint8_t* sk_sk = SK_SK(sk); | |||
const uint8_t* sk_pt = SK_PT(sk); | |||
const uint8_t* sk_c = SK_C(sk); | |||
const uint8_t* pk_pt = PK_PT(pk); | |||
const uint8_t* pk_c = PK_C(pk); | |||
// check param and plaintext | |||
if (param != pk->data[0] || memcmp(sk_pt, pk_pt, output_size) != 0 || | |||
memcmp(sk_c, pk_c, output_size) != 0) { | |||
return -1; | |||
} | |||
mzd_local_t plaintext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256]; | |||
mzd_local_t privkey[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256]; | |||
mzd_local_t ciphertext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256]; | |||
mzd_from_char_array(plaintext, sk_pt, instance->output_size); | |||
mzd_from_char_array(privkey, sk_sk, instance->input_size); | |||
// compute public key | |||
instance->impls.lowmc(privkey, plaintext, ciphertext); | |||
uint8_t buffer[MAX_LOWMC_BLOCK_SIZE]; | |||
mzd_to_char_array(buffer, ciphertext, output_size); | |||
return memcmp(buffer, pk_c, output_size); | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk, const uint8_t* message, | |||
size_t message_len, uint8_t* signature, | |||
size_t* signature_len) { | |||
if (!sk || !signature || !signature_len) { | |||
return -1; | |||
} | |||
const picnic_params_t param = sk->data[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t output_size = instance->output_size; | |||
const size_t input_size = instance->input_size; | |||
const uint8_t* sk_sk = SK_SK(sk); | |||
const uint8_t* sk_c = SK_C(sk); | |||
const uint8_t* sk_pt = SK_PT(sk); | |||
if (param == Picnic3_L1 || param == Picnic3_L3 || param == Picnic3_L5) { | |||
return impl_sign_picnic3(instance, sk_pt, sk_sk, sk_c, message, message_len, signature, | |||
signature_len); | |||
} else { | |||
return -1; | |||
} | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, const uint8_t* message, | |||
size_t message_len, const uint8_t* signature, | |||
size_t signature_len) { | |||
if (!pk || !signature || !signature_len) { | |||
return -1; | |||
} | |||
const picnic_params_t param = pk->data[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t output_size = instance->output_size; | |||
const uint8_t* pk_c = PK_C(pk); | |||
const uint8_t* pk_pt = PK_PT(pk); | |||
if (param == Picnic3_L1 || param == Picnic3_L3 || param == Picnic3_L5) { | |||
return impl_verify_picnic3(instance, pk_pt, pk_c, message, message_len, signature, | |||
signature_len); | |||
} else { | |||
return -1; | |||
} | |||
} | |||
const char* PICNIC_CALLING_CONVENTION picnic_get_param_name(picnic_params_t parameters) { | |||
switch (parameters) { | |||
case Picnic_L1_FS: | |||
return "Picnic_L1_FS"; | |||
case Picnic_L1_UR: | |||
return "Picnic_L1_UR"; | |||
case Picnic_L3_FS: | |||
return "Picnic_L3_FS"; | |||
case Picnic_L3_UR: | |||
return "Picnic_L3_UR"; | |||
case Picnic_L5_FS: | |||
return "Picnic_L5_FS"; | |||
case Picnic_L5_UR: | |||
return "Picnic_L5_UR"; | |||
case Picnic3_L1: | |||
return "Picnic3_L1"; | |||
case Picnic3_L3: | |||
return "Picnic3_L3"; | |||
case Picnic3_L5: | |||
return "Picnic3_L5"; | |||
case Picnic_L1_full: | |||
return "Picnic_L1_full"; | |||
case Picnic_L3_full: | |||
return "Picnic_L3_full"; | |||
case Picnic_L5_full: | |||
return "Picnic_L5_full"; | |||
default: | |||
return "Unknown parameter set"; | |||
} | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_write_public_key(const picnic_publickey_t* key, uint8_t* buf, | |||
size_t buflen) { | |||
if (!key || !buf) { | |||
return -1; | |||
} | |||
const picnic_params_t param = key->data[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t output_size = instance->output_size; | |||
const size_t bytes_required = 1 + 2 * output_size; | |||
if (buflen < bytes_required) { | |||
return -1; | |||
} | |||
memcpy(buf, key->data, bytes_required); | |||
return (int)bytes_required; | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_read_public_key(picnic_publickey_t* key, const uint8_t* buf, | |||
size_t buflen) { | |||
if (!key || !buf || buflen < 1) { | |||
return -1; | |||
} | |||
const picnic_params_t param = buf[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t output_size = instance->output_size; | |||
const size_t bytes_required = 1 + 2 * output_size; | |||
if (buflen < bytes_required) { | |||
return -1; | |||
} | |||
if (param == Picnic_L1_full || param == Picnic_L5_full || param == Picnic3_L1 || | |||
param == Picnic3_L5) { | |||
const unsigned int diff = output_size * 8 - instance->lowmc.n; | |||
if (check_padding_bits(buf[1 + output_size - 1], diff) || | |||
check_padding_bits(buf[1 + 2 * output_size - 1], diff)) { | |||
return -1; | |||
} | |||
} | |||
memcpy(key->data, buf, bytes_required); | |||
return 0; | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_write_private_key(const picnic_privatekey_t* key, uint8_t* buf, | |||
size_t buflen) { | |||
if (!key || !buf) { | |||
return -1; | |||
} | |||
const picnic_params_t param = key->data[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t input_size = instance->input_size; | |||
const size_t output_size = instance->output_size; | |||
const size_t bytes_required = 1 + input_size + 2 * output_size; | |||
if (buflen < bytes_required) { | |||
return -1; | |||
} | |||
memcpy(buf, &key->data, bytes_required); | |||
return (int)bytes_required; | |||
} | |||
int PICNIC_CALLING_CONVENTION picnic_read_private_key(picnic_privatekey_t* key, const uint8_t* buf, | |||
size_t buflen) { | |||
if (!key || !buf || buflen < 1) { | |||
return -1; | |||
} | |||
const picnic_params_t param = buf[0]; | |||
const picnic_instance_t* instance = picnic_instance_get(param); | |||
if (!instance) { | |||
return -1; | |||
} | |||
const size_t input_size = instance->input_size; | |||
const size_t output_size = instance->output_size; | |||
const size_t bytes_required = 1 + input_size + 2 * output_size; | |||
if (buflen < bytes_required) { | |||
return -1; | |||
} | |||
if (param == Picnic_L1_full || param == Picnic_L5_full || param == Picnic3_L1 || | |||
param == Picnic3_L5) { | |||
const unsigned int diff = output_size * 8 - instance->lowmc.n; | |||
assert(diff == input_size * 8 - instance->lowmc.k); | |||
/* sanity check of public data: padding bits need to be 0 */ | |||
const int check = check_padding_bits(buf[1 + input_size - 1], diff) | | |||
check_padding_bits(buf[1 + input_size + output_size - 1], diff) | | |||
check_padding_bits(buf[1 + input_size + 2 * output_size - 1], diff); | |||
picnic_declassify(&check, sizeof(check)); | |||
if (check) { | |||
return -1; | |||
} | |||
} | |||
memcpy(key->data, buf, bytes_required); | |||
return 0; | |||
} | |||
void PICNIC_CALLING_CONVENTION picnic_clear_private_key(picnic_privatekey_t* key) { | |||
explicit_bzero(key, sizeof(picnic_privatekey_t)); | |||
} | |||
@@ -0,0 +1,285 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC_H | |||
#define PICNIC_H | |||
#if !defined(PICNIC_EXPORT) | |||
#if !defined(PICNIC_STATIC) && (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) | |||
#define PICNIC_EXPORT __declspec(dllimport) | |||
#else | |||
#define PICNIC_EXPORT | |||
#endif | |||
#endif | |||
#if defined(_WIN16) || defined(_WIN32) || defined(_WIN64) | |||
#define PICNIC_CALLING_CONVENTION __stdcall | |||
#else | |||
#define PICNIC_CALLING_CONVENTION | |||
#endif | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include <stdio.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#define PICNIC_CONCAT2(a, b) a##_##b | |||
#define PICNIC_CONCAT(a, b) PICNIC_CONCAT2(a, b) | |||
/* Block sizes of the LowMC ciphers per parameter */ | |||
#define LOWMC_BLOCK_SIZE_Picnic_L1_FS 16 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L1_UR 16 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L3_FS 24 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L3_UR 24 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L5_FS 32 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L5_UR 32 | |||
#define LOWMC_BLOCK_SIZE_Picnic3_L1 17 | |||
#define LOWMC_BLOCK_SIZE_Picnic3_L3 24 | |||
#define LOWMC_BLOCK_SIZE_Picnic3_L5 32 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L1_full 17 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L3_full 24 | |||
#define LOWMC_BLOCK_SIZE_Picnic_L5_full 32 | |||
#define LOWMC_BLOCK_SIZE(p) PICNIC_CONCAT(LOWMC_BLOCK_SIZE, p) | |||
#define PICNIC_PRIVATE_KEY_SIZE(p) (1 + 3 * LOWMC_BLOCK_SIZE(p)) | |||
#define PICNIC_PUBLIC_KEY_SIZE(p) (1 + 2 * LOWMC_BLOCK_SIZE(p)) | |||
/* Max. signature sizes per parameter */ | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_FS 34032 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_UR 53961 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_FS 76772 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_UR 121845 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_FS 132856 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_UR 209506 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic3_L1 14608 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic3_L3 35024 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic3_L5 61024 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_full 32061 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_full 71179 | |||
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_full 126286 | |||
#define PICNIC_SIGNATURE_SIZE(p) PICNIC_CONCAT(PICNIC_SIGNATURE_SIZE, p) | |||
#define PICNIC_MAX_LOWMC_BLOCK_SIZE LOWMC_BLOCK_SIZE(Picnic_L5_UR) | |||
#define PICNIC_MAX_PRIVATEKEY_SIZE PICNIC_PRIVATE_KEY_SIZE(Picnic_L5_UR) | |||
#define PICNIC_MAX_PUBLICKEY_SIZE PICNIC_PUBLIC_KEY_SIZE(Picnic_L5_UR) | |||
#define PICNIC_MAX_SIGNATURE_SIZE PICNIC_SIGNATURE_SIZE(Picnic_L5_UR) | |||
/** Parameter set names */ | |||
typedef enum { | |||
PARAMETER_SET_INVALID = 0, | |||
/* ZKB++ with LowMC m=10 */ | |||
Picnic_L1_FS = 1, | |||
Picnic_L1_UR = 2, | |||
Picnic_L3_FS = 3, | |||
Picnic_L3_UR = 4, | |||
Picnic_L5_FS = 5, | |||
Picnic_L5_UR = 6, | |||
/* KKW with full LowMC */ | |||
Picnic3_L1 = 7, | |||
Picnic3_L3 = 8, | |||
Picnic3_L5 = 9, | |||
/* ZKB++ with full LowMC */ | |||
Picnic_L1_full = 10, | |||
Picnic_L3_full = 11, | |||
Picnic_L5_full = 12, | |||
PARAMETER_SET_MAX_INDEX = 13 | |||
} picnic_params_t; | |||
/** Public key */ | |||
typedef struct { | |||
uint8_t data[PICNIC_MAX_PUBLICKEY_SIZE]; | |||
} picnic_publickey_t; | |||
/** Private key */ | |||
typedef struct { | |||
uint8_t data[PICNIC_MAX_PRIVATEKEY_SIZE]; | |||
} picnic_privatekey_t; | |||
/** | |||
* Get a string representation of the parameter set. | |||
* | |||
* @param parameters A parameter set | |||
* | |||
* @return A null-terminated string describing the parameter set. | |||
*/ | |||
PICNIC_EXPORT const char* PICNIC_CALLING_CONVENTION | |||
picnic_get_param_name(picnic_params_t parameters); | |||
/* Signature API */ | |||
/** | |||
* Key generation function. | |||
* Generates a public and private key pair, for the specified parameter set. | |||
* | |||
* @param[in] parameters The parameter set to use when generating a key. | |||
* @param[out] pk The new public key. | |||
* @param[out] sk The new private key. | |||
* | |||
* @return Returns 0 for success, or a nonzero value indicating an error. | |||
* | |||
* @see picnic_verify(), picnic_sign() | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_keygen(picnic_params_t parameters, | |||
picnic_publickey_t* pk, | |||
picnic_privatekey_t* sk); | |||
/** | |||
* Signature function. | |||
* Signs a message with the given keypair. | |||
* | |||
* @param[in] sk The signer's private key. | |||
* @param[in] message The message to be signed. | |||
* @param[in] message_len The length of the message, in bytes. | |||
* @param[out] signature A buffer to hold the signature. The required size does | |||
* not exceed PICNIC_MAX_SIGNATURE_SIZE bytes. The specific max number of | |||
* bytes required for a parameter set is given by picnic_signature_size(). Note | |||
* that the length of each signature varies slightly, for the parameter sets | |||
* using the FS transform. The parameter sets using the Unruh transform have a | |||
* fixed length. | |||
* @param[in,out] signature_len The length of the provided signature buffer. | |||
* On success, this is set to the number of bytes written to the signature buffer. | |||
* | |||
* @return Returns 0 for success, or a nonzero value indicating an error. | |||
* | |||
* @see picnic_verify(), picnic_keygen(), picnic_signature_size() | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk, | |||
const uint8_t* message, size_t message_len, | |||
uint8_t* signature, size_t* signature_len); | |||
/** | |||
* Get the number of bytes required to hold a signature. | |||
* | |||
* @param[in] parameters The parameter set of the signature. | |||
* | |||
* @return The number of bytes required to hold the signature created by | |||
* picnic_sign | |||
* | |||
* @note The size of signatures with parameter sets using the FS transform vary | |||
* slightly based on the random choices made during signing. This function | |||
* will return a suffcient number of bytes to hold a signature, and the | |||
* picnic_sign() function returns the exact number used for a given signature. | |||
* | |||
* @see picnic_sign() | |||
*/ | |||
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t parameters); | |||
/** | |||
* Verification function. | |||
* Verifies a signature is valid with respect to a public key and message. | |||
* | |||
* @param[in] pk The signer's public key. | |||
* @param[in] message The message the signature purpotedly signs. | |||
* @param[in] message_len The length of the message, in bytes. | |||
* @param[in] signature The signature to verify. | |||
* @param[in] signature_len The length of the signature. | |||
* | |||
* @return Returns 0 for success, indicating a valid signature, or a nonzero | |||
* value indicating an error or an invalid signature. | |||
* | |||
* @see picnic_sign(), picnic_keygen() | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, | |||
const uint8_t* message, | |||
size_t message_len, | |||
const uint8_t* signature, | |||
size_t signature_len); | |||
/** | |||
* Serialize a public key. | |||
* | |||
* @param[in] key The public key to serialize | |||
* @param[out] buf The buffer to write the key to. | |||
* Must have size at least PICNIC_MAX_PUBLICKEY_SIZE bytes. | |||
* @param[in] buflen The length of buf, in bytes | |||
* | |||
* @return Returns the number of bytes written. | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_write_public_key(const picnic_publickey_t* key, | |||
uint8_t* buf, size_t buflen); | |||
/** | |||
* De-serialize a public key. | |||
* | |||
* @param[out] key The public key object to be populated. | |||
* @param[in] buf The buffer to read the public key from. | |||
* Must be at least PICNIC_MAX_PUBLICKEY_SIZE bytes. | |||
* @param[in] buflen The length of buf, in bytes | |||
* | |||
* @return Returns 0 on success, or a nonzero value indicating an error. | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_read_public_key(picnic_publickey_t* key, | |||
const uint8_t* buf, | |||
size_t buflen); | |||
/** | |||
* Serialize a private key. | |||
* | |||
* @param[in] key The private key to serialize | |||
* @param[out] buf The buffer to write the key to. | |||
* Must have size at least PICNIC_MAX_PRIVATEKEY_SIZE bytes. | |||
* @param[in] buflen The length of buf, in bytes | |||
* | |||
* @return Returns the number of bytes written. | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_write_private_key(const picnic_privatekey_t* key, | |||
uint8_t* buf, size_t buflen); | |||
/** | |||
* De-serialize a private key. | |||
* | |||
* @param[out] key The private key object to be populated | |||
* @param[in] buf The buffer to read the key from. | |||
* Must have size at least PICNIC_MAX_PRIVATEKEY_SIZE bytes. | |||
* @param[in] buflen The length of buf, in bytes | |||
* | |||
* @return Returns 0 on success, or a nonzero value indicating an error. | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_read_private_key(picnic_privatekey_t* key, | |||
const uint8_t* buf, | |||
size_t buflen); | |||
/** | |||
* Check that a key pair is valid. | |||
* | |||
* @param[in] privatekey The private key to check | |||
* @param[in] publickey The public key to check | |||
* | |||
* @return Returns 0 if the key pair is valid, or a nonzero value indicating an error | |||
*/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION | |||
picnic_validate_keypair(const picnic_privatekey_t* privatekey, const picnic_publickey_t* publickey); | |||
/** | |||
* Clear data of a private key. | |||
* | |||
* @param[out] key The private key to clear | |||
*/ | |||
PICNIC_EXPORT void PICNIC_CALLING_CONVENTION picnic_clear_private_key(picnic_privatekey_t* key); | |||
/** | |||
* Compute public key from private key. | |||
* | |||
* @param[in] privatekey The private key | |||
* @param[out] publickey The public key to be populated | |||
* @return Returns 0 on success, or a nonzero value indicating an error. | |||
**/ | |||
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* privatekey, | |||
picnic_publickey_t* publickey); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif |
@@ -0,0 +1,971 @@ | |||
/*! @file picnic3_impl.c | |||
* @brief This is the main file of the signature scheme for the Picnic3 | |||
* parameter sets. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include <assert.h> | |||
#include <stdbool.h> | |||
#include <stdint.h> | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "io.h" | |||
#include "kdf_shake.h" | |||
#include "macros.h" | |||
#include "picnic.h" | |||
#include "picnic3_impl.h" | |||
#include "picnic3_tree.h" | |||
#include "picnic3_types.h" | |||
/* Helper functions */ | |||
ATTR_CONST | |||
static uint32_t numBytes(uint32_t numBits) { | |||
return (numBits + 7) >> 3; | |||
} | |||
static void createRandomTapes(randomTape_t* tapes, uint8_t** seeds, uint8_t* salt, size_t t, | |||
const picnic_instance_t* params) { | |||
hash_context_x4 ctx; | |||
size_t tapeSizeBytes = 2 * params->view_size; | |||
allocateRandomTape(tapes, params); | |||
assert(params->num_MPC_parties % 4 == 0); | |||
for (size_t i = 0; i < params->num_MPC_parties; i += 4) { | |||
hash_init_x4(&ctx, params->digest_size); | |||
const uint8_t* seeds_ptr[4] = {seeds[i], seeds[i + 1], seeds[i + 2], seeds[i + 3]}; | |||
hash_update_x4(&ctx, seeds_ptr, params->seed_size); | |||
const uint8_t* salt_ptr[4] = {salt, salt, salt, salt}; | |||
hash_update_x4(&ctx, salt_ptr, SALT_SIZE); | |||
hash_update_x4_uint16_le(&ctx, t); | |||
const uint16_t i_arr[4] = {i + 0, i + 1, i + 2, i + 3}; | |||
hash_update_x4_uint16s_le(&ctx, i_arr); | |||
hash_final_x4(&ctx); | |||
uint8_t* out_ptr[4] = {tapes->tape[i], tapes->tape[i + 1], tapes->tape[i + 2], | |||
tapes->tape[i + 3]}; | |||
hash_squeeze_x4(&ctx, out_ptr, tapeSizeBytes); | |||
} | |||
} | |||
/* Input is the tapes for one parallel repitition; i.e., tapes[t] | |||
* Updates the random tapes of all players with the mask values for the output of | |||
* AND gates, and computes the N-th party's share such that the AND gate invariant | |||
* holds on the mask values. | |||
*/ | |||
static void computeAuxTape(randomTape_t* tapes, uint8_t* input_masks, | |||
const picnic_instance_t* params) { | |||
mzd_local_t lowmc_key[1]; | |||
size_t tapeSizeBytes = 2 * params->view_size; | |||
// combine into key shares and calculate lowmc evaluation in plain | |||
for (size_t i = 0; i < params->num_MPC_parties; i++) { | |||
for (size_t j = 0; j < tapeSizeBytes; j++) { | |||
tapes->parity_tapes[j] ^= tapes->tape[i][j]; | |||
} | |||
} | |||
mzd_from_char_array(lowmc_key, tapes->parity_tapes, params->input_size); | |||
tapes->pos = params->lowmc.n; | |||
tapes->aux_pos = 0; | |||
memset(tapes->aux_bits, 0, params->view_size); | |||
lowmc_compute_aux_implementation_f lowmc_aux_impl = params->impls.lowmc_aux; | |||
// Perform LowMC evaluation and fix AND masks for all AND gates | |||
lowmc_aux_impl(lowmc_key, tapes); | |||
// write the key masks to the input | |||
if (input_masks != NULL) { | |||
mzd_to_char_array(input_masks, lowmc_key, params->input_size); | |||
} | |||
// Reset the random tape counter so that the online execution uses the | |||
// same random bits as when computing the aux shares | |||
tapes->pos = 0; | |||
} | |||
static void commit(uint8_t* digest, const uint8_t* seed, const uint8_t* aux, const uint8_t* salt, | |||
size_t t, size_t j, const picnic_instance_t* params) { | |||
/* Compute C[t][j]; as digest = H(seed||[aux]) aux is optional */ | |||
hash_context ctx; | |||
hash_init(&ctx, params->digest_size); | |||
hash_update(&ctx, seed, params->seed_size); | |||
if (aux != NULL) { | |||
hash_update(&ctx, aux, params->view_size); | |||
} | |||
hash_update(&ctx, salt, SALT_SIZE); | |||
hash_update_uint16_le(&ctx, t); | |||
hash_update_uint16_le(&ctx, j); | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, digest, params->digest_size); | |||
} | |||
static void commit_x4(uint8_t** digest, const uint8_t** seed, const uint8_t* salt, size_t t, | |||
size_t j, const picnic_instance_t* params) { | |||
/* Compute C[t][j]; as digest = H(seed||[aux]) aux is optional */ | |||
hash_context_x4 ctx; | |||
hash_init_x4(&ctx, params->digest_size); | |||
hash_update_x4(&ctx, seed, params->seed_size); | |||
const uint8_t* salt_ptr[4] = {salt, salt, salt, salt}; | |||
hash_update_x4(&ctx, salt_ptr, SALT_SIZE); | |||
hash_update_x4_uint16_le(&ctx, t); | |||
const uint16_t j_arr[4] = {j + 0, j + 1, j + 2, j + 3}; | |||
hash_update_x4_uint16s_le(&ctx, j_arr); | |||
hash_final_x4(&ctx); | |||
hash_squeeze_x4(&ctx, digest, params->digest_size); | |||
} | |||
static void commit_h(uint8_t* digest, const commitments_t* C, const picnic_instance_t* params) { | |||
hash_context ctx; | |||
hash_init(&ctx, params->digest_size); | |||
for (size_t i = 0; i < params->num_MPC_parties; i++) { | |||
hash_update(&ctx, C->hashes[i], params->digest_size); | |||
} | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, digest, params->digest_size); | |||
} | |||
static void commit_h_x4(uint8_t** digest, const commitments_t* C, const picnic_instance_t* params) { | |||
hash_context_x4 ctx; | |||
hash_init_x4(&ctx, params->digest_size); | |||
for (size_t i = 0; i < params->num_MPC_parties; i++) { | |||
const uint8_t* data[4] = { | |||
C[0].hashes[i], | |||
C[1].hashes[i], | |||
C[2].hashes[i], | |||
C[3].hashes[i], | |||
}; | |||
hash_update_x4(&ctx, data, params->digest_size); | |||
} | |||
hash_final_x4(&ctx); | |||
hash_squeeze_x4(&ctx, digest, params->digest_size); | |||
} | |||
// Commit to the views for one parallel rep | |||
static void commit_v(uint8_t* digest, const uint8_t* input, const msgs_t* msgs, | |||
const picnic_instance_t* params) { | |||
hash_context ctx; | |||
hash_init(&ctx, params->digest_size); | |||
hash_update(&ctx, input, params->input_size); | |||
for (size_t i = 0; i < params->num_MPC_parties; i++) { | |||
hash_update(&ctx, msgs->msgs[i], numBytes(msgs->pos)); | |||
} | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, digest, params->digest_size); | |||
} | |||
static void commit_v_x4(uint8_t** digest, const uint8_t** input, const msgs_t* msgs, | |||
const picnic_instance_t* params) { | |||
hash_context_x4 ctx; | |||
hash_init_x4(&ctx, params->digest_size); | |||
hash_update_x4(&ctx, input, params->input_size); | |||
for (size_t i = 0; i < params->num_MPC_parties; i++) { | |||
assert(msgs[0].pos == msgs[1].pos && msgs[2].pos == msgs[3].pos && msgs[0].pos == msgs[2].pos); | |||
const uint8_t* data[4] = { | |||
msgs[0].msgs[i], | |||
msgs[1].msgs[i], | |||
msgs[2].msgs[i], | |||
msgs[3].msgs[i], | |||
}; | |||
hash_update_x4(&ctx, data, numBytes(msgs->pos)); | |||
} | |||
hash_final_x4(&ctx); | |||
hash_squeeze_x4(&ctx, digest, params->digest_size); | |||
} | |||
static void xor_byte_array(uint8_t* out, const uint8_t* in1, const uint8_t* in2, uint32_t length) { | |||
for (uint32_t i = 0; i < length; i++) { | |||
out[i] = in1[i] ^ in2[i]; | |||
} | |||
} | |||
static int contains(const uint16_t* list, size_t len, uint16_t value) { | |||
for (size_t i = 0; i < len; i++) { | |||
if (list[i] == value) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
static int indexOf(const uint16_t* list, size_t len, uint16_t value) { | |||
for (size_t i = 0; i < len; i++) { | |||
if (list[i] == value) { | |||
return i; | |||
} | |||
} | |||
assert(!"indexOf called on list where value is not found. (caller bug)"); | |||
return -1; | |||
} | |||
static void setAuxBits(randomTape_t* tapes, uint8_t* input, const picnic_instance_t* params) { | |||
size_t last = params->num_MPC_parties - 1; | |||
size_t inBit = 0; | |||
for (size_t j = 0; j < params->lowmc.r; j++) { | |||
for (size_t i = 0; i < params->lowmc.n; i++) { | |||
setBit(tapes->tape[last], params->lowmc.n + params->lowmc.n * 2 * (j) + i, | |||
getBit(input, inBit++)); | |||
} | |||
} | |||
} | |||
static size_t bitsToChunks(size_t chunkLenBits, const uint8_t* input, size_t inputLen, | |||
uint16_t* chunks) { | |||
if (chunkLenBits > inputLen * 8) { | |||
assert(!"Invalid input to bitsToChunks: not enough input"); | |||
return 0; | |||
} | |||
size_t chunkCount = ((inputLen * 8) / chunkLenBits); | |||
for (size_t i = 0; i < chunkCount; i++) { | |||
chunks[i] = 0; | |||
for (size_t j = 0; j < chunkLenBits; j++) { | |||
chunks[i] += getBit(input, i * chunkLenBits + j) << j; | |||
assert(chunks[i] < (1 << chunkLenBits)); | |||
} | |||
} | |||
return chunkCount; | |||
} | |||
static size_t appendUnique(uint16_t* list, uint16_t value, size_t position) { | |||
if (position == 0) { | |||
list[position] = value; | |||
return position + 1; | |||
} | |||
for (size_t i = 0; i < position; i++) { | |||
if (list[i] == value) { | |||
return position; | |||
} | |||
} | |||
list[position] = value; | |||
return position + 1; | |||
} | |||
static void expandChallenge(uint16_t* challengeC, uint16_t* challengeP, const uint8_t* sigH, | |||
const picnic_instance_t* params) { | |||
uint8_t h[MAX_DIGEST_SIZE] = {0}; | |||
hash_context ctx; | |||
memcpy(h, sigH, params->digest_size); | |||
// Populate C | |||
uint32_t bitsPerChunkC = ceil_log2(params->num_rounds); | |||
uint32_t bitsPerChunkP = ceil_log2(params->num_MPC_parties); | |||
uint16_t* chunks = | |||
calloc(params->digest_size * 8 / MIN(bitsPerChunkP, bitsPerChunkC), sizeof(uint16_t)); | |||
size_t countC = 0; | |||
while (countC < params->num_opened_rounds) { | |||
size_t numChunks = bitsToChunks(bitsPerChunkC, h, params->digest_size, chunks); | |||
for (size_t i = 0; i < numChunks; i++) { | |||
if (chunks[i] < params->num_rounds) { | |||
countC = appendUnique(challengeC, chunks[i], countC); | |||
} | |||
if (countC == params->num_opened_rounds) { | |||
break; | |||
} | |||
} | |||
hash_init_prefix(&ctx, params->digest_size, HASH_PREFIX_1); | |||
hash_update(&ctx, h, params->digest_size); | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, h, params->digest_size); | |||
} | |||
// Note that we always compute h = H(h) after setting C | |||
size_t countP = 0; | |||
while (countP < params->num_opened_rounds) { | |||
size_t numChunks = bitsToChunks(bitsPerChunkP, h, params->digest_size, chunks); | |||
for (size_t i = 0; i < numChunks; i++) { | |||
if (chunks[i] < params->num_MPC_parties) { | |||
challengeP[countP] = chunks[i]; | |||
countP++; | |||
} | |||
if (countP == params->num_opened_rounds) { | |||
break; | |||
} | |||
} | |||
hash_init_prefix(&ctx, params->digest_size, HASH_PREFIX_1); | |||
hash_update(&ctx, h, params->digest_size); | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, h, params->digest_size); | |||
} | |||
free(chunks); | |||
} | |||
static void HCP(uint8_t* sigH, uint16_t* challengeC, uint16_t* challengeP, commitments_t* Ch, | |||
uint8_t* hCv, uint8_t* salt, const uint8_t* pubKey, const uint8_t* plaintext, | |||
const uint8_t* message, size_t messageByteLength, const picnic_instance_t* params) { | |||
hash_context ctx; | |||
assert(params->num_opened_rounds < params->num_rounds); | |||
hash_init(&ctx, params->digest_size); | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
hash_update(&ctx, Ch->hashes[t], params->digest_size); | |||
} | |||
hash_update(&ctx, hCv, params->digest_size); | |||
hash_update(&ctx, salt, SALT_SIZE); | |||
hash_update(&ctx, pubKey, params->input_size); | |||
hash_update(&ctx, plaintext, params->input_size); | |||
hash_update(&ctx, message, messageByteLength); | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, sigH, params->digest_size); | |||
/* parts of this hash will be published as challenge so is public anyway */ | |||
picnic_declassify(sigH, params->digest_size); | |||
expandChallenge(challengeC, challengeP, sigH, params); | |||
} | |||
static uint16_t* getMissingLeavesList(uint16_t* challengeC, const picnic_instance_t* params) { | |||
size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds; | |||
uint16_t* missingLeaves = calloc(missingLeavesSize, sizeof(uint16_t)); | |||
size_t pos = 0; | |||
for (size_t i = 0; i < params->num_rounds; i++) { | |||
if (!contains(challengeC, params->num_opened_rounds, i)) { | |||
missingLeaves[pos] = i; | |||
pos++; | |||
} | |||
} | |||
return missingLeaves; | |||
} | |||
static int verify_picnic3(signature2_t* sig, const uint8_t* pubKey, const uint8_t* plaintext, | |||
const uint8_t* message, size_t messageByteLength, | |||
const picnic_instance_t* params) { | |||
commitments_t C[4]; | |||
allocateCommitments2(&C[0], params, params->num_MPC_parties); | |||
allocateCommitments2(&C[1], params, params->num_MPC_parties); | |||
allocateCommitments2(&C[2], params, params->num_MPC_parties); | |||
allocateCommitments2(&C[3], params, params->num_MPC_parties); | |||
msgs_t* msgs = allocateMsgsVerify(params); | |||
tree_t* treeCv = createTree(params->num_rounds, params->digest_size); | |||
size_t challengeSizeBytes = params->num_opened_rounds * sizeof(uint16_t); | |||
uint16_t* challengeC = malloc(challengeSizeBytes); | |||
uint16_t* challengeP = malloc(challengeSizeBytes); | |||
uint8_t challenge[MAX_DIGEST_SIZE]; | |||
randomTape_t* tapes = malloc(params->num_rounds * sizeof(randomTape_t)); | |||
tree_t* iSeedsTree = createTree(params->num_rounds, params->seed_size); | |||
int ret = reconstructSeeds(iSeedsTree, sig->challengeC, params->num_opened_rounds, sig->iSeedInfo, | |||
sig->iSeedInfoLen, sig->salt, 0, params); | |||
const size_t last = params->num_MPC_parties - 1; | |||
lowmc_simulate_online_f simulateOnline = params->impls.lowmc_simulate_online; | |||
commitments_t Ch; | |||
allocateCommitments2(&Ch, params, params->num_rounds); | |||
commitments_t Cv; | |||
allocateCommitments2(&Cv, params, params->num_rounds); | |||
mzd_local_t m_plaintext[1]; | |||
mzd_local_t m_maskedKey[1]; | |||
mzd_from_char_array(m_plaintext, plaintext, params->output_size); | |||
if (ret != 0) { | |||
ret = -1; | |||
goto Exit; | |||
} | |||
/* Populate seeds with values from the signature */ | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
tree_t* seed = NULL; | |||
if (!contains(sig->challengeC, params->num_opened_rounds, t)) { | |||
/* Expand iSeed[t] to seeds for each parties, using a seed tree */ | |||
seed = generateSeeds(params->num_MPC_parties, getLeaf(iSeedsTree, t), sig->salt, t, params); | |||
} else { | |||
/* We don't have the initial seed for the round, but instead a seed | |||
* for each unopened party */ | |||
seed = createTree(params->num_MPC_parties, params->seed_size); | |||
size_t P_index = indexOf(sig->challengeC, params->num_opened_rounds, t); | |||
uint16_t hideList[1]; | |||
hideList[0] = sig->challengeP[P_index]; | |||
ret = reconstructSeeds(seed, hideList, 1, sig->proofs[t].seedInfo, sig->proofs[t].seedInfoLen, | |||
sig->salt, t, params); | |||
if (ret != 0) { | |||
#if !defined(NDEBUG) | |||
printf("Failed to reconstruct seeds for round " SIZET_FMT "\n", t); | |||
#endif | |||
freeTree(seed); | |||
ret = -1; | |||
goto Exit; | |||
} | |||
} | |||
/* Commit */ | |||
/* Compute random tapes for all parties. One party for each repitition | |||
* challengeC will have a bogus seed; but we won't use that party's | |||
* random tape. */ | |||
createRandomTapes(&tapes[t], getLeaves(seed), sig->salt, t, params); | |||
if (!contains(sig->challengeC, params->num_opened_rounds, t)) { | |||
/* We're given iSeed, have expanded the seeds, compute aux from scratch so we can comnpte | |||
* Com[t] */ | |||
computeAuxTape(&tapes[t], NULL, params); | |||
for (size_t j = 0; j < params->num_MPC_parties; j += 4) { | |||
const uint8_t* seed_ptr[4] = {getLeaf(seed, j + 0), getLeaf(seed, j + 1), | |||
getLeaf(seed, j + 2), getLeaf(seed, j + 3)}; | |||
commit_x4(C[t % 4].hashes + j, seed_ptr, sig->salt, t, j, params); | |||
} | |||
commit(C[t % 4].hashes[last], getLeaf(seed, last), tapes[t].aux_bits, sig->salt, t, last, | |||
params); | |||
/* after we have checked the tape, we do not need it anymore for this opened iteration */ | |||
} else { | |||
/* We're given all seeds and aux bits, execpt for the unopened | |||
* party, we get their commitment */ | |||
size_t unopened = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)]; | |||
for (size_t j = 0; j < params->num_MPC_parties; j += 4) { | |||
const uint8_t* seed_ptr[4] = {getLeaf(seed, j + 0), getLeaf(seed, j + 1), | |||
getLeaf(seed, j + 2), getLeaf(seed, j + 3)}; | |||
commit_x4(C[t % 4].hashes + j, seed_ptr, sig->salt, t, j, params); | |||
} | |||
if (last != unopened) { | |||
commit(C[t % 4].hashes[last], getLeaf(seed, last), sig->proofs[t].aux, sig->salt, t, last, | |||
params); | |||
} | |||
memcpy(C[t % 4].hashes[unopened], sig->proofs[t].C, params->digest_size); | |||
} | |||
/* hash commitments every four iterations if possible, for the last few do single commitments | |||
*/ | |||
if (t >= params->num_rounds / 4 * 4) { | |||
commit_h(Ch.hashes[t], &C[t % 4], params); | |||
} else if ((t + 1) % 4 == 0) { | |||
size_t t4 = t / 4 * 4; | |||
commit_h_x4(&Ch.hashes[t4], &C[0], params); | |||
} | |||
freeTree(seed); | |||
} | |||
/* Commit to the views */ | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
if (!contains(sig->challengeC, params->num_opened_rounds, t)) { | |||
Cv.hashes[t] = NULL; | |||
} | |||
} | |||
for (size_t i = 0; i < params->num_opened_rounds; i++) { | |||
/* 2. When t is in C, we have everything we need to re-compute the view, as an honest signer | |||
* would. | |||
* We simulate the MPC with one fewer party; the unopned party's values are all set to zero. | |||
*/ | |||
size_t t = sig->challengeC[i]; | |||
int unopened = sig->challengeP[i]; | |||
uint8_t* input = sig->proofs[t].input; | |||
setAuxBits(&tapes[t], sig->proofs[t].aux, params); | |||
memset(tapes[t].tape[unopened], 0, 2 * params->view_size); | |||
memcpy(msgs->msgs[unopened], sig->proofs[t].msgs, params->view_size); | |||
mzd_from_char_array(m_maskedKey, input, params->input_size); | |||
msgs->unopened = unopened; | |||
msgs->pos = 0; | |||
ret = simulateOnline(m_maskedKey, &tapes[t], msgs, m_plaintext, pubKey, params); | |||
if (ret != 0) { | |||
#if !defined(NDEBUG) | |||
printf("MPC simulation failed for round " SIZET_FMT ", signature invalid\n", i); | |||
#endif | |||
ret = -1; | |||
goto Exit; | |||
} | |||
commit_v(Cv.hashes[t], sig->proofs[t].input, msgs, params); | |||
} | |||
size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds; | |||
uint16_t* missingLeaves = getMissingLeavesList(sig->challengeC, params); | |||
ret = addMerkleNodes(treeCv, missingLeaves, missingLeavesSize, sig->cvInfo, sig->cvInfoLen); | |||
free(missingLeaves); | |||
if (ret != 0) { | |||
ret = -1; | |||
goto Exit; | |||
} | |||
ret = verifyMerkleTree(treeCv, Cv.hashes, sig->salt, params); | |||
if (ret != 0) { | |||
ret = -1; | |||
goto Exit; | |||
} | |||
/* Compute the challenge; two lists of integers */ | |||
HCP(challenge, challengeC, challengeP, &Ch, treeCv->nodes[0], sig->salt, pubKey, plaintext, | |||
message, messageByteLength, params); | |||
/* Compare to challenge from signature */ | |||
if (memcmp(sig->challenge, challenge, params->digest_size) != 0) { | |||
#if !defined(NDEBUG) | |||
printf("Challenge does not match, signature invalid\n"); | |||
#endif | |||
ret = -1; | |||
goto Exit; | |||
} | |||
ret = EXIT_SUCCESS; | |||
Exit: | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
freeRandomTape(&tapes[t]); | |||
} | |||
freeCommitments2(&Cv); | |||
freeCommitments2(&Ch); | |||
freeTree(iSeedsTree); | |||
free(tapes); | |||
free(challengeP); | |||
free(challengeC); | |||
freeTree(treeCv); | |||
freeMsgs(msgs); | |||
freeCommitments2(&C[3]); | |||
freeCommitments2(&C[2]); | |||
freeCommitments2(&C[1]); | |||
freeCommitments2(&C[0]); | |||
return ret; | |||
} | |||
static void computeSaltAndRootSeed(uint8_t* saltAndRoot, size_t saltAndRootLength, | |||
const uint8_t* privateKey, const uint8_t* pubKey, | |||
const uint8_t* plaintext, const uint8_t* message, | |||
size_t messageByteLength, const picnic_instance_t* params) { | |||
hash_context ctx; | |||
hash_init(&ctx, params->digest_size); | |||
hash_update(&ctx, privateKey, params->input_size); | |||
hash_update(&ctx, message, messageByteLength); | |||
hash_update(&ctx, pubKey, params->input_size); | |||
hash_update(&ctx, plaintext, params->input_size); | |||
hash_update_uint16_le(&ctx, (uint16_t)params->lowmc.n); | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, saltAndRoot, saltAndRootLength); | |||
} | |||
static int sign_picnic3(const uint8_t* privateKey, const uint8_t* pubKey, const uint8_t* plaintext, | |||
const uint8_t* message, size_t messageByteLength, signature2_t* sig, | |||
const picnic_instance_t* params) { | |||
int ret = 0; | |||
uint8_t* saltAndRoot = malloc(params->seed_size + SALT_SIZE); | |||
computeSaltAndRootSeed(saltAndRoot, params->seed_size + SALT_SIZE, privateKey, pubKey, plaintext, | |||
message, messageByteLength, params); | |||
memcpy(sig->salt, saltAndRoot, SALT_SIZE); | |||
tree_t* iSeedsTree = | |||
generateSeeds(params->num_rounds, saltAndRoot + SALT_SIZE, sig->salt, 0, params); | |||
uint8_t** iSeeds = getLeaves(iSeedsTree); | |||
free(saltAndRoot); | |||
randomTape_t* tapes = malloc(params->num_rounds * sizeof(randomTape_t)); | |||
tree_t** seeds = malloc(params->num_rounds * sizeof(tree_t*)); | |||
commitments_t* C = allocateCommitments(params, 0); | |||
lowmc_simulate_online_f simulateOnline = params->impls.lowmc_simulate_online; | |||
inputs_t inputs = allocateInputs(params); | |||
msgs_t* msgs = allocateMsgs(params); | |||
/* Commitments to the commitments and views */ | |||
commitments_t Ch; | |||
allocateCommitments2(&Ch, params, params->num_rounds); | |||
commitments_t Cv; | |||
allocateCommitments2(&Cv, params, params->num_rounds); | |||
mzd_local_t m_plaintext[1]; | |||
mzd_local_t m_maskedKey[1]; | |||
mzd_from_char_array(m_plaintext, plaintext, params->output_size); | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
seeds[t] = generateSeeds(params->num_MPC_parties, iSeeds[t], sig->salt, t, params); | |||
createRandomTapes(&tapes[t], getLeaves(seeds[t]), sig->salt, t, params); | |||
/* Preprocessing; compute aux tape for the N-th player, for each parallel rep */ | |||
computeAuxTape(&tapes[t], inputs[t], params); | |||
/* Commit to seeds and aux bits */ | |||
assert(params->num_MPC_parties % 4 == 0); | |||
for (size_t j = 0; j < params->num_MPC_parties; j += 4) { | |||
const uint8_t* seed_ptr[4] = {getLeaf(seeds[t], j + 0), getLeaf(seeds[t], j + 1), | |||
getLeaf(seeds[t], j + 2), getLeaf(seeds[t], j + 3)}; | |||
commit_x4(C[t].hashes + j, seed_ptr, sig->salt, t, j, params); | |||
} | |||
const size_t last = params->num_MPC_parties - 1; | |||
commit(C[t].hashes[last], getLeaf(seeds[t], last), tapes[t].aux_bits, sig->salt, t, last, | |||
params); | |||
} | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
/* Simulate the online phase of the MPC */ | |||
uint8_t* maskedKey = inputs[t]; | |||
xor_byte_array(maskedKey, maskedKey, privateKey, | |||
params->input_size); // maskedKey += privateKey | |||
for (size_t i = params->lowmc.n; i < params->input_size * 8; i++) { | |||
setBit(maskedKey, i, 0); | |||
} | |||
mzd_from_char_array(m_maskedKey, maskedKey, params->input_size); | |||
int rv = simulateOnline(m_maskedKey, &tapes[t], &msgs[t], m_plaintext, pubKey, params); | |||
if (rv != 0) { | |||
#if !defined(NDEBUG) | |||
printf("MPC simulation failed in round " SIZET_FMT ", aborting signature\n", t); | |||
#endif | |||
ret = -1; | |||
} | |||
} | |||
/* Commit to the commitments and views */ | |||
{ | |||
size_t t = 0; | |||
for (; t < params->num_rounds / 4 * 4; t += 4) { | |||
commit_h_x4(&Ch.hashes[t], &C[t], params); | |||
commit_v_x4(&Cv.hashes[t], (const uint8_t**)&inputs[t], &msgs[t], params); | |||
} | |||
for (; t < params->num_rounds; t++) { | |||
commit_h(Ch.hashes[t], &C[t], params); | |||
commit_v(Cv.hashes[t], inputs[t], &msgs[t], params); | |||
} | |||
} | |||
/* Create a Merkle tree with Cv as the leaves */ | |||
tree_t* treeCv = createTree(params->num_rounds, params->digest_size); | |||
buildMerkleTree(treeCv, Cv.hashes, sig->salt, params); | |||
/* Compute the challenge; two lists of integers */ | |||
uint16_t* challengeC = sig->challengeC; | |||
uint16_t* challengeP = sig->challengeP; | |||
HCP(sig->challenge, challengeC, challengeP, &Ch, treeCv->nodes[0], sig->salt, pubKey, plaintext, | |||
message, messageByteLength, params); | |||
/* Send information required for checking commitments with Merkle tree. | |||
* The commitments the verifier will be missing are those not in challengeC. */ | |||
size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds; | |||
uint16_t* missingLeaves = getMissingLeavesList(challengeC, params); | |||
size_t cvInfoLen = 0; | |||
uint8_t* cvInfo = openMerkleTree(treeCv, missingLeaves, missingLeavesSize, &cvInfoLen); | |||
sig->cvInfo = cvInfo; | |||
sig->cvInfoLen = cvInfoLen; | |||
free(missingLeaves); | |||
/* Reveal iSeeds for unopned rounds, those in {0..T-1} \ ChallengeC. */ | |||
sig->iSeedInfo = malloc(params->num_rounds * params->seed_size); | |||
sig->iSeedInfoLen = revealSeeds(iSeedsTree, challengeC, params->num_opened_rounds, sig->iSeedInfo, | |||
params->num_rounds * params->seed_size, params); | |||
sig->iSeedInfo = realloc(sig->iSeedInfo, sig->iSeedInfoLen); | |||
/* Assemble the proof */ | |||
proof2_t* proofs = sig->proofs; | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
if (contains(challengeC, params->num_opened_rounds, t)) { | |||
allocateProof2(&proofs[t], params); | |||
size_t P_index = indexOf(challengeC, params->num_opened_rounds, t); | |||
proofs[t].unOpenedIndex = challengeP[P_index]; | |||
uint16_t hideList[1]; | |||
hideList[0] = challengeP[P_index]; | |||
proofs[t].seedInfo = malloc(params->num_MPC_parties * params->seed_size); | |||
proofs[t].seedInfoLen = revealSeeds(seeds[t], hideList, 1, proofs[t].seedInfo, | |||
params->num_MPC_parties * params->seed_size, params); | |||
proofs[t].seedInfo = realloc(proofs[t].seedInfo, proofs[t].seedInfoLen); | |||
size_t last = params->num_MPC_parties - 1; | |||
if (challengeP[P_index] != last) { | |||
memcpy(proofs[t].aux, tapes[t].aux_bits, params->view_size); | |||
} | |||
memcpy(proofs[t].input, inputs[t], params->input_size); | |||
memcpy(proofs[t].msgs, msgs[t].msgs[challengeP[P_index]], params->view_size); | |||
/* recompute commitment of unopened party since we did not store it for memory optimization | |||
*/ | |||
if (proofs[t].unOpenedIndex == params->num_MPC_parties - 1) { | |||
commit(proofs[t].C, getLeaf(seeds[t], proofs[t].unOpenedIndex), tapes[t].aux_bits, | |||
sig->salt, t, proofs[t].unOpenedIndex, params); | |||
} else { | |||
commit(proofs[t].C, getLeaf(seeds[t], proofs[t].unOpenedIndex), NULL, sig->salt, t, | |||
proofs[t].unOpenedIndex, params); | |||
} | |||
} | |||
} | |||
sig->proofs = proofs; | |||
freeTree(treeCv); | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
freeRandomTape(&tapes[t]); | |||
freeTree(seeds[t]); | |||
} | |||
freeCommitments2(&Cv); | |||
freeCommitments2(&Ch); | |||
freeMsgs(msgs); | |||
freeInputs(inputs); | |||
freeCommitments(C); | |||
free(seeds); | |||
free(tapes); | |||
freeTree(iSeedsTree); | |||
return ret; | |||
} | |||
static int arePaddingBitsZero(uint8_t* data, size_t byteLength, size_t bitLength) { | |||
return !check_padding_bits(data[byteLength - 1], byteLength * 8 - bitLength); | |||
} | |||
static int deserializeSignature2(signature2_t* sig, const uint8_t* sigBytes, size_t sigBytesLen, | |||
const picnic_instance_t* params) { | |||
/* Read the challenge and salt */ | |||
size_t bytesRequired = params->digest_size + SALT_SIZE; | |||
if (sigBytesLen < bytesRequired) { | |||
return EXIT_FAILURE; | |||
} | |||
memcpy(sig->challenge, sigBytes, params->digest_size); | |||
sigBytes += params->digest_size; | |||
memcpy(sig->salt, sigBytes, SALT_SIZE); | |||
sigBytes += SALT_SIZE; | |||
expandChallenge(sig->challengeC, sig->challengeP, sig->challenge, params); | |||
/* Add size of iSeeds tree data */ | |||
sig->iSeedInfoLen = | |||
revealSeedsSize(params->num_rounds, sig->challengeC, params->num_opened_rounds, params); | |||
bytesRequired += sig->iSeedInfoLen; | |||
/* Add the size of the Cv Merkle tree data */ | |||
size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds; | |||
uint16_t* missingLeaves = getMissingLeavesList(sig->challengeC, params); | |||
sig->cvInfoLen = openMerkleTreeSize(params->num_rounds, missingLeaves, missingLeavesSize, params); | |||
bytesRequired += sig->cvInfoLen; | |||
free(missingLeaves); | |||
/* Compute the number of bytes required for the proofs */ | |||
uint16_t hideList[1] = {0}; | |||
size_t seedInfoLen = revealSeedsSize(params->num_MPC_parties, hideList, 1, params); | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
if (contains(sig->challengeC, params->num_opened_rounds, t)) { | |||
size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)]; | |||
if (P_t != (params->num_MPC_parties - 1)) { | |||
bytesRequired += params->view_size; | |||
} | |||
bytesRequired += params->digest_size; | |||
bytesRequired += params->input_size; | |||
bytesRequired += params->view_size; | |||
bytesRequired += seedInfoLen; | |||
} | |||
} | |||
/* Fail if the signature does not have the exact number of bytes we expect */ | |||
if (sigBytesLen != bytesRequired) { | |||
#if !defined(NDEBUG) | |||
printf("%s: sigBytesLen = " SIZET_FMT ", expected bytesRequired = " SIZET_FMT "\n", __func__, | |||
sigBytesLen, bytesRequired); | |||
#endif | |||
return EXIT_FAILURE; | |||
} | |||
sig->iSeedInfo = malloc(sig->iSeedInfoLen); | |||
memcpy(sig->iSeedInfo, sigBytes, sig->iSeedInfoLen); | |||
sigBytes += sig->iSeedInfoLen; | |||
sig->cvInfo = malloc(sig->cvInfoLen); | |||
memcpy(sig->cvInfo, sigBytes, sig->cvInfoLen); | |||
sigBytes += sig->cvInfoLen; | |||
/* Read the proofs */ | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
if (contains(sig->challengeC, params->num_opened_rounds, t)) { | |||
allocateProof2(&sig->proofs[t], params); | |||
sig->proofs[t].seedInfoLen = seedInfoLen; | |||
sig->proofs[t].seedInfo = malloc(sig->proofs[t].seedInfoLen); | |||
memcpy(sig->proofs[t].seedInfo, sigBytes, sig->proofs[t].seedInfoLen); | |||
sigBytes += sig->proofs[t].seedInfoLen; | |||
size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)]; | |||
if (P_t != (params->num_MPC_parties - 1)) { | |||
memcpy(sig->proofs[t].aux, sigBytes, params->view_size); | |||
sigBytes += params->view_size; | |||
if (!arePaddingBitsZero(sig->proofs[t].aux, params->view_size, | |||
3 * params->lowmc.r * params->lowmc.m)) { | |||
#if !defined(NDEBUG) | |||
printf("%s: failed while deserializing aux bits\n", __func__); | |||
#endif | |||
return -1; | |||
} | |||
} | |||
memcpy(sig->proofs[t].input, sigBytes, params->input_size); | |||
if (!arePaddingBitsZero(sig->proofs[t].input, params->input_size, params->lowmc.n)) { | |||
#if !defined(NDEBUG) | |||
printf("%s: failed while deserializing input bits\n", __func__); | |||
#endif | |||
return -1; | |||
} | |||
sigBytes += params->input_size; | |||
size_t msgsByteLength = params->view_size; | |||
memcpy(sig->proofs[t].msgs, sigBytes, msgsByteLength); | |||
sigBytes += msgsByteLength; | |||
size_t msgsBitLength = 3 * params->lowmc.r * params->lowmc.m; | |||
if (!arePaddingBitsZero(sig->proofs[t].msgs, msgsByteLength, msgsBitLength)) { | |||
#if !defined(NDEBUG) | |||
printf("%s: failed while deserializing msgs bits\n", __func__); | |||
#endif | |||
return -1; | |||
} | |||
memcpy(sig->proofs[t].C, sigBytes, params->digest_size); | |||
sigBytes += params->digest_size; | |||
} | |||
} | |||
return EXIT_SUCCESS; | |||
} | |||
static int serializeSignature2(const signature2_t* sig, uint8_t* sigBytes, size_t sigBytesLen, | |||
const picnic_instance_t* params) { | |||
uint8_t* sigBytesBase = sigBytes; | |||
/* Compute the number of bytes required for the signature */ | |||
size_t bytesRequired = params->digest_size + SALT_SIZE; /* challenge and salt */ | |||
bytesRequired += | |||
sig->iSeedInfoLen; /* Encode only iSeedInfo, the length will be recomputed by deserialize */ | |||
bytesRequired += sig->cvInfoLen; | |||
for (size_t t = 0; t < params->num_rounds; t++) { /* proofs */ | |||
if (contains(sig->challengeC, params->num_opened_rounds, t)) { | |||
size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)]; | |||
bytesRequired += sig->proofs[t].seedInfoLen; | |||
if (P_t != (params->num_MPC_parties - 1)) { | |||
bytesRequired += params->view_size; | |||
} | |||
bytesRequired += params->digest_size; | |||
bytesRequired += params->input_size; | |||
bytesRequired += params->view_size; | |||
} | |||
} | |||
if (sigBytesLen < bytesRequired) { | |||
return -1; | |||
} | |||
memcpy(sigBytes, sig->challenge, params->digest_size); | |||
sigBytes += params->digest_size; | |||
memcpy(sigBytes, sig->salt, SALT_SIZE); | |||
sigBytes += SALT_SIZE; | |||
memcpy(sigBytes, sig->iSeedInfo, sig->iSeedInfoLen); | |||
sigBytes += sig->iSeedInfoLen; | |||
memcpy(sigBytes, sig->cvInfo, sig->cvInfoLen); | |||
sigBytes += sig->cvInfoLen; | |||
/* Write the proofs */ | |||
for (size_t t = 0; t < params->num_rounds; t++) { | |||
if (contains(sig->challengeC, params->num_opened_rounds, t)) { | |||
memcpy(sigBytes, sig->proofs[t].seedInfo, sig->proofs[t].seedInfoLen); | |||
sigBytes += sig->proofs[t].seedInfoLen; | |||
size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)]; | |||
if (P_t != (params->num_MPC_parties - 1)) { | |||
memcpy(sigBytes, sig->proofs[t].aux, params->view_size); | |||
sigBytes += params->view_size; | |||
} | |||
memcpy(sigBytes, sig->proofs[t].input, params->input_size); | |||
sigBytes += params->input_size; | |||
memcpy(sigBytes, sig->proofs[t].msgs, params->view_size); | |||
sigBytes += params->view_size; | |||
memcpy(sigBytes, sig->proofs[t].C, params->digest_size); | |||
sigBytes += params->digest_size; | |||
} | |||
} | |||
return (int)(sigBytes - sigBytesBase); | |||
} | |||
int impl_sign_picnic3(const picnic_instance_t* instance, const uint8_t* plaintext, | |||
const uint8_t* private_key, const uint8_t* public_key, const uint8_t* msg, | |||
size_t msglen, uint8_t* signature, size_t* signature_len) { | |||
signature2_t* sig = (signature2_t*)malloc(sizeof(signature2_t)); | |||
allocateSignature2(sig, instance); | |||
if (sig == NULL) { | |||
return -1; | |||
} | |||
int ret = sign_picnic3(private_key, public_key, plaintext, msg, msglen, sig, instance); | |||
picnic_declassify(&ret, sizeof(ret)); | |||
if (ret != EXIT_SUCCESS) { | |||
#if !defined(NDEBUG) | |||
fprintf(stderr, "Failed to create signature\n"); | |||
fflush(stderr); | |||
#endif | |||
freeSignature2(sig, instance); | |||
free(sig); | |||
return -1; | |||
} | |||
ret = serializeSignature2(sig, signature, *signature_len, instance); | |||
if (ret == -1) { | |||
#if !defined(NDEBUG) | |||
fprintf(stderr, "Failed to serialize signature\n"); | |||
fflush(stderr); | |||
#endif | |||
freeSignature2(sig, instance); | |||
free(sig); | |||
return -1; | |||
} | |||
*signature_len = ret; | |||
freeSignature2(sig, instance); | |||
free(sig); | |||
return 0; | |||
} | |||
int impl_verify_picnic3(const picnic_instance_t* instance, const uint8_t* plaintext, | |||
const uint8_t* public_key, const uint8_t* msg, size_t msglen, | |||
const uint8_t* signature, size_t signature_len) { | |||
int ret; | |||
signature2_t* sig = (signature2_t*)malloc(sizeof(signature2_t)); | |||
allocateSignature2(sig, instance); | |||
if (sig == NULL) { | |||
return -1; | |||
} | |||
ret = deserializeSignature2(sig, signature, signature_len, instance); | |||
if (ret != EXIT_SUCCESS) { | |||
#if !defined(NDEBUG) | |||
fprintf(stderr, "Failed to deserialize signature\n"); | |||
fflush(stderr); | |||
#endif | |||
freeSignature2(sig, instance); | |||
free(sig); | |||
return -1; | |||
} | |||
ret = verify_picnic3(sig, public_key, plaintext, msg, msglen, instance); | |||
if (ret != EXIT_SUCCESS) { | |||
/* Signature is invalid, or verify function failed */ | |||
freeSignature2(sig, instance); | |||
free(sig); | |||
return -1; | |||
} | |||
freeSignature2(sig, instance); | |||
free(sig); | |||
return 0; | |||
} |
@@ -0,0 +1,52 @@ | |||
/*! @file picnic3_impl.h | |||
* @brief This is the main implementation file of the signature scheme for | |||
* the Picnic3 parameter sets. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC3_IMPL_H | |||
#define PICNIC3_IMPL_H | |||
#include <stdint.h> | |||
#include <stddef.h> | |||
#include "picnic_instances.h" | |||
typedef struct proof2_t { | |||
uint16_t unOpenedIndex; // P[t], index of the party that is not opened. | |||
uint8_t* seedInfo; // Information required to compute the tree with seeds of of all opened parties | |||
size_t seedInfoLen; // Length of seedInfo buffer | |||
uint8_t* aux; // Last party's correction bits; NULL if P[t] == N-1 | |||
uint8_t* C; // Commitment to preprocessing step of unopened party | |||
uint8_t* input; // Masked input used in online execution | |||
uint8_t* msgs; // Broadcast messages of unopened party P[t] | |||
} proof2_t; | |||
typedef struct signature2_t { | |||
uint8_t salt[SALT_SIZE]; | |||
uint8_t* iSeedInfo; // Info required to recompute the tree of all initial seeds | |||
size_t iSeedInfoLen; | |||
uint8_t* cvInfo; // Info required to check commitments to views (reconstruct Merkle tree) | |||
size_t cvInfoLen; | |||
uint8_t* challenge; // output of HCP | |||
uint16_t* challengeC; | |||
uint16_t* challengeP; | |||
proof2_t* proofs; // One proof for each online execution the verifier checks | |||
} signature2_t; | |||
int impl_sign_picnic3(const picnic_instance_t* pp, const uint8_t* plaintext, | |||
const uint8_t* private_key, const uint8_t* public_key, const uint8_t* msg, | |||
size_t msglen, uint8_t* sig, size_t* siglen); | |||
int impl_verify_picnic3(const picnic_instance_t* instance, const uint8_t* plaintext, | |||
const uint8_t* public_key, const uint8_t* msg, size_t msglen, | |||
const uint8_t* signature, size_t signature_len); | |||
void allocateSignature2(signature2_t* sig, const picnic_instance_t* params); | |||
void freeSignature2(signature2_t* sig, const picnic_instance_t* params); | |||
#endif /* PICNIC3_IMPL_H */ |
@@ -0,0 +1,516 @@ | |||
/*! @file picnic3_impl.c | |||
* @brief This is the main file of the signature scheme for the Picnic3 | |||
* parameter sets. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include <assert.h> | |||
#include <stdint.h> | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#if !defined(_MSC_VER) | |||
#include <stdalign.h> | |||
#endif | |||
#include "compat.h" | |||
#include "bitstream.h" | |||
#include "io.h" | |||
#include "picnic3_simulate.h" | |||
#include "picnic3_types.h" | |||
#include "simd.h" | |||
#define picnic3_mpc_sbox_bitsliced(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, bitmask_c) \ | |||
do { \ | |||
mzd_local_t a[1], b[1], c[1]; \ | |||
/* a */ \ | |||
AND(a, bitmask_a, statein); \ | |||
/* b */ \ | |||
AND(b, bitmask_b, statein); \ | |||
/* c */ \ | |||
AND(c, bitmask_c, statein); \ | |||
\ | |||
SHL(a, a, 2); \ | |||
SHL(b, b, 1); \ | |||
\ | |||
mzd_local_t t0[1], t1[1], t2[1]; \ | |||
\ | |||
mzd_local_t s_ab[1], s_bc[1], s_ca[1]; \ | |||
/* b & c */ \ | |||
AND(s_bc, b, c); \ | |||
/* c & a */ \ | |||
AND(s_ca, c, a); \ | |||
/* a & b */ \ | |||
AND(s_ab, a, b); \ | |||
for (int i = 0; i < 16; i++) { \ | |||
mzd_local_t tmp[1]; \ | |||
bitstream_t party_msgs = {{msgs->msgs[i]}, msgs->pos}; \ | |||
if (i == msgs->unopened) { \ | |||
/* we are in verify, just grab the broadcast s from the msgs array */ \ | |||
mzd_from_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
AND(t0, bitmask_a, tmp); \ | |||
/* b */ \ | |||
AND(t1, bitmask_b, tmp); \ | |||
/* c */ \ | |||
AND(t2, bitmask_c, tmp); \ | |||
SHL(t0, t0, 2); \ | |||
SHL(t1, t1, 1); \ | |||
XOR(s_ab, t2, s_ab); \ | |||
XOR(s_bc, t1, s_bc); \ | |||
XOR(s_ca, t0, s_ca); \ | |||
\ | |||
continue; \ | |||
} \ | |||
bitstream_t party_tape = {{tapes->tape[i]}, tapes->pos}; \ | |||
/* make a mzd_local from tape[i] for input_masks */ \ | |||
mzd_local_t mask_a[1], mask_b[1], mask_c[1]; \ | |||
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
AND(mask_a, bitmask_a, tmp); \ | |||
/* b */ \ | |||
AND(mask_b, bitmask_b, tmp); \ | |||
/* c */ \ | |||
AND(mask_c, bitmask_c, tmp); \ | |||
SHL(mask_a, mask_a, 2); \ | |||
SHL(mask_b, mask_b, 1); \ | |||
\ | |||
/* make a mzd_local from tape[i] for and_helper */ \ | |||
mzd_local_t and_helper_ab[1], and_helper_bc[1], and_helper_ca[1]; \ | |||
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
AND(and_helper_ab, bitmask_c, tmp); \ | |||
/* b */ \ | |||
AND(and_helper_bc, bitmask_b, tmp); \ | |||
/* c */ \ | |||
AND(and_helper_ca, bitmask_a, tmp); \ | |||
SHL(and_helper_ca, and_helper_ca, 2); \ | |||
SHL(and_helper_bc, and_helper_bc, 1); \ | |||
\ | |||
/* s_ab */ \ | |||
AND(t0, a, mask_b); \ | |||
AND(t1, b, mask_a); \ | |||
XOR(t0, t0, t1); \ | |||
XOR(tmp, t0, and_helper_ab); \ | |||
XOR(s_ab, tmp, s_ab); \ | |||
/* s_bc */ \ | |||
AND(t0, b, mask_c); \ | |||
AND(t1, c, mask_b); \ | |||
XOR(t0, t0, t1); \ | |||
XOR(t0, t0, and_helper_bc); \ | |||
XOR(s_bc, t0, s_bc); \ | |||
\ | |||
SHR(t0, t0, 1); \ | |||
XOR(tmp, tmp, t0); \ | |||
/* s_ca */ \ | |||
AND(t0, c, mask_a); \ | |||
AND(t1, a, mask_c); \ | |||
XOR(t0, t0, t1); \ | |||
XOR(t0, t0, and_helper_ca); \ | |||
XOR(s_ca, t0, s_ca); \ | |||
\ | |||
SHR(t0, t0, 2); \ | |||
XOR(tmp, tmp, t0); \ | |||
mzd_to_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
} \ | |||
tapes->pos += LOWMC_N; \ | |||
tapes->pos += LOWMC_N; \ | |||
msgs->pos += LOWMC_N; \ | |||
\ | |||
/* (b & c) ^ a */ \ | |||
XOR(t0, s_bc, a); \ | |||
\ | |||
/* (c & a) ^ a ^ b */ \ | |||
XOR(a, a, b); \ | |||
XOR(t1, s_ca, a); \ | |||
\ | |||
/* (a & b) ^ a ^ b ^c */ \ | |||
XOR(t2, s_ab, a); \ | |||
XOR(t2, t2, c); \ | |||
\ | |||
SHR(t0, t0, 2); \ | |||
SHR(t1, t1, 1); \ | |||
\ | |||
XOR(t2, t2, t1); \ | |||
XOR(statein, t2, t0); \ | |||
} while (0) | |||
#include "lowmc_129_129_4.h" | |||
#if !defined(NO_UINT64_FALLBACK) | |||
static void picnic3_mpc_sbox_uint64_lowmc_129_129_4(mzd_local_t* statein, randomTape_t* tapes, | |||
msgs_t* msgs) { | |||
picnic3_mpc_sbox_bitsliced(LOWMC_129_129_4_N, mzd_xor_uint64_192, mzd_and_uint64_192, | |||
mzd_shift_left_uint64_192, mzd_shift_right_uint64_192, | |||
mask_129_129_43_a, mask_129_129_43_b, mask_129_129_43_c); | |||
} | |||
#define IMPL uint64 | |||
/* PICNIC3_L1_FS */ | |||
#include "lowmc_129_129_4_fns_uint64.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_uint64_129_43 | |||
#include "picnic3_simulate.c.i" | |||
/* PICNIC3_L3_FS */ | |||
#include "lowmc_192_192_4_fns_uint64.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_uint64_192_64 | |||
#include "picnic3_simulate.c.i" | |||
/* PICNIC3_L5_FS */ | |||
#include "lowmc_255_255_4_fns_uint64.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_uint64_255_85 | |||
#include "picnic3_simulate.c.i" | |||
#undef IMPL | |||
#endif | |||
#define picnic3_mpc_sbox_bitsliced_mm128(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, \ | |||
bitmask_c) \ | |||
do { \ | |||
word128 a[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 b[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 c[2] ATTR_ALIGNED(alignof(word128)); \ | |||
/* a */ \ | |||
AND(a, bitmask_a->w128, statein->w128); \ | |||
/* b */ \ | |||
AND(b, bitmask_b->w128, statein->w128); \ | |||
/* c */ \ | |||
AND(c, bitmask_c->w128, statein->w128); \ | |||
\ | |||
SHL(a, a, 2); \ | |||
SHL(b, b, 1); \ | |||
\ | |||
word128 t0[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 t1[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 t2[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 s_ab[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 s_bc[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 s_ca[2] ATTR_ALIGNED(alignof(word128)); \ | |||
\ | |||
/* b & c */ \ | |||
AND(s_bc, b, c); \ | |||
/* c & a */ \ | |||
AND(s_ca, c, a); \ | |||
/* a & b */ \ | |||
AND(s_ab, a, b); \ | |||
for (int i = 0; i < 16; i++) { \ | |||
mzd_local_t tmp[1]; \ | |||
bitstream_t party_msgs = {{msgs->msgs[i]}, msgs->pos}; \ | |||
if (i == msgs->unopened) { \ | |||
/* we are in verify, just grab the broadcast s from the msgs array */ \ | |||
mzd_from_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
AND(t0, bitmask_a->w128, tmp->w128); \ | |||
/* b */ \ | |||
AND(t1, bitmask_b->w128, tmp->w128); \ | |||
/* c */ \ | |||
AND(t2, bitmask_c->w128, tmp->w128); \ | |||
SHL(t0, t0, 2); \ | |||
SHL(t1, t1, 1); \ | |||
XOR(s_ab, t2, s_ab); \ | |||
XOR(s_bc, t1, s_bc); \ | |||
XOR(s_ca, t0, s_ca); \ | |||
\ | |||
continue; \ | |||
} \ | |||
bitstream_t party_tape = {{tapes->tape[i]}, tapes->pos}; \ | |||
/* make a mzd_local from tape[i] for input_masks */ \ | |||
word128 mask_a[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 mask_b[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 mask_c[2] ATTR_ALIGNED(alignof(word128)); \ | |||
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
AND(mask_a, bitmask_a->w128, tmp->w128); \ | |||
/* b */ \ | |||
AND(mask_b, bitmask_b->w128, tmp->w128); \ | |||
/* c */ \ | |||
AND(mask_c, bitmask_c->w128, tmp->w128); \ | |||
SHL(mask_a, mask_a, 2); \ | |||
SHL(mask_b, mask_b, 1); \ | |||
\ | |||
/* make a mzd_local from tape[i] for and_helper */ \ | |||
word128 and_helper_ab[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 and_helper_bc[2] ATTR_ALIGNED(alignof(word128)); \ | |||
word128 and_helper_ca[2] ATTR_ALIGNED(alignof(word128)); \ | |||
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
AND(and_helper_ab, bitmask_c->w128, tmp->w128); \ | |||
/* b */ \ | |||
AND(and_helper_bc, bitmask_b->w128, tmp->w128); \ | |||
/* c */ \ | |||
AND(and_helper_ca, bitmask_a->w128, tmp->w128); \ | |||
SHL(and_helper_ca, and_helper_ca, 2); \ | |||
SHL(and_helper_bc, and_helper_bc, 1); \ | |||
\ | |||
/* s_ab */ \ | |||
AND(t0, a, mask_b); \ | |||
AND(t1, b, mask_a); \ | |||
XOR(t0, t0, t1); \ | |||
XOR(tmp->w128, t0, and_helper_ab); \ | |||
XOR(s_ab, tmp->w128, s_ab); \ | |||
/* s_bc */ \ | |||
AND(t0, b, mask_c); \ | |||
AND(t1, c, mask_b); \ | |||
XOR(t0, t0, t1); \ | |||
XOR(t0, t0, and_helper_bc); \ | |||
XOR(s_bc, t0, s_bc); \ | |||
\ | |||
SHR(t0, t0, 1); \ | |||
XOR(tmp->w128, tmp->w128, t0); \ | |||
/* s_ca */ \ | |||
AND(t0, c, mask_a); \ | |||
AND(t1, a, mask_c); \ | |||
XOR(t0, t0, t1); \ | |||
XOR(t0, t0, and_helper_ca); \ | |||
XOR(s_ca, t0, s_ca); \ | |||
\ | |||
SHR(t0, t0, 2); \ | |||
XOR(tmp->w128, tmp->w128, t0); \ | |||
mzd_to_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
} \ | |||
tapes->pos += LOWMC_N; \ | |||
tapes->pos += LOWMC_N; \ | |||
msgs->pos += LOWMC_N; \ | |||
\ | |||
/* (b & c) ^ a */ \ | |||
XOR(t0, s_bc, a); \ | |||
\ | |||
/* (c & a) ^ a ^ b */ \ | |||
XOR(a, a, b); \ | |||
XOR(t1, s_ca, a); \ | |||
\ | |||
/* (a & b) ^ a ^ b ^c */ \ | |||
XOR(t2, s_ab, a); \ | |||
XOR(t2, t2, c); \ | |||
\ | |||
SHR(t0, t0, 2); \ | |||
SHR(t1, t1, 1); \ | |||
\ | |||
XOR(t2, t2, t1); \ | |||
XOR(statein->w128, t2, t0); \ | |||
} while (0) | |||
ATTR_TARGET_S128 | |||
static void picnic3_mpc_sbox_s128_lowmc_129_129_4(mzd_local_t* statein, randomTape_t* tapes, | |||
msgs_t* msgs) { | |||
picnic3_mpc_sbox_bitsliced_mm128(LOWMC_129_129_4_N, mm128_xor_256, mm128_and_256, | |||
mm128_shift_left_256, mm128_shift_right_256, mask_129_129_43_a, | |||
mask_129_129_43_b, mask_129_129_43_c); | |||
} | |||
#define IMPL s128 | |||
#undef FN_ATTR | |||
#define FN_ATTR ATTR_TARGET_S128 | |||
/* PICNIC3_L1_FS */ | |||
#include "lowmc_129_129_4_fns_s128.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_s128_129_43 | |||
#include "picnic3_simulate.c.i" | |||
/* PICNIC3_L3_FS */ | |||
#include "lowmc_192_192_4_fns_s128.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_s128_192_64 | |||
#include "picnic3_simulate.c.i" | |||
/* PICNIC3_L5_FS */ | |||
#include "lowmc_255_255_4_fns_s128.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_s128_255_85 | |||
#include "picnic3_simulate.c.i" | |||
#undef IMPL | |||
#define picnic3_mpc_sbox_bitsliced_mm256(LOWMC_N, XOR, AND, ROL, ROR, bitmask_a, bitmask_b, \ | |||
bitmask_c) \ | |||
do { \ | |||
word256 a ATTR_ALIGNED(alignof(word256)); \ | |||
word256 b ATTR_ALIGNED(alignof(word256)); \ | |||
word256 c ATTR_ALIGNED(alignof(word256)); \ | |||
/* a */ \ | |||
a = AND(bitmask_a->w256, statein->w256); \ | |||
/* b */ \ | |||
b = AND(bitmask_b->w256, statein->w256); \ | |||
/* c */ \ | |||
c = AND(bitmask_c->w256, statein->w256); \ | |||
\ | |||
a = ROL(a, 2); \ | |||
b = ROL(b, 1); \ | |||
\ | |||
word256 t0 ATTR_ALIGNED(alignof(word256)); \ | |||
word256 t1 ATTR_ALIGNED(alignof(word256)); \ | |||
word256 t2 ATTR_ALIGNED(alignof(word256)); \ | |||
word256 s_ab ATTR_ALIGNED(alignof(word256)); \ | |||
word256 s_bc ATTR_ALIGNED(alignof(word256)); \ | |||
word256 s_ca ATTR_ALIGNED(alignof(word256)); \ | |||
\ | |||
/* b & c */ \ | |||
s_bc = AND(b, c); \ | |||
/* c & a */ \ | |||
s_ca = AND(c, a); \ | |||
/* a & b */ \ | |||
s_ab = AND(a, b); \ | |||
for (int i = 0; i < 16; i++) { \ | |||
mzd_local_t tmp[1]; \ | |||
bitstream_t party_msgs = {{msgs->msgs[i]}, msgs->pos}; \ | |||
if (i == msgs->unopened) { \ | |||
/* we are in verify, just grab the broadcast s from the msgs array */ \ | |||
mzd_from_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
t0 = AND(bitmask_a->w256, tmp->w256); \ | |||
/* b */ \ | |||
t1 = AND(bitmask_b->w256, tmp->w256); \ | |||
/* c */ \ | |||
t2 = AND(bitmask_c->w256, tmp->w256); \ | |||
t0 = ROL(t0, 2); \ | |||
t1 = ROL(t1, 1); \ | |||
s_ab = XOR(t2, s_ab); \ | |||
s_bc = XOR(t1, s_bc); \ | |||
s_ca = XOR(t0, s_ca); \ | |||
\ | |||
continue; \ | |||
} \ | |||
bitstream_t party_tape = {{tapes->tape[i]}, tapes->pos}; \ | |||
/* make a mzd_local from tape[i] for input_masks */ \ | |||
word256 mask_a ATTR_ALIGNED(alignof(word256)); \ | |||
word256 mask_b ATTR_ALIGNED(alignof(word256)); \ | |||
word256 mask_c ATTR_ALIGNED(alignof(word256)); \ | |||
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
mask_a = AND(bitmask_a->w256, tmp->w256); \ | |||
/* b */ \ | |||
mask_b = AND(bitmask_b->w256, tmp->w256); \ | |||
/* c */ \ | |||
mask_c = AND(bitmask_c->w256, tmp->w256); \ | |||
mask_a = ROL(mask_a, 2); \ | |||
mask_b = ROL(mask_b, 1); \ | |||
\ | |||
/* make a mzd_local from tape[i] for and_helper */ \ | |||
word256 and_helper_ab ATTR_ALIGNED(alignof(word256)); \ | |||
word256 and_helper_bc ATTR_ALIGNED(alignof(word256)); \ | |||
word256 and_helper_ca ATTR_ALIGNED(alignof(word256)); \ | |||
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
/* a */ \ | |||
and_helper_ab = AND(bitmask_c->w256, tmp->w256); \ | |||
/* b */ \ | |||
and_helper_bc = AND(bitmask_b->w256, tmp->w256); \ | |||
/* c */ \ | |||
and_helper_ca = AND(bitmask_a->w256, tmp->w256); \ | |||
and_helper_ca = ROL(and_helper_ca, 2); \ | |||
and_helper_bc = ROL(and_helper_bc, 1); \ | |||
\ | |||
/* s_ab */ \ | |||
t0 = AND(a, mask_b); \ | |||
t1 = AND(b, mask_a); \ | |||
t0 = XOR(t0, t1); \ | |||
tmp->w256 = XOR(t0, and_helper_ab); \ | |||
s_ab = XOR(tmp->w256, s_ab); \ | |||
/* s_bc */ \ | |||
t0 = AND(b, mask_c); \ | |||
t1 = AND(c, mask_b); \ | |||
t0 = XOR(t0, t1); \ | |||
t0 = XOR(t0, and_helper_bc); \ | |||
s_bc = XOR(t0, s_bc); \ | |||
\ | |||
t0 = ROR(t0, 1); \ | |||
tmp->w256 = XOR(tmp->w256, t0); \ | |||
/* s_ca */ \ | |||
t0 = AND(c, mask_a); \ | |||
t1 = AND(a, mask_c); \ | |||
t0 = XOR(t0, t1); \ | |||
t0 = XOR(t0, and_helper_ca); \ | |||
s_ca = XOR(t0, s_ca); \ | |||
\ | |||
t0 = ROR(t0, 2); \ | |||
tmp->w256 = XOR(tmp->w256, t0); \ | |||
mzd_to_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \ | |||
} \ | |||
tapes->pos += LOWMC_N; \ | |||
tapes->pos += LOWMC_N; \ | |||
msgs->pos += LOWMC_N; \ | |||
\ | |||
/* (b & c) ^ a */ \ | |||
t0 = XOR(s_bc, a); \ | |||
\ | |||
/* (c & a) ^ a ^ b */ \ | |||
a = XOR(a, b); \ | |||
t1 = XOR(s_ca, a); \ | |||
\ | |||
/* (a & b) ^ a ^ b ^c */ \ | |||
t2 = XOR(s_ab, a); \ | |||
t2 = XOR(t2, c); \ | |||
\ | |||
t0 = ROR(t0, 2); \ | |||
t1 = ROR(t1, 1); \ | |||
\ | |||
t2 = XOR(t2, t1); \ | |||
statein->w256 = XOR(t2, t0); \ | |||
} while (0) | |||
ATTR_TARGET_AVX2 | |||
static void picnic3_mpc_sbox_s256_lowmc_129_129_4(mzd_local_t* statein, randomTape_t* tapes, | |||
msgs_t* msgs) { | |||
picnic3_mpc_sbox_bitsliced_mm256(LOWMC_129_129_4_N, mm256_xor, mm256_and, mm256_rotate_left, | |||
mm256_rotate_right, mask_129_129_43_a, mask_129_129_43_b, | |||
mask_129_129_43_c); | |||
} | |||
#define IMPL s256 | |||
#undef FN_ATTR | |||
#define FN_ATTR ATTR_TARGET_AVX2 | |||
/* PICNIC3_L1_FS */ | |||
#include "lowmc_129_129_4_fns_s256.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_s256_129_43 | |||
#include "picnic3_simulate.c.i" | |||
/* PICNIC3_L3_FS */ | |||
#include "lowmc_192_192_4_fns_s256.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_s256_192_64 | |||
#include "picnic3_simulate.c.i" | |||
/* PICNIC3_L5_FS */ | |||
#include "lowmc_255_255_4_fns_s256.h" | |||
#undef SIM_ONLINE | |||
#define SIM_ONLINE lowmc_simulate_online_s256_255_85 | |||
#include "picnic3_simulate.c.i" | |||
#undef IMPL | |||
lowmc_simulate_online_f lowmc_simulate_online_get_implementation(const lowmc_parameters_t* lowmc) { | |||
assert((lowmc->m == 43 && lowmc->n == 129) || (lowmc->m == 64 && lowmc->n == 192) || | |||
(lowmc->m == 85 && lowmc->n == 255)); | |||
if (CPU_SUPPORTS_AVX2) { | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_simulate_online_s256_129_43; | |||
} | |||
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) { | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_simulate_online_s128_129_43; | |||
} | |||
#if !defined(NO_UINT64_FALLBACK) | |||
if (lowmc->n == 129 && lowmc->m == 43) | |||
return lowmc_simulate_online_uint64_129_43; | |||
#endif | |||
return NULL; | |||
} |
@@ -0,0 +1,57 @@ | |||
/*! @file picnic3_impl.c | |||
* @brief This is the main file of the signature scheme for the Picnic3 | |||
* parameter sets. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#if defined(LOWMC_INSTANCE) | |||
#if defined(FN_ATTR) | |||
FN_ATTR | |||
#endif | |||
static int SIM_ONLINE(mzd_local_t* maskedKey, randomTape_t* tapes, msgs_t* msgs, | |||
const mzd_local_t* plaintext, const uint8_t* pubKey, | |||
const picnic_instance_t* params) { | |||
#define mpc_sbox CONCAT(picnic3_mpc_sbox, CONCAT(IMPL, LOWMC_INSTANCE)) | |||
int ret = 0; | |||
mzd_local_t state[(LOWMC_N + 255) / 256]; | |||
mzd_local_t temp[(LOWMC_N + 255) / 256]; | |||
// MPC_MUL(temp, maskedKey, LOWMC_INSTANCE.k0_matrix, | |||
// mask_shares); // roundKey = maskedKey * KMatrix[0] | |||
MUL(temp, maskedKey, LOWMC_INSTANCE.k0_matrix); | |||
XOR(state, temp, plaintext); | |||
for (uint32_t r = 0; r < LOWMC_R; r++) { | |||
mpc_sbox(state, tapes, msgs); | |||
// MPC_MUL(state, state, LOWMC_INSTANCE.rounds[r].l_matrix, | |||
// mask_shares); // state = state * LMatrix (r-1) | |||
MUL(temp, state, LOWMC_INSTANCE.rounds[r].l_matrix); | |||
XOR(state, temp, LOWMC_INSTANCE.rounds[r].constant); | |||
ADDMUL(state, maskedKey, LOWMC_INSTANCE.rounds[r].k_matrix); | |||
} | |||
/* check that the output is correct */ | |||
uint8_t output[MAX_LOWMC_BLOCK_SIZE]; | |||
mzd_to_char_array(output, state, params->output_size); | |||
if (timingsafe_bcmp(output, pubKey, params->output_size) != 0) { | |||
#if !defined(NDEBUG) | |||
printf("%s: output does not match pubKey\n", __func__); | |||
printf("pubKey: "); | |||
print_hex(stdout, pubKey, params->output_size); | |||
printf("\noutput: "); | |||
print_hex(stdout, output, params->output_size); | |||
printf("\n"); | |||
#endif | |||
ret = -1; | |||
} | |||
return ret; | |||
} | |||
#endif |
@@ -0,0 +1,25 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC3_SIMULATE_H | |||
#define PICNIC3_SIMULATE_H | |||
#include "lowmc_pars.h" | |||
typedef struct randomTape_t randomTape_t; | |||
typedef struct msgs_t msgs_t; | |||
typedef struct picnic_instance_t picnic_instance_t; | |||
typedef int (*lowmc_simulate_online_f)(mzd_local_t* maskedKey, randomTape_t* tapes, msgs_t* msgs, | |||
const mzd_local_t* plaintext, const uint8_t* pubKey, | |||
const picnic_instance_t* params); | |||
lowmc_simulate_online_f lowmc_simulate_online_get_implementation(const lowmc_parameters_t* lowmc); | |||
#endif |
@@ -0,0 +1,612 @@ | |||
/*! @file tree.c | |||
* @brief This file has the tree implementation used to generate random seeds | |||
* and commit to multiple values with a Merkle tree. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include <assert.h> | |||
#include <limits.h> | |||
#include <stdlib.h> | |||
#include "endian_compat.h" | |||
#include "kdf_shake.h" | |||
#include "picnic.h" | |||
#include "picnic3_tree.h" | |||
#include "picnic3_types.h" | |||
static int contains(size_t* list, size_t len, size_t value) { | |||
for (size_t i = 0; i < len; i++) { | |||
if (list[i] == value) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
static int exists(tree_t* tree, size_t i) { | |||
if (i >= tree->numNodes) { | |||
return 0; | |||
} | |||
if (tree->exists[i]) { | |||
return 1; | |||
} | |||
return 0; | |||
} | |||
tree_t* createTree(size_t numLeaves, size_t dataSize) { | |||
tree_t* tree = malloc(sizeof(tree_t)); | |||
tree->depth = ceil_log2(numLeaves) + 1; | |||
tree->numNodes = | |||
((1 << (tree->depth)) - 1) - | |||
((1 << (tree->depth - 1)) - numLeaves); /* Num nodes in complete - number of missing leaves */ | |||
tree->numLeaves = numLeaves; | |||
tree->dataSize = dataSize; | |||
tree->nodes = malloc(tree->numNodes * sizeof(uint8_t*)); | |||
uint8_t* slab = calloc(tree->numNodes, dataSize); | |||
for (size_t i = 0; i < tree->numNodes; i++) { | |||
tree->nodes[i] = slab; | |||
slab += dataSize; | |||
} | |||
tree->haveNode = calloc(tree->numNodes, 1); | |||
/* Depending on the number of leaves, the tree may not be complete */ | |||
tree->exists = calloc(tree->numNodes, 1); | |||
memset(tree->exists + tree->numNodes - tree->numLeaves, 1, tree->numLeaves); /* Set leaves */ | |||
for (int i = tree->numNodes - tree->numLeaves; i > 0; i--) { | |||
if (exists(tree, 2 * i + 1) || exists(tree, 2 * i + 2)) { | |||
tree->exists[i] = 1; | |||
} | |||
} | |||
tree->exists[0] = 1; | |||
return tree; | |||
} | |||
void freeTree(tree_t* tree) { | |||
if (tree != NULL) { | |||
free(tree->nodes[0]); | |||
free(tree->nodes); | |||
free(tree->haveNode); | |||
free(tree->exists); | |||
free(tree); | |||
} | |||
} | |||
static int isLeftChild(size_t node) { | |||
assert(node != 0); | |||
return (node % 2 == 1); | |||
} | |||
static int hasRightChild(tree_t* tree, size_t node) { | |||
return (2 * node + 2 < tree->numNodes && exists(tree, node)); | |||
} | |||
static size_t getParent(size_t node) { | |||
assert(node != 0); | |||
return ((node + 1) >> 1) - 1; | |||
//if (isLeftChild(node)) { | |||
// /* (node - 1) / 2, but since node % 2 == 1, that's the same as node / 2 */ | |||
// return node >> 1; | |||
//} | |||
//return (node - 2) / 2; | |||
} | |||
uint8_t** getLeaves(tree_t* tree) { | |||
return &tree->nodes[tree->numNodes - tree->numLeaves]; | |||
} | |||
uint8_t* getLeaf(tree_t* tree, size_t leafIndex) { | |||
assert(leafIndex < tree->numLeaves); | |||
size_t firstLeaf = tree->numNodes - tree->numLeaves; | |||
return tree->nodes[firstLeaf + leafIndex]; | |||
} | |||
static void hashSeed(uint8_t* digest, const uint8_t* inputSeed, uint8_t* salt, uint8_t hashPrefix, | |||
size_t repIndex, size_t nodeIndex, const picnic_instance_t* params) { | |||
hash_context ctx; | |||
hash_init_prefix(&ctx, params->digest_size, hashPrefix); | |||
hash_update(&ctx, inputSeed, params->seed_size); | |||
hash_update(&ctx, salt, SALT_SIZE); | |||
hash_update_uint16_le(&ctx, repIndex); | |||
hash_update_uint16_le(&ctx, nodeIndex); | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, digest, 2 * params->seed_size); | |||
} | |||
static void hashSeed_x4(uint8_t** digest, const uint8_t** inputSeed, uint8_t* salt, | |||
uint8_t hashPrefix, size_t repIndex, size_t nodeIndex, | |||
const picnic_instance_t* params) { | |||
hash_context_x4 ctx; | |||
hash_init_prefix_x4(&ctx, params->digest_size, hashPrefix); | |||
hash_update_x4(&ctx, inputSeed, params->seed_size); | |||
const uint8_t* salts[4] = {salt, salt, salt, salt}; | |||
hash_update_x4(&ctx, salts, SALT_SIZE); | |||
hash_update_x4_uint16_le(&ctx, repIndex); | |||
const uint16_t nodes[4] = {nodeIndex, nodeIndex + 1, nodeIndex + 2, nodeIndex + 3}; | |||
hash_update_x4_uint16s_le(&ctx, nodes); | |||
hash_final_x4(&ctx); | |||
hash_squeeze_x4(&ctx, digest, 2 * params->seed_size); | |||
} | |||
static void expandSeeds(tree_t* tree, uint8_t* salt, size_t repIndex, | |||
const picnic_instance_t* params) { | |||
uint8_t tmp[4 * 2 * MAX_SEED_SIZE_BYTES]; | |||
uint8_t* tmp_ptr[4] = {&tmp[0], &tmp[2 * MAX_SEED_SIZE_BYTES], &tmp[2 * 2 * MAX_SEED_SIZE_BYTES], | |||
&tmp[3 * 2 * MAX_SEED_SIZE_BYTES]}; | |||
/* Walk the tree, expanding seeds where possible. Compute children of | |||
* non-leaf nodes. */ | |||
size_t lastNonLeaf = getParent(tree->numNodes - 1); | |||
size_t i = 0; | |||
/* expand the first 4 seeds*/ | |||
for (; i <= MIN(2,lastNonLeaf); i++) { | |||
if (!tree->haveNode[i]) { | |||
continue; | |||
} | |||
hashSeed(tmp, tree->nodes[i], salt, HASH_PREFIX_1, repIndex, i, params); | |||
if (!tree->haveNode[2 * i + 1]) { | |||
/* left child = H_left(seed_i || salt || t || i) */ | |||
memcpy(tree->nodes[2 * i + 1], tmp, params->seed_size); | |||
tree->haveNode[2 * i + 1] = 1; | |||
} | |||
/* The last non-leaf node will only have a left child when there are an odd number of leaves */ | |||
if (exists(tree, 2 * i + 2) && !tree->haveNode[2 * i + 2]) { | |||
/* right child = H_right(seed_i || salt || t || i) */ | |||
memcpy(tree->nodes[2 * i + 2], tmp + params->seed_size, params->seed_size); | |||
tree->haveNode[2 * i + 2] = 1; | |||
} | |||
} | |||
/* now hash in groups of 4 for faster hashing */ | |||
for (; i <= lastNonLeaf / 4 * 4; i += 4) { | |||
hashSeed_x4(tmp_ptr, (const uint8_t**) &tree->nodes[i], salt, HASH_PREFIX_1, repIndex, i, params); | |||
for (size_t j = i; j < i + 4; j++) { | |||
if (!tree->haveNode[j]) { | |||
continue; | |||
} | |||
if (!tree->haveNode[2 * j + 1]) { | |||
/* left child = H_left(seed_i || salt || t || j) */ | |||
memcpy(tree->nodes[2 * j + 1], tmp_ptr[j-i], params->seed_size); | |||
tree->haveNode[2 * j + 1] = 1; | |||
} | |||
/* The last non-leaf node will only have a left child when there are an odd number of leaves | |||
*/ | |||
if (exists(tree, 2 * j + 2) && !tree->haveNode[2 * j + 2]) { | |||
/* right child = H_right(seed_i || salt || t || j) */ | |||
memcpy(tree->nodes[2 * j + 2], tmp_ptr[j-i] + params->seed_size, params->seed_size); | |||
tree->haveNode[2 * j + 2] = 1; | |||
} | |||
} | |||
} | |||
/* handle last few, which are not a multiple of 4 */ | |||
for (; i <= lastNonLeaf; i++) { | |||
if (!tree->haveNode[i]) { | |||
continue; | |||
} | |||
hashSeed(tmp, tree->nodes[i], salt, HASH_PREFIX_1, repIndex, i, params); | |||
if (!tree->haveNode[2 * i + 1]) { | |||
/* left child = H_left(seed_i || salt || t || i) */ | |||
memcpy(tree->nodes[2 * i + 1], tmp, params->seed_size); | |||
tree->haveNode[2 * i + 1] = 1; | |||
} | |||
/* The last non-leaf node will only have a left child when there are an odd number of leaves */ | |||
if (exists(tree, 2 * i + 2) && !tree->haveNode[2 * i + 2]) { | |||
/* right child = H_right(seed_i || salt || t || i) */ | |||
memcpy(tree->nodes[2 * i + 2], tmp + params->seed_size, params->seed_size); | |||
tree->haveNode[2 * i + 2] = 1; | |||
} | |||
} | |||
} | |||
tree_t* generateSeeds(size_t nSeeds, uint8_t* rootSeed, uint8_t* salt, size_t repIndex, | |||
const picnic_instance_t* params) { | |||
tree_t* tree = createTree(nSeeds, params->seed_size); | |||
memcpy(tree->nodes[0], rootSeed, params->seed_size); | |||
tree->haveNode[0] = 1; | |||
expandSeeds(tree, salt, repIndex, params); | |||
return tree; | |||
} | |||
static int isLeafNode(tree_t* tree, size_t node) { | |||
return (2 * node + 1 >= tree->numNodes); | |||
} | |||
static int hasSibling(tree_t* tree, size_t node) { | |||
if (!exists(tree, node)) { | |||
return 0; | |||
} | |||
if (isLeftChild(node) && !exists(tree, node + 1)) { | |||
return 0; | |||
} | |||
return 1; | |||
} | |||
static size_t getSibling(tree_t* tree, size_t node) { | |||
assert(node < tree->numNodes); | |||
assert(node != 0); | |||
assert(hasSibling(tree, node)); | |||
if (isLeftChild(node)) { | |||
if (node + 1 < tree->numNodes) { | |||
return node + 1; | |||
} else { | |||
assert(!"getSibling: request for node with not sibling"); | |||
return 0; | |||
} | |||
} else { | |||
return node - 1; | |||
} | |||
} | |||
/* Returns the number of bytes written to output */ | |||
static size_t* getRevealedNodes(tree_t* tree, uint16_t* hideList, size_t hideListSize, | |||
size_t* outputSize) { | |||
/* Compute paths up from hideList to root, store as sets of nodes */ | |||
size_t pathLen = tree->depth - 1; | |||
/* pathSets[i][0...hideListSize] stores the nodes in the path at depth i | |||
* for each of the leaf nodes in hideListSize */ | |||
size_t** pathSets = malloc(pathLen * sizeof(size_t*)); | |||
size_t* slab = malloc(hideListSize * pathLen * sizeof(size_t)); | |||
for (size_t i = 0; i < pathLen; i++) { | |||
pathSets[i] = slab; | |||
slab += hideListSize; | |||
} | |||
/* Compute the paths back to the root */ | |||
for (size_t i = 0; i < hideListSize; i++) { | |||
size_t pos = 0; | |||
size_t node = | |||
hideList[i] + | |||
(tree->numNodes - tree->numLeaves); /* input lists leaf indexes, translate to nodes */ | |||
pathSets[pos][i] = node; | |||
pos++; | |||
while ((node = getParent(node)) != 0) { | |||
pathSets[pos][i] = node; | |||
pos++; | |||
} | |||
} | |||
/* Determine seeds to reveal */ | |||
size_t* revealed = malloc(tree->numLeaves * sizeof(size_t)); | |||
size_t revealedPos = 0; | |||
for (size_t d = 0; d < pathLen; d++) { | |||
for (size_t i = 0; i < hideListSize; i++) { | |||
if (!hasSibling(tree, pathSets[d][i])) { | |||
continue; | |||
} | |||
size_t sibling = getSibling(tree, pathSets[d][i]); | |||
if (!contains(pathSets[d], hideListSize, sibling)) { | |||
// Determine the seed to reveal | |||
while (!hasRightChild(tree, sibling) && !isLeafNode(tree, sibling)) { | |||
sibling = 2 * sibling + 1; // sibling = leftChild(sibling) | |||
} | |||
// Only reveal if we haven't already | |||
if (!contains(revealed, revealedPos, sibling)) { | |||
revealed[revealedPos] = sibling; | |||
revealedPos++; | |||
} | |||
} | |||
} | |||
} | |||
free(pathSets[0]); | |||
free(pathSets); | |||
*outputSize = revealedPos; | |||
return revealed; | |||
} | |||
size_t revealSeedsSize(size_t numNodes, uint16_t* hideList, size_t hideListSize, | |||
const picnic_instance_t* params) { | |||
tree_t* tree = createTree(numNodes, params->seed_size); | |||
size_t numNodesRevealed = 0; | |||
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &numNodesRevealed); | |||
freeTree(tree); | |||
free(revealed); | |||
return numNodesRevealed * params->seed_size; | |||
} | |||
size_t revealSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* output, | |||
size_t outputSize, const picnic_instance_t* params) { | |||
uint8_t* outputBase = output; | |||
size_t revealedSize = 0; | |||
if (outputSize > INT_MAX) { | |||
return -1; | |||
} | |||
int outLen = (int)outputSize; | |||
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &revealedSize); | |||
for (size_t i = 0; i < revealedSize; i++) { | |||
outLen -= params->seed_size; | |||
if (outLen < 0) { | |||
assert(!"Insufficient sized buffer provided to revealSeeds"); | |||
free(revealed); | |||
return 0; | |||
} | |||
memcpy(output, tree->nodes[revealed[i]], params->seed_size); | |||
output += params->seed_size; | |||
} | |||
free(revealed); | |||
return output - outputBase; | |||
} | |||
int reconstructSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* input, | |||
size_t inputLen, uint8_t* salt, size_t repIndex, | |||
const picnic_instance_t* params) { | |||
int ret = 0; | |||
if (inputLen > INT_MAX) { | |||
return -1; | |||
} | |||
int inLen = (int)inputLen; | |||
size_t revealedSize = 0; | |||
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &revealedSize); | |||
for (size_t i = 0; i < revealedSize; i++) { | |||
inLen -= params->seed_size; | |||
if (inLen < 0) { | |||
ret = -1; | |||
goto Exit; | |||
} | |||
memcpy(tree->nodes[revealed[i]], input, params->seed_size); | |||
tree->haveNode[revealed[i]] = 1; | |||
input += params->seed_size; | |||
} | |||
expandSeeds(tree, salt, repIndex, params); | |||
Exit: | |||
free(revealed); | |||
return ret; | |||
} | |||
static void computeParentHash(tree_t* tree, size_t child, uint8_t* salt, | |||
const picnic_instance_t* params) { | |||
if (!exists(tree, child)) { | |||
return; | |||
} | |||
size_t parent = getParent(child); | |||
if (tree->haveNode[parent]) { | |||
return; | |||
} | |||
/* Compute the hash for parent, if we have everything */ | |||
if (!tree->haveNode[2 * parent + 1]) { | |||
return; | |||
} | |||
if (exists(tree, 2 * parent + 2) && !tree->haveNode[2 * parent + 2]) { | |||
return; | |||
} | |||
/* Compute parent data = H(left child data || [right child data] || salt || parent idx) */ | |||
hash_context ctx; | |||
hash_init_prefix(&ctx, params->digest_size, HASH_PREFIX_3); | |||
hash_update(&ctx, tree->nodes[2 * parent + 1], params->digest_size); | |||
if (hasRightChild(tree, parent)) { | |||
/* One node may not have a right child when there's an odd number of leaves */ | |||
hash_update(&ctx, tree->nodes[2 * parent + 2], params->digest_size); | |||
} | |||
hash_update(&ctx, salt, SALT_SIZE); | |||
hash_update_uint16_le(&ctx, parent); | |||
hash_final(&ctx); | |||
hash_squeeze(&ctx, tree->nodes[parent], params->digest_size); | |||
tree->haveNode[parent] = 1; | |||
} | |||
/* Create a Merkle tree by hashing up all nodes. | |||
* leafData must have length tree->numNodes, but some may be NULL. */ | |||
void buildMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt, | |||
const picnic_instance_t* params) { | |||
size_t firstLeaf = tree->numNodes - tree->numLeaves; | |||
/* Copy data to the leaves. The actual data being committed to has already been | |||
* hashed, according to the spec. */ | |||
for (size_t i = 0; i < tree->numLeaves; i++) { | |||
if (leafData[i] != NULL) { | |||
memcpy(tree->nodes[firstLeaf + i], leafData[i], tree->dataSize); | |||
tree->haveNode[firstLeaf + i] = 1; | |||
} | |||
} | |||
/* Starting at the leaves, work up the tree, computing the hashes for intermediate nodes */ | |||
for (int i = (int)tree->numNodes; i > 0; i--) { | |||
computeParentHash(tree, i, salt, params); | |||
} | |||
} | |||
/* Note that we never output the root node */ | |||
static size_t* getRevealedMerkleNodes(tree_t* tree, uint16_t* missingLeaves, | |||
size_t missingLeavesSize, size_t* outputSize) { | |||
size_t firstLeaf = tree->numNodes - tree->numLeaves; | |||
uint8_t* missingNodes = calloc(tree->numNodes, 1); | |||
/* Mark leaves that are missing */ | |||
for (size_t i = 0; i < missingLeavesSize; i++) { | |||
missingNodes[firstLeaf + missingLeaves[i]] = 1; | |||
} | |||
/* For the nonleaf nodes, if both leaves are missing, mark it as missing too */ | |||
int lastNonLeaf = getParent(tree->numNodes - 1); | |||
for (int i = lastNonLeaf; i > 0; i--) { | |||
if (!exists(tree, i)) { | |||
continue; | |||
} | |||
if (exists(tree, 2 * i + 2)) { | |||
if (missingNodes[2 * i + 1] && missingNodes[2 * i + 2]) { | |||
missingNodes[i] = 1; | |||
} | |||
} else { | |||
if (missingNodes[2 * i + 1]) { | |||
missingNodes[i] = 1; | |||
} | |||
} | |||
} | |||
/* For each missing leaf node, add the highest missing node on the path | |||
* back to the root to the set to be revealed */ | |||
size_t* revealed = malloc(tree->numLeaves * sizeof(size_t)); | |||
size_t pos = 0; | |||
for (size_t i = 0; i < missingLeavesSize; i++) { | |||
size_t node = missingLeaves[i] + firstLeaf; /* input is leaf indexes, translate to nodes */ | |||
do { | |||
if (!missingNodes[getParent(node)]) { | |||
if (!contains(revealed, pos, node)) { | |||
revealed[pos] = node; | |||
pos++; | |||
} | |||
break; | |||
} | |||
} while ((node = getParent(node)) != 0); | |||
} | |||
free(missingNodes); | |||
*outputSize = pos; | |||
return revealed; | |||
} | |||
size_t openMerkleTreeSize(size_t numNodes, uint16_t* missingLeaves, size_t missingLeavesSize, | |||
const picnic_instance_t* params) { | |||
tree_t* tree = createTree(numNodes, params->digest_size); | |||
size_t revealedSize = 0; | |||
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize); | |||
freeTree(tree); | |||
free(revealed); | |||
return revealedSize * params->digest_size; | |||
} | |||
/* Serialze the missing nodes that the verifier will require to check commitments for non-missing | |||
* leaves */ | |||
uint8_t* openMerkleTree(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, | |||
size_t* outputSizeBytes) { | |||
size_t revealedSize = 0; | |||
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize); | |||
/* Serialize output */ | |||
*outputSizeBytes = revealedSize * tree->dataSize; | |||
uint8_t* output = malloc(*outputSizeBytes); | |||
uint8_t* outputBase = output; | |||
for (size_t i = 0; i < revealedSize; i++) { | |||
memcpy(output, tree->nodes[revealed[i]], tree->dataSize); | |||
output += tree->dataSize; | |||
} | |||
free(revealed); | |||
return outputBase; | |||
} | |||
/* addMerkleNodes: deserialize and add the data for nodes provided by the committer */ | |||
int addMerkleNodes(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, uint8_t* input, | |||
size_t inputSize) { | |||
int ret = 0; | |||
assert(missingLeavesSize < tree->numLeaves); | |||
if (inputSize > INT_MAX) { | |||
return -1; | |||
} | |||
int intLen = (int)inputSize; | |||
size_t revealedSize = 0; | |||
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize); | |||
assert(!contains(revealed, revealedSize, 0)); | |||
/* Deserialize input */ | |||
for (size_t i = 0; i < revealedSize; i++) { | |||
intLen -= tree->dataSize; | |||
if (intLen < 0) { | |||
ret = -1; | |||
goto Exit; | |||
} | |||
memcpy(tree->nodes[revealed[i]], input, tree->dataSize); | |||
input += tree->dataSize; | |||
tree->haveNode[revealed[i]] = 1; | |||
} | |||
if (intLen != 0) { | |||
ret = -1; | |||
goto Exit; | |||
} | |||
Exit: | |||
free(revealed); | |||
return ret; | |||
} | |||
/* verifyMerkleTree: verify for each leaf that is set */ | |||
int verifyMerkleTree(tree_t* tree, /* uint16_t* missingLeaves, size_t missingLeavesSize, */ | |||
uint8_t** leafData, uint8_t* salt, const picnic_instance_t* params) { | |||
size_t firstLeaf = tree->numNodes - tree->numLeaves; | |||
/* Copy the leaf data, where we have it. The actual data being committed to has already been | |||
* hashed, according to the spec. */ | |||
for (size_t i = 0; i < tree->numLeaves; i++) { | |||
if (leafData[i] != NULL) { | |||
if (tree->haveNode[firstLeaf + i] == 1) { | |||
return -1; /* A leaf was assigned from the prover for a node we've recomputed */ | |||
} | |||
if (leafData[i] != NULL) { | |||
memcpy(tree->nodes[firstLeaf + i], leafData[i], tree->dataSize); | |||
tree->haveNode[firstLeaf + i] = 1; | |||
} | |||
} | |||
} | |||
/* At this point the tree has some of the leaves, and some intermediate nodes | |||
* Work up the tree, computing all nodes we don't have that are missing. */ | |||
for (int i = (int)tree->numNodes; i > 0; i--) { | |||
computeParentHash(tree, i, salt, params); | |||
} | |||
/* Fail if the root was not computed. */ | |||
if (!tree->haveNode[0]) { | |||
return -1; | |||
} | |||
return 0; | |||
} |
@@ -0,0 +1,83 @@ | |||
/*! @file tree.h | |||
* @brief This file has part of the tree implementation used to generate | |||
* random seeds and commit to multiple values with a Merkle tree. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC3_TREE_H | |||
#define PICNIC3_TREE_H | |||
#include "picnic_instances.h" | |||
/* | |||
* Represents a (nearly) complete binary tree, stored in memory as an array. | |||
* The root is at nodes[0], and the left child of node k is 2k + 1, the right | |||
* child is at 2k + 2 | |||
*/ | |||
typedef struct tree_t { | |||
size_t depth; /* The depth of the tree */ | |||
uint8_t** nodes; /* The data for each node */ | |||
size_t dataSize; /* The size data at each node, in bytes */ | |||
uint8_t* haveNode; /* If we have the data (seed or hash) for node i, haveSeed[i] is 1 */ | |||
uint8_t* exists; /* Since the tree is not always complete, nodes marked 0 don't exist */ | |||
size_t numNodes; /* The total number of nodes in the tree */ | |||
size_t numLeaves; /* The total number of leaves in the tree */ | |||
} tree_t; | |||
/* The largest seed size is 256 bits, for the Picnic3-L5-FS parameter set. */ | |||
#define MAX_SEED_SIZE_BYTES (32) | |||
tree_t* createTree(size_t numLeaves, size_t dataSize); | |||
void freeTree(tree_t* tree); | |||
uint8_t** getLeaves(tree_t* tree); | |||
/* Get one leaf, leafIndex must be in [0, tree->numLeaves -1] */ | |||
uint8_t* getLeaf(tree_t* tree, size_t leafIndex); | |||
/* Functions for trees used to derive seeds. | |||
* Signer's usage: generateSeeds -> revealSeeds -> freeTree | |||
* Verifier's usage: createTree -> reconstructSeeds -> freeTree | |||
*/ | |||
/* Returns the number of bytes written to output. A safe number of bytes for | |||
* callers to allocate is numLeaves*params->seedSizeBytes, or call revealSeedsSize. */ | |||
tree_t* generateSeeds(size_t nSeeds, uint8_t* rootSeed, uint8_t* salt, size_t repIndex, | |||
const picnic_instance_t* params); | |||
size_t revealSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* output, | |||
size_t outputLen, const picnic_instance_t* params); | |||
size_t revealSeedsSize(size_t numNodes, uint16_t* hideList, size_t hideListSize, | |||
const picnic_instance_t* params); | |||
int reconstructSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* input, | |||
size_t inputLen, uint8_t* salt, size_t repIndex, | |||
const picnic_instance_t* params); | |||
/* Functions for Merkle hash trees used for commitments. | |||
* | |||
* Signer call sequence: | |||
* 1. createTree | |||
* 2. buildMerkleTree with all commitments as leaf nodes | |||
* 3. openMerkleTree with missingLeaves - list of commitments the verifier won't recompute | |||
* 4. freeTree | |||
* Verifier call sequence | |||
* 1. createTree | |||
* 2. addMerkleNodes with the output of the signer | |||
* 3. verifyMerkleTree Checks that all leaf nodes present are correct commitments | |||
* 4. freeTree | |||
*/ | |||
void buildMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt, | |||
const picnic_instance_t* params); | |||
uint8_t* openMerkleTree(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, | |||
size_t* outputSizeBytes); | |||
size_t openMerkleTreeSize(size_t numNodes, uint16_t* notMissingLeaves, size_t notMissingLeavesSize, | |||
const picnic_instance_t* params); | |||
int addMerkleNodes(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, uint8_t* input, | |||
size_t inputSize); | |||
int verifyMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt, | |||
const picnic_instance_t* params); | |||
#endif |
@@ -0,0 +1,203 @@ | |||
/*! @file picnic_types.c | |||
* @brief Functions to allocate/free data types used in the Picnic signature | |||
* scheme implementation. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include <assert.h> | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "compat.h" | |||
#include "picnic3_types.h" | |||
void allocateRandomTape(randomTape_t* tape, const picnic_instance_t* params) { | |||
tape->nTapes = params->num_MPC_parties; | |||
tape->tape = malloc(tape->nTapes * sizeof(uint8_t*)); | |||
tape->aux_bits = calloc(1, params->view_size); | |||
tape->buffer = aligned_alloc(32, 16 * sizeof(uint16_t)); | |||
size_t tapeSizeBytes = 2 * params->view_size; | |||
tape->parity_tapes = calloc(1, tapeSizeBytes); | |||
uint8_t* slab = calloc(1, tape->nTapes * tapeSizeBytes); | |||
for (uint8_t i = 0; i < tape->nTapes; i++) { | |||
tape->tape[i] = slab; | |||
slab += tapeSizeBytes; | |||
} | |||
tape->pos = 0; | |||
tape->aux_pos = 0; | |||
} | |||
void freeRandomTape(randomTape_t* tape) { | |||
if (tape != NULL) { | |||
free(tape->tape[0]); | |||
free(tape->tape); | |||
free(tape->parity_tapes); | |||
aligned_free(tape->buffer); | |||
free(tape->aux_bits); | |||
} | |||
} | |||
void allocateProof2(proof2_t* proof, const picnic_instance_t* params) { | |||
memset(proof, 0, sizeof(proof2_t)); | |||
proof->unOpenedIndex = 0; | |||
proof->seedInfo = NULL; // Sign/verify code sets it | |||
proof->seedInfoLen = 0; | |||
proof->C = malloc(params->digest_size); | |||
proof->input = malloc(params->input_size); | |||
proof->aux = malloc(params->view_size); | |||
proof->msgs = malloc(params->view_size); | |||
} | |||
static void freeProof2(proof2_t* proof) { | |||
free(proof->seedInfo); | |||
free(proof->C); | |||
free(proof->input); | |||
free(proof->aux); | |||
free(proof->msgs); | |||
} | |||
void allocateSignature2(signature2_t* sig, const picnic_instance_t* params) { | |||
sig->iSeedInfo = NULL; | |||
sig->iSeedInfoLen = 0; | |||
sig->cvInfo = NULL; // Sign/verify code sets it | |||
sig->cvInfoLen = 0; | |||
sig->challenge = (uint8_t*)malloc(params->digest_size); | |||
sig->challengeC = (uint16_t*)malloc(params->num_opened_rounds * sizeof(uint16_t)); | |||
sig->challengeP = (uint16_t*)malloc(params->num_opened_rounds * sizeof(uint16_t)); | |||
sig->proofs = calloc(params->num_rounds, sizeof(proof2_t)); | |||
// Individual proofs are allocated during signature generation, only for rounds when neeeded | |||
} | |||
void freeSignature2(signature2_t* sig, const picnic_instance_t* params) { | |||
free(sig->iSeedInfo); | |||
free(sig->cvInfo); | |||
free(sig->challenge); | |||
free(sig->challengeC); | |||
free(sig->challengeP); | |||
for (size_t i = 0; i < params->num_rounds; i++) { | |||
freeProof2(&sig->proofs[i]); | |||
} | |||
free(sig->proofs); | |||
} | |||
/* Allocate one commitments_t object with capacity for numCommitments values */ | |||
void allocateCommitments2(commitments_t* commitments, const picnic_instance_t* params, | |||
size_t numCommitments) { | |||
commitments->nCommitments = numCommitments; | |||
uint8_t* slab = malloc(numCommitments * params->digest_size + numCommitments * sizeof(uint8_t*)); | |||
commitments->hashes = (uint8_t**)slab; | |||
slab += numCommitments * sizeof(uint8_t*); | |||
for (size_t i = 0; i < numCommitments; i++) { | |||
commitments->hashes[i] = slab; | |||
slab += params->digest_size; | |||
} | |||
} | |||
void freeCommitments2(commitments_t* commitments) { | |||
if (commitments != NULL) { | |||
free(commitments->hashes); | |||
} | |||
} | |||
inputs_t allocateInputs(const picnic_instance_t* params) { | |||
uint8_t* slab = calloc(1, params->num_rounds * (params->input_size + sizeof(uint8_t*))); | |||
inputs_t inputs = (uint8_t**)slab; | |||
slab += params->num_rounds * sizeof(uint8_t*); | |||
for (uint32_t i = 0; i < params->num_rounds; i++) { | |||
inputs[i] = (uint8_t*)slab; | |||
slab += params->input_size; | |||
} | |||
return inputs; | |||
} | |||
void freeInputs(inputs_t inputs) { | |||
free(inputs); | |||
} | |||
msgs_t* allocateMsgs(const picnic_instance_t* params) { | |||
msgs_t* msgs = malloc(params->num_rounds * sizeof(msgs_t)); | |||
uint8_t* slab = | |||
calloc(1, params->num_rounds * (params->num_MPC_parties * ((params->view_size + 7) / 8 * 8) + | |||
params->num_MPC_parties * sizeof(uint8_t*))); | |||
for (uint32_t i = 0; i < params->num_rounds; i++) { | |||
msgs[i].pos = 0; | |||
msgs[i].unopened = -1; | |||
msgs[i].msgs = (uint8_t**)slab; | |||
slab += params->num_MPC_parties * sizeof(uint8_t*); | |||
for (uint32_t j = 0; j < params->num_MPC_parties; j++) { | |||
msgs[i].msgs[j] = slab; | |||
slab += (params->view_size + 7) / 8 * 8; | |||
} | |||
} | |||
return msgs; | |||
} | |||
msgs_t* allocateMsgsVerify(const picnic_instance_t* params) { | |||
msgs_t* msgs = malloc(sizeof(msgs_t)); | |||
uint8_t* slab = calloc(1, (params->num_MPC_parties * ((params->view_size + 7) / 8 * 8) + | |||
params->num_MPC_parties * sizeof(uint8_t*))); | |||
msgs->pos = 0; | |||
msgs->unopened = -1; | |||
msgs->msgs = (uint8_t**)slab; | |||
slab += params->num_MPC_parties * sizeof(uint8_t*); | |||
for (uint32_t j = 0; j < params->num_MPC_parties; j++) { | |||
msgs->msgs[j] = slab; | |||
slab += (params->view_size + 7) / 8 * 8; | |||
} | |||
return msgs; | |||
} | |||
void freeMsgs(msgs_t* msgs) { | |||
free(msgs[0].msgs); | |||
free(msgs); | |||
} | |||
commitments_t* allocateCommitments(const picnic_instance_t* params, size_t numCommitments) { | |||
commitments_t* commitments = malloc(params->num_rounds * sizeof(commitments_t)); | |||
commitments->nCommitments = (numCommitments) ? numCommitments : params->num_MPC_parties; | |||
uint8_t* slab = malloc(params->num_rounds * (commitments->nCommitments * params->digest_size + | |||
commitments->nCommitments * sizeof(uint8_t*))); | |||
for (uint32_t i = 0; i < params->num_rounds; i++) { | |||
commitments[i].hashes = (uint8_t**)slab; | |||
slab += commitments->nCommitments * sizeof(uint8_t*); | |||
for (uint32_t j = 0; j < commitments->nCommitments; j++) { | |||
commitments[i].hashes[j] = slab; | |||
slab += params->digest_size; | |||
} | |||
} | |||
return commitments; | |||
} | |||
void freeCommitments(commitments_t* commitments) { | |||
free(commitments[0].hashes); | |||
free(commitments); | |||
} |
@@ -0,0 +1,63 @@ | |||
/*! @file picnic_types.h | |||
* @brief Functions to allocate/free data types used in the Picnic signature | |||
* scheme implementation. | |||
* | |||
* This file is part of the reference implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC_TYPES_H | |||
#define PICNIC_TYPES_H | |||
#include "picnic3_impl.h" | |||
/* Type definitions */ | |||
typedef struct randomTape_t { | |||
uint8_t** tape; | |||
uint8_t* aux_bits; | |||
uint8_t* parity_tapes; | |||
uint32_t pos; | |||
uint32_t aux_pos; | |||
size_t nTapes; | |||
uint16_t* buffer; | |||
} randomTape_t; | |||
typedef struct commitments_t { | |||
uint8_t** hashes; | |||
size_t nCommitments; | |||
} commitments_t; | |||
typedef uint8_t** inputs_t; | |||
typedef struct msgs_t { | |||
uint8_t** msgs; // One for each player | |||
size_t pos; | |||
int unopened; // Index of the unopened party, or -1 if all parties opened (when signing) | |||
} msgs_t; | |||
#define UNUSED_PARAMETER(x) (void)(x) | |||
void allocateRandomTape(randomTape_t* tape, const picnic_instance_t* params); | |||
void freeRandomTape(randomTape_t* tape); | |||
void allocateProof2(proof2_t* proof, const picnic_instance_t* params); | |||
commitments_t* allocateCommitments(const picnic_instance_t* params, size_t nCommitments); | |||
void freeCommitments(commitments_t* commitments); | |||
void allocateCommitments2(commitments_t* commitments, const picnic_instance_t* params, | |||
size_t nCommitments); | |||
void freeCommitments2(commitments_t* commitments); | |||
inputs_t allocateInputs(const picnic_instance_t* params); | |||
void freeInputs(inputs_t inputs); | |||
msgs_t* allocateMsgs(const picnic_instance_t* params); | |||
msgs_t* allocateMsgsVerify(const picnic_instance_t* params); | |||
void freeMsgs(msgs_t* msgs); | |||
#endif /* PICNIC_TYPES_H */ |
@@ -0,0 +1,95 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "picnic_instances.h" | |||
// instance handling | |||
// L1, L3, and L5 instances with partial Sbox layer | |||
#define lowmc_parameters_128_128_20 { 0, 0, 0, 0 } | |||
#define lowmc_parameters_192_192_30 { 0, 0, 0, 0 } | |||
#define lowmc_parameters_256_256_38 { 0, 0, 0, 0 } | |||
// L1, L3, and L5 instances with full Sbox layer | |||
#include "lowmc_129_129_4.h" | |||
#define lowmc_parameters_192_192_4 { 0, 0, 0, 0 } | |||
#define lowmc_parameters_255_255_4 { 0, 0, 0, 0 } | |||
#define ENABLE_ZKBPP(x) { 0, 0, 0, 0 } | |||
#define ENABLE_KKW(x) x | |||
#define NULL_FNS \ | |||
{ NULL, NULL, NULL } | |||
static picnic_instance_t instances[PARAMETER_SET_MAX_INDEX] = { | |||
{{0, 0, 0, 0}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, PARAMETER_SET_INVALID, NULL_FNS}, | |||
/* ZKB++ with partial LowMC instances */ | |||
{ENABLE_ZKBPP(lowmc_parameters_128_128_20), 32, 16, 219, 219, 3, 16, 16, 75, 30, 55, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L1_FS, Picnic_L1_FS, NULL_FNS}, | |||
{ENABLE_ZKBPP(lowmc_parameters_128_128_20), 32, 16, 219, 219, 3, 16, 16, 75, 30, 55, 91, 107, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L1_UR, Picnic_L1_UR, NULL_FNS}, | |||
{ENABLE_ZKBPP(lowmc_parameters_192_192_30), 48, 24, 329, 329, 3, 24, 24, 113, 30, 83, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L3_FS, Picnic_L3_FS, NULL_FNS}, | |||
{ENABLE_ZKBPP(lowmc_parameters_192_192_30), 48, 24, 329, 329, 3, 24, 24, 113, 30, 83, 137, 161, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L3_UR, Picnic_L3_UR, NULL_FNS}, | |||
{ENABLE_ZKBPP(lowmc_parameters_256_256_38), 64, 32, 438, 438, 3, 32, 32, 143, 30, 110, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L5_FS, Picnic_L5_FS, NULL_FNS}, | |||
{ENABLE_ZKBPP(lowmc_parameters_256_256_38), 64, 32, 438, 438, 3, 32, 32, 143, 30, 110, 175, 207, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L5_UR, Picnic_L5_UR, NULL_FNS}, | |||
/* KKW with full LowMC instances */ | |||
{ENABLE_KKW(lowmc_parameters_129_129_4), 32, 16, 250, 36, 16, 17, 17, 65, 129, 55, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic3_L1, Picnic3_L1, NULL_FNS}, | |||
{ENABLE_KKW(lowmc_parameters_192_192_4), 48, 24, 419, 52, 16, 24, 24, 96, 192, 83, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic3_L3, Picnic3_L3, NULL_FNS}, | |||
{ENABLE_KKW(lowmc_parameters_255_255_4), 64, 32, 601, 68, 16, 32, 32, 128, 255, 110, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic3_L5, Picnic3_L5, NULL_FNS}, | |||
/* ZKB++ with full LowMC instances */ | |||
{ENABLE_ZKBPP(lowmc_parameters_129_129_4), 32, 16, 219, 219, 3, 17, 17, 65, 129, 55, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L1_full, Picnic_L1_full, NULL_FNS}, | |||
{ENABLE_ZKBPP(lowmc_parameters_192_192_4), 48, 24, 329, 329, 3, 24, 24, 96, 192, 83, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L3_full, Picnic_L3_full, NULL_FNS}, | |||
{ENABLE_ZKBPP(lowmc_parameters_255_255_4), 64, 32, 438, 438, 3, 32, 32, 128, 255, 110, 0, 0, | |||
PICNIC_SIGNATURE_SIZE_Picnic_L5_full, Picnic_L5_full, NULL_FNS}, | |||
}; | |||
static bool instance_initialized[PARAMETER_SET_MAX_INDEX]; | |||
static bool create_instance(picnic_instance_t* pp) { | |||
if (!pp->lowmc.m || !pp->lowmc.n || !pp->lowmc.r || !pp->lowmc.k) { | |||
return false; | |||
} | |||
if (pp->params == Picnic_L1_UR || pp->params == Picnic_L3_UR || pp->params == Picnic_L5_UR) { | |||
return false; | |||
} | |||
pp->impls.lowmc = lowmc_get_implementation(&pp->lowmc); | |||
if (pp->params >= Picnic3_L1 && pp->params <= Picnic3_L5) { | |||
pp->impls.lowmc_aux = lowmc_compute_aux_get_implementation(&pp->lowmc); | |||
pp->impls.lowmc_simulate_online = lowmc_simulate_online_get_implementation(&pp->lowmc); | |||
} | |||
return true; | |||
} | |||
const picnic_instance_t* picnic_instance_get(picnic_params_t param) { | |||
if (param <= PARAMETER_SET_INVALID || param >= PARAMETER_SET_MAX_INDEX) { | |||
return NULL; | |||
} | |||
if (!instance_initialized[param]) { | |||
if (!create_instance(&instances[param])) { | |||
return NULL; | |||
} | |||
instance_initialized[param] = true; | |||
} | |||
return &instances[param]; | |||
} |
@@ -0,0 +1,62 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef PICNIC_INSTANCES_H | |||
#define PICNIC_INSTANCES_H | |||
#include "lowmc.h" | |||
#include "picnic3_simulate.h" | |||
#include "picnic.h" | |||
#define SALT_SIZE 32 | |||
#define MAX_DIGEST_SIZE 64 | |||
typedef struct picnic_instance_t { | |||
lowmc_parameters_t lowmc; | |||
uint32_t digest_size; /* bytes */ | |||
uint32_t seed_size; /* bytes */ | |||
uint32_t num_rounds; // T | |||
uint32_t num_opened_rounds; // u | |||
uint32_t num_MPC_parties; // N | |||
uint32_t input_size; /* bytes */ | |||
uint32_t output_size; /* bytes */ | |||
uint32_t view_size; /* bytes */ | |||
uint32_t view_round_size; /* bits (per round) */ | |||
uint32_t collapsed_challenge_size; /* bytes */ | |||
uint32_t unruh_without_input_bytes_size; /* bytes */ | |||
uint32_t unruh_with_input_bytes_size; /* bytes */ | |||
uint32_t max_signature_size; /* bytes */ | |||
picnic_params_t params; | |||
struct { | |||
lowmc_implementation_f lowmc; | |||
lowmc_compute_aux_implementation_f lowmc_aux; | |||
lowmc_simulate_online_f lowmc_simulate_online; | |||
} impls; | |||
} picnic_instance_t; | |||
const picnic_instance_t* picnic_instance_get(picnic_params_t param); | |||
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_lowmc_block_size(picnic_params_t param); | |||
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t param); | |||
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_public_key_size(picnic_params_t param); | |||
/* Prefix values for domain separation */ | |||
static const uint8_t HASH_PREFIX_0 = 0; | |||
static const uint8_t HASH_PREFIX_1 = 1; | |||
static const uint8_t HASH_PREFIX_2 = 2; | |||
static const uint8_t HASH_PREFIX_3 = 3; | |||
static const uint8_t HASH_PREFIX_4 = 4; | |||
static const uint8_t HASH_PREFIX_5 = 5; | |||
#endif |
@@ -0,0 +1,35 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#include "randomness.h" | |||
#include "macros.h" | |||
// randombytes from the NIST framework / SUPERCOP | |||
extern void randombytes(unsigned char* x, unsigned long long xlen); | |||
int rand_bytes(uint8_t* dst, size_t len) { | |||
randombytes(dst, len); | |||
return 0; | |||
} | |||
int rand_bits(uint8_t* dst, size_t num_bits) { | |||
const size_t num_bytes = (num_bits + 7) / 8; | |||
const size_t num_extra_bits = num_bits % 8; | |||
if (rand_bytes(dst, num_bytes)) { | |||
return -1; | |||
} | |||
if (num_extra_bits) { | |||
dst[num_bytes - 1] &= UINT8_C(0xff) << (8 - num_extra_bits); | |||
} | |||
return 0; | |||
} |
@@ -0,0 +1,19 @@ | |||
/* | |||
* This file is part of the optimized implementation of the Picnic signature scheme. | |||
* See the accompanying documentation for complete details. | |||
* | |||
* The code is provided under the MIT license, see LICENSE for | |||
* more details. | |||
* SPDX-License-Identifier: MIT | |||
*/ | |||
#ifndef RANDOMNESS_H | |||
#define RANDOMNESS_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
int rand_bytes(uint8_t* dst, size_t len); | |||
int rand_bits(uint8_t* dst, size_t num_bits); | |||
#endif |
@@ -0,0 +1,81 @@ | |||
/* | |||
The eXtended Keccak Code Package (XKCP) | |||
https://github.com/XKCP/XKCP | |||
Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. | |||
Implementation by the designers, hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to the Keccak Team website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
*/ | |||
#include <string.h> | |||
#include "KeccakHash.h" | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) | |||
{ | |||
HashReturn result; | |||
if (delimitedSuffix == 0) | |||
return KECCAK_FAIL; | |||
result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity); | |||
if (result != KECCAK_SUCCESS) | |||
return result; | |||
instance->fixedOutputLength = hashbitlen; | |||
instance->delimitedSuffix = delimitedSuffix; | |||
return KECCAK_SUCCESS; | |||
} | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen) | |||
{ | |||
if ((databitlen % 8) == 0) | |||
return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); | |||
else { | |||
HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); | |||
if (ret == KECCAK_SUCCESS) { | |||
/* The last partial byte is assumed to be aligned on the least significant bits */ | |||
unsigned char lastByte = data[databitlen/8]; | |||
/* Concatenate the last few bits provided here with those of the suffix */ | |||
unsigned short delimitedLastBytes = (unsigned short)((unsigned short)(lastByte & ((1 << (databitlen % 8)) - 1)) | ((unsigned short)instance->delimitedSuffix << (databitlen % 8))); | |||
if ((delimitedLastBytes & 0xFF00) == 0x0000) { | |||
instance->delimitedSuffix = delimitedLastBytes & 0xFF; | |||
} | |||
else { | |||
unsigned char oneByte[1]; | |||
oneByte[0] = delimitedLastBytes & 0xFF; | |||
ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1); | |||
instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF; | |||
} | |||
} | |||
return ret; | |||
} | |||
} | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) | |||
{ | |||
HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); | |||
if (ret == KECCAK_SUCCESS) | |||
return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8); | |||
else | |||
return ret; | |||
} | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen) | |||
{ | |||
if ((databitlen % 8) != 0) | |||
return KECCAK_FAIL; | |||
return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8); | |||
} |
@@ -0,0 +1,125 @@ | |||
/* | |||
The eXtended Keccak Code Package (XKCP) | |||
https://github.com/XKCP/XKCP | |||
Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. | |||
Implementation by the designers, hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to the Keccak Team website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
*/ | |||
#ifndef _KeccakHashInterface_h_ | |||
#define _KeccakHashInterface_h_ | |||
#include "config.h" | |||
#ifdef XKCP_has_KeccakP1600 | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "KeccakSponge.h" | |||
#ifndef _Keccak_BitTypes_ | |||
#define _Keccak_BitTypes_ | |||
typedef uint8_t BitSequence; | |||
typedef size_t BitLength; | |||
#endif | |||
typedef enum { KECCAK_SUCCESS = 0, KECCAK_FAIL = 1, KECCAK_BAD_HASHLEN = 2 } HashReturn; | |||
typedef struct { | |||
KeccakWidth1600_SpongeInstance sponge; | |||
unsigned int fixedOutputLength; | |||
unsigned char delimitedSuffix; | |||
} Keccak_HashInstance; | |||
/** | |||
* Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode. | |||
* @param hashInstance Pointer to the hash instance to be initialized. | |||
* @param rate The value of the rate r. | |||
* @param capacity The value of the capacity c. | |||
* @param hashbitlen The desired number of output bits, | |||
* or 0 for an arbitrarily-long output. | |||
* @param delimitedSuffix Bits that will be automatically appended to the end | |||
* of the input message, as in domain separation. | |||
* This is a byte containing from 0 to 7 bits | |||
* formatted like the @a delimitedData parameter of | |||
* the Keccak_SpongeAbsorbLastFewBits() function. | |||
* @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. | |||
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); | |||
/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F) | |||
/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F) | |||
/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06) | |||
/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06) | |||
/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06) | |||
/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06) | |||
/** | |||
* Function to give input data to be absorbed. | |||
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). | |||
* @param data Pointer to the input data. | |||
* When @a databitLen is not a multiple of 8, the last bits of data must be | |||
* in the least significant bits of the last byte (little-endian convention). | |||
* In this case, the (8 - @a databitLen mod 8) most significant bits | |||
* of the last byte are ignored. | |||
* @param databitLen The number of input bits provided in the input data. | |||
* @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8. | |||
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen); | |||
/** | |||
* Function to call after all input blocks have been input and to get | |||
* output bits if the length was specified when calling Keccak_HashInitialize(). | |||
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). | |||
* If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of | |||
* output bits is equal to @a hashbitlen. | |||
* If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits | |||
* must be extracted using the Keccak_HashSqueeze() function. | |||
* @param hashval Pointer to the buffer where to store the output data. | |||
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); | |||
/** | |||
* Function to squeeze output data. | |||
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). | |||
* @param data Pointer to the buffer where to store the output data. | |||
* @param databitlen The number of output bits desired (must be a multiple of 8). | |||
* @pre Keccak_HashFinal() must have been already called. | |||
* @pre @a databitlen is a multiple of 8. | |||
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen); | |||
#else | |||
#error This requires an implementation of Keccak-p[1600] | |||
#endif | |||
#endif |
@@ -0,0 +1,60 @@ | |||
/* | |||
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, | |||
Michaël Peeters, Gilles Van Assche and Ronny Van Keer, | |||
hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to our website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
*/ | |||
#include <string.h> | |||
#include "KeccakHashtimes4.h" | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashInitializetimes4(Keccak_HashInstancetimes4 *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) | |||
{ | |||
HashReturn result; | |||
if (delimitedSuffix == 0) | |||
return KECCAK_FAIL; | |||
result = (HashReturn)KeccakWidth1600times4_SpongeInitialize(&instance->sponge, rate, capacity); | |||
if (result != KECCAK_SUCCESS) | |||
return result; | |||
instance->fixedOutputLength = hashbitlen; | |||
instance->delimitedSuffix = delimitedSuffix; | |||
return KECCAK_SUCCESS; | |||
} | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashUpdatetimes4(Keccak_HashInstancetimes4 *instance, const BitSequence **data, BitLength databitlen) | |||
{ | |||
if ((databitlen % 8) != 0) | |||
return KECCAK_FAIL; | |||
return (HashReturn)KeccakWidth1600times4_SpongeAbsorb(&instance->sponge, data, databitlen/8); | |||
} | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashFinaltimes4(Keccak_HashInstancetimes4 *instance, BitSequence **hashval) | |||
{ | |||
HashReturn ret = (HashReturn)KeccakWidth1600times4_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); | |||
if (ret == KECCAK_SUCCESS) | |||
return (HashReturn)KeccakWidth1600times4_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8); | |||
else | |||
return ret; | |||
} | |||
/* ---------------------------------------------------------------- */ | |||
HashReturn Keccak_HashSqueezetimes4(Keccak_HashInstancetimes4 *instance, BitSequence **data, BitLength databitlen) | |||
{ | |||
if ((databitlen % 8) != 0) | |||
return KECCAK_FAIL; | |||
return (HashReturn)KeccakWidth1600times4_SpongeSqueeze(&instance->sponge, data, databitlen/8); | |||
} |
@@ -0,0 +1,112 @@ | |||
/* | |||
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, | |||
Michaël Peeters, Gilles Van Assche and Ronny Van Keer, | |||
hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to our website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
*/ | |||
#ifndef _KeccakHashInterfacetimes4_h_ | |||
#define _KeccakHashInterfacetimes4_h_ | |||
#include "config.h" | |||
#ifdef XKCP_has_KeccakP1600times4 | |||
#if !defined(SUPERCOP) | |||
#include "KeccakHash.h" | |||
#else | |||
#include <libkeccak.a.headers/KeccakHash.h> | |||
#endif | |||
#include "KeccakSpongetimes4.h" | |||
typedef struct { | |||
KeccakWidth1600times4_SpongeInstance sponge; | |||
unsigned int fixedOutputLength; | |||
unsigned char delimitedSuffix; | |||
} Keccak_HashInstancetimes4; | |||
/** | |||
* Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode. | |||
* @param hashInstance Pointer to the hash instance to be initialized. | |||
* @param rate The value of the rate r. | |||
* @param capacity The value of the capacity c. | |||
* @param hashbitlen The desired number of output bits, | |||
* or 0 for an arbitrarily-long output. | |||
* @param delimitedSuffix Bits that will be automatically appended to the end | |||
* of the input message, as in domain separation. | |||
* This is a byte containing from 0 to 7 bits | |||
* formatted like the @a delimitedData parameter of | |||
* the Keccak_SpongeAbsorbLastFewBits() function. | |||
* @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. | |||
* @return SUCCESS if successful, FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashInitializetimes4(Keccak_HashInstancetimes4 *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); | |||
/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitializetimes4_SHAKE128(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1344, 256, 0, 0x1F) | |||
/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitializetimes4_SHAKE256(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1088, 512, 0, 0x1F) | |||
/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitializetimes4_SHA3_224(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1152, 448, 224, 0x06) | |||
/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitializetimes4_SHA3_256(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1088, 512, 256, 0x06) | |||
/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitializetimes4_SHA3_384(hashInstance) Keccak_HashInitializetimes4(hashInstance, 832, 768, 384, 0x06) | |||
/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard. | |||
*/ | |||
#define Keccak_HashInitializetimes4_SHA3_512(hashInstance) Keccak_HashInitializetimes4(hashInstance, 576, 1024, 512, 0x06) | |||
/** | |||
* Function to give input data to be absorbed. | |||
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). | |||
* @param data Array of 4 pointers to the input data. | |||
* @param databitLen The number of input bits provided in the input data, must be a multiple of 8. | |||
* @pre @a databitlen is a multiple of 8. | |||
* @return SUCCESS if successful, FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashUpdatetimes4(Keccak_HashInstancetimes4 *hashInstance, const BitSequence **data, BitLength databitlen); | |||
/** | |||
* Function to call after all input blocks have been input and to get | |||
* output bits if the length was specified when calling Keccak_HashInitialize(). | |||
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). | |||
* If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of | |||
* output bits is equal to @a hashbitlen. | |||
* If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits | |||
* must be extracted using the Keccak_HashSqueeze() function. | |||
* @param hashval Pointer to the buffer where to store the output data. | |||
* @return SUCCESS if successful, FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashFinaltimes4(Keccak_HashInstancetimes4 *hashInstance, BitSequence **hashval); | |||
/** | |||
* Function to squeeze output data. | |||
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). | |||
* @param data Array of 4 pointers to the buffers where to store the output data. | |||
* @param databitlen The number of output bits desired (must be a multiple of 8). | |||
* @pre Keccak_HashFinal() must have been already called. | |||
* @pre @a databitlen is a multiple of 8. | |||
* @return SUCCESS if successful, FAIL otherwise. | |||
*/ | |||
HashReturn Keccak_HashSqueezetimes4(Keccak_HashInstancetimes4 *hashInstance, BitSequence **data, BitLength databitlen); | |||
#else | |||
#error This requires an implementation of Keccak-p[1600]x4 | |||
#endif | |||
#endif |
@@ -0,0 +1,46 @@ | |||
/* | |||
The eXtended Keccak Code Package (XKCP) | |||
https://github.com/XKCP/XKCP | |||
The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. | |||
Implementation by Ronny Van Keer, hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to the Keccak Team website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
--- | |||
Please refer to SnP-documentation.h for more details. | |||
*/ | |||
#ifndef _KeccakP_1600_SnP_h_ | |||
#define _KeccakP_1600_SnP_h_ | |||
#include <stddef.h> | |||
#define KeccakP1600_implementation "AVX2 optimized implementation" | |||
#define KeccakP1600_stateSizeInBytes 200 | |||
#define KeccakP1600_stateAlignment 32 | |||
#define KeccakF1600_FastLoop_supported | |||
#define KeccakP1600_12rounds_FastLoop_supported | |||
#define KeccakP1600_StaticInitialize() | |||
void KeccakP1600_Initialize(void *state); | |||
void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); | |||
void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); | |||
void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); | |||
void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); | |||
void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); | |||
void KeccakP1600_Permute_12rounds(void *state); | |||
void KeccakP1600_Permute_24rounds(void *state); | |||
void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); | |||
void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); | |||
size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); | |||
size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); | |||
#endif |
@@ -0,0 +1,55 @@ | |||
/* | |||
The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. | |||
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to the Keccak Team website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
--- | |||
Please refer to PlSnP-documentation.h for more details. | |||
*/ | |||
#ifndef _KeccakP_1600_times4_SnP_h_ | |||
#define _KeccakP_1600_times4_SnP_h_ | |||
#include <stdint.h> | |||
#include "SIMD256-config.h" | |||
#define KeccakP1600times4_implementation "256-bit SIMD implementation (" KeccakP1600times4_implementation_config ")" | |||
#define KeccakP1600times4_statesSizeInBytes 800 | |||
#define KeccakP1600times4_statesAlignment 32 | |||
#define KeccakF1600times4_FastLoop_supported | |||
#define KeccakP1600times4_12rounds_FastLoop_supported | |||
#define KeccakF1600times4_FastKravatte_supported | |||
#include <stddef.h> | |||
#define KeccakP1600times4_StaticInitialize() | |||
void KeccakP1600times4_InitializeAll(void *states); | |||
#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \ | |||
((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte) | |||
void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); | |||
void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); | |||
void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); | |||
void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); | |||
void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount); | |||
void KeccakP1600times4_PermuteAll_4rounds(void *states); | |||
void KeccakP1600times4_PermuteAll_6rounds(void *states); | |||
void KeccakP1600times4_PermuteAll_12rounds(void *states); | |||
void KeccakP1600times4_PermuteAll_24rounds(void *states); | |||
void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length); | |||
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset); | |||
void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); | |||
void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset); | |||
size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); | |||
size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); | |||
size_t KeccakP1600times4_KravatteCompress(uint64_t *xAccu, uint64_t *kRoll, const unsigned char *input, size_t inputByteLen); | |||
size_t KeccakP1600times4_KravatteExpand(uint64_t *yAccu, const uint64_t *kRoll, unsigned char *output, size_t outputByteLen); | |||
#endif |
@@ -0,0 +1,305 @@ | |||
/* | |||
The eXtended Keccak Code Package (XKCP) | |||
https://github.com/XKCP/XKCP | |||
The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. | |||
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to the Keccak Team website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
*/ | |||
#if (defined(FullUnrolling)) | |||
#define rounds24 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(10, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(11, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(12, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(13, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(14, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(15, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(16, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(17, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#define rounds12 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(12, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(13, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(14, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(15, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(16, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(17, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#define rounds6 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#define rounds4 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#elif (Unrolling == 12) | |||
#define rounds24 \ | |||
prepareTheta \ | |||
for(i=0; i<24; i+=12) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ | |||
} \ | |||
#define rounds12 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(12, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(13, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(14, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(15, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(16, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(17, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#define rounds6 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#define rounds4 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#elif (Unrolling == 6) | |||
#define rounds24 \ | |||
prepareTheta \ | |||
for(i=0; i<24; i+=6) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ | |||
} \ | |||
#define rounds12 \ | |||
prepareTheta \ | |||
for(i=12; i<24; i+=6) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ | |||
} \ | |||
#define rounds6 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#define rounds4 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#elif (Unrolling == 4) | |||
#define rounds24 \ | |||
prepareTheta \ | |||
for(i=0; i<24; i+=4) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ | |||
} \ | |||
#define rounds12 \ | |||
prepareTheta \ | |||
for(i=12; i<24; i+=4) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ | |||
} \ | |||
#define rounds6 \ | |||
prepareTheta \ | |||
for(i=18; i<24; i+=2) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
} \ | |||
#define rounds4 \ | |||
prepareTheta \ | |||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \ | |||
thetaRhoPiChiIota(23, E, A) \ | |||
#elif (Unrolling == 3) | |||
#define rounds24 \ | |||
prepareTheta \ | |||
for(i=0; i<24; i+=3) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ | |||
copyStateVariables(A, E) \ | |||
} \ | |||
#define rounds12 \ | |||
prepareTheta \ | |||
for(i=12; i<24; i+=3) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ | |||
copyStateVariables(A, E) \ | |||
} \ | |||
#define rounds6 \ | |||
prepareTheta \ | |||
for(i=18; i<24; i+=3) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ | |||
copyStateVariables(A, E) \ | |||
} \ | |||
#define rounds4 \ | |||
prepareTheta \ | |||
for(i=20; i<24; i+=2) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
} \ | |||
#elif (Unrolling == 2) | |||
#define rounds24 \ | |||
prepareTheta \ | |||
for(i=0; i<24; i+=2) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
} \ | |||
#define rounds12 \ | |||
prepareTheta \ | |||
for(i=12; i<24; i+=2) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
} \ | |||
#define rounds6 \ | |||
prepareTheta \ | |||
for(i=18; i<24; i+=2) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
} \ | |||
#define rounds4 \ | |||
prepareTheta \ | |||
for(i=20; i<24; i+=2) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
} \ | |||
#elif (Unrolling == 1) | |||
#define rounds24 \ | |||
prepareTheta \ | |||
for(i=0; i<24; i++) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
copyStateVariables(A, E) \ | |||
} \ | |||
#define rounds12 \ | |||
prepareTheta \ | |||
for(i=12; i<24; i++) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
copyStateVariables(A, E) \ | |||
} \ | |||
#define rounds6 \ | |||
prepareTheta \ | |||
for(i=18; i<24; i++) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
copyStateVariables(A, E) \ | |||
} \ | |||
#define rounds4 \ | |||
prepareTheta \ | |||
for(i=20; i<24; i++) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
copyStateVariables(A, E) \ | |||
} \ | |||
#else | |||
#error "Unrolling is not correctly specified!" | |||
#endif | |||
#define roundsN(__nrounds) \ | |||
prepareTheta \ | |||
i = 24 - (__nrounds); \ | |||
if ((i&1) != 0) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i, A, E) \ | |||
copyStateVariables(A, E) \ | |||
++i; \ | |||
} \ | |||
for( /* empty */; i<24; i+=2) { \ | |||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \ | |||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ | |||
} |
@@ -0,0 +1,111 @@ | |||
/* | |||
The eXtended Keccak Code Package (XKCP) | |||
https://github.com/XKCP/XKCP | |||
Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. | |||
Implementation by the designers, hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to the Keccak Team website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
*/ | |||
#include "KeccakSponge.h" | |||
#ifdef KeccakReference | |||
#include "displayIntermediateValues.h" | |||
#endif | |||
#ifdef XKCP_has_KeccakP200 | |||
#include "KeccakP-200-SnP.h" | |||
#define prefix KeccakWidth200 | |||
#define SnP KeccakP200 | |||
#define SnP_width 200 | |||
#define SnP_Permute KeccakP200_Permute_18rounds | |||
#if defined(KeccakF200_FastLoop_supported) | |||
#define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb | |||
#endif | |||
#include "KeccakSponge.inc" | |||
#undef prefix | |||
#undef SnP | |||
#undef SnP_width | |||
#undef SnP_Permute | |||
#undef SnP_FastLoop_Absorb | |||
#endif | |||
#ifdef XKCP_has_KeccakP400 | |||
#include "KeccakP-400-SnP.h" | |||
#define prefix KeccakWidth400 | |||
#define SnP KeccakP400 | |||
#define SnP_width 400 | |||
#define SnP_Permute KeccakP400_Permute_20rounds | |||
#if defined(KeccakF400_FastLoop_supported) | |||
#define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb | |||
#endif | |||
#include "KeccakSponge.inc" | |||
#undef prefix | |||
#undef SnP | |||
#undef SnP_width | |||
#undef SnP_Permute | |||
#undef SnP_FastLoop_Absorb | |||
#endif | |||
#ifdef XKCP_has_KeccakP800 | |||
#include "KeccakP-800-SnP.h" | |||
#define prefix KeccakWidth800 | |||
#define SnP KeccakP800 | |||
#define SnP_width 800 | |||
#define SnP_Permute KeccakP800_Permute_22rounds | |||
#if defined(KeccakF800_FastLoop_supported) | |||
#define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb | |||
#endif | |||
#include "KeccakSponge.inc" | |||
#undef prefix | |||
#undef SnP | |||
#undef SnP_width | |||
#undef SnP_Permute | |||
#undef SnP_FastLoop_Absorb | |||
#endif | |||
#ifdef XKCP_has_KeccakP1600 | |||
#include "KeccakP-1600-SnP.h" | |||
#define prefix KeccakWidth1600 | |||
#define SnP KeccakP1600 | |||
#define SnP_width 1600 | |||
#define SnP_Permute KeccakP1600_Permute_24rounds | |||
#if defined(KeccakF1600_FastLoop_supported) | |||
#define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb | |||
#endif | |||
#include "KeccakSponge.inc" | |||
#undef prefix | |||
#undef SnP | |||
#undef SnP_width | |||
#undef SnP_Permute | |||
#undef SnP_FastLoop_Absorb | |||
#endif | |||
#ifdef XKCP_has_KeccakP1600 | |||
#include "KeccakP-1600-SnP.h" | |||
#define prefix KeccakWidth1600_12rounds | |||
#define SnP KeccakP1600 | |||
#define SnP_width 1600 | |||
#define SnP_Permute KeccakP1600_Permute_12rounds | |||
#if defined(KeccakP1600_12rounds_FastLoop_supported) | |||
#define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb | |||
#endif | |||
#include "KeccakSponge.inc" | |||
#undef prefix | |||
#undef SnP | |||
#undef SnP_width | |||
#undef SnP_Permute | |||
#undef SnP_FastLoop_Absorb | |||
#endif |
@@ -0,0 +1,76 @@ | |||
/* | |||
The eXtended Keccak Code Package (XKCP) | |||
https://github.com/XKCP/XKCP | |||
Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. | |||
Implementation by the designers, hereby denoted as "the implementer". | |||
For more information, feedback or questions, please refer to the Keccak Team website: | |||
https://keccak.team/ | |||
To the extent possible under law, the implementer has waived all copyright | |||
and related or neighboring rights to the source code in this file. | |||
http://creativecommons.org/publicdomain/zero/1.0/ | |||
*/ | |||
#ifndef _KeccakSponge_h_ | |||
#define _KeccakSponge_h_ | |||
/* For the documentation, please follow the link: */ | |||
/* #include "KeccakSponge-documentation.h" */ | |||
#include <string.h> | |||
#include "align.h" | |||
#include "config.h" | |||
#define XKCP_DeclareSpongeStructure(prefix, size, alignment) \ | |||
ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ | |||
unsigned char state[size]; \ | |||
unsigned int rate; \ | |||
unsigned int byteIOIndex; \ | |||
int squeezing; \ | |||
} prefix##_SpongeInstance; | |||
#define XKCP_DeclareSpongeFunctions(prefix) \ | |||
int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ | |||
int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ | |||
int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ | |||
int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ | |||
int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); | |||
#ifdef XKCP_has_KeccakP200 | |||
#include "KeccakP-200-SnP.h" | |||
XKCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) | |||
XKCP_DeclareSpongeFunctions(KeccakWidth200) | |||
#define XKCP_has_Sponge_Keccak_width200 | |||
#endif | |||
#ifdef XKCP_has_KeccakP400 | |||
#include "KeccakP-400-SnP.h" | |||
XKCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) | |||
XKCP_DeclareSpongeFunctions(KeccakWidth400) | |||
#define XKCP_has_Sponge_Keccak_width400 | |||
#endif | |||
#ifdef XKCP_has_KeccakP800 | |||
#include "KeccakP-800-SnP.h" | |||
XKCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) | |||
XKCP_DeclareSpongeFunctions(KeccakWidth800) | |||
#define XKCP_has_Sponge_Keccak_width800 | |||
#endif | |||
#ifdef XKCP_has_KeccakP1600 | |||
#include "KeccakP-1600-SnP.h" | |||
XKCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) | |||
XKCP_DeclareSpongeFunctions(KeccakWidth1600) | |||
#define XKCP_has_Sponge_Keccak_width1600 | |||
#endif | |||
#ifdef XKCP_has_KeccakP1600 | |||
#include "KeccakP-1600-SnP.h" | |||
XKCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) | |||
XKCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) | |||
#endif | |||
#endif |