Compare commits

...

18 Commits

Author SHA1 Message Date
  Henry Case aebce5f4be wip 3 years ago
  Henry Case 20cc113042 picnic3L1: needed updates 3 years ago
  Henry Case 67fb9cf574 add picnic 3 years ago
  Henry Case 895d9c0abd bench ntt 3 years ago
  Henry Case 395896dc92 basemul bench 3 years ago
  Kris K 977d449ce3
Update README.md 3 years ago
  Henry Case 832da09aa8 fix build 3 years ago
  Henry Case d7ca0ddad6 fix memory overrun 3 years ago
  Henry Case 744461b0ff add drone.yml 3 years ago
  Henry Case 89a34ac04b SIKE: enable optimized version 3 years ago
  Henry Case 9cb7e5a265 SIKE/p434 3 years ago
  Henry Case 15b97bc74e Change variable name 3 years ago
  Henry Case 128b5406cc Add bench for rejection sampling 3 years ago
  Henry Case 40e3fff409 remove gtest header 3 years ago
  Henry Case 2e14f263b0 kyber512 benchmarks 3 years ago
  Henry Case 6e0b153ed3 kyber matrix generation bench 3 years ago
  Henry Case 56629c53f9 add benchmarking framework 3 years ago
  Henry Case 59df9a3f73
Create SECURITY.md 3 years ago
100 changed files with 16957 additions and 140 deletions
Split View
  1. +0
    -2
      .astylerc
  2. +0
    -6
      .gitattributes
  3. +1
    -4
      .gitignore
  4. +3
    -0
      .gitmodules
  5. +1
    -0
      3rd/cpu_features
  6. +29
    -4
      CMakeLists.txt
  7. +2
    -1
      README.md
  8. +9
    -0
      SECURITY.md
  9. +14
    -0
      buid.dbg/.drone.yml
  10. +4
    -2
      public/pqc/pqc.h
  11. +14
    -117
      src/capi/pqapi.c
  12. +124
    -0
      src/capi/schemes.h
  13. +8
    -0
      src/common/utils.h
  14. +20
    -0
      src/kem/sike/CMakeLists.txt
  15. +81
    -0
      src/kem/sike/includes/sike/sike.h
  16. +926
    -0
      src/kem/sike/p434/fp-x86_64.S
  17. +207
    -0
      src/kem/sike/p434/fp_generic.c
  18. +282
    -0
      src/kem/sike/p434/fpx.c
  19. +110
    -0
      src/kem/sike/p434/fpx.h
  20. +262
    -0
      src/kem/sike/p434/isogeny.c
  21. +49
    -0
      src/kem/sike/p434/isogeny.h
  22. +128
    -0
      src/kem/sike/p434/params.c
  23. +505
    -0
      src/kem/sike/p434/sike.c
  24. +214
    -0
      src/kem/sike/p434/utils.h
  25. +4
    -2
      src/rustapi/pqc-sys/src/bindings.rs
  26. +4
    -2
      src/rustapi/pqc-sys/src/build.rs
  27. +10
    -0
      src/sign/picnic/AUTHORS
  28. +21
    -0
      src/sign/picnic/LICENSE
  29. +277
    -0
      src/sign/picnic/picnic3l1/avx2/NIST-KATs/PQCgenKAT_sign.c
  30. +222
    -0
      src/sign/picnic/picnic3l1/avx2/NIST-KATs/rng.c
  31. +55
    -0
      src/sign/picnic/picnic3l1/avx2/NIST-KATs/rng.h
  32. +73
    -0
      src/sign/picnic/picnic3l1/avx2/aligned_alloc.c
  33. +16
    -0
      src/sign/picnic/picnic3l1/avx2/api.h
  34. +188
    -0
      src/sign/picnic/picnic3l1/avx2/bitstream.c
  35. +35
    -0
      src/sign/picnic/picnic3l1/avx2/bitstream.h
  36. +104
    -0
      src/sign/picnic/picnic3l1/avx2/compat.h
  37. +122
    -0
      src/sign/picnic/picnic3l1/avx2/cpu.c
  38. +45
    -0
      src/sign/picnic/picnic3l1/avx2/cpu.h
  39. +6
    -0
      src/sign/picnic/picnic3l1/avx2/crypto_sign.h
  40. +173
    -0
      src/sign/picnic/picnic3l1/avx2/endian_compat.h
  41. +43
    -0
      src/sign/picnic/picnic3l1/avx2/io.c
  42. +40
    -0
      src/sign/picnic/picnic3l1/avx2/io.h
  43. +159
    -0
      src/sign/picnic/picnic3l1/avx2/kdf_shake.h
  44. +511
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc.c
  45. +38
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc.c.i
  46. +31
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc.h
  47. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_128_128_20_fns_s128.h
  48. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_128_128_20_fns_s256.h
  49. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_128_128_20_fns_uint64.h
  50. +2768
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4.c
  51. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4.h
  52. +21
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4_fns_s128.h
  53. +21
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4_fns_s256.h
  54. +21
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4_fns_uint64.h
  55. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_192_192_30_fns_s128.h
  56. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_192_192_30_fns_s256.h
  57. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_192_192_30_fns_uint64.h
  58. +17
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_192_192_4_fns_s128.h
  59. +17
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_192_192_4_fns_s256.h
  60. +17
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_192_192_4_fns_uint64.h
  61. +17
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_255_255_4_fns_s128.h
  62. +17
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_255_255_4_fns_s256.h
  63. +17
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_255_255_4_fns_uint64.h
  64. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_256_256_38_fns_s128.h
  65. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_256_256_38_fns_s256.h
  66. +22
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_256_256_38_fns_uint64.h
  67. +24
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_fns_undef.h
  68. +44
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_impl.c.i
  69. +39
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_impl_aux.c.i
  70. +67
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_impl_partial.c.i
  71. +84
    -0
      src/sign/picnic/picnic3l1/avx2/lowmc_pars.h
  72. +312
    -0
      src/sign/picnic/picnic3l1/avx2/macros.h
  73. +912
    -0
      src/sign/picnic/picnic3l1/avx2/mzd_additional.c
  74. +247
    -0
      src/sign/picnic/picnic3l1/avx2/mzd_additional.h
  75. +390
    -0
      src/sign/picnic/picnic3l1/avx2/picnic.c
  76. +285
    -0
      src/sign/picnic/picnic3l1/avx2/picnic.h
  77. +971
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_impl.c
  78. +52
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_impl.h
  79. +516
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_simulate.c
  80. +57
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_simulate.c.i
  81. +25
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_simulate.h
  82. +612
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_tree.c
  83. +83
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_tree.h
  84. +203
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_types.c
  85. +63
    -0
      src/sign/picnic/picnic3l1/avx2/picnic3_types.h
  86. +95
    -0
      src/sign/picnic/picnic3l1/avx2/picnic_instances.c
  87. +62
    -0
      src/sign/picnic/picnic3l1/avx2/picnic_instances.h
  88. +35
    -0
      src/sign/picnic/picnic3l1/avx2/randomness.c
  89. +19
    -0
      src/sign/picnic/picnic3l1/avx2/randomness.h
  90. +81
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakHash.c
  91. +125
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakHash.h
  92. +60
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakHashtimes4.c
  93. +112
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakHashtimes4.h
  94. +1149
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-AVX2.s
  95. +46
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-SnP.h
  96. +1317
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-times4-SIMD256.c
  97. +55
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-times4-SnP.h
  98. +305
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-unrolling.macros
  99. +111
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakSponge.c
  100. +76
    -0
      src/sign/picnic/picnic3l1/avx2/sha3/KeccakSponge.h

+ 0
- 2
.astylerc View File

@@ -1,14 +0,0 @@
#--unpad-paren
# disable backup files

+ 0
- 6
.gitattributes View File

@@ -1,6 +0,0 @@
* text=auto
*.[ch] text whitespacestrict
*.yaml text whitespacestrict
Makefile text whitespace="tabwidth=4,-tab-in-indent,indent-with-non-tab"

[attr]whitespacestrict whitespace="trailing-space,tab-in-indent,space-before-tab,tabwidth=4"

+ 1
- 4
.gitignore View File

@@ -7,7 +7,4 @@ bin/

# Object and library files on Windows
*.lib
*.obj

__pycache__
testcases/
*.obj

+ 3
- 0
.gitmodules View File

@@ -7,3 +7,6 @@
[submodule "3rd/gbench"]
path = 3rd/gbench
url = https://github.com/henrydcase/benchmark.git
[submodule "3rd/cpu_features"]
path = 3rd/cpu_features
url = https://github.com/henrydcase/cpu_features.git

+ 1
- 0
3rd/cpu_features

@@ -0,0 +1 @@
Subproject commit 2b07c2ab7df71d0b6c19afb93f68a808b412a7ff

+ 29
- 4
CMakeLists.txt View File

@@ -1,10 +1,13 @@
cmake_minimum_required(VERSION 3.13)
project(cryptocore NONE)
project(cryptocore VERSION 0.0.1 LANGUAGES C)

enable_language(C)
enable_language(CXX)
enable_language(ASM)

add_subdirectory(3rd/gtest)
add_subdirectory(3rd/cpu_features)

set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "~/.cmake/Modules")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "3rd/cmake-modules")
@@ -33,7 +36,19 @@ else()
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
endif()

add_subdirectory(3rd/gtest)
if(NOT CMAKE_BUILD_TYPE_LOWER STREQUAL "debug")
# settings below are required by benchmark library
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
# Target for benchmark - it also builds gtest library
set(BENCHMARK_ENABLE_GTEST_TESTS ON CACHE BOOL "Enable testing of the benchmark library." FORCE)
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark tests" FORCE)
set(GOOGLETEST_PATH "${CMAKE_SOURCE_DIR}/3rd/gtest" CACHE PATH "Path to the gtest sources" FORCE)
#if (NOT MACOSX)
# set(BENCHMARK_ENABLE_LTO ON CACHE BOOL "Enable link time optim" FORCE)
#endif()
set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
add_subdirectory(${CMAKE_SOURCE_DIR}/3rd/gbench)
endif()

# Arch settings

@@ -82,6 +97,7 @@ include_directories(
public
src/common/
src
3rd/cpu_features/include
)

set_property(GLOBAL PROPERTY obj_libs "")
@@ -128,6 +144,7 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/clean)
add_subdirectory(src/sign/sphincs/sphincs-sha256-128s-robust/clean)
add_subdirectory(src/sign/sphincs/sphincs-sha256-128f-simple/clean)
add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-robust/clean)
add_subdirectory(src/sign/picnic/picnic3l1/clean)

add_subdirectory(src/kem/kyber/kyber512/clean)
add_subdirectory(src/kem/kyber/kyber768/clean)
@@ -148,12 +165,13 @@ add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean)
add_subdirectory(src/kem/sike)

# Hardware optimized targets
if(${ARCH} STREQUAL "ARCH_x86_64")

set(CMAKE_C_FLAGS
"${CMAKE_C_FLAGS} -march=native -mtune=native")
"${CMAKE_C_FLAGS} -march=haswell")
set(SRC_COMMON_AVX2
src/common/keccak4x/KeccakP-1600-times4-SIMD256.c
)
@@ -241,12 +259,16 @@ get_property(OBJ_LIBS GLOBAL PROPERTY obj_libs)

target_link_libraries(
pqc
common
${OBJ_LIBS}
cpu_features
common
)

target_link_libraries(
pqc_s

cpu_features
common
${OBJ_LIBS}
)
@@ -268,6 +290,9 @@ target_include_directories(

${CMAKE_SOURCE_DIR})

if(NOT CMAKE_BUILD_TYPE_LOWER STREQUAL "debug")
add_subdirectory(test/bench)
endif()

install(TARGETS pqc pqc_s
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE


+ 2
- 1
README.md View File

@@ -18,6 +18,7 @@ Users shouldn't expect any level of security provided by this code. The library
| Falcon | 2 | |
| Rainbow | 3 | |
| SPHINCS+ SHA256/SHAKE256 | 3 | x |
| SIKE/p434 | 3 | x |

## Building

@@ -38,13 +39,13 @@ Library provides simple API, wrapping PQClean. For example to use KEM, one shoul
```c
#include <pqc/pqc.h>

const params_t *p = pqc_kem_alg_by_id(KYBER512);
std::vector<uint8_t> ct(ciphertext_bsz(p));
std::vector<uint8_t> ss1(shared_secret_bsz(p));
std::vector<uint8_t> ss2(shared_secret_bsz(p));
std::vector<uint8_t> sk(private_key_bsz(p));
std::vector<uint8_t> pk(public_key_bsz(p));

const params_t *p = pqc_kem_alg_by_id(KYBER512);
pqc_keygen(p, pk.data(), sk.data());
pqc_kem_encapsulate(p, ct.data(), ss1.data(), pk.data());
pqc_kem_decapsulate(p, ss2.data(), ct.data(), sk.data());


+ 9
- 0
SECURITY.md View File

@@ -0,0 +1,9 @@
# Security Policy

## Supported Versions

No security guaranteed.

## Reporting a Vulnerability

Any comments welcome: contact (at) amongbytes.com

+ 14
- 0
buid.dbg/.drone.yml View File

@@ -0,0 +1,14 @@
kind: pipeline
type: exec
name: default

steps:
- name: build
commands:
- git submodule init
- git submodule update --recursive --remote
- mkdir build
- cd build
- cmake ..
- make
- ./test

+ 4
- 2
public/pqc/pqc.h View File

@@ -41,7 +41,8 @@ extern "C" {
_(SPHINCSSHA256256SROBUST) \
_(SPHINCSSHA256128SROBUST) \
_(SPHINCSSHA256128FSIMPLE) \
_(SPHINCSSHA256192FROBUST)
_(SPHINCSSHA256192FROBUST) \
_(PICNIC3L1)

// defines supported kem algorithm list
#define PQC_SUPPORTED_KEMS(_)\
@@ -63,7 +64,8 @@ extern "C" {
_(SABER) \
_(HQCRMRS128) \
_(HQCRMRS192) \
_(HQCRMRS256)
_(HQCRMRS256) \
_(SIKE434)

// Defines IDs for each algorithm. The
// PQC_ALG_SIG/KEM_MAX indicates number


+ 14
- 117
src/capi/pqapi.c View File

@@ -1,124 +1,9 @@
#include <stdint.h>
#include <stdbool.h>
#include <pqc/pqc.h>
#include <cpuinfo_x86.h>

// PQClean include
#include "sign/rainbow/rainbowV-classic/clean/api.h"
#include "sign/rainbow/rainbowI-classic/clean/api.h"
#include "sign/rainbow/rainbowIII-classic/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h"
#include "sign/falcon/falcon-1024/clean/api.h"
#include "sign/falcon/falcon-1024/avx2/api.h"
#include "sign/falcon/falcon-512/clean/api.h"
#include "sign/falcon/falcon-512/avx2/api.h"
#include "sign/dilithium/dilithium2/clean/api.h"
#include "sign/dilithium/dilithium2/avx2/api.h"
#include "sign/dilithium/dilithium3/clean/api.h"
#include "sign/dilithium/dilithium3/avx2/api.h"
#include "sign/dilithium/dilithium5/clean/api.h"
#include "sign/dilithium/dilithium5/avx2/api.h"
#include "kem/ntru/ntruhps4096821/clean/api.h"
#include "kem/ntru/ntruhps4096821/avx2/api.h"
#include "kem/ntru/ntruhps2048509/clean/api.h"
#include "kem/ntru/ntruhps2048509/avx2/api.h"
#include "kem/ntru/ntruhrss701/clean/api.h"
#include "kem/ntru/ntruhrss701/avx2/api.h"
#include "kem/ntru/ntruhps2048677/clean/api.h"
#include "kem/ntru/ntruhps2048677/avx2/api.h"
#include "kem/ntru_prime/ntrulpr761/clean/api.h"
#include "kem/ntru_prime/ntrulpr761/avx2/api.h"
#include "kem/ntru_prime/ntrulpr653/clean/api.h"
#include "kem/ntru_prime/ntrulpr653/avx2/api.h"
#include "kem/ntru_prime/ntrulpr857/clean/api.h"
#include "kem/ntru_prime/ntrulpr857/avx2/api.h"
#include "kem/kyber/kyber768/clean/api.h"
#include "kem/kyber/kyber768/avx2/api.h"
#include "kem/kyber/kyber1024/clean/api.h"
#include "kem/kyber/kyber1024/avx2/api.h"
#include "kem/kyber/kyber512/clean/api.h"
#include "kem/kyber/kyber512/avx2/api.h"
#include "kem/mceliece/mceliece460896f/avx/api.h"
#include "kem/mceliece/mceliece460896f/clean/api.h"
#include "kem/mceliece/mceliece8192128/avx/api.h"
#include "kem/mceliece/mceliece8192128/clean/api.h"
#include "kem/mceliece/mceliece6688128f/avx/api.h"
#include "kem/mceliece/mceliece6688128f/clean/api.h"
#include "kem/mceliece/mceliece8192128f/avx/api.h"
#include "kem/mceliece/mceliece8192128f/clean/api.h"
#include "kem/mceliece/mceliece6960119f/avx/api.h"
#include "kem/mceliece/mceliece6960119f/clean/api.h"
#include "kem/mceliece/mceliece460896/avx/api.h"
#include "kem/mceliece/mceliece460896/clean/api.h"
#include "kem/mceliece/mceliece6688128/avx/api.h"
#include "kem/mceliece/mceliece6688128/clean/api.h"
#include "kem/mceliece/mceliece348864f/avx/api.h"
#include "kem/mceliece/mceliece348864f/clean/api.h"
#include "kem/mceliece/mceliece6960119/avx/api.h"
#include "kem/mceliece/mceliece6960119/clean/api.h"
#include "kem/mceliece/mceliece348864/avx/api.h"
#include "kem/mceliece/mceliece348864/clean/api.h"
#include "kem/frodo/frodokem976shake/clean/api.h"
#include "kem/frodo/frodokem1344shake/clean/api.h"
#include "kem/frodo/frodokem640shake/clean/api.h"
#include "kem/saber/lightsaber/clean/api.h"
#include "kem/saber/lightsaber/avx2/api.h"
#include "kem/saber/firesaber/clean/api.h"
#include "kem/saber/firesaber/avx2/api.h"
#include "kem/saber/saber/clean/api.h"
#include "kem/saber/saber/avx2/api.h"
#include "kem/hqc/hqc-rmrs-128/clean/api.h"
#include "kem/hqc/hqc-rmrs-192/clean/api.h"
#include "kem/hqc/hqc-rmrs-256/clean/api.h"
#include "kem/hqc/hqc-rmrs-128/avx2/api.h"
#include "kem/hqc/hqc-rmrs-192/avx2/api.h"
#include "kem/hqc/hqc-rmrs-256/avx2/api.h"
#include "schemes.h"

// not proud of this thingy
#define OPT_VERSION _CLEAN_
@@ -192,6 +77,13 @@ const sig_params_t sigs[] = {
PQC_SUPPORTED_SIGS(REG_SIG)
};

// Contains capabilities on x86 CPU on which implementation is running
X86Features CPU_CAPS;

const X86Features * const get_cpu_caps(void) {
return &CPU_CAPS;
}

const params_t *pqc_kem_alg_by_id(uint8_t id) {
int i;
for(i=0; i<PQC_ALG_KEM_MAX; i++) {
@@ -242,3 +134,8 @@ bool pqc_sig_verify(const params_t *p,
const uint8_t *pk) {
return !((sig_params_t *)p)->verify(sig, siglen, m, mlen, pk);
}

void static_initialization(void) __attribute__((constructor));
void static_initialization(void) {
CPU_CAPS = GetX86Info().features;
}

+ 124
- 0
src/capi/schemes.h View File

@@ -0,0 +1,124 @@
#ifndef PQC_SCHEMES_
#define PQC_SCHEMES_

// PQClean include
#include "sign/rainbow/rainbowV-classic/clean/api.h"
#include "sign/rainbow/rainbowI-classic/clean/api.h"
#include "sign/rainbow/rainbowIII-classic/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h"
#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h"
#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h"
#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h"
#include "sign/falcon/falcon-1024/clean/api.h"
#include "sign/falcon/falcon-1024/avx2/api.h"
#include "sign/falcon/falcon-512/clean/api.h"
#include "sign/falcon/falcon-512/avx2/api.h"
#include "sign/dilithium/dilithium2/clean/api.h"
#include "sign/dilithium/dilithium2/avx2/api.h"
#include "sign/dilithium/dilithium3/clean/api.h"
#include "sign/dilithium/dilithium3/avx2/api.h"
#include "sign/dilithium/dilithium5/clean/api.h"
#include "sign/dilithium/dilithium5/avx2/api.h"
#include "sign/picnic/picnic3l1/clean/api.h"
#include "kem/ntru/ntruhps4096821/clean/api.h"
#include "kem/ntru/ntruhps4096821/avx2/api.h"
#include "kem/ntru/ntruhps2048509/clean/api.h"
#include "kem/ntru/ntruhps2048509/avx2/api.h"
#include "kem/ntru/ntruhrss701/clean/api.h"
#include "kem/ntru/ntruhrss701/avx2/api.h"
#include "kem/ntru/ntruhps2048677/clean/api.h"
#include "kem/ntru/ntruhps2048677/avx2/api.h"
#include "kem/ntru_prime/ntrulpr761/clean/api.h"
#include "kem/ntru_prime/ntrulpr761/avx2/api.h"
#include "kem/ntru_prime/ntrulpr653/clean/api.h"
#include "kem/ntru_prime/ntrulpr653/avx2/api.h"
#include "kem/ntru_prime/ntrulpr857/clean/api.h"
#include "kem/ntru_prime/ntrulpr857/avx2/api.h"
#include "kem/kyber/kyber768/clean/api.h"
#include "kem/kyber/kyber768/avx2/api.h"
#include "kem/kyber/kyber1024/clean/api.h"
#include "kem/kyber/kyber1024/avx2/api.h"
#include "kem/kyber/kyber512/clean/api.h"
#include "kem/kyber/kyber512/avx2/api.h"
#include "kem/mceliece/mceliece460896f/avx/api.h"
#include "kem/mceliece/mceliece460896f/clean/api.h"
#include "kem/mceliece/mceliece8192128/avx/api.h"
#include "kem/mceliece/mceliece8192128/clean/api.h"
#include "kem/mceliece/mceliece6688128f/avx/api.h"
#include "kem/mceliece/mceliece6688128f/clean/api.h"
#include "kem/mceliece/mceliece8192128f/avx/api.h"
#include "kem/mceliece/mceliece8192128f/clean/api.h"
#include "kem/mceliece/mceliece6960119f/avx/api.h"
#include "kem/mceliece/mceliece6960119f/clean/api.h"
#include "kem/mceliece/mceliece460896/avx/api.h"
#include "kem/mceliece/mceliece460896/clean/api.h"
#include "kem/mceliece/mceliece6688128/avx/api.h"
#include "kem/mceliece/mceliece6688128/clean/api.h"
#include "kem/mceliece/mceliece348864f/avx/api.h"
#include "kem/mceliece/mceliece348864f/clean/api.h"
#include "kem/mceliece/mceliece6960119/avx/api.h"
#include "kem/mceliece/mceliece6960119/clean/api.h"
#include "kem/mceliece/mceliece348864/avx/api.h"
#include "kem/mceliece/mceliece348864/clean/api.h"
#include "kem/frodo/frodokem976shake/clean/api.h"
#include "kem/frodo/frodokem1344shake/clean/api.h"
#include "kem/frodo/frodokem640shake/clean/api.h"
#include "kem/saber/lightsaber/clean/api.h"
#include "kem/saber/lightsaber/avx2/api.h"
#include "kem/saber/firesaber/clean/api.h"
#include "kem/saber/firesaber/avx2/api.h"
#include "kem/saber/saber/clean/api.h"
#include "kem/saber/saber/avx2/api.h"
#include "kem/hqc/hqc-rmrs-128/clean/api.h"
#include "kem/hqc/hqc-rmrs-192/clean/api.h"
#include "kem/hqc/hqc-rmrs-256/clean/api.h"
#include "kem/hqc/hqc-rmrs-128/avx2/api.h"
#include "kem/hqc/hqc-rmrs-192/avx2/api.h"
#include "kem/hqc/hqc-rmrs-256/avx2/api.h"
#include "kem/sike/includes/sike/sike.h"

#endif

+ 8
- 0
src/common/utils.h View File

@@ -0,0 +1,8 @@
#ifndef PQC_COMMON_UTILS_
#define PQC_COMMON_UTILS_

#include <cpuinfo_x86.h>

const X86Features * const get_cpu_caps(void);

#endif

+ 20
- 0
src/kem/sike/CMakeLists.txt View File

@@ -0,0 +1,20 @@
set(
SRC_CLEAN_SIKE_P434
p434/fpx.c
p434/fp_generic.c
p434/isogeny.c
p434/params.c
p434/sike.c)

if(${ARCH} STREQUAL "ARCH_x86_64")
add_definitions(-DPQC_ASM=1)
set(
SRC_CLEAN_SIKE_P434
${SRC_CLEAN_SIKE_P434}
p434/fp-x86_64.S
)
endif()

define_kem_alg(
sike_p434_clean
PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 81
- 0
src/kem/sike/includes/sike/sike.h View File

@@ -0,0 +1,81 @@
#ifndef SIKE_H_
#define SIKE_H_

#include <stdint.h>
#include <string.h>
#include "randombytes.h"

/* SIKE
*
* SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the
* algorithm is provided in [SIKE]. This implementation uses 434-bit field size. The code
* is based on "Additional_Implementations" from PQC NIST submission package which can
* be found here:
* https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip
*
* [SIKE] https://sike.org/files/SIDH-spec.pdf
*/

// SIKE_PUB_BYTESZ is the number of bytes in a public key.
#define SIKE_PUB_BYTESZ 330
// SIKE_PRV_BYTESZ is the number of bytes in a private key.
#define SIKE_PRV_BYTESZ 28
// SIKE_SS_BYTESZ is the number of bytes in a shared key.
#define SIKE_SS_BYTESZ 16
// SIKE_MSG_BYTESZ is the number of bytes in a random bit string concatenated
// with the public key (see 1.4 of SIKE).
#define SIKE_MSG_BYTESZ 16
// SIKE_SS_BYTESZ is the number of bytes in a ciphertext.
#define SIKE_CT_BYTESZ (SIKE_PUB_BYTESZ + SIKE_MSG_BYTESZ)

// SIKE_keypair outputs a public and secret key. In case of success
// function returns 1, otherwise 0.
int SIKE_keypair(
uint8_t out_priv[SIKE_PRV_BYTESZ],
uint8_t out_pub[SIKE_PUB_BYTESZ]);

// SIKE_encaps generates and encrypts a random session key, writing those values to
// |out_shared_key| and |out_ciphertext|, respectively.
void SIKE_encaps(
uint8_t out_shared_key[SIKE_SS_BYTESZ],
uint8_t out_ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ]);

// SIKE_decaps outputs a random session key, writing it to |out_shared_key|.
void SIKE_decaps(
uint8_t out_shared_key[SIKE_SS_BYTESZ],
const uint8_t ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ],
const uint8_t priv_key[SIKE_PRV_BYTESZ]);

// boilerplate needed for integration
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_BYTES SIKE_SS_BYTESZ
#define PQCLEAN_SIKE434_CLEAN_CRYPTO_ALGNAME "SIKE/p434"

#define PQCLEAN_SIKE434_AVX2_CRYPTO_SECRETKEYBYTES SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ+SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_PUBLICKEYBYTES SIKE_PUB_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_BYTES SIKE_SS_BYTESZ
#define PQCLEAN_SIKE434_AVX2_CRYPTO_ALGNAME "SIKE/p434"

static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
SIKE_keypair(sk, pk);
// KATs require the public key to be concatenated after private key
memcpy(&sk[SIKE_MSG_BYTESZ+SIKE_PRV_BYTESZ], pk, SIKE_PUB_BYTESZ);
return 0;
}
static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
SIKE_encaps(ss,ct,pk);
return 0;
}

static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
SIKE_decaps(ss, ct, &sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], sk);
return 0;
}


#endif

+ 926
- 0
src/kem/sike/p434/fp-x86_64.S View File

@@ -0,0 +1,926 @@
.text

.Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE
.quad 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF
.quad 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC
.quad 0x0004683E4E2EE688


.Lp434p1:
.quad 0xFDC1767AE3000000
.quad 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056
.quad 0x0002341F27177344

.globl sike_fpadd_asm
.hidden sike_fpadd_asm
.type sike_fpadd_asm,@function
sike_fpadd_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32

xorq %rax,%rax

movq 0(%rdi),%r8
addq 0(%rsi),%r8
movq 8(%rdi),%r9
adcq 8(%rsi),%r9
movq 16(%rdi),%r10
adcq 16(%rsi),%r10
movq 24(%rdi),%r11
adcq 24(%rsi),%r11
movq 32(%rdi),%r12
adcq 32(%rsi),%r12
movq 40(%rdi),%r13
adcq 40(%rsi),%r13
movq 48(%rdi),%r14
adcq 48(%rsi),%r14

movq .Lp434x2(%rip),%rcx
subq %rcx,%r8
movq 8+.Lp434x2(%rip),%rcx
sbbq %rcx,%r9
sbbq %rcx,%r10
movq 16+.Lp434x2(%rip),%rcx
sbbq %rcx,%r11
movq 24+.Lp434x2(%rip),%rcx
sbbq %rcx,%r12
movq 32+.Lp434x2(%rip),%rcx
sbbq %rcx,%r13
movq 40+.Lp434x2(%rip),%rcx
sbbq %rcx,%r14

sbbq $0,%rax

movq .Lp434x2(%rip),%rdi
andq %rax,%rdi
movq 8+.Lp434x2(%rip),%rsi
andq %rax,%rsi
movq 16+.Lp434x2(%rip),%rcx
andq %rax,%rcx

addq %rdi,%r8
movq %r8,0(%rdx)
adcq %rsi,%r9
movq %r9,8(%rdx)
adcq %rsi,%r10
movq %r10,16(%rdx)
adcq %rcx,%r11
movq %r11,24(%rdx)

setc %cl
movq 24+.Lp434x2(%rip),%r8
andq %rax,%r8
movq 32+.Lp434x2(%rip),%r9
andq %rax,%r9
movq 40+.Lp434x2(%rip),%r10
andq %rax,%r10
btq $0,%rcx

adcq %r8,%r12
movq %r12,32(%rdx)
adcq %r9,%r13
movq %r13,40(%rdx)
adcq %r10,%r14
movq %r14,48(%rdx)

popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc

.globl sike_fpsub_asm
.hidden sike_fpsub_asm
.type sike_fpsub_asm,@function
sike_fpsub_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32

xorq %rax,%rax

movq 0(%rdi),%r8
subq 0(%rsi),%r8
movq 8(%rdi),%r9
sbbq 8(%rsi),%r9
movq 16(%rdi),%r10
sbbq 16(%rsi),%r10
movq 24(%rdi),%r11
sbbq 24(%rsi),%r11
movq 32(%rdi),%r12
sbbq 32(%rsi),%r12
movq 40(%rdi),%r13
sbbq 40(%rsi),%r13
movq 48(%rdi),%r14
sbbq 48(%rsi),%r14

sbbq $0x0,%rax

movq .Lp434x2(%rip),%rdi
andq %rax,%rdi
movq 8+.Lp434x2(%rip),%rsi
andq %rax,%rsi
movq 16+.Lp434x2(%rip),%rcx
andq %rax,%rcx

addq %rdi,%r8
movq %r8,0(%rdx)
adcq %rsi,%r9
movq %r9,8(%rdx)
adcq %rsi,%r10
movq %r10,16(%rdx)
adcq %rcx,%r11
movq %r11,24(%rdx)

setc %cl
movq 24+.Lp434x2(%rip),%r8
andq %rax,%r8
movq 32+.Lp434x2(%rip),%r9
andq %rax,%r9
movq 40+.Lp434x2(%rip),%r10
andq %rax,%r10
btq $0x0,%rcx

adcq %r8,%r12
adcq %r9,%r13
adcq %r10,%r14
movq %r12,32(%rdx)
movq %r13,40(%rdx)
movq %r14,48(%rdx)

popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpadd_asm
.hidden sike_mpadd_asm
.type sike_mpadd_asm,@function
sike_mpadd_asm:
.cfi_startproc
movq 0(%rdi),%r8;
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11
movq 32(%rdi),%rcx
addq 0(%rsi),%r8
adcq 8(%rsi),%r9
adcq 16(%rsi),%r10
adcq 24(%rsi),%r11
adcq 32(%rsi),%rcx
movq %r8,0(%rdx)
movq %r9,8(%rdx)
movq %r10,16(%rdx)
movq %r11,24(%rdx)
movq %rcx,32(%rdx)

movq 40(%rdi),%r8
movq 48(%rdi),%r9
adcq 40(%rsi),%r8
adcq 48(%rsi),%r9
movq %r8,40(%rdx)
movq %r9,48(%rdx)
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpsubx2_asm
.hidden sike_mpsubx2_asm
.type sike_mpsubx2_asm,@function
sike_mpsubx2_asm:
.cfi_startproc
xorq %rax,%rax

movq 0(%rdi),%r8
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11
movq 32(%rdi),%rcx
subq 0(%rsi),%r8
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
sbbq 24(%rsi),%r11
sbbq 32(%rsi),%rcx
movq %r8,0(%rdx)
movq %r9,8(%rdx)
movq %r10,16(%rdx)
movq %r11,24(%rdx)
movq %rcx,32(%rdx)

movq 40(%rdi),%r8
movq 48(%rdi),%r9
movq 56(%rdi),%r10
movq 64(%rdi),%r11
movq 72(%rdi),%rcx
sbbq 40(%rsi),%r8
sbbq 48(%rsi),%r9
sbbq 56(%rsi),%r10
sbbq 64(%rsi),%r11
sbbq 72(%rsi),%rcx
movq %r8,40(%rdx)
movq %r9,48(%rdx)
movq %r10,56(%rdx)
movq %r11,64(%rdx)
movq %rcx,72(%rdx)

movq 80(%rdi),%r8
movq 88(%rdi),%r9
movq 96(%rdi),%r10
movq 104(%rdi),%r11
sbbq 80(%rsi),%r8
sbbq 88(%rsi),%r9
sbbq 96(%rsi),%r10
sbbq 104(%rsi),%r11
sbbq $0x0,%rax
movq %r8,80(%rdx)
movq %r9,88(%rdx)
movq %r10,96(%rdx)
movq %r11,104(%rdx)
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpdblsubx2_asm
.hidden sike_mpdblsubx2_asm
.type sike_mpdblsubx2_asm,@function
sike_mpdblsubx2_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24

xorq %rax,%rax


movq 0(%rdx),%r8
movq 8(%rdx),%r9
movq 16(%rdx),%r10
movq 24(%rdx),%r11
movq 32(%rdx),%r12
movq 40(%rdx),%r13
movq 48(%rdx),%rcx
subq 0(%rdi),%r8
sbbq 8(%rdi),%r9
sbbq 16(%rdi),%r10
sbbq 24(%rdi),%r11
sbbq 32(%rdi),%r12
sbbq 40(%rdi),%r13
sbbq 48(%rdi),%rcx
adcq $0x0,%rax


subq 0(%rsi),%r8
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
sbbq 24(%rsi),%r11
sbbq 32(%rsi),%r12
sbbq 40(%rsi),%r13
sbbq 48(%rsi),%rcx
adcq $0x0,%rax


movq %r8,0(%rdx)
movq %r9,8(%rdx)
movq %r10,16(%rdx)
movq %r11,24(%rdx)
movq %r12,32(%rdx)
movq %r13,40(%rdx)
movq %rcx,48(%rdx)


movq 56(%rdx),%r8
movq 64(%rdx),%r9
movq 72(%rdx),%r10
movq 80(%rdx),%r11
movq 88(%rdx),%r12
movq 96(%rdx),%r13
movq 104(%rdx),%rcx

subq %rax,%r8
sbbq 56(%rdi),%r8
sbbq 64(%rdi),%r9
sbbq 72(%rdi),%r10
sbbq 80(%rdi),%r11
sbbq 88(%rdi),%r12
sbbq 96(%rdi),%r13
sbbq 104(%rdi),%rcx


subq 56(%rsi),%r8
sbbq 64(%rsi),%r9
sbbq 72(%rsi),%r10
sbbq 80(%rsi),%r11
sbbq 88(%rsi),%r12
sbbq 96(%rsi),%r13
sbbq 104(%rsi),%rcx


movq %r8,56(%rdx)
movq %r9,64(%rdx)
movq %r10,72(%rdx)
movq %r11,80(%rdx)
movq %r12,88(%rdx)
movq %r13,96(%rdx)
movq %rcx,104(%rdx)

popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc

.globl sike_fprdc_asm
.hidden sike_fprdc_asm
.type sike_fprdc_asm,@function
sike_fprdc_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15, -40

xorq %rax,%rax
movq 0+0(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

xorq %rax,%rax
movq 0+8(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r13,%rcx
adcxq %r13,%r9
adcxq %rcx,%r10

mulxq 8+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r11
adoxq %rcx,%r10

mulxq 16+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r12
adoxq %rcx,%r11

mulxq 24+.Lp434p1(%rip),%rcx,%r13
adcxq %rax,%r13
adoxq %rcx,%r12
adoxq %rax,%r13

xorq %rcx,%rcx
addq 24(%rdi),%r8
adcq 32(%rdi),%r9
adcq 40(%rdi),%r10
adcq 48(%rdi),%r11
adcq 56(%rdi),%r12
adcq 64(%rdi),%r13
adcq 72(%rdi),%rcx
movq %r8,24(%rdi)
movq %r9,32(%rdi)
movq %r10,40(%rdi)
movq %r11,48(%rdi)
movq %r12,56(%rdi)
movq %r13,64(%rdi)
movq %rcx,72(%rdi)
movq 80(%rdi),%r8
movq 88(%rdi),%r9
movq 96(%rdi),%r10
movq 104(%rdi),%r11
adcq $0x0,%r8
adcq $0x0,%r9
adcq $0x0,%r10
adcq $0x0,%r11
movq %r8,80(%rdi)
movq %r9,88(%rdi)
movq %r10,96(%rdi)
movq %r11,104(%rdi)

xorq %rax,%rax
movq 16+0(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

xorq %rax,%rax
movq 16+8(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r13,%rcx
adcxq %r13,%r9
adcxq %rcx,%r10

mulxq 8+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r11
adoxq %rcx,%r10

mulxq 16+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r12
adoxq %rcx,%r11

mulxq 24+.Lp434p1(%rip),%rcx,%r13
adcxq %rax,%r13
adoxq %rcx,%r12
adoxq %rax,%r13

xorq %rcx,%rcx
addq 40(%rdi),%r8
adcq 48(%rdi),%r9
adcq 56(%rdi),%r10
adcq 64(%rdi),%r11
adcq 72(%rdi),%r12
adcq 80(%rdi),%r13
adcq 88(%rdi),%rcx
movq %r8,40(%rdi)
movq %r9,48(%rdi)
movq %r10,56(%rdi)
movq %r11,64(%rdi)
movq %r12,72(%rdi)
movq %r13,80(%rdi)
movq %rcx,88(%rdi)
movq 96(%rdi),%r8
movq 104(%rdi),%r9
adcq $0x0,%r8
adcq $0x0,%r9
movq %r8,96(%rdi)
movq %r9,104(%rdi)

xorq %rax,%rax
movq 32+0(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

xorq %rax,%rax
movq 32+8(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r13,%rcx
adcxq %r13,%r9
adcxq %rcx,%r10

mulxq 8+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r11
adoxq %rcx,%r10

mulxq 16+.Lp434p1(%rip),%rcx,%r13
adcxq %r13,%r12
adoxq %rcx,%r11

mulxq 24+.Lp434p1(%rip),%rcx,%r13
adcxq %rax,%r13
adoxq %rcx,%r12
adoxq %rax,%r13

xorq %rcx,%rcx
addq 56(%rdi),%r8
adcq 64(%rdi),%r9
adcq 72(%rdi),%r10
adcq 80(%rdi),%r11
adcq 88(%rdi),%r12
adcq 96(%rdi),%r13
adcq 104(%rdi),%rcx
movq %r8,0(%rsi)
movq %r9,8(%rsi)
movq %r10,72(%rdi)
movq %r11,80(%rdi)
movq %r12,88(%rdi)
movq %r13,96(%rdi)
movq %rcx,104(%rdi)

xorq %rax,%rax
movq 48(%rdi),%rdx
mulxq 0+.Lp434p1(%rip),%r8,%r9
mulxq 8+.Lp434p1(%rip),%r12,%r10
mulxq 16+.Lp434p1(%rip),%r13,%r11

adoxq %r12,%r9
adoxq %r13,%r10

mulxq 24+.Lp434p1(%rip),%r13,%r12
adoxq %r13,%r11
adoxq %rax,%r12

addq 72(%rdi),%r8
adcq 80(%rdi),%r9
adcq 88(%rdi),%r10
adcq 96(%rdi),%r11
adcq 104(%rdi),%r12
movq %r8,16(%rsi)
movq %r9,24(%rsi)
movq %r10,32(%rsi)
movq %r11,40(%rsi)
movq %r12,48(%rsi)


popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc
.globl sike_mpmul_asm
.hidden sike_mpmul_asm
.type sike_mpmul_asm,@function
sike_mpmul_asm:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12, -16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13, -24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14, -32
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15, -40


movq %rdx,%rcx
xorq %rax,%rax


movq 0(%rdi),%r8
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11

pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset rbx, -48
pushq %rbp
.cfi_offset rbp, -56
.cfi_adjust_cfa_offset 8
subq $96,%rsp
.cfi_adjust_cfa_offset 96

addq 32(%rdi),%r8
adcq 40(%rdi),%r9
adcq 48(%rdi),%r10
adcq $0x0,%r11
sbbq $0x0,%rax
movq %r8,0(%rsp)
movq %r9,8(%rsp)
movq %r10,16(%rsp)
movq %r11,24(%rsp)


xorq %rbx,%rbx
movq 0(%rsi),%r12
movq 8(%rsi),%r13
movq 16(%rsi),%r14
movq 24(%rsi),%r15
addq 32(%rsi),%r12
adcq 40(%rsi),%r13
adcq 48(%rsi),%r14
adcq $0x0,%r15
sbbq $0x0,%rbx
movq %r12,32(%rsp)
movq %r13,40(%rsp)
movq %r14,48(%rsp)
movq %r15,56(%rsp)


andq %rax,%r12
andq %rax,%r13
andq %rax,%r14
andq %rax,%r15


andq %rbx,%r8
andq %rbx,%r9
andq %rbx,%r10
andq %rbx,%r11


addq %r12,%r8
adcq %r13,%r9
adcq %r14,%r10
adcq %r15,%r11
movq %r8,64(%rsp)
movq %r9,72(%rsp)
movq %r10,80(%rsp)
movq %r11,88(%rsp)


movq 0+0(%rsp),%rdx
mulxq 32+0(%rsp),%r9,%r8
movq %r9,0+0(%rsp)
mulxq 32+8(%rsp),%r10,%r9
xorq %rax,%rax
adoxq %r10,%r8
mulxq 32+16(%rsp),%r11,%r10
adoxq %r11,%r9
mulxq 32+24(%rsp),%r12,%r11
adoxq %r12,%r10

movq 0+8(%rsp),%rdx
mulxq 32+0(%rsp),%r12,%r13
adoxq %rax,%r11
xorq %rax,%rax
mulxq 32+8(%rsp),%r15,%r14
adoxq %r8,%r12
movq %r12,0+8(%rsp)
adcxq %r15,%r13
mulxq 32+16(%rsp),%rbx,%r15
adcxq %rbx,%r14
adoxq %r9,%r13
mulxq 32+24(%rsp),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r10,%r14

movq 0+16(%rsp),%rdx
mulxq 32+0(%rsp),%r8,%r9
adoxq %r11,%r15
adoxq %rax,%rbx
xorq %rax,%rax
mulxq 32+8(%rsp),%r11,%r10
adoxq %r13,%r8
movq %r8,0+16(%rsp)
adcxq %r11,%r9
mulxq 32+16(%rsp),%r12,%r11
adcxq %r12,%r10
adoxq %r14,%r9
mulxq 32+24(%rsp),%rbp,%r12
adcxq %rbp,%r11
adcxq %rax,%r12

adoxq %r15,%r10
adoxq %rbx,%r11
adoxq %rax,%r12

movq 0+24(%rsp),%rdx
mulxq 32+0(%rsp),%r8,%r13
xorq %rax,%rax
mulxq 32+8(%rsp),%r15,%r14
adcxq %r15,%r13
adoxq %r8,%r9
mulxq 32+16(%rsp),%rbx,%r15
adcxq %rbx,%r14
adoxq %r13,%r10
mulxq 32+24(%rsp),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r14,%r11
adoxq %r15,%r12
adoxq %rax,%rbx
movq %r9,0+24(%rsp)
movq %r10,0+32(%rsp)
movq %r11,0+40(%rsp)
movq %r12,0+48(%rsp)
movq %rbx,0+56(%rsp)



movq 0+0(%rdi),%rdx
mulxq 0+0(%rsi),%r9,%r8
movq %r9,0+0(%rcx)
mulxq 0+8(%rsi),%r10,%r9
xorq %rax,%rax
adoxq %r10,%r8
mulxq 0+16(%rsi),%r11,%r10
adoxq %r11,%r9
mulxq 0+24(%rsi),%r12,%r11
adoxq %r12,%r10

movq 0+8(%rdi),%rdx
mulxq 0+0(%rsi),%r12,%r13
adoxq %rax,%r11
xorq %rax,%rax
mulxq 0+8(%rsi),%r15,%r14
adoxq %r8,%r12
movq %r12,0+8(%rcx)
adcxq %r15,%r13
mulxq 0+16(%rsi),%rbx,%r15
adcxq %rbx,%r14
adoxq %r9,%r13
mulxq 0+24(%rsi),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r10,%r14

movq 0+16(%rdi),%rdx
mulxq 0+0(%rsi),%r8,%r9
adoxq %r11,%r15
adoxq %rax,%rbx
xorq %rax,%rax
mulxq 0+8(%rsi),%r11,%r10
adoxq %r13,%r8
movq %r8,0+16(%rcx)
adcxq %r11,%r9
mulxq 0+16(%rsi),%r12,%r11
adcxq %r12,%r10
adoxq %r14,%r9
mulxq 0+24(%rsi),%rbp,%r12
adcxq %rbp,%r11
adcxq %rax,%r12

adoxq %r15,%r10
adoxq %rbx,%r11
adoxq %rax,%r12

movq 0+24(%rdi),%rdx
mulxq 0+0(%rsi),%r8,%r13
xorq %rax,%rax
mulxq 0+8(%rsi),%r15,%r14
adcxq %r15,%r13
adoxq %r8,%r9
mulxq 0+16(%rsi),%rbx,%r15
adcxq %rbx,%r14
adoxq %r13,%r10
mulxq 0+24(%rsi),%rbp,%rbx
adcxq %rbp,%r15
adcxq %rax,%rbx
adoxq %r14,%r11
adoxq %r15,%r12
adoxq %rax,%rbx
movq %r9,0+24(%rcx)
movq %r10,0+32(%rcx)
movq %r11,0+40(%rcx)
movq %r12,0+48(%rcx)
movq %rbx,0+56(%rcx)



movq 32+0(%rdi),%rdx
mulxq 32+0(%rsi),%r9,%r8
movq %r9,64+0(%rcx)
mulxq 32+8(%rsi),%r10,%r9
xorq %rax,%rax
adoxq %r10,%r8
mulxq 32+16(%rsi),%r11,%r10
adoxq %r11,%r9

movq 32+8(%rdi),%rdx
mulxq 32+0(%rsi),%r12,%r11
adoxq %rax,%r10
xorq %rax,%rax

mulxq 32+8(%rsi),%r14,%r13
adoxq %r8,%r12
movq %r12,64+8(%rcx)
adcxq %r14,%r11

mulxq 32+16(%rsi),%r8,%r14
adoxq %r9,%r11
adcxq %r8,%r13
adcxq %rax,%r14
adoxq %r10,%r13

movq 32+16(%rdi),%rdx
mulxq 32+0(%rsi),%r8,%r9
adoxq %rax,%r14
xorq %rax,%rax

mulxq 32+8(%rsi),%r10,%r12
adoxq %r11,%r8
movq %r8,64+16(%rcx)
adcxq %r13,%r9

mulxq 32+16(%rsi),%r11,%r8
adcxq %r14,%r12
adcxq %rax,%r8
adoxq %r10,%r9
adoxq %r12,%r11
adoxq %rax,%r8
movq %r9,64+24(%rcx)
movq %r11,64+32(%rcx)
movq %r8,64+40(%rcx)




movq 64(%rsp),%r8
movq 72(%rsp),%r9
movq 80(%rsp),%r10
movq 88(%rsp),%r11

movq 32(%rsp),%rax
addq %rax,%r8
movq 40(%rsp),%rax
adcq %rax,%r9
movq 48(%rsp),%rax
adcq %rax,%r10
movq 56(%rsp),%rax
adcq %rax,%r11


movq 0(%rsp),%r12
movq 8(%rsp),%r13
movq 16(%rsp),%r14
movq 24(%rsp),%r15
subq 0(%rcx),%r12
sbbq 8(%rcx),%r13
sbbq 16(%rcx),%r14
sbbq 24(%rcx),%r15
sbbq 32(%rcx),%r8
sbbq 40(%rcx),%r9
sbbq 48(%rcx),%r10
sbbq 56(%rcx),%r11


subq 64(%rcx),%r12
sbbq 72(%rcx),%r13
sbbq 80(%rcx),%r14
sbbq 88(%rcx),%r15
sbbq 96(%rcx),%r8
sbbq 104(%rcx),%r9
sbbq $0x0,%r10
sbbq $0x0,%r11

addq 32(%rcx),%r12
movq %r12,32(%rcx)
adcq 40(%rcx),%r13
movq %r13,40(%rcx)
adcq 48(%rcx),%r14
movq %r14,48(%rcx)
adcq 56(%rcx),%r15
movq %r15,56(%rcx)
adcq 64(%rcx),%r8
movq %r8,64(%rcx)
adcq 72(%rcx),%r9
movq %r9,72(%rcx)
adcq 80(%rcx),%r10
movq %r10,80(%rcx)
adcq 88(%rcx),%r11
movq %r11,88(%rcx)
movq 96(%rcx),%r12
adcq $0x0,%r12
movq %r12,96(%rcx)
movq 104(%rcx),%r13
adcq $0x0,%r13
movq %r13,104(%rcx)

addq $96,%rsp
.cfi_adjust_cfa_offset -96
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_same_value rbp
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_same_value rbx


popq %r15
.cfi_adjust_cfa_offset -8
popq %r14
.cfi_adjust_cfa_offset -8
popq %r13
.cfi_adjust_cfa_offset -8
popq %r12
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_endproc

+ 207
- 0
src/kem/sike/p434/fp_generic.c View File

@@ -0,0 +1,207 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: portable modular arithmetic for P503
*********************************************************************************************/
#include "common/utils.h"

#include "utils.h"
#include "fpx.h"

#ifndef PQC_NOASM
void sike_fprdc_asm(const felm_t ma, felm_t mc);
void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
void sike_fpadd_asm(const felm_t a, const felm_t b, felm_t c);
void sike_fpsub_asm(const felm_t a, const felm_t b, felm_t c);
#endif

// Global constants
extern const struct params_t params;

// Digit multiplication, digit * digit -> 2-digit result
static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
{
crypto_word_t al, ah, bl, bh, temp;
crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4);

al = a & mask_low; // Low part
ah = a >> (sizeof(crypto_word_t) * 4); // High part
bl = b & mask_low;
bh = b >> (sizeof(crypto_word_t) * 4);

albl = al*bl;
albh = al*bh;
ahbl = ah*bl;
ahbh = ah*bh;
c[0] = albl & mask_low; // C00

res1 = albl >> (sizeof(crypto_word_t) * 4);
res2 = ahbl & mask_low;
res3 = albh & mask_low;
temp = res1 + res2 + res3;
carry = temp >> (sizeof(crypto_word_t) * 4);
c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01

res1 = ahbl >> (sizeof(crypto_word_t) * 4);
res2 = albh >> (sizeof(crypto_word_t) * 4);
res3 = ahbh & mask_low;
temp = res1 + res2 + res3 + carry;
c[1] = temp & mask_low; // C10
carry = temp & mask_high;
c[1] ^= (ahbh & mask_high) + carry; // C11
}

// Modular addition, c = a+b mod p434.
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
{
#ifdef PQC_ASM
sike_fpadd_asm(a,b,c);
#else
unsigned int i, carry = 0;
crypto_word_t mask;

for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}

carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(carry, c[i], params.prime_x2[i], carry, c[i]);
}
mask = 0 - (crypto_word_t)carry;

carry = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]);
}
#endif
}

void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
{ // Modular subtraction, c = a-b mod p434.
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#ifdef PQC_ASM
sike_fpsub_asm(a,b,c);
#else
unsigned int i, borrow = 0;
crypto_word_t mask;

for (i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
mask = 0 - (crypto_word_t)borrow;

borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]);
}
#endif
}

// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
{
#ifdef PQC_ASM
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
sike_mpmul_asm(a,b,c);
return;
}
#endif

unsigned int i, j;
crypto_word_t t = 0, u = 0, v = 0, UV[2];
unsigned int carry = 0;

for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j <= i; j++) {
MUL(a[j], b[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}

for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
MUL(a[j], b[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
c[i] = v;
v = u;
u = t;
t = 0;
}
c[2*NWORDS_FIELD-1] = v;
}

// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
// mc = ma*R^-1 mod p434x2, where R = 2^448.
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.
void sike_fprdc(const felm_t ma, felm_t mc)
{
#ifdef PQC_ASM
if (get_cpu_caps()->bmi2 && get_cpu_caps()->adx) {
sike_fprdc_asm(ma, mc);
return;
}
#endif
unsigned int i, j, carry, count = ZERO_WORDS;
crypto_word_t UV[2], t = 0, u = 0, v = 0;

for (i = 0; i < NWORDS_FIELD; i++) {
mc[i] = 0;
}

for (i = 0; i < NWORDS_FIELD; i++) {
for (j = 0; j < i; j++) {
if (j < (i-ZERO_WORDS+1)) {
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i] = v;
v = u;
u = t;
t = 0;
}

for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
if (count > 0) {
count -= 1;
}
for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
if (j < (NWORDS_FIELD-count)) {
MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
ADDC(0, UV[0], v, carry, v);
ADDC(carry, UV[1], u, carry, u);
t += carry;
}
}
ADDC(0, v, ma[i], carry, v);
ADDC(carry, u, 0, carry, u);
t += carry;
mc[i-NWORDS_FIELD] = v;
v = u;
u = t;
t = 0;
}
ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v);
mc[NWORDS_FIELD-1] = v;
}

+ 282
- 0
src/kem/sike/p434/fpx.c View File

@@ -0,0 +1,282 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: core functions over GF(p) and GF(p^2)
*********************************************************************************************/
#include <stddef.h>
#include "utils.h"
#include "fpx.h"

extern const struct params_t params;

// Multiprecision squaring, c = a^2 mod p.
static void fpsqr_mont(const felm_t ma, felm_t mc)
{
dfelm_t temp = {0};
sike_mpmul(ma, ma, temp);
sike_fprdc(temp, mc);
}

// Chain to compute a^(p-3)/4 using Montgomery arithmetic.
static void fpinv_chain_mont(felm_t a)
{
unsigned int i, j;
felm_t t[31], tt;

// Precomputed table
fpsqr_mont(a, tt);
sike_fpmul_mont(a, tt, t[0]);
for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]);

sike_fpcopy(a, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[5], tt, tt);
for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[14], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[3], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[23], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[13], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[24], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[7], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[12], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[1], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[21], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[2], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[19], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[1], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[24], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[26], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[16], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[10], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[6], tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[0], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[20], tt, tt);
for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[9], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[25], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[26], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(a, tt, tt);
for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[28], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[6], tt, tt);
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[10], tt, tt);
for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[22], tt, tt);
for (j = 0; j < 35; j++) {
for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
sike_fpmul_mont(t[30], tt, tt);
}
sike_fpcopy(tt, a);
}

// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
static void fpinv_mont(felm_t a)
{
felm_t tt = {0};
sike_fpcopy(a, tt);
fpinv_chain_mont(tt);
fpsqr_mont(tt, tt);
fpsqr_mont(tt, tt);
sike_fpmul_mont(a, tt, a);
}

// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
#ifndef PQC_ASM
inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
uint8_t carry = 0;
for (size_t i = 0; i < nwords; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}
return carry;
}

// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
uint32_t borrow = 0;
for (size_t i = 0; i < nwords; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
return borrow;
}
#endif

// Multiprecision addition, c = a+b.
inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
{
#ifdef PQC_ASM
sike_mpadd_asm(a, b, c);
#else
mp_add(a, b, c, NWORDS_FIELD);
#endif
}

// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
#ifdef PQC_ASM
return sike_mpsubx2_asm(a, b, c);
#else
return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
#endif
}

// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
// Inputs should be s.t. c > a and c > b
inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
#ifdef PQC_ASM
sike_mpdblsubx2_asm(a, b, c);
#else
mp_sub(c, a, c, 2*NWORDS_FIELD);
mp_sub(c, b, c, 2*NWORDS_FIELD);
#endif
}

// Copy a field element, c = a.
void sike_fpcopy(const felm_t a, felm_t c) {
for (size_t i = 0; i < NWORDS_FIELD; i++) {
c[i] = a[i];
}
}

// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
{
dfelm_t temp = {0};
sike_mpmul(ma, mb, temp);
sike_fprdc(temp, mc);
}

// Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
void sike_from_mont(const felm_t ma, felm_t c)
{
felm_t one = {0};
one[0] = 1;

sike_fpmul_mont(ma, one, c);
sike_fpcorrection(c);
}

// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) {
felm_t t1, t2, t3;

mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
sike_fpsub(a->c0, a->c1, t2); // t2 = a0-a1
mp_addfast(a->c0, a->c0, t3); // t3 = 2a0
sike_fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1)
sike_fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1
}

// Modular negation, a = -a mod p503.
// Input/output: a in [0, 2*p503-1]
void sike_fpneg(felm_t a) {
uint32_t borrow = 0;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, params.prime_x2[i], a[i], borrow, a[i]);
}
}

// Modular division by two, c = a/2 mod p503.
// Input : a in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
void sike_fpdiv2(const felm_t a, felm_t c) {
uint32_t carry = 0;
crypto_word_t mask;

mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503
for (size_t i = 0; i < NWORDS_FIELD; i++) {
ADDC(carry, a[i], params.prime[i] & mask, carry, c[i]);
}

// Multiprecision right shift by one.
for (size_t i = 0; i < NWORDS_FIELD-1; i++) {
c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1));
}
c[NWORDS_FIELD-1] >>= 1;
}

// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
void sike_fpcorrection(felm_t a) {
uint32_t borrow = 0;
crypto_word_t mask;

for (size_t i = 0; i < NWORDS_FIELD; i++) {
SUBC(borrow, a[i], params.prime[i], borrow, a[i]);
}
mask = 0 - (crypto_word_t)borrow;

borrow = 0;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
ADDC(borrow, a[i], params.prime[i] & mask, borrow, a[i]);
}
}

// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) {
felm_t t1, t2;
dfelm_t tt1, tt2, tt3;
crypto_word_t mask;

mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1
mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1
sike_mpmul(a->c0, b->c0, tt1); // tt1 = a0*b0
sike_mpmul(a->c1, b->c1, tt2); // tt2 = a1*b1
sike_mpmul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1)
mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0

for (size_t i = 0; i < NWORDS_FIELD; i++) {
t1[i] = params.prime[i] & mask;
}

sike_fprdc(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]);
sike_fprdc(tt1, c->c0); // c[0] = a0*b0 - a1*b1
}

// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
void sike_fp2inv_mont(f2elm_t a) {
f2elm_t t1;

fpsqr_mont(a->c0, t1->c0); // t10 = a0^2
fpsqr_mont(a->c1, t1->c1); // t11 = a1^2
sike_fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2
fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1
sike_fpneg(a->c1); // a = a0-i*a1
sike_fpmul_mont(a->c0, t1->c0, a->c0);
sike_fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1
}

+ 110
- 0
src/kem/sike/p434/fpx.h View File

@@ -0,0 +1,110 @@
#ifndef FPX_H_
#define FPX_H_

#include "utils.h"

#if defined(__cplusplus)
extern "C" {
#endif

// Modular addition, c = a+b mod p.
void sike_fpadd(const felm_t a, const felm_t b, felm_t c);
// Modular subtraction, c = a-b mod p.
void sike_fpsub(const felm_t a, const felm_t b, felm_t c);
// Modular division by two, c = a/2 mod p.
void sike_fpdiv2(const felm_t a, felm_t c);
// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1].
void sike_fpcorrection(felm_t a);
// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c);
// 443-bit Montgomery reduction, c = a mod p
void sike_fprdc(const dfelm_t a, felm_t c);
// Double 2x443-bit multiprecision subtraction, c = c-a-b
void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
// Multiprecision subtraction, c = a-b
crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c);
// 443-bit multiprecision addition, c = a+b
void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c);
// Modular negation, a = -a mod p.
void sike_fpneg(felm_t a);
// Copy of a field element, c = a
void sike_fpcopy(const felm_t a, felm_t c);
// Copy a field element, c = a.
void sike_fpzero(felm_t a);
// Conversion from Montgomery representation to standard representation,
// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
void sike_from_mont(const felm_t ma, felm_t c);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768
void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);
// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2)
void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
void sike_fp2inv_mont(f2elm_t a);
// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c);
// Modular correction, a = a in GF(p^2).
void sike_fp2correction(f2elm_t a);

#if defined(__cplusplus)
} // extern C
#endif

// GF(p^2) addition, c = a+b in GF(p^2).
#define sike_fp2add(a, b, c) \
do { \
sike_fpadd(a->c0, b->c0, c->c0); \
sike_fpadd(a->c1, b->c1, c->c1); \
} while(0)

// GF(p^2) subtraction, c = a-b in GF(p^2).
#define sike_fp2sub(a,b,c) \
do { \
sike_fpsub(a->c0, b->c0, c->c0); \
sike_fpsub(a->c1, b->c1, c->c1); \
} while(0)

// Copy a GF(p^2) element, c = a.
#define sike_fp2copy(a, c) \
do { \
sike_fpcopy(a->c0, c->c0); \
sike_fpcopy(a->c1, c->c1); \
} while(0)

// GF(p^2) negation, a = -a in GF(p^2).
#define sike_fp2neg(a) \
do { \
sike_fpneg(a->c0); \
sike_fpneg(a->c1); \
} while(0)

// GF(p^2) division by two, c = a/2 in GF(p^2).
#define sike_fp2div2(a, c) \
do { \
sike_fpdiv2(a->c0, c->c0); \
sike_fpdiv2(a->c1, c->c1); \
} while(0)

// Modular correction, a = a in GF(p^2).
#define sike_fp2correction(a) \
do { \
sike_fpcorrection(a->c0); \
sike_fpcorrection(a->c1); \
} while(0)

// Conversion of a GF(p^2) element to Montgomery representation,
// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
#define sike_to_fp2mont(a, mc) \
do { \
sike_fpmul_mont(a->c0, params.mont_R2, mc->c0); \
sike_fpmul_mont(a->c1, params.mont_R2, mc->c1); \
} while(0)

// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
// c_i = ma_i*R^(-1) = a_i in GF(p^2).
#define sike_from_fp2mont(ma, c) \
do { \
sike_from_mont(ma->c0, c->c0); \
sike_from_mont(ma->c1, c->c1); \
} while(0)

#endif // FPX_H_

+ 262
- 0
src/kem/sike/p434/isogeny.c View File

@@ -0,0 +1,262 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: elliptic curve and isogeny functions
*********************************************************************************************/
#include <stddef.h>
#include <string.h>
#include "utils.h"
#include "isogeny.h"
#include "fpx.h"

static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
{ // Doubling of a Montgomery point in projective coordinates (X:Z).
// Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
// Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
f2elm_t t0, t1;

sike_fp2sub(P->X, P->Z, t0); // t0 = X1-Z1
sike_fp2add(P->X, P->Z, t1); // t1 = X1+Z1
sike_fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2
sike_fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2
sike_fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2
sike_fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
sike_fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2
sike_fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
sike_fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
sike_fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
}

void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e)
{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
// Output: projective Montgomery x-coordinates Q <- (2^e)*P.

memmove(Q, P, sizeof(*P));
for (size_t i = 0; i < e; i++) {
xDBL(Q, Q, A24plus, C24);
}
}

void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
// Input: projective point of order four P = (X4:Z4).
// Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
// that are used to evaluate the isogeny at a point in eval_4_isog().

sike_fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4
sike_fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4
sike_fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2
sike_fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4
sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2
sike_fp2sqr_mont(P->X, A24plus); // A24plus = X4^2
sike_fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2
sike_fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4
}

void eval_4_isog(point_proj_t P, f2elm_t* coeff)
{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
// by the 3 coefficients in coeff (computed in the function get_4_isog()).
// Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
// Output: the projective point P = phi(P) = (X:Z) in the codomain.
f2elm_t t0, t1;

sike_fp2add(P->X, P->Z, t0); // t0 = X+Z
sike_fp2sub(P->X, P->Z, t1); // t1 = X-Z
sike_fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1]
sike_fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2]
sike_fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z)
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z)
sike_fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
sike_fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
sike_fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
sike_fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
sike_fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
sike_fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
sike_fp2mul_mont(P->X, t1, P->X); // Xfinal
sike_fp2mul_mont(P->Z, t0, P->Z); // Zfinal
}


void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
{ // Tripling of a Montgomery point in projective coordinates (X:Z).
// Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
// Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
f2elm_t t0, t1, t2, t3, t4, t5, t6;

sike_fp2sub(P->X, P->Z, t0); // t0 = X-Z
sike_fp2sqr_mont(t0, t2); // t2 = (X-Z)^2
sike_fp2add(P->X, P->Z, t1); // t1 = X+Z
sike_fp2sqr_mont(t1, t3); // t3 = (X+Z)^2
sike_fp2add(t0, t1, t4); // t4 = 2*X
sike_fp2sub(t1, t0, t0); // t0 = 2*Z
sike_fp2sqr_mont(t4, t1); // t1 = 4*X^2
sike_fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2
sike_fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
sike_fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2
sike_fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3
sike_fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2
sike_fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3
sike_fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
sike_fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
sike_fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
sike_fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
sike_fp2sqr_mont(t2, t2); // t2 = t2^2
sike_fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2
sike_fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
sike_fp2sqr_mont(t1, t1); // t1 = t1^2
sike_fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1
}

void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e)
{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
// Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
// Output: projective Montgomery x-coordinates Q <- (3^e)*P.
memmove(Q, P, sizeof(*P));
for (size_t i = 0; i < e; i++) {
xTPL(Q, Q, A24minus, A24plus);
}
}

void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
// Input: projective point of order three P = (X3:Z3).
// Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
f2elm_t t0, t1, t2, t3, t4;

sike_fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z
sike_fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2
sike_fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z
sike_fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2
sike_fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2
sike_fp2add(coeff[0], coeff[1], t3); // t3 = 2*X
sike_fp2sqr_mont(t3, t3); // t3 = 4*X^2
sike_fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
sike_fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2
sike_fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2
sike_fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
sike_fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
sike_fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
sike_fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
sike_fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
sike_fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
sike_fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
sike_fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
}


void eval_3_isog(point_proj_t Q, f2elm_t* coeff)
{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
// a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
// Inputs: projective points P = (X3:Z3) and Q = (X:Z).
// Output: the projective point Q <- phi(Q) = (X3:Z3).
f2elm_t t0, t1, t2;

sike_fp2add(Q->X, Q->Z, t0); // t0 = X+Z
sike_fp2sub(Q->X, Q->Z, t1); // t1 = X-Z
sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z)
sike_fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z)
sike_fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z)
sike_fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z)
sike_fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
sike_fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
sike_fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2
sike_fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
}


void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
{ // 3-way simultaneous inversion
// Input: z1,z2,z3
// Output: 1/z1,1/z2,1/z3 (override inputs).
f2elm_t t0, t1, t2, t3;

sike_fp2mul_mont(z1, z2, t0); // t0 = z1*z2
sike_fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3
sike_fp2inv_mont(t1); // t1 = 1/(z1*z2*z3)
sike_fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2)
sike_fp2mul_mont(t2, z2, t3); // t3 = 1/z1
sike_fp2mul_mont(t2, z1, z2); // z2 = 1/z2
sike_fp2mul_mont(t0, t1, z3); // z3 = 1/z3
sike_fp2copy(t3, z1); // z1 = 1/z1
}


void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
// Input: the x-coordinates xP, xQ, and xR of the points P, Q and R.
// Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
f2elm_t t0, t1, one = F2ELM_INIT;

extern const struct params_t params;
sike_fpcopy(params.mont_one, one->c0);
sike_fp2add(xP, xQ, t1); // t1 = xP+xQ
sike_fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ
sike_fp2mul_mont(xR, t1, A); // A = xR*t1
sike_fp2add(t0, A, A); // A = A+t0
sike_fp2mul_mont(t0, xR, t0); // t0 = t0*xR
sike_fp2sub(A, one, A); // A = A-1
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2add(t1, xR, t1); // t1 = t1+xR
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2sqr_mont(A, A); // A = A^2
sike_fp2inv_mont(t0); // t0 = 1/t0
sike_fp2mul_mont(A, t0, A); // A = A*t0
sike_fp2sub(A, t1, A); // Afinal = A-t1
}


void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
{ // Computes the j-invariant of a Montgomery curve with projective constant.
// Input: A,C in GF(p^2).
// Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
f2elm_t t0, t1;

sike_fp2sqr_mont(A, jinv); // jinv = A^2
sike_fp2sqr_mont(C, t1); // t1 = C^2
sike_fp2add(t1, t1, t0); // t0 = t1+t1
sike_fp2sub(jinv, t0, t0); // t0 = jinv-t0
sike_fp2sub(t0, t1, t0); // t0 = t0-t1
sike_fp2sub(t0, t1, jinv); // jinv = t0-t1
sike_fp2sqr_mont(t1, t1); // t1 = t1^2
sike_fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2sqr_mont(t0, t1); // t1 = t0^2
sike_fp2mul_mont(t0, t1, t0); // t0 = t0*t1
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2add(t0, t0, t0); // t0 = t0+t0
sike_fp2inv_mont(jinv); // jinv = 1/jinv
sike_fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv
}


void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
{ // Simultaneous doubling and differential addition.
// Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
// Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
f2elm_t t0, t1, t2;

sike_fp2add(P->X, P->Z, t0); // t0 = XP+ZP
sike_fp2sub(P->X, P->Z, t1); // t1 = XP-ZP
sike_fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2
sike_fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ
sike_fp2correction(t2);
sike_fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ
sike_fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ)
sike_fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2
sike_fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ)
sike_fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2
sike_fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2
sike_fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
sike_fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
sike_fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
sike_fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
sike_fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
sike_fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
sike_fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
sike_fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
}

+ 49
- 0
src/kem/sike/p434/isogeny.h View File

@@ -0,0 +1,49 @@
#ifndef ISOGENY_H_
#define ISOGENY_H_

// Computes [2^e](X:Z) on Montgomery curve with projective
// constant via e repeated doublings.
void xDBLe(
const point_proj_t P, point_proj_t Q, const f2elm_t A24plus,
const f2elm_t C24, size_t e);
// Simultaneous doubling and differential addition.
void xDBLADD(
point_proj_t P, point_proj_t Q, const f2elm_t xPQ,
const f2elm_t A24);
// Tripling of a Montgomery point in projective coordinates (X:Z).
void xTPL(
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
const f2elm_t A24plus);
// Computes [3^e](X:Z) on Montgomery curve with projective constant
// via e repeated triplings.
void xTPLe(
const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
const f2elm_t A24plus, size_t e);
// Given the x-coordinates of P, Q, and R, returns the value A
// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
void get_A(
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
// Computes the j-invariant of a Montgomery curve with projective constant.
void j_inv(
const f2elm_t A, const f2elm_t C, f2elm_t jinv);
// Computes the corresponding 4-isogeny of a projective Montgomery
// point (X4:Z4) of order 4.
void get_4_isog(
const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
// Computes the corresponding 3-isogeny of a projective Montgomery
// point (X3:Z3) of order 3.
void get_3_isog(
const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus,
f2elm_t* coeff);
// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3)
// of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
void eval_3_isog(
point_proj_t Q, f2elm_t* coeff);
// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
void eval_4_isog(
point_proj_t P, f2elm_t* coeff);
// 3-way simultaneous inversion
void inv_3_way(
f2elm_t z1, f2elm_t z2, f2elm_t z3);

#endif // ISOGENY_H_

+ 128
- 0
src/kem/sike/p434/params.c View File

@@ -0,0 +1,128 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: supersingular isogeny parameters and generation of functions for P434
*********************************************************************************************/

#include "utils.h"

// Parameters for isogeny system "SIKE"
const struct params_t params = {
.prime = {
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF),
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
U64_TO_WORDS(0x0002341F27177344)
},
.prime_p1 = {
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000),
U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
U64_TO_WORDS(0x0002341F27177344)
},
.prime_x2 = {
U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF),
U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC),
U64_TO_WORDS(0x0004683E4E2EE688)
},
.A_gen = {
U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B),
U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1),
U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F),
U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0
U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B),
U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA),
U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C),
U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1
U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6),
U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03),
U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215),
U64_TO_WORDS(0x0001C4CB77542876), // XQA0
U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050),
U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374),
U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508),
U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1
U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611),
U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD),
U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3),
U64_TO_WORDS(0x00022A81D8D55643), // XRA0
U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED),
U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4),
U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2),
U64_TO_WORDS(0x0000B87FC716C0C6) // XRA1
},
.B_gen = {
U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05),
U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F),
U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9),
U64_TO_WORDS(0x0001BED4772E551F), // XPB0
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), // XPB1
U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C),
U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62),
U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F),
U64_TO_WORDS(0x000034080181D8AE), // XQB0
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), // XQB1
U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647),
U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D),
U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504),
U64_TO_WORDS(0x00007E8A50F02E37), // XRB0
U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230),
U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53),
U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B),
U64_TO_WORDS(0x000173FA910377D3) // XRB1
},
.mont_R2 = {
U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2),
U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B),
U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A),
U64_TO_WORDS(0x000025A89BCDD12A)
},
.mont_one = {
U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000),
U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C),
U64_TO_WORDS(0x0000ECEEA7BD2EDA)
},
.mont_six = {
U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000),
U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000),
U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0),
U64_TO_WORDS(0x00012559A0403298)
},
.A_strat = {
0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01,
0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02,
0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01,
0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01,
0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03,
0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04,
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01
},
.B_strat = {
0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01,
0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01,
0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02,
0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10,
0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01,
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01,
0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02,
0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01,
0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
0x02, 0x01, 0x01, 0x02, 0x01, 0x01
}
};

+ 505
- 0
src/kem/sike/p434/sike.c View File

@@ -0,0 +1,505 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
*********************************************************************************************/

#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <randombytes.h>
#include <common/fips202.h>

#include "utils.h"
#include "isogeny.h"
#include "fpx.h"

extern const struct params_t params;

// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
#define SIDH_JINV_BYTESZ 110U
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
#define SIDH_PRV_A_BITSZ 216U
// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
#define SIDH_PRV_B_BITSZ 217U
// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
#define MAX_INT_POINTS_ALICE 7U
// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
#define MAX_INT_POINTS_BOB 8U

// Swap points.
// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
{
crypto_word_t temp;
for (size_t i = 0; i < NWORDS_FIELD; i++) {
temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
P->X->c0[i] = temp ^ P->X->c0[i];
Q->X->c0[i] = temp ^ Q->X->c0[i];
temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
P->Z->c0[i] = temp ^ P->Z->c0[i];
Q->Z->c0[i] = temp ^ Q->Z->c0[i];
temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
P->X->c1[i] = temp ^ P->X->c1[i];
Q->X->c1[i] = temp ^ Q->X->c1[i];
temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
P->Z->c1[i] = temp ^ P->Z->c1[i];
Q->Z->c1[i] = temp ^ Q->Z->c1[i];
}
}

static void ladder3Pt(
const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,
int is_A, point_proj_t R, const f2elm_t A) {
point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
f2elm_t A24 = F2ELM_INIT;
crypto_word_t mask;
int bit, swap, prevbit = 0;

const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ;

// Initializing constant
sike_fpcopy(params.mont_one, A24[0].c0);
sike_fp2add(A24, A24, A24);
sike_fp2add(A, A24, A24);
sike_fp2div2(A24, A24);
sike_fp2div2(A24, A24); // A24 = (A+2)/4

// Initializing points
sike_fp2copy(xQ, R0->X);
sike_fpcopy(params.mont_one, R0->Z[0].c0);
sike_fp2copy(xPQ, R2->X);
sike_fpcopy(params.mont_one, R2->Z[0].c0);
sike_fp2copy(xP, R->X);
sike_fpcopy(params.mont_one, R->Z[0].c0);
memset(R->Z->c1, 0, sizeof(R->Z->c1));

// Main loop
for (size_t i = 0; i < nbits; i++) {
bit = (m[i >> 3] >> (i & 7)) & 1;
swap = bit ^ prevbit;
prevbit = bit;
mask = 0 - (crypto_word_t)swap;

sike_fp2cswap(R, R2, mask);
xDBLADD(R0, R2, R->X, A24);
sike_fp2mul_mont(R2->X, R->Z, R2->X);
}
swap = 0 ^ prevbit;
mask = 0 - (crypto_word_t)swap;
sike_fp2cswap(R, R2, mask);
}

// Initialization of basis points
static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
sike_fpcopy(gen, XP->c0);
sike_fpcopy(gen + NWORDS_FIELD, XP->c1);
sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1);
sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0);
sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1);
}

// Conversion of GF(p^2) element from Montgomery to standard representation.
static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) {
f2elm_t t;
sike_from_fp2mont(x, t);

// convert to bytes in little endian form
for (size_t i=0; i<FIELD_BYTESZ; i++) {
enc[i+ 0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
}
}

// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
// Elements over GF(p503) are encoded in 63 octets in little endian format
// (i.e., the least significant octet is located in the lowest memory address).
static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
memset(t[0].c0, 0, sizeof(t[0].c0));
memset(t[0].c1, 0, sizeof(t[0].c1));
// convert bytes in little endian form to f2elm_t
for (size_t i = 0; i < FIELD_BYTESZ; i++) {
t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (8*(i%LSZ));
t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ));
}
sike_to_fp2mont(t, t);
}

// Alice's ephemeral public key generation
// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
static void gen_iso_A(const uint8_t* skA, uint8_t* pkA)
{
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
point_proj_t phiP = POINT_PROJ_INIT;
point_proj_t phiQ = POINT_PROJ_INIT;
point_proj_t phiR = POINT_PROJ_INIT;
f2elm_t XPA, XQA, XRA, coeff[3];
f2elm_t A24plus = F2ELM_INIT;
f2elm_t C24 = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;

// Initialize basis points
sike_init_basis(params.A_gen, XPA, XQA, XRA);
sike_init_basis(params.B_gen, phiP->X, phiQ->X, phiR->X);
sike_fpcopy(params.mont_one, (phiP->Z)->c0);
sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
sike_fpcopy(params.mont_one, (phiR->Z)->c0);

// Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1
sike_fpcopy(params.mont_one, A24plus->c0);
sike_fp2add(A24plus, A24plus, A24plus);
sike_fp2add(A24plus, A24plus, C24);
sike_fp2add(A24plus, C24, A);
sike_fp2add(C24, C24, A24plus);

// Retrieve kernel point
ladder3Pt(XPA, XQA, XRA, skA, 1, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < A_max; row++) {
while (index < A_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.A_strat[ii++];
xDBLe(R, R, A24plus, C24, (2*m));
index += m;
}
get_4_isog(R, A24plus, C24, coeff);

for (size_t i = 0; i < npts; i++) {
eval_4_isog(pts[i], coeff);
}
eval_4_isog(phiP, coeff);
eval_4_isog(phiQ, coeff);
eval_4_isog(phiR, coeff);

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_4_isog(R, A24plus, C24, coeff);
eval_4_isog(phiP, coeff);
eval_4_isog(phiQ, coeff);
eval_4_isog(phiR, coeff);

inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);

// Format public key
sike_fp2_encode(phiP->X, pkA);
sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ);
sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ);
}

// Bob's ephemeral key-pair generation
// It produces a private key skB and computes the public key pkB.
// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
static void gen_iso_B(const uint8_t* skB, uint8_t* pkB)
{
point_proj_t R, pts[MAX_INT_POINTS_BOB];
point_proj_t phiP = POINT_PROJ_INIT;
point_proj_t phiQ = POINT_PROJ_INIT;
point_proj_t phiR = POINT_PROJ_INIT;
f2elm_t XPB, XQB, XRB, coeff[3];
f2elm_t A24plus = F2ELM_INIT;
f2elm_t A24minus = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;

// Initialize basis points
sike_init_basis(params.B_gen, XPB, XQB, XRB);
sike_init_basis(params.A_gen, phiP->X, phiQ->X, phiR->X);
sike_fpcopy(params.mont_one, (phiP->Z)->c0);
sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
sike_fpcopy(params.mont_one, (phiR->Z)->c0);

// Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
sike_fpcopy(params.mont_one, A24plus->c0);
sike_fp2add(A24plus, A24plus, A24plus);
sike_fp2add(A24plus, A24plus, A24minus);
sike_fp2add(A24plus, A24minus, A);
sike_fp2add(A24minus, A24minus, A24plus);

// Retrieve kernel point
ladder3Pt(XPB, XQB, XRB, skB, 0, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < B_max; row++) {
while (index < B_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.B_strat[ii++];
xTPLe(R, R, A24minus, A24plus, m);
index += m;
}
get_3_isog(R, A24minus, A24plus, coeff);

for (size_t i = 0; i < npts; i++) {
eval_3_isog(pts[i], coeff);
}
eval_3_isog(phiP, coeff);
eval_3_isog(phiQ, coeff);
eval_3_isog(phiR, coeff);

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_3_isog(R, A24minus, A24plus, coeff);
eval_3_isog(phiP, coeff);
eval_3_isog(phiQ, coeff);
eval_3_isog(phiR, coeff);

inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);

// Format public key
sike_fp2_encode(phiP->X, pkB);
sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ);
sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ);
}

// Alice's ephemeral shared secret computation
// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
{
point_proj_t R, pts[MAX_INT_POINTS_ALICE];
f2elm_t coeff[3], PKB[3], jinv;
f2elm_t A24plus = F2ELM_INIT;
f2elm_t C24 = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;

// Initialize images of Bob's basis
fp2_decode(pkB, PKB[0]);
fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]);
fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]);

// Initialize constants
get_A(PKB[0], PKB[1], PKB[2], A);
sike_fpadd(params.mont_one, params.mont_one, C24->c0);
sike_fp2add(A, C24, A24plus);
sike_fpadd(C24->c0, C24->c0, C24->c0);

// Retrieve kernel point
ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < A_max; row++) {
while (index < A_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.A_strat[ii++];
xDBLe(R, R, A24plus, C24, (2*m));
index += m;
}
get_4_isog(R, A24plus, C24, coeff);

for (size_t i = 0; i < npts; i++) {
eval_4_isog(pts[i], coeff);
}

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_4_isog(R, A24plus, C24, coeff);
sike_fp2add(A24plus, A24plus, A24plus);
sike_fp2sub(A24plus, C24, A24plus);
sike_fp2add(A24plus, A24plus, A24plus);
j_inv(A24plus, C24, jinv);
sike_fp2_encode(jinv, ssA);
}

// Bob's ephemeral shared secret computation
// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
{
point_proj_t R, pts[MAX_INT_POINTS_BOB];
f2elm_t coeff[3], PKB[3], jinv;
f2elm_t A24plus = F2ELM_INIT;
f2elm_t A24minus = F2ELM_INIT;
f2elm_t A = F2ELM_INIT;
unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;

// Initialize images of Alice's basis
fp2_decode(pkA, PKB[0]);
fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]);
fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]);

// Initialize constants
get_A(PKB[0], PKB[1], PKB[2], A);
sike_fpadd(params.mont_one, params.mont_one, A24minus->c0);
sike_fp2add(A, A24minus, A24plus);
sike_fp2sub(A, A24minus, A24minus);

// Retrieve kernel point
ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A);

// Traverse tree
index = 0;
for (size_t row = 1; row < B_max; row++) {
while (index < B_max-row) {
sike_fp2copy(R->X, pts[npts]->X);
sike_fp2copy(R->Z, pts[npts]->Z);
pts_index[npts++] = index;
m = params.B_strat[ii++];
xTPLe(R, R, A24minus, A24plus, m);
index += m;
}
get_3_isog(R, A24minus, A24plus, coeff);

for (size_t i = 0; i < npts; i++) {
eval_3_isog(pts[i], coeff);
}

sike_fp2copy(pts[npts-1]->X, R->X);
sike_fp2copy(pts[npts-1]->Z, R->Z);
index = pts_index[npts-1];
npts -= 1;
}

get_3_isog(R, A24minus, A24plus, coeff);
sike_fp2add(A24plus, A24minus, A);
sike_fp2add(A, A, A);
sike_fp2sub(A24plus, A24minus, A24plus);
j_inv(A, A24plus, jinv);
sike_fp2_encode(jinv, ssB);
}

int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ],
uint8_t out_pub[SIKE_PUB_BYTESZ]) {
// Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and <
// 253 bits
randombytes(out_priv, SIKE_MSG_BYTESZ);
randombytes(&out_priv[SIKE_MSG_BYTESZ], SIKE_PRV_BYTESZ);
out_priv[SIKE_MSG_BYTESZ+28-1] = (out_priv[SIKE_MSG_BYTESZ+28-1] & 0x01);
gen_iso_B(&out_priv[SIKE_MSG_BYTESZ], out_pub);
return 1;
}

void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
uint8_t out_ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ]) {
// Secret buffer is reused by the function to store some ephemeral
// secret data. It's size must be maximum of 64,
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
uint8_t secret[32]; // OZAPTF, why?
uint8_t j[SIDH_JINV_BYTESZ];
uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ];
shake256incctx ctx;

// Generate secret key for A
// secret key A = SHAKE256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
randombytes(temp, SIKE_MSG_BYTESZ);

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);

// Generate public key for A - first part of the ciphertext
gen_iso_A(secret, out_ciphertext);

// Generate c1:
// h = SHAKE256(j-invariant)
// c1 = h ^ m
ex_iso_A(secret, pub_key, j);
shake256(secret, sizeof secret, j, sizeof j);

// c1 = h ^ m
uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ];
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
c1[i] = temp[i] ^ secret[i];
}

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, out_ciphertext, SIKE_CT_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);
// Generate shared secret out_shared_key = SHAKE256(m||out_ciphertext)
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
}

void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
const uint8_t ciphertext[SIKE_CT_BYTESZ],
const uint8_t pub_key[SIKE_PUB_BYTESZ],
const uint8_t priv_key[SIKE_MSG_BYTESZ + SIKE_PRV_BYTESZ]) {
// Secret buffer is reused by the function to store some ephemeral
// secret data. It's size must be maximum of 64,
// SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
uint8_t secret[32];
uint8_t j[SIDH_JINV_BYTESZ];
uint8_t c0[SIKE_PUB_BYTESZ];
uint8_t temp[SIKE_MSG_BYTESZ];
shake256incctx ctx;

// Recover m
// Let ciphertext = c0 || c1 - both have fixed sizes
// m = F(j-invariant(c0, priv_key)) ^ c1
ex_iso_B(&priv_key[SIKE_MSG_BYTESZ], ciphertext, j);

shake256(secret, sizeof secret, j, sizeof j);


const uint8_t *c1 = &ciphertext[sizeof(c0)];
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
temp[i] = c1[i] ^ secret[i];
}

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);

// Recover c0 = public key A
gen_iso_A(secret, c0);
crypto_word_t ok = ct_uint_eq(
ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1);
for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
temp[i] = ct_select_8(ok, temp[i], priv_key[i]);
}

shake256_inc_init(&ctx);
shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
shake256_inc_absorb(&ctx, ciphertext, SIKE_CT_BYTESZ);
shake256_inc_finalize(&ctx);
shake256_inc_squeeze(secret, 32, &ctx);
shake256_inc_ctx_release(&ctx);

// Generate shared secret out_shared_key = SHAKE256(m||ciphertext)
memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
}

+ 214
- 0
src/kem/sike/p434/utils.h View File

@@ -0,0 +1,214 @@
/********************************************************************************************
* SIDH: an efficient supersingular isogeny cryptography library
*
* Abstract: internal header file for P434
*********************************************************************************************/

#ifndef UTILS_H_
#define UTILS_H_

#include <stddef.h>
#include <kem/sike/includes/sike/sike.h>

// Conversion macro from number of bits to number of bytes
#define BITS_TO_BYTES(nbits) (((nbits)+7)/8)

// Bit size of the field
#define BITS_FIELD 434
// Byte size of the field
#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD)
// Number of 64-bit words of a 224-bit element
#define NBITS_ORDER 224
#define NWORDS64_ORDER ((NBITS_ORDER+63)/64)
// Number of elements in Alice's strategy
#define A_max 108
// Number of elements in Bob's strategy
#define B_max 137
// Word size size
#define RADIX sizeof(crypto_word_t)*8
// Byte size of a limb
#define LSZ sizeof(crypto_word_t)

#if defined(CPU_64_BIT)
typedef uint64_t crypto_word_t;
// Number of words of a 434-bit field element
#define NWORDS_FIELD 7
// Number of "0" digits in the least significant part of p434 + 1
#define ZERO_WORDS 3
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
#define U64_TO_WORDS(x) UINT64_C(x)
#else
typedef uint32_t crypto_word_t;
// Number of words of a 434-bit field element
#define NWORDS_FIELD 14
// Number of "0" digits in the least significant part of p434 + 1
#define ZERO_WORDS 6
// U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
#define U64_TO_WORDS(x) \
(uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32)
#endif

// Extended datatype support
#if !defined(HAS_UINT128)
typedef uint64_t uint128_t[2];
#endif

// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
// Digit multiplication
#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo));

// If mask |x|==0xff.ff set |x| to 1, otherwise 0
#define M2B(x) ((x)>>(RADIX-1))

// Digit addition with carry
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
do { \
crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \
(sumOut) = (addend2) + tempReg; \
(carryOut) = M2B(ct_uint_lt(tempReg, (crypto_word_t)(carryIn)) | \
ct_uint_lt((sumOut), tempReg)); \
} while(0)

// Digit subtraction with borrow
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
do { \
crypto_word_t tempReg = (minuend) - (subtrahend); \
crypto_word_t borrowReg = M2B(ct_uint_lt((minuend), (subtrahend))); \
borrowReg |= ((borrowIn) & ct_uint_eq(tempReg, 0)); \
(differenceOut) = tempReg - (crypto_word_t)(borrowIn); \
(borrowOut) = borrowReg; \
} while(0)

/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly,
which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8).
Defines below are used to work around the bug, and provide a way
to initialize f2elem_t and point_proj_t structs.
Bug has been fixed in GCC6 (debian stretch).
*/
#define F2ELM_INIT {{ {0}, {0} }}
#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }}

// Datatype for representing 434-bit field elements (448-bit max.)
// Elements over GF(p434) are encoded in 63 octets in little endian format
// (i.e., the least significant octet is located in the lowest memory address).
typedef crypto_word_t felm_t[NWORDS_FIELD];

// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e.
// Fp2 element = c0 + c1*i in F_{p^2}
// Datatype for representing double-precision 2x434-bit field elements (448-bit max.)
// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are
// encoded as {a, b}, with a in the lowest memory portion.
typedef struct {
felm_t c0;
felm_t c1;
} fp2;

// Our F_{p^2} element type is a pointer to the struct.
typedef fp2 f2elm_t[1];

// Datatype for representing double-precision 2x434-bit
// field elements in contiguous memory.
typedef crypto_word_t dfelm_t[2*NWORDS_FIELD];

// Constants used during SIKE computation.
struct params_t {
// Stores a prime
const crypto_word_t prime[NWORDS_FIELD];
// Stores prime + 1
const crypto_word_t prime_p1[NWORDS_FIELD];
// Stores prime * 2
const crypto_word_t prime_x2[NWORDS_FIELD];
// Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i}
// in GF(prime^2), expressed in Montgomery representation
const crypto_word_t A_gen[6*NWORDS_FIELD];
// Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i}
// in GF(prime^2), expressed in Montgomery representation
const crypto_word_t B_gen[6*NWORDS_FIELD];
// Montgomery constant mont_R2 = (2^448)^2 mod prime
const crypto_word_t mont_R2[NWORDS_FIELD];
// Value 'one' in Montgomery representation
const crypto_word_t mont_one[NWORDS_FIELD];
// Value '6' in Montgomery representation
const crypto_word_t mont_six[NWORDS_FIELD];
// Fixed parameters for isogeny tree computation
const unsigned int A_strat[A_max-1];
const unsigned int B_strat[B_max-1];
};

// Point representation in projective XZ Montgomery coordinates.
typedef struct {
f2elm_t X;
f2elm_t Z;
} point_proj;
typedef point_proj point_proj_t[1];

// Checks whether two words are equal. Returns 1 in case it is,
// otherwise 0.
static inline crypto_word_t ct_uint_eq(crypto_word_t x, crypto_word_t y)
{
// if x==y then t = 0
crypto_word_t t = x ^ y;
// if x!=y t will have first bit set
t = (t >> 1) - t;
// return MSB - 1 in case x==y, otherwise 0
return ((~t) >> (RADIX-1));
}
// Constant time select.
// if pick == 1 (out = in1)
// if pick == 0 (out = in2)
// else out is undefined
static inline uint8_t ct_select_8(uint8_t flag, uint8_t in1, uint8_t in2) {
uint8_t mask = ((int8_t)(flag << 7))>>7;
return (in1&mask) | (in2&(~mask));
}

// Constant time memcmp. Returns 1 if p==q, otherwise 0
static inline int ct_mem_eq(const void *p, const void *q, size_t n)
{
const uint8_t *pp = (uint8_t*)p, *qq = (uint8_t*)q;
uint8_t a = 0;

while (n--) a |= *pp++ ^ *qq++;
return (ct_uint_eq(a, 0));
}

static inline crypto_word_t constant_time_msb_w(crypto_word_t a) {
return 0u - (a >> (sizeof(a) * 8 - 1));
}

// constant_time_lt_w returns 0xff..f if a < b and 0 otherwise.
static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y)
{
// Consider the two cases of the problem:
// msb(a) == msb(b): a < b iff the MSB of a - b is set.
// msb(a) != msb(b): a < b iff the MSB of b is set.
//
// If msb(a) == msb(b) then the following evaluates as:
// msb(a^((a^b)|((a-b)^a))) ==
// msb(a^((a-b) ^ a)) == (because msb(a^b) == 0)
// msb(a^a^(a-b)) == (rearranging)
// msb(a-b) (because ∀x. x^x == 0)
//
// Else, if msb(a) != msb(b) then the following evaluates as:
// msb(a^((a^b)|((a-b)^a))) ==
// msb(a^(𝟙 | ((a-b)^a))) == (because msb(a^b) == 1 and 𝟙
// represents a value s.t. msb(𝟙) = 1)
// msb(a^𝟙) == (because ORing with 1 results in 1)
// msb(b)
//
//
// Here is an SMT-LIB verification of this formula:
//
// (define-fun lt ((a (_ BitVec 32)) (b (_ BitVec 32))) (_ BitVec 32)
// (bvxor a (bvor (bvxor a b) (bvxor (bvsub a b) a)))
// )
//
// (declare-fun a () (_ BitVec 32))
// (declare-fun b () (_ BitVec 32))
//
// (assert (not (= (= #x00000001 (bvlshr (lt a b) #x0000001f)) (bvult a b))))
// (check-sat)
// (get-model)
return constant_time_msb_w(x^((x^y)|((x-y)^x)));
}
#endif // UTILS_H_

+ 4
- 2
src/rustapi/pqc-sys/src/bindings.rs View File

@@ -235,7 +235,8 @@ pub const SPHINCSSHA256256SROBUST: ::std::os::raw::c_uint = 28;
pub const SPHINCSSHA256128SROBUST: ::std::os::raw::c_uint = 29;
pub const SPHINCSSHA256128FSIMPLE: ::std::os::raw::c_uint = 30;
pub const SPHINCSSHA256192FROBUST: ::std::os::raw::c_uint = 31;
pub const PQC_ALG_SIG_MAX: ::std::os::raw::c_uint = 32;
pub const PICNIC3L1: ::std::os::raw::c_uint = 32;
pub const PQC_ALG_SIG_MAX: ::std::os::raw::c_uint = 33;
pub type _bindgen_ty_1 = ::std::os::raw::c_uint;
pub const FRODOKEM976SHAKE: ::std::os::raw::c_uint = 0;
pub const FRODOKEM1344SHAKE: ::std::os::raw::c_uint = 1;
@@ -256,7 +257,8 @@ pub const SABER: ::std::os::raw::c_uint = 15;
pub const HQCRMRS128: ::std::os::raw::c_uint = 16;
pub const HQCRMRS192: ::std::os::raw::c_uint = 17;
pub const HQCRMRS256: ::std::os::raw::c_uint = 18;
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 19;
pub const SIKE434: ::std::os::raw::c_uint = 19;
pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 20;
pub type _bindgen_ty_2 = ::std::os::raw::c_uint;
#[repr(C)]
#[derive(Debug, Copy, Clone)]


+ 4
- 2
src/rustapi/pqc-sys/src/build.rs View File

@@ -4,12 +4,14 @@ extern crate bindgen;

fn main() {
let dst = Config::new("../../../")
.profile("Release")
.profile("Debug")
.very_verbose(true)
.build();
.build();

println!("cargo:rustc-link-search=native={}/lib", dst.display());
println!("cargo:rustc-link-lib=static=pqc_s");
// For some reason GetX86Info symbol is undefined in the pqc_s. Hence this line
println!("cargo:rustc-link-lib=static=cpu_features");
println!("cargo:rerun-if-changed=../../../capi/*,../../../kem/*,../../../sign/*,../../../../public/pqc/pqc.h");

// The bindgen::Builder is the main entry point


+ 10
- 0
src/sign/picnic/AUTHORS View File

@@ -0,0 +1,10 @@
Greg Zaverucha
Sebastian Ramacher
Daniel Kales
Steven Goldfeder

This reference implementation is derived from the earlier Picnic implementation
at https://github.com/Microsoft/Picnic by Steven Goldfeder and Greg Zaverucha.

The SHA-3 implementation redistributed here is from the Keccak Code Package,
see https://github.com/gvanas/KeccakCodePackage for authorship.

+ 21
- 0
src/sign/picnic/LICENSE View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) Steven Goldfeder and Microsoft Corporation. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

+ 277
- 0
src/sign/picnic/picnic3l1/avx2/NIST-KATs/PQCgenKAT_sign.c View File

@@ -0,0 +1,277 @@

//
// PQCgenKAT_sign.c
//
// Created by Bassham, Lawrence E (Fed) on 8/29/17.
// Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved.
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "rng.h"
#include "api.h"

#define MAX_MARKER_LEN 50

#define KAT_SUCCESS 0
#define KAT_FILE_OPEN_ERROR -1
#define KAT_DATA_ERROR -3
#define KAT_CRYPTO_FAILURE -4

int FindMarker(FILE *infile, const char *marker);
int ReadHex(FILE *infile, unsigned char *A, int Length, char *str);
void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L);

char AlgName[] = CRYPTO_ALGNAME;
static const char* l1 = "L1";
static const char* l3 = "L3";
static const char* l5 = "L5";
static const char* unknown = "UNKNOWN_PARAM_SET";

int
main()
{
char fn_req[33], fn_rsp[33];
FILE *fp_req, *fp_rsp;
unsigned char seed[48];
unsigned char msg[3300];
unsigned char entropy_input[48];
unsigned char *m, *sm, *m1;
unsigned long long mlen, smlen, mlen1;
int count;
int done;
unsigned char pk[CRYPTO_PUBLICKEYBYTES], sk[CRYPTO_SECRETKEYBYTES];
int ret_val;
const char* suffix;


switch (CRYPTO_PUBLICKEYBYTES) {
case 1 + 2 * 16:
case 1 + 2 * 17:
suffix = l1;
break;
case 1 + 2 * 24:
suffix = l3;
break;
case 1 + 2 * 32:
suffix = l5;
break;
default:
suffix = unknown;
break;
}

// Create the REQUEST file
sprintf(fn_req, "PQCsignKAT_%s.req", suffix);
if ( (fp_req = fopen(fn_req, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fn_req);
return KAT_FILE_OPEN_ERROR;
}
sprintf(fn_rsp, "PQCsignKAT_%s.rsp", suffix);
if ( (fp_rsp = fopen(fn_rsp, "w")) == NULL ) {
printf("Couldn't open <%s> for write\n", fn_rsp);
return KAT_FILE_OPEN_ERROR;
}
for (int i=0; i<48; i++)
entropy_input[i] = i;

randombytes_init(entropy_input, NULL, 256);
for (int i=0; i<100; i++) {
fprintf(fp_req, "count = %d\n", i);
randombytes(seed, 48);
fprintBstr(fp_req, "seed = ", seed, 48);
mlen = 33*(i+1);
fprintf(fp_req, "mlen = %llu\n", mlen);
randombytes(msg, mlen);
fprintBstr(fp_req, "msg = ", msg, mlen);
fprintf(fp_req, "pk =\n");
fprintf(fp_req, "sk =\n");
fprintf(fp_req, "smlen =\n");
fprintf(fp_req, "sm =\n\n");
}
fclose(fp_req);
//Create the RESPONSE file based on what's in the REQUEST file
if ( (fp_req = fopen(fn_req, "r")) == NULL ) {
printf("Couldn't open <%s> for read\n", fn_req);
return KAT_FILE_OPEN_ERROR;
}
fprintf(fp_rsp, "# %s\n\n", CRYPTO_ALGNAME);
done = 0;
do {
if ( FindMarker(fp_req, "count = ") )
fscanf(fp_req, "%d", &count);
else {
done = 1;
break;
}
fprintf(fp_rsp, "count = %d\n", count);
if ( !ReadHex(fp_req, seed, 48, "seed = ") ) {
printf("ERROR: unable to read 'seed' from <%s>\n", fn_req);
return KAT_DATA_ERROR;
}
fprintBstr(fp_rsp, "seed = ", seed, 48);
randombytes_init(seed, NULL, 256);
if ( FindMarker(fp_req, "mlen = ") )
fscanf(fp_req, "%llu", &mlen);
else {
printf("ERROR: unable to read 'mlen' from <%s>\n", fn_req);
return KAT_DATA_ERROR;
}
fprintf(fp_rsp, "mlen = %llu\n", mlen);
m = (unsigned char *)calloc(mlen, sizeof(unsigned char));
m1 = (unsigned char *)calloc(mlen, sizeof(unsigned char));
sm = (unsigned char *)calloc(mlen+CRYPTO_BYTES, sizeof(unsigned char));
if ( !ReadHex(fp_req, m, (int)mlen, "msg = ") ) {
printf("ERROR: unable to read 'msg' from <%s>\n", fn_req);
return KAT_DATA_ERROR;
}
fprintBstr(fp_rsp, "msg = ", m, mlen);
// Generate the public/private keypair
if ( (ret_val = crypto_sign_keypair(pk, sk)) != 0) {
printf("crypto_sign_keypair returned <%d>\n", ret_val);
return KAT_CRYPTO_FAILURE;
}
fprintBstr(fp_rsp, "pk = ", pk, CRYPTO_PUBLICKEYBYTES);
fprintBstr(fp_rsp, "sk = ", sk, CRYPTO_SECRETKEYBYTES);
if ( (ret_val = crypto_sign(sm, &smlen, m, mlen, sk)) != 0) {
printf("crypto_sign returned <%d>\n", ret_val);
return KAT_CRYPTO_FAILURE;
}
fprintf(fp_rsp, "smlen = %llu\n", smlen);
fprintBstr(fp_rsp, "sm = ", sm, smlen);
fprintf(fp_rsp, "\n");
if ( (ret_val = crypto_sign_open(m1, &mlen1, sm, smlen, pk)) != 0) {
printf("crypto_sign_open returned <%d>\n", ret_val);
return KAT_CRYPTO_FAILURE;
}
if ( mlen != mlen1 ) {
printf("crypto_sign_open returned bad 'mlen': Got <%llu>, expected <%llu>\n", mlen1, mlen);
return KAT_CRYPTO_FAILURE;
}
if ( memcmp(m, m1, mlen) ) {
printf("crypto_sign_open returned bad 'm' value\n");
return KAT_CRYPTO_FAILURE;
}
free(m);
free(m1);
free(sm);

} while ( !done );
fclose(fp_req);
fclose(fp_rsp);

return KAT_SUCCESS;
}

//
// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.)
//
int
FindMarker(FILE *infile, const char *marker)
{
char line[MAX_MARKER_LEN];
int i, len;

len = (int)strlen(marker);
if ( len > MAX_MARKER_LEN-1 )
len = MAX_MARKER_LEN-1;

for ( i=0; i<len; i++ )
if ( (line[i] = fgetc(infile)) == EOF )
return 0;
line[len] = '\0';

while ( 1 ) {
if ( !strncmp(line, marker, len) )
return 1;

for ( i=0; i<len-1; i++ )
line[i] = line[i+1];
if ( (line[len-1] = fgetc(infile)) == EOF )
return 0;
line[len] = '\0';
}

// shouldn't get here
return 0;
}

//
// ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.)
//
int
ReadHex(FILE *infile, unsigned char *A, int Length, char *str)
{
int i, ch, started;
unsigned char ich;

if ( Length == 0 ) {
A[0] = 0x00;
return 1;
}
memset(A, 0x00, Length);
started = 0;
if ( FindMarker(infile, str) )
while ( (ch = fgetc(infile)) != EOF ) {
if ( !isxdigit(ch) ) {
if ( !started ) {
if ( ch == '\n' )
break;
else
continue;
}
else
break;
}
started = 1;
if ( (ch >= '0') && (ch <= '9') )
ich = ch - '0';
else if ( (ch >= 'A') && (ch <= 'F') )
ich = ch - 'A' + 10;
else if ( (ch >= 'a') && (ch <= 'f') )
ich = ch - 'a' + 10;
else // shouldn't ever get here
ich = 0;
for ( i=0; i<Length-1; i++ )
A[i] = (A[i] << 4) | (A[i+1] >> 4);
A[Length-1] = (A[Length-1] << 4) | ich;
}
else
return 0;

return 1;
}

void
fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L)
{
unsigned long long i;

fprintf(fp, "%s", S);

for ( i=0; i<L; i++ )
fprintf(fp, "%02X", A[i]);

if ( L == 0 )
fprintf(fp, "00");

fprintf(fp, "\n");
}


+ 222
- 0
src/sign/picnic/picnic3l1/avx2/NIST-KATs/rng.c View File

@@ -0,0 +1,222 @@
//
// rng.c
//
// Created by Bassham, Lawrence E (Fed) on 8/29/17.
// Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved.
//

#include <string.h>
#include "rng.h"
#include <openssl/conf.h>
#include <openssl/evp.h>
#include <openssl/err.h>

AES256_CTR_DRBG_struct DRBG_ctx;

void AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer);

/*
seedexpander_init()
ctx - stores the current state of an instance of the seed expander
seed - a 32 byte random value
diversifier - an 8 byte diversifier
maxlen - maximum number of bytes (less than 2**32) generated under this seed and diversifier
*/
int
seedexpander_init(AES_XOF_struct *ctx,
unsigned char *seed,
unsigned char *diversifier,
unsigned long maxlen)
{
if ( maxlen >= 0x100000000 )
return RNG_BAD_MAXLEN;
ctx->length_remaining = maxlen;
memcpy(ctx->key, seed, 32);
memcpy(ctx->ctr, diversifier, 8);
ctx->ctr[11] = maxlen % 256;
maxlen >>= 8;
ctx->ctr[10] = maxlen % 256;
maxlen >>= 8;
ctx->ctr[9] = maxlen % 256;
maxlen >>= 8;
ctx->ctr[8] = maxlen % 256;
memset(ctx->ctr+12, 0x00, 4);
ctx->buffer_pos = 16;
memset(ctx->buffer, 0x00, 16);
return RNG_SUCCESS;
}

/*
seedexpander()
ctx - stores the current state of an instance of the seed expander
x - returns the XOF data
xlen - number of bytes to return
*/
int
seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen)
{
unsigned long offset;
if ( x == NULL )
return RNG_BAD_OUTBUF;
if ( xlen >= ctx->length_remaining )
return RNG_BAD_REQ_LEN;
ctx->length_remaining -= xlen;
offset = 0;
while ( xlen > 0 ) {
if ( xlen <= (16-ctx->buffer_pos) ) { // buffer has what we need
memcpy(x+offset, ctx->buffer+ctx->buffer_pos, xlen);
ctx->buffer_pos += xlen;
return RNG_SUCCESS;
}
// take what's in the buffer
memcpy(x+offset, ctx->buffer+ctx->buffer_pos, 16-ctx->buffer_pos);
xlen -= 16-ctx->buffer_pos;
offset += 16-ctx->buffer_pos;
AES256_ECB(ctx->key, ctx->ctr, ctx->buffer);
ctx->buffer_pos = 0;
//increment the counter
for (int i=15; i>=12; i--) {
if ( ctx->ctr[i] == 0xff )
ctx->ctr[i] = 0x00;
else {
ctx->ctr[i]++;
break;
}
}
}
return RNG_SUCCESS;
}


void handleErrors(void)
{
ERR_print_errors_fp(stderr);
abort();
}

// Use whatever AES implementation you have. This uses AES from openSSL library
// key - 256-bit AES key
// ctr - a 128-bit plaintext value
// buffer - a 128-bit ciphertext value
void
AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer)
{
EVP_CIPHER_CTX *ctx;
int len;
int ciphertext_len;
/* Create and initialise the context */
if(!(ctx = EVP_CIPHER_CTX_new())) handleErrors();
if(1 != EVP_EncryptInit_ex(ctx, EVP_aes_256_ecb(), NULL, key, NULL))
handleErrors();
if(1 != EVP_EncryptUpdate(ctx, buffer, &len, ctr, 16))
handleErrors();
ciphertext_len = len;
/* Clean up */
EVP_CIPHER_CTX_free(ctx);
}

void
randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength)
{
unsigned char seed_material[48];
memcpy(seed_material, entropy_input, 48);
if (personalization_string)
for (int i=0; i<48; i++)
seed_material[i] ^= personalization_string[i];
memset(DRBG_ctx.Key, 0x00, 32);
memset(DRBG_ctx.V, 0x00, 16);
AES256_CTR_DRBG_Update(seed_material, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter = 1;
}

int
randombytes(unsigned char *x, unsigned long long xlen)
{
unsigned char block[16];
int i = 0;
while ( xlen > 0 ) {
//increment V
for (int j=15; j>=0; j--) {
if ( DRBG_ctx.V[j] == 0xff )
DRBG_ctx.V[j] = 0x00;
else {
DRBG_ctx.V[j]++;
break;
}
}
AES256_ECB(DRBG_ctx.Key, DRBG_ctx.V, block);
if ( xlen > 15 ) {
memcpy(x+i, block, 16);
i += 16;
xlen -= 16;
}
else {
memcpy(x+i, block, xlen);
xlen = 0;
}
}
AES256_CTR_DRBG_Update(NULL, DRBG_ctx.Key, DRBG_ctx.V);
DRBG_ctx.reseed_counter++;
return RNG_SUCCESS;
}

void
AES256_CTR_DRBG_Update(unsigned char *provided_data,
unsigned char *Key,
unsigned char *V)
{
unsigned char temp[48];
for (int i=0; i<3; i++) {
//increment V
for (int j=15; j>=0; j--) {
if ( V[j] == 0xff )
V[j] = 0x00;
else {
V[j]++;
break;
}
}
AES256_ECB(Key, V, temp+16*i);
}
if ( provided_data != NULL )
for (int i=0; i<48; i++)
temp[i] ^= provided_data[i];
memcpy(Key, temp, 32);
memcpy(V, temp+32, 16);
}










+ 55
- 0
src/sign/picnic/picnic3l1/avx2/NIST-KATs/rng.h View File

@@ -0,0 +1,55 @@
//
// rng.h
//
// Created by Bassham, Lawrence E (Fed) on 8/29/17.
// Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved.
//

#ifndef rng_h
#define rng_h

#include <stdio.h>

#define RNG_SUCCESS 0
#define RNG_BAD_MAXLEN -1
#define RNG_BAD_OUTBUF -2
#define RNG_BAD_REQ_LEN -3

typedef struct {
unsigned char buffer[16];
int buffer_pos;
unsigned long length_remaining;
unsigned char key[32];
unsigned char ctr[16];
} AES_XOF_struct;

typedef struct {
unsigned char Key[32];
unsigned char V[16];
int reseed_counter;
} AES256_CTR_DRBG_struct;


void
AES256_CTR_DRBG_Update(unsigned char *provided_data,
unsigned char *Key,
unsigned char *V);

int
seedexpander_init(AES_XOF_struct *ctx,
unsigned char *seed,
unsigned char *diversifier,
unsigned long maxlen);

int
seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen);

void
randombytes_init(unsigned char *entropy_input,
unsigned char *personalization_string,
int security_strength);

int
randombytes(unsigned char *x, unsigned long long xlen);

#endif /* rng_h */

+ 73
- 0
src/sign/picnic/picnic3l1/avx2/aligned_alloc.c View File

@@ -0,0 +1,73 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

/* define HAVE_* for more known good configurations */
#if !defined(HAVE_POSIX_MEMALIGN) && \
((defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || defined(__APPLE__))
/* defined in POSIX and available on OS X */
#define HAVE_POSIX_MEMALIGN
#endif

#if !defined(HAVE_MEMALIGN) && defined(__linux__)
/* always available on Linux */
#define HAVE_MEMALIGN
#endif

#include "compat.h"
#if !defined(HAVE_ALIGNED_ALLOC)
#include <errno.h>
#include <stdlib.h>
#if !defined(HAVE_POSIX_MEMALIGN) || defined(__MING32__) || defined(__MING64__) || defined(_MSC_VER)
#include <malloc.h>
#endif

void* aligned_alloc(size_t alignment, size_t size) {
/* check alignment (power of 2) and size (multiple of alignment) */
if (alignment & (alignment - 1) || size & (alignment - 1)) {
errno = EINVAL;
return NULL;
}

#if defined(HAVE_POSIX_MEMALIGN)
/* check alignment (needs to be >= sizeof(void*)) */
if (alignment < sizeof(void*)) {
alignment = sizeof(void*);
}

void* ptr = NULL;
const int err = posix_memalign(&ptr, alignment, size);
if (err) {
errno = err;
}
return ptr;
#elif defined(HAVE_MEMALIGN)
return memalign(alignment, size);
#elif defined(__MINGW32__) || defined(__MINGW64__)
return __mingw_aligned_malloc(size, alignment);
#elif defined(_MSC_VER)
return _aligned_malloc(size, alignment);
#else
if (size > 0) {
errno = ENOMEM;
}
return NULL;
#endif
}

void aligned_free(void* ptr) {
#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)
free(ptr);
#elif defined(__MINGW32__) || defined(__MINGW64__)
__mingw_aligned_free(ptr);
#elif defined(_MSC_VER)
_aligned_free(ptr);
#endif
}

#endif

+ 16
- 0
src/sign/picnic/picnic3l1/avx2/api.h View File

@@ -0,0 +1,16 @@
#ifndef PICNIC3_L1_FS_API_H
#define PICNIC3_L1_FS_API_H

#define CRYPTO_SECRETKEYBYTES (1 + 2 * 17 + 17)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 17)
#define CRYPTO_BYTES (4 + 14608)
#define CRYPTO_ALGNAME "picnic3l1"
#define CRYPTO_DETERMINISTIC 1

int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);
int crypto_sign(unsigned char* sm, unsigned long long* smlen, const unsigned char* m,
unsigned long long mlen, const unsigned char* sk);
int crypto_sign_open(unsigned char* m, unsigned long long* mlen, const unsigned char* sm,
unsigned long long smlen, const unsigned char* pk);

#endif

+ 188
- 0
src/sign/picnic/picnic3l1/avx2/bitstream.c View File

@@ -0,0 +1,188 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include "bitstream.h"
#include "macros.h"

uint64_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 64);

const uint8_t* p = &bs->buffer.r[bs->position / 8];
const unsigned int skip_bits = bs->position % 8;
const unsigned int start_bits = 8 - skip_bits;

bs->position += num_bits;
uint64_t ret = (*p++ & ((1 << start_bits) - 1));

if (num_bits <= start_bits) {
return ret >> (start_bits - num_bits);
}

num_bits -= start_bits;
for (; num_bits >= 8; num_bits -= 8, ++p) {
ret = ret << 8 | *p;
}

if (num_bits > 0) {
ret = ret << num_bits | ((*p >> (8 - num_bits)) & ((1 << num_bits) - 1));
}

return ret;
}

uint8_t bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 8);

const uint8_t* p = &bs->buffer.r[bs->position / 8];
const unsigned int skip_bits = bs->position % 8;
const unsigned int start_bits = 8 - skip_bits;

bs->position += num_bits;
uint8_t ret = (*p++ & ((1 << start_bits) - 1));

if (num_bits <= start_bits) {
return ret >> (start_bits - num_bits);
}

num_bits -= start_bits;
if (num_bits > 0) {
ret = ret << num_bits | ((*p >> (8 - num_bits)) & ((1 << num_bits) - 1));
}

return ret;
}

uint32_t bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 32);

const uint8_t* p = &bs->buffer.r[bs->position / 8];
const unsigned int skip_bits = bs->position % 8;
const unsigned int start_bits = 8 - skip_bits;

bs->position += num_bits;
uint32_t ret = (*p++ & ((1 << start_bits) - 1));

if (num_bits <= start_bits) {
return ret >> (start_bits - num_bits);
}

num_bits -= start_bits;
for (; num_bits >= 8; num_bits -= 8, ++p) {
ret = ret << 8 | *p;
}

if (num_bits > 0) {
ret = ret << num_bits | ((*p >> (8 - num_bits)) & ((1 << num_bits) - 1));
}

return ret;
}

void bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 64);

const unsigned int skip_bits = bs->position % 8;
uint8_t* p = &bs->buffer.w[bs->position / 8];

bs->position += num_bits;
if (skip_bits) {
// the upper skip_bits of current pos have already been taken
const unsigned int start_bits = 8 - skip_bits;
const unsigned int bits = num_bits < start_bits ? num_bits : start_bits;

*p &= (0xFF << start_bits) | (0xFF >> (skip_bits + bits)); // clear bits before setting
*p++ |= (value >> (num_bits - bits)) << (start_bits - bits);
num_bits -= bits;
}

for (; num_bits >= 8; num_bits -= 8, ++p) {
*p = value >> (num_bits - 8);
}

if (num_bits > 0) {
*p &= (0xFF >> num_bits); // clear bits before setting
*p |= (value & ((1 << num_bits) - 1)) << (8 - num_bits);
}
}

void bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 8);

const unsigned int skip_bits = bs->position % 8;
uint8_t* p = &bs->buffer.w[bs->position / 8];

bs->position += num_bits;
if (skip_bits) {
// the upper skip_bits of current pos have already been taken
const unsigned int start_bits = 8 - skip_bits;
const unsigned int bits = num_bits < start_bits ? num_bits : start_bits;

*p++ |= (value >> (num_bits - bits)) << (8 - skip_bits - bits);
num_bits -= bits;
}

if (num_bits > 0) {
*p = (value & ((1 << num_bits) - 1)) << (8 - num_bits);
}
}

void bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 32);

const unsigned int skip_bits = bs->position % 8;
uint8_t* p = &bs->buffer.w[bs->position / 8];

bs->position += num_bits;
if (skip_bits) {
// the upper skip_bits of current pos have already been taken
const unsigned int start_bits = 8 - skip_bits;
const unsigned int bits = num_bits < start_bits ? num_bits : start_bits;

*p++ |= (value >> (num_bits - bits)) << (8 - skip_bits - bits);
num_bits -= bits;
}

for (; num_bits >= 8; num_bits -= 8, ++p) {
*p = value >> (num_bits - 8);
}

if (num_bits > 0) {
*p = (value & ((1 << num_bits) - 1)) << (8 - num_bits);
}
}

void mzd_to_bitstream(bitstream_t* bs, const mzd_local_t* v, const size_t width,
const size_t size) {
const uint64_t* d = &CONST_BLOCK(v, 0)->w64[width - 1];
size_t bits = size;
for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) {
bitstream_put_bits(bs, *d, sizeof(uint64_t) * 8);
}
if (bits) {
bitstream_put_bits(bs, *d >> (sizeof(uint64_t) * 8 - bits), bits);
}
}

void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t width, const size_t size) {
uint64_t* d = &BLOCK(v, 0)->w64[width - 1];
uint64_t* f = BLOCK(v, 0)->w64;

size_t bits = size;
for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) {
*d = bitstream_get_bits(bs, sizeof(uint64_t) * 8);
}
if (bits) {
*d = bitstream_get_bits(bs, bits) << (sizeof(uint64_t) * 8 - bits);
--d;
}
for (; d >= f; --d) {
*d = 0;
}
}

+ 35
- 0
src/sign/picnic/picnic3l1/avx2/bitstream.h View File

@@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef BITSTREAM_H
#define BITSTREAM_H

#include <stddef.h>
#include <stdint.h>
#include "mzd_additional.h"

typedef struct {
union {
uint8_t* w;
const uint8_t* r;
} buffer;
size_t position;
} bitstream_t;

uint64_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits);
uint8_t bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits);
uint32_t bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits);
void bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits);
void bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits);
void bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits);

void mzd_to_bitstream(bitstream_t* bs, const mzd_local_t* v, const size_t width, const size_t size);
void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t width, const size_t size);

#endif

+ 104
- 0
src/sign/picnic/picnic3l1/avx2/compat.h View File

@@ -0,0 +1,104 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC_COMPAT_H
#define PICNIC_COMPAT_H

/* in case cmake checks were not run, define HAVE_* for known good configurations */

#include "macros.h"
#if defined(__OpenBSD__)
#include <sys/param.h>
#endif /* __OpenBSD__ */

#if !defined(HAVE_ALIGNED_ALLOC) && !defined(__APPLE__) && !defined(__MINGW32__) && \
!defined(__MINGW64__) && \
(defined(_ISOC11_SOURCE) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L))
/* aligned_alloc was introduced in ISO C 2011 */
#define HAVE_ALIGNED_ALLOC
#endif /* HAVE_ALIGNED_ALLOC */

#if !defined(HAVE_EXPLICIT_BZERO) && \
(GLIBC_CHECK(2, 25) || (defined(__OpenBSD__) && OpenBSD >= 201405) || FREEBSD_CHECK(11, 0) || \
NETBSD_CHECK(8, 0))
/* explicit_bzero was introduced in glibc 2.35, OpenBSD 5.5, FreeBSD 11.0 and NetBSD 8.0 */
#define HAVE_EXPLICIT_BZERO
#endif /* HAVE_EXPLICIT_BZERO */

#if !defined(HAVE_CONSTTIME_MEMEQUAL) && NETBSD_CHECK(7, 0)
/* consttime_memequal was introduced in NetBSD 7.0 */
#define HAVE_CONSTTIME_MEMEQUAL
#endif /* HAVE_CONSTTIME_MEMEQUAL */

#if !defined(HAVE_TIMINGSAFE_BCMP) && ((defined(__OpenBSD__) && OpenBSD >= 201105) || \
FREEBSD_CHECK(12, 0) || MACOSX_CHECK(10, 12, 1))
/* timingsafe_bcmp was introduced in OpenBSD 4.9, FreeBSD 12.0, and MacOS X 10.12 */
#define HAVE_TIMINGSAFE_BCMP
#endif /* HAVE_TIMINGSAFE_BCMP */

#if defined(HAVE_ALIGNED_ALLOC)
#include <stdlib.h>

#define aligned_free(ptr) free((ptr))
#else
#include <stddef.h>

/**
* Compatibility implementation of aligned_alloc from ISO C 2011.
*/
void* aligned_alloc(size_t alignment, size_t size);
/**
* Some aligned_alloc compatbility implementations require custom free
* functions, so we provide one too.
*/
void aligned_free(void* ptr);
#endif /* HAVE_ALIGNED_ALLOC */

#include "endian_compat.h"

#if !defined(HAVE_TIMINGSAFE_BCMP)
/**
* Compatibility implementation of timingsafe_bcmp from OpenBSD 4.9 and FreeBSD 12.0.
*/
static inline int timingsafe_bcmp(const void* a, const void* b, size_t len) {
#if defined(HAVE_CONSTTIME_MEMEQUAL)
return !consttime_memequal(a, b, len);
#else
const unsigned char* p1 = a;
const unsigned char* p2 = b;

unsigned int res = 0;
for (; len; --len, ++p1, ++p2) {
res |= *p1 ^ *p2;
}
return res;
#endif
}
#endif /* HAVE_TIMINGSAFE_BCMP */

#if !defined(HAVE_EXPLICIT_BZERO)
#if defined(_WIN32)
#include <windows.h>
#endif
/**
* Compatibility implementation of explicit_bzero
*/
static inline void explicit_bzero(void* a, size_t len) {
#if defined(_WIN32)
SecureZeroMemory(a, len);
#else
volatile char* p = a;
for (; len; ++a, --len) {
*p = 0;
}
#endif
}
#endif /* HAVE_EXPLICIT_BZERO */

#endif

+ 122
- 0
src/sign/picnic/picnic3l1/avx2/cpu.c View File

@@ -0,0 +1,122 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

/* If cmake checks were not run, define some known values. */
#if !defined(HAVE_SYS_AUXV_H) && defined(__linux__)
#define HAVE_SYS_AUXV_H
#endif

#if !defined(HAVE_ASM_HWCAP_H) && defined(__linux__) && defined(__arm__)
#define HAVE_ASM_HWCAP_H
#endif

#include "cpu.h"

#if !defined(BUILTIN_CPU_SUPPORTED) || defined(BUILTIN_CPU_SUPPORTED_BROKEN_BMI2)
#if defined(__arm__) && defined(HAVE_SYS_AUXV_H) && defined(HAVE_ASM_HWCAP_H)
#include <asm/hwcap.h>
#include <sys/auxv.h>

static unsigned int init_caps(void) {
unsigned int caps = 0;
if (getauxval(AT_HWCAP) & HWCAP_NEON) {
caps |= CPU_CAP_NEON;
}
return caps;
}

#elif (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64)) && (defined(__GNUC__) || defined(_MSC_VER))

#ifdef _MSC_VER
#include <intrin.h>

static unsigned init_caps(void) {
unsigned int caps = 0;

union {
struct {
unsigned int eax, ebx, ecx, edx;
};
int data[4];
} regs = {0};

__cpuid(regs.data, 0);
unsigned int max = regs.eax;

if (max >= 1) {
__cpuid(regs.data, 0);
if (regs.edx & (1 << 26)) {
caps |= CPU_CAP_SSE2;
}
if (regs.ecx & (1 << 23)) {
caps |= CPU_CAP_POPCNT;
}
}

if (max >= 7) {
__cpuidex(regs.data, 7, 0);
if (regs.ebx & (1 << 5)) {
caps |= CPU_CAP_AVX2;
}
if (regs.ebx & (1 << 8)) {
caps |= CPU_CAP_BMI2;
}
}

return caps;
}
#else
#include <cpuid.h>

static unsigned init_caps(void) {
unsigned int caps = 0;
unsigned int eax, ebx, ecx, edx;

if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
if (edx & (1 << 26)) {
caps |= CPU_CAP_SSE2;
}
if (ecx & (1 << 23)) {
caps |= CPU_CAP_POPCNT;
}
}

if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) {
if (ebx & (1 << 5)) {
caps |= CPU_CAP_AVX2;
}
if (ebx & (1 << 8)) {
caps |= CPU_CAP_BMI2;
}
}

return caps;
}
#endif

#else

static unsigned init_caps(void) {
return 0;
}

#endif

#include <limits.h>

static unsigned int cpu_caps = UINT_MAX;

bool cpu_supports(unsigned int caps) {
if (cpu_caps == UINT_MAX) {
cpu_caps = init_caps();
}

return (cpu_caps & caps) == caps;
}
#endif

+ 45
- 0
src/sign/picnic/picnic3l1/avx2/cpu.h View File

@@ -0,0 +1,45 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef CPU_H
#define CPU_H

#include "macros.h"

#if defined(__GNUC__) && !(defined(__APPLE__) && (__clang_major__ <= 8)) && \
!defined(__MINGW32__) && !defined(__MINGW64__)
#define BUILTIN_CPU_SUPPORTED
#endif

#if defined(BUILTIN_CPU_SUPPORTED) && GNUC_CHECK(4, 9) && !GNUC_CHECK(5, 0)
/* gcc 4.9's __builtin_cpu_support does not support "bmi2" */
#define BUILTIN_CPU_SUPPORTED_BROKEN_BMI2
#endif

#if !defined(BUILTIN_CPU_SUPPORTED) || defined(BUILTIN_CPU_SUPPORTED_BROKEN_BMI2)
#include <stdbool.h>

/* CPU supports SSE2 */
#define CPU_CAP_SSE2 0x00000001
/* CPU supports popcnt */
#define CPU_CAP_POPCNT 0x00000002
/* CPU supports AVX2 */
#define CPU_CAP_AVX2 0x00000004
/* CPU supports BMI2 */
#define CPU_CAP_BMI2 0x00000010
/* CPU supports NEON */
#define CPU_CAP_NEON 0x00000008

/**
* Helper function in case __builtin_cpu_supports is not available.
*/
bool cpu_supports(unsigned int caps);
#endif

#endif

+ 6
- 0
src/sign/picnic/picnic3l1/avx2/crypto_sign.h View File

@@ -0,0 +1,6 @@
#ifndef CRYPTO_SIGN_H
#define CRYPTO_SIGN_H

#include "api.h"

#endif

+ 173
- 0
src/sign/picnic/picnic3l1/avx2/endian_compat.h View File

@@ -0,0 +1,173 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC_COMPAT_ENDIAN_H
#define PICNIC_COMPAT_ENDIAN_H

#include <stdint.h>
#include "macros.h"

#if defined(__GNUC__) || defined(__clang__)
#define bswap16(x) __builtin_bswap16(x)
#define bswap32(x) __builtin_bswap32(x)
#define bswap64(x) __builtin_bswap64(x)
#elif defined(_MSC_VER)
#include <stdlib.h>

#define bswap16(x) _byteswap_ushort(x)
#define bswap32(x) _byteswap_ulong(x)
#define bswap64(x) _byteswap_uint64(x)
#else
static inline uint16_t ATTR_CONST bswap16(uint16_t x) {
return ((x & 0xff00) >> 8) | ((x & 0x00ff) << 8);
}

static inline uint32_t ATTR_CONST bswap32(uint32_t x) {
return ((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) | ((x & 0x0000ff00) << 8) |
((x & 0x000000ff) << 24);
}

static inline uint64_t ATTR_CONST bswap64(uint64_t x) {
return ((x & UINT64_C(0xff00000000000000)) >> 56) | ((x & UINT64_C(0x00ff000000000000)) >> 40) |
((x & UINT64_C(0x0000ff0000000000)) >> 24) | ((x & UINT64_C(0x000000ff00000000)) >> 8) |
((x & UINT64_C(0x00000000ff000000)) << 8) | ((x & UINT64_C(0x0000000000ff0000)) << 24) |
((x & UINT64_C(0x000000000000ff00)) << 40) | ((x & UINT64_C(0x00000000000000ff)) << 56);
}
#endif

/* Linux / GLIBC */
#if defined(__linux__) || defined(__GLIBC__)
#include <endian.h>
/* endian.h only provides conversion functions if built with one these defines */
#if defined(_DEFAULT_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
#define HAVE_HOSTSWAP
#endif
#endif

/* Windows */
#if defined(_WIN16) || defined(_WIN32) || defined(_WIN64)
#if defined(__MINGW32__) || defined(__MINGW64__)
#include <sys/param.h>
#else
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#endif

/* OS X */
#if defined(__APPLE__)
#include <machine/endian.h>
#endif

/* OpenBSD */
#if defined(__OpenBSD__)
#include <machine/endian.h>
#define HAVE_HOSTSWAP
#endif

/* other BSDs */
#if defined(__FreeBSD__) || defined(__NETBSD__) || defined(__NetBSD__)
#include <sys/endian.h>
#define HAVE_HOSTSWAP
#endif

#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN)
#if defined(BIG_ENDIAN) && defined(LITTLE_ENDIAN)
#if defined(BYTE_ORDER) && BYTE_ORDER == BIG_ENDIAN
#define PICNIC_IS_BIG_ENDIAN
#elif defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#elif defined(BIG_ENDIAN)
#define PICNIC_IS_BIG_ENDIAN
#elif defined(LITTLE_ENDIAN)
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#endif

#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN)
#if defined(_BIG_ENDIAN) && defined(_LITTLE_ENDIAN)
#if defined(_BYTE_ORDER) && _BYTE_ORDER == _BIG_ENDIAN
#define PICNIC_IS_BIG_ENDIAN
#elif defined(_BYTE_ORDER) && _BYTE_ORDER == _LITTLE_ENDIAN
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#elif defined(_BIG_ENDIAN)
#define PICNIC_IS_BIG_ENDIAN
#elif defined(_LITTLE_ENDIAN)
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#endif

#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN)
#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN
#define PICNIC_IS_BIG_ENDIAN
#elif defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#elif defined(__BIG_ENDIAN)
#define PICNIC_IS_BIG_ENDIAN
#elif defined(__LITTLE_ENDIAN)
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#endif

#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN)
#if defined(__BIG_ENDIAN__) && defined(__LITTLE_ENDIAN__)
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __BIG_ENDIAN__
#define PICNIC_IS_BIG_ENDIAN
#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#elif defined(__BIG_ENDIAN__)
#define PICNIC_IS_BIG_ENDIAN
#elif defined(__LITTLE_ENDIAN__)
#define PICNIC_IS_LITTLE_ENDIAN
#endif
#endif

#if !defined(PICNIC_IS_LITTLE_ENDIAN) && !defined(PICNIC_IS_BIG_ENDIAN)
#error "Unknown platform!"
#endif

#if !defined(HAVE_HOSTSWAP)
#if defined(PICNIC_IS_LITTLE_ENDIAN)
#define htobe16(x) bswap16((x))
#define htole16(x) ((uint16_t)(x))
#define be16toh(x) bswap16((x))
#define le16toh(x) ((uint16_t)(x))

#define htobe32(x) bswap32((x))
#define htole32(x) ((uint32_t)(x))
#define be32toh(x) bswap32((x))
#define le32toh(x) ((uint32_t)(x))

#define htobe64(x) bswap64((x))
#define htole64(x) ((uint64_t)(x))
#define be64toh(x) bswap64((x))
#define le64toh(x) ((uint64_t)(x))
#elif defined(PICNIC_IS_BIG_ENDIAN)
#define htobe16(x) ((uint16_t)(x))
#define htole16(x) bswap16((x))
#define be16toh(x) ((uint16_t)(x))
#define le16toh(x) bswap16((x))

#define htobe32(x) ((uint32_t)(x))
#define htole32(x) bswap32((x))
#define be32toh(x) ((uint32_t)(x))
#define le32toh(x) bswap32((x))

#define htobe64(x) ((uint64_t)(x))
#define htole64(x) bswap64((x))
#define be64toh(x) ((uint64_t)(x))
#define le64toh(x) bswap64((x))
#endif
#endif

#endif

+ 43
- 0
src/sign/picnic/picnic3l1/avx2/io.c View File

@@ -0,0 +1,43 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include "io.h"

#include <string.h>
#include "compat.h"

void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t len) {
const size_t word_count = (len + 7) / sizeof(uint64_t);
const block_t* block = CONST_BLOCK(data, 0);

for (size_t i = word_count; i; --i, dst += sizeof(uint64_t), len -= sizeof(uint64_t)) {
const uint64_t tmp = htobe64(block->w64[i - 1]);
memcpy(dst, &tmp, MIN(sizeof(tmp), len));
}
}

void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len) {
const size_t word_count = (len + 7) / sizeof(uint64_t);
block_t* block = BLOCK(result, 0);

for (size_t i = word_count; i; --i, data += sizeof(uint64_t), len -= sizeof(uint64_t)) {
uint64_t tmp = 0;
memcpy(&tmp, data, MIN(sizeof(tmp), len));
block->w64[i - 1] = be64toh(tmp);
}
}

#if defined(PICNIC_STATIC) || !defined(NDEBUG)
void print_hex(FILE* out, const uint8_t* data, size_t len) {
for (size_t i = len; i; --i, ++data) {
fprintf(out, "%02X", *data);
}
}
#endif

+ 40
- 0
src/sign/picnic/picnic3l1/avx2/io.h View File

@@ -0,0 +1,40 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef IO_H
#define IO_H

#include <stdint.h>
#include <stdio.h>

#include "mzd_additional.h"

void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t numbytes);
void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len);

/* Get one bit from a byte array */
static inline uint8_t getBit(const uint8_t* array, size_t bitNumber) {
return (array[bitNumber / 8] >> (7 - (bitNumber % 8))) & 0x01;
}

/* Set a specific bit in a byte array to a given value */
static inline void setBit(uint8_t* bytes, size_t bitNumber, uint8_t val) {
bytes[bitNumber / 8] =
(bytes[bitNumber >> 3] & ~(1 << (7 - (bitNumber % 8)))) | (val << (7 - (bitNumber % 8)));
}

static inline int check_padding_bits(const uint8_t byte, const unsigned int diff) {
return byte & ~(UINT8_C(0xff) << diff);
}

#if defined(PICNIC_STATIC) || !defined(NDEBUG)
void print_hex(FILE* out, const uint8_t* data, size_t len);
#endif

#endif

+ 159
- 0
src/sign/picnic/picnic3l1/avx2/kdf_shake.h View File

@@ -0,0 +1,159 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef KDF_SHAKE_H
#define KDF_SHAKE_H

#include <stdint.h>

#include "macros.h"
#include "endian_compat.h"

#if defined(WITH_SHAKE_S390_CPACF)
/* use the KIMD/KLMD instructions from CPACF for SHAKE support on S390 */
#include "sha3/s390_cpacf.h"
#else
#if !defined(SUPERCOP)
/* use SHAKE implementation in sha3/ */
#include "sha3/KeccakHash.h"
#else
/* use SUPERCOP implementation */
#include <libkeccak.a.headers/KeccakHash.h>
#endif

/* use the Keccakx4 implementation */
#include "KeccakHashtimes4.h"

typedef Keccak_HashInstance hash_context ATTR_ALIGNED(32);

/**
* Initialize hash context based on the digest size used by Picnic. If the size is 32 bytes,
* SHAKE128 is used, otherwise SHAKE256 is used.
*/
static inline void hash_init(hash_context* ctx, size_t digest_size) {
if (digest_size == 32) {
Keccak_HashInitialize_SHAKE128(ctx);
} else {
Keccak_HashInitialize_SHAKE256(ctx);
}
}

static inline void hash_update(hash_context* ctx, const uint8_t* data, size_t size) {
Keccak_HashUpdate(ctx, data, size << 3);
}

static inline void hash_final(hash_context* ctx) {
Keccak_HashFinal(ctx, NULL);
}

static inline void hash_squeeze(hash_context* ctx, uint8_t* buffer, size_t buflen) {
Keccak_HashSqueeze(ctx, buffer, buflen << 3);
}
#endif

static inline void hash_update_uint16_le(hash_context* ctx, uint16_t data) {
const uint16_t data_le = htole16(data);
hash_update(ctx, (const uint8_t*)&data_le, sizeof(data_le));
}

static inline void hash_init_prefix(hash_context* ctx, size_t digest_size,
const uint8_t prefix) {
hash_init(ctx, digest_size);
hash_update(ctx, &prefix, sizeof(prefix));
}

typedef hash_context kdf_shake_t;

#define kdf_shake_init(ctx, digest_size) hash_init((ctx), (digest_size))
#define kdf_shake_init_prefix(ctx, digest_size, prefix) hash_init_prefix((ctx), (digest_size), (prefix))
#define kdf_shake_update_key(ctx, key, keylen) hash_update((ctx), (key), (keylen))
#define kdf_shake_update_key_uint16_le(ctx, key) hash_update_uint16_le((ctx), (key))
#define kdf_shake_finalize_key(ctx) hash_final((ctx))
#define kdf_shake_get_randomness(ctx, dst, count) hash_squeeze((ctx), (dst), (count))
#define kdf_shake_clear(ctx)

/* Instances that work with 4 states in parallel. */
typedef Keccak_HashInstancetimes4 hash_context_x4 ATTR_ALIGNED(32);

static inline void hash_init_x4(hash_context_x4* ctx, size_t digest_size) {
if (digest_size == 32) {
Keccak_HashInitializetimes4_SHAKE128(ctx);
} else {
Keccak_HashInitializetimes4_SHAKE256(ctx);
}
}

static inline void hash_update_x4(hash_context_x4* ctx, const uint8_t** data, size_t size) {
Keccak_HashUpdatetimes4(ctx, data, size << 3);
}

static inline void hash_update_x4_4(hash_context_x4* ctx, const uint8_t* data0,
const uint8_t* data1, const uint8_t* data2,
const uint8_t* data3, size_t size) {
const uint8_t* data[4] = { data0, data1, data2, data3 };
hash_update_x4(ctx, data, size);
}

static inline void hash_update_x4_1(hash_context_x4* ctx, const uint8_t* data, size_t size) {
const uint8_t* tmp[4] = { data, data, data, data };
hash_update_x4(ctx, tmp, size);
}

static inline void hash_init_prefix_x4(hash_context_x4* ctx, size_t digest_size,
const uint8_t prefix) {
hash_init_x4(ctx, digest_size);
hash_update_x4_1(ctx, &prefix, sizeof(prefix));
}

static inline void hash_final_x4(hash_context_x4* ctx) {
Keccak_HashFinaltimes4(ctx, NULL);
}

static inline void hash_squeeze_x4(hash_context_x4* ctx, uint8_t** buffer, size_t buflen) {
Keccak_HashSqueezetimes4(ctx, buffer, buflen << 3);
}

static inline void hash_squeeze_x4_4(hash_context_x4* ctx, uint8_t* buffer0, uint8_t* buffer1,
uint8_t* buffer2, uint8_t* buffer3, size_t buflen) {
uint8_t* buffer[4] = { buffer0, buffer1, buffer2, buffer3 };
hash_squeeze_x4(ctx, buffer, buflen);
}

static inline void hash_update_x4_uint16_le(hash_context_x4* ctx, uint16_t data) {
const uint16_t data_le = htole16(data);
hash_update_x4_1(ctx, (const uint8_t*)&data_le, sizeof(data_le));
}

static inline void hash_update_x4_uint16s_le(hash_context_x4* ctx, const uint16_t data[4]) {
const uint16_t data0_le = htole16(data[0]);
const uint16_t data1_le = htole16(data[1]);
const uint16_t data2_le = htole16(data[2]);
const uint16_t data3_le = htole16(data[3]);
hash_update_x4_4(ctx, (const uint8_t*)&data0_le, (const uint8_t*)&data1_le,
(const uint8_t*)&data2_le, (const uint8_t*)&data3_le, sizeof(data[0]));
}

typedef hash_context_x4 kdf_shake_x4_t;

#define kdf_shake_x4_init(ctx, digest_size) hash_init_x4((ctx), (digest_size))
#define kdf_shake_x4_init_prefix(ctx, digest_size, prefix) \
hash_init_prefix_x4((ctx), (digest_size), (prefix))
#define kdf_shake_x4_update_key(ctx, key, keylen) hash_update_x4((ctx), (key), (keylen))
#define kdf_shake_x4_update_key_4(ctx, key0, key1, key2, key3, keylen) \
hash_update_x4_4((ctx), (key0), (key1), (key2), (key3), (keylen))
#define kdf_shake_x4_update_key_1(ctx, key, keylen) hash_update_x4_1((ctx), (key), (keylen))
#define kdf_shake_x4_update_key_uint16_le(ctx, key) hash_update_x4_uint16_le((ctx), (key))
#define kdf_shake_x4_update_key_uint16s_le(ctx, keys) hash_update_x4_uint16s_le((ctx), (keys))
#define kdf_shake_x4_finalize_key(ctx) hash_final_x4((ctx))
#define kdf_shake_x4_get_randomness(ctx, dst, count) hash_squeeze_x4((ctx), (dst), (count))
#define kdf_shake_x4_get_randomness_4(ctx, dst0, dst1, dst2, dst3, count) \
hash_squeeze_x4_4((ctx), (dst0), (dst1), (dst2), (dst3), (count))
#define kdf_shake_x4_clear(ctx)

#endif

+ 511
- 0
src/sign/picnic/picnic3l1/avx2/lowmc.c View File

@@ -0,0 +1,511 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include "io.h"
#include "lowmc.h"
#include "mzd_additional.h"
#include "bitstream.h"
#include "picnic3_impl.h"
#include "picnic3_types.h"
#include "simd.h"

#if !defined(_MSC_VER)
#include <stdalign.h>
#endif
#include <string.h>
#include <assert.h>

#include "lowmc_129_129_4.h"


#if !defined(NO_UINT64_FALLBACK)
/**
* S-box for m = 43
*/
static void sbox_uint64_lowmc_129_129_4(mzd_local_t* in) {
mzd_local_t x0m[1], x1m[1], x2m[1];
// a
mzd_and_uint64_192(x0m, mask_129_129_43_a, in);
// b
mzd_and_uint64_192(x1m, mask_129_129_43_b, in);
// c
mzd_and_uint64_192(x2m, mask_129_129_43_c, in);

mzd_shift_left_uint64_192(x0m, x0m, 2);
mzd_shift_left_uint64_192(x1m, x1m, 1);

mzd_local_t t0[1], t1[1], t2[1];
// b & c
mzd_and_uint64_192(t0, x1m, x2m);
// c & a
mzd_and_uint64_192(t1, x0m, x2m);
// a & b
mzd_and_uint64_192(t2, x0m, x1m);

// (b & c) ^ a
mzd_xor_uint64_192(t0, t0, x0m);

// (c & a) ^ a ^ b
mzd_xor_uint64_192(t1, t1, x0m);
mzd_xor_uint64_192(t1, t1, x1m);

// (a & b) ^ a ^ b ^c
mzd_xor_uint64_192(t2, t2, x0m);
mzd_xor_uint64_192(t2, t2, x1m);
mzd_xor_uint64_192(t2, t2, x2m);

mzd_shift_right_uint64_192(t0, t0, 2);
mzd_shift_right_uint64_192(t1, t1, 1);

mzd_xor_uint64_192(t2, t2, t1);
mzd_xor_uint64_192(in, t2, t0);
}


#endif /* NO_UINT_FALLBACK */

ATTR_TARGET_S128
static inline void sbox_s128_full(mzd_local_t* in, const word128* mask_a, const word128* mask_b,
const word128* mask_c) {
word128 x0m[2] ATTR_ALIGNED(alignof(word128)), x1m[2] ATTR_ALIGNED(alignof(word128)),
x2m[2] ATTR_ALIGNED(alignof(word128));
mm128_and_256(x0m, CONST_BLOCK(in, 0)->w128, mask_a);
mm128_and_256(x1m, CONST_BLOCK(in, 0)->w128, mask_b);
mm128_and_256(x2m, CONST_BLOCK(in, 0)->w128, mask_c);

mm128_shift_left_256(x0m, x0m, 2);
mm128_shift_left_256(x1m, x1m, 1);

word128 t0[2] ATTR_ALIGNED(alignof(word128)), t1[2] ATTR_ALIGNED(alignof(word128)),
t2[2] ATTR_ALIGNED(alignof(word128));
mm128_and_256(t0, x1m, x2m);
mm128_and_256(t1, x0m, x2m);
mm128_and_256(t2, x0m, x1m);

mm128_xor_256(t0, t0, x0m);

mm128_xor_256(x0m, x0m, x1m);
mm128_xor_256(t1, t1, x0m);

mm128_xor_256(t2, t2, x0m);
mm128_xor_256(t2, t2, x2m);

mm128_shift_right_256(t0, t0, 2);
mm128_shift_right_256(t1, t1, 1);

mm128_xor_256(t0, t0, t1);
mm128_xor_256(in->w128, t0, t2);
}

ATTR_TARGET_S128
static inline void sbox_s128_lowmc_129_129_4(mzd_local_t* in) {
sbox_s128_full(in, mask_129_129_43_a->w128, mask_129_129_43_b->w128, mask_129_129_43_c->w128);
}



ATTR_TARGET_AVX2
static inline word256 sbox_s256_lowmc_full(const word256 min, const word256 mask_a,
const word256 mask_b, const word256 mask_c) {
word256 x0m ATTR_ALIGNED(alignof(word256)) = mm256_and(min, mask_a);
word256 x1m ATTR_ALIGNED(alignof(word256)) = mm256_and(min, mask_b);
word256 x2m ATTR_ALIGNED(alignof(word256)) = mm256_and(min, mask_c);

x0m = mm256_rotate_left(x0m, 2);
x1m = mm256_rotate_left(x1m, 1);

word256 t0 ATTR_ALIGNED(alignof(word256)) = mm256_and(x1m, x2m);
word256 t1 ATTR_ALIGNED(alignof(word256)) = mm256_and(x0m, x2m);
word256 t2 ATTR_ALIGNED(alignof(word256)) = mm256_and(x0m, x1m);

t0 = mm256_xor(t0, x0m);

x0m = mm256_xor(x0m, x1m);
t1 = mm256_xor(t1, x0m);

t2 = mm256_xor(t2, x0m);
t2 = mm256_xor(t2, x2m);

t0 = mm256_rotate_right(t0, 2);
t1 = mm256_rotate_right(t1, 1);

return mm256_xor(mm256_xor(t0, t1), t2);
}

ATTR_TARGET_AVX2
static inline void sbox_s256_lowmc_129_129_4(mzd_local_t* in) {
BLOCK(in, 0)->w256 = sbox_s256_lowmc_full(
BLOCK(in, 0)->w256, CONST_BLOCK(mask_129_129_43_a, 0)->w256,
CONST_BLOCK(mask_129_129_43_b, 0)->w256, CONST_BLOCK(mask_129_129_43_c, 0)->w256);
}



#if !defined(NO_UINT64_FALLBACK)
#define picnic3_aux_sbox_bitsliced(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, bitmask_c) \
do { \
mzd_local_t a[1], b[1], c[1]; \
/* a */ \
AND(a, bitmask_a, statein); \
/* b */ \
AND(b, bitmask_b, statein); \
/* c */ \
AND(c, bitmask_c, statein); \
\
SHL(a, a, 2); \
SHL(b, b, 1); \
mzd_local_t d[1], e[1], f[1]; \
/* a */ \
AND(d, bitmask_a, stateout); \
/* b */ \
AND(e, bitmask_b, stateout); \
/* c */ \
AND(f, bitmask_c, stateout); \
\
SHL(d, d, 2); \
SHL(e, e, 1); \
\
mzd_local_t fresh_output_ab[1], fresh_output_bc[1], fresh_output_ca[1]; \
XOR(fresh_output_ab, a, b); \
XOR(fresh_output_ca, e, fresh_output_ab); \
XOR(fresh_output_bc, d, a); \
XOR(fresh_output_ab, fresh_output_ab, c); \
XOR(fresh_output_ab, fresh_output_ab, f); \
\
mzd_local_t t0[1], t1[1], t2[1], aux[1]; \
SHR(t2, fresh_output_ca, 2); \
SHR(t1, fresh_output_bc, 1); \
XOR(t2, t2, t1); \
XOR(aux, t2, fresh_output_ab); \
/* a & b */ \
AND(t0, a, b); \
/* b & c */ \
AND(t1, b, c); \
/* c & a */ \
AND(t2, c, a); \
SHR(t2, t2, 2); \
SHR(t1, t1, 1); \
XOR(t2, t2, t1); \
XOR(t2, t2, t0); \
XOR(aux, aux, t2); \
\
bitstream_t parity_tape = {{tapes->parity_tapes}, tapes->pos}; \
bitstream_t last_party_tape = {{tapes->tape[15]}, tapes->pos}; \
\
/* calculate aux_bits to fix and_helper */ \
mzd_from_bitstream(&parity_tape, t0, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
XOR(aux, aux, t0); \
mzd_from_bitstream(&last_party_tape, t1, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
XOR(aux, aux, t1); \
\
last_party_tape.position = tapes->pos; \
mzd_to_bitstream(&last_party_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
bitstream_t aux_tape = {{tapes->aux_bits}, tapes->aux_pos}; \
mzd_to_bitstream(&aux_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
\
tapes->aux_pos += LOWMC_N; \
} while (0)

static void sbox_aux_uint64_lowmc_129_129_4(mzd_local_t* statein, mzd_local_t* stateout,
randomTape_t* tapes) {
picnic3_aux_sbox_bitsliced(LOWMC_129_129_4_N, mzd_xor_uint64_192, mzd_and_uint64_192,
mzd_shift_left_uint64_192, mzd_shift_right_uint64_192,
mask_129_129_43_a, mask_129_129_43_b, mask_129_129_43_c);
}
#endif /* !NO_UINT64_FALLBACK */

#if !defined(NO_UINT64_FALLBACK)
// uint64 based implementation
#define IMPL uint64

#include "lowmc_129_129_4_fns_uint64.h"
#include "lowmc.c.i"

#include "lowmc_192_192_4_fns_uint64.h"
#include "lowmc.c.i"

#include "lowmc_255_255_4_fns_uint64.h"
#include "lowmc.c.i"

#include "lowmc_128_128_20_fns_uint64.h"
#include "lowmc.c.i"

#include "lowmc_192_192_30_fns_uint64.h"
#include "lowmc.c.i"

#include "lowmc_256_256_38_fns_uint64.h"
#include "lowmc.c.i"
#endif

#define FN_ATTR ATTR_TARGET_S128
#undef IMPL
#define IMPL s128

#define picnic3_aux_sbox_bitsliced_mm128(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, \
bitmask_c) \
do { \
word128 a[2] ATTR_ALIGNED(alignof(word128)); \
word128 b[2] ATTR_ALIGNED(alignof(word128)); \
word128 c[2] ATTR_ALIGNED(alignof(word128)); \
/* a */ \
AND(a, bitmask_a->w128, statein->w128); \
/* b */ \
AND(b, bitmask_b->w128, statein->w128); \
/* c */ \
AND(c, bitmask_c->w128, statein->w128); \
\
SHL(a, a, 2); \
SHL(b, b, 1); \
word128 d[2] ATTR_ALIGNED(alignof(word128)); \
word128 e[2] ATTR_ALIGNED(alignof(word128)); \
word128 f[2] ATTR_ALIGNED(alignof(word128)); \
/* a */ \
AND(d, bitmask_a->w128, stateout->w128); \
/* b */ \
AND(e, bitmask_b->w128, stateout->w128); \
/* c */ \
AND(f, bitmask_c->w128, stateout->w128); \
\
SHL(d, d, 2); \
SHL(e, e, 1); \
\
word128 fresh_output_ab[2] ATTR_ALIGNED(alignof(word128)); \
word128 fresh_output_bc[2] ATTR_ALIGNED(alignof(word128)); \
word128 fresh_output_ca[2] ATTR_ALIGNED(alignof(word128)); \
XOR(fresh_output_ab, a, b); \
XOR(fresh_output_ca, e, fresh_output_ab); \
XOR(fresh_output_bc, d, a); \
XOR(fresh_output_ab, fresh_output_ab, c); \
XOR(fresh_output_ab, fresh_output_ab, f); \
\
word128 t0[2] ATTR_ALIGNED(alignof(word128)); \
word128 t1[2] ATTR_ALIGNED(alignof(word128)); \
word128 t2[2] ATTR_ALIGNED(alignof(word128)); \
mzd_local_t tmp[1], aux[1]; \
SHR(t2, fresh_output_ca, 2); \
SHR(t1, fresh_output_bc, 1); \
XOR(t2, t2, t1); \
XOR(aux->w128, t2, fresh_output_ab); \
\
/* a & b */ \
AND(t0, a, b); \
/* b & c */ \
AND(t1, b, c); \
/* c & a */ \
AND(t2, c, a); \
SHR(t2, t2, 2); \
SHR(t1, t1, 1); \
XOR(t2, t2, t1); \
XOR(t2, t2, t0); \
XOR(aux->w128, aux->w128, t2); \
\
bitstream_t parity_tape = {{tapes->parity_tapes}, tapes->pos}; \
bitstream_t last_party_tape = {{tapes->tape[15]}, tapes->pos}; \
\
/* calculate aux_bits to fix and_helper */ \
mzd_from_bitstream(&parity_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
XOR(aux->w128, aux->w128, tmp->w128); \
mzd_from_bitstream(&last_party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
XOR(aux->w128, aux->w128, tmp->w128); \
\
last_party_tape.position = tapes->pos; \
mzd_to_bitstream(&last_party_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
bitstream_t aux_tape = {{tapes->aux_bits}, tapes->aux_pos}; \
mzd_to_bitstream(&aux_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
\
tapes->aux_pos += LOWMC_N; \
} while (0)

ATTR_TARGET_S128
static void sbox_aux_s128_lowmc_129_129_4(mzd_local_t* statein, mzd_local_t* stateout,
randomTape_t* tapes) {
picnic3_aux_sbox_bitsliced_mm128(LOWMC_129_129_4_N, mm128_xor_256, mm128_and_256,
mm128_shift_left_256, mm128_shift_right_256, mask_129_129_43_a,
mask_129_129_43_b, mask_129_129_43_c);
}

#include "lowmc_129_129_4_fns_s128.h"
#include "lowmc.c.i"

#include "lowmc_192_192_4_fns_s128.h"
#include "lowmc.c.i"

#include "lowmc_255_255_4_fns_s128.h"
#include "lowmc.c.i"

#include "lowmc_128_128_20_fns_s128.h"
#include "lowmc.c.i"

#include "lowmc_192_192_30_fns_s128.h"
#include "lowmc.c.i"

#include "lowmc_256_256_38_fns_s128.h"
#include "lowmc.c.i"

#undef FN_ATTR
#define FN_ATTR ATTR_TARGET_AVX2
#undef IMPL
#define IMPL s256

#define picnic3_aux_sbox_bitsliced_mm256(LOWMC_N, XOR, AND, ROL, ROR, bitmask_a, bitmask_b, \
bitmask_c) \
do { \
word256 a ATTR_ALIGNED(alignof(word256)); \
word256 b ATTR_ALIGNED(alignof(word256)); \
word256 c ATTR_ALIGNED(alignof(word256)); \
/* a */ \
a = AND(bitmask_a->w256, statein->w256); \
/* b */ \
b = AND(bitmask_b->w256, statein->w256); \
/* c */ \
c = AND(bitmask_c->w256, statein->w256); \
\
a = ROL(a, 2); \
b = ROL(b, 1); \
word256 d ATTR_ALIGNED(alignof(word256)); \
word256 e ATTR_ALIGNED(alignof(word256)); \
word256 f ATTR_ALIGNED(alignof(word256)); \
/* d */ \
d = AND(bitmask_a->w256, stateout->w256); \
/* e */ \
e = AND(bitmask_b->w256, stateout->w256); \
/* f */ \
f = AND(bitmask_c->w256, stateout->w256); \
\
d = ROL(d, 2); \
e = ROL(e, 1); \
\
word256 fresh_output_ab ATTR_ALIGNED(alignof(word256)); \
word256 fresh_output_bc ATTR_ALIGNED(alignof(word256)); \
word256 fresh_output_ca ATTR_ALIGNED(alignof(word256)); \
fresh_output_ab = XOR(a, b); \
fresh_output_ca = XOR(e, fresh_output_ab); \
fresh_output_bc = XOR(d, a); \
fresh_output_ab = XOR(fresh_output_ab, c); \
fresh_output_ab = XOR(fresh_output_ab, f); \
\
word256 t0 ATTR_ALIGNED(alignof(word256)); \
word256 t1 ATTR_ALIGNED(alignof(word256)); \
word256 t2 ATTR_ALIGNED(alignof(word256)); \
mzd_local_t tmp[1], aux[1]; \
t2 = ROR(fresh_output_ca, 2); \
t1 = ROR(fresh_output_bc, 1); \
t2 = XOR(t2, t1); \
aux->w256 = XOR(t2, fresh_output_ab); \
\
/* a & b */ \
t0 = AND(a, b); \
/* b & c */ \
t1 = AND(b, c); \
/* c & a */ \
t2 = AND(c, a); \
t2 = ROR(t2, 2); \
t1 = ROR(t1, 1); \
t2 = XOR(t2, t1); \
t2 = XOR(t2, t0); \
aux->w256 = XOR(aux->w256, t2); \
\
bitstream_t parity_tape = {{tapes->parity_tapes}, tapes->pos}; \
bitstream_t last_party_tape = {{tapes->tape[15]}, tapes->pos}; \
\
/* calculate aux_bits to fix and_helper */ \
mzd_from_bitstream(&parity_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
aux->w256 = XOR(aux->w256, tmp->w256); \
mzd_from_bitstream(&last_party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
aux->w256 = XOR(aux->w256, tmp->w256); \
\
last_party_tape.position = tapes->pos; \
mzd_to_bitstream(&last_party_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
bitstream_t aux_tape = {{tapes->aux_bits}, tapes->aux_pos}; \
mzd_to_bitstream(&aux_tape, aux, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
\
tapes->aux_pos += LOWMC_N; \
} while (0)

ATTR_TARGET_AVX2
static void sbox_aux_s256_lowmc_129_129_4(mzd_local_t* statein, mzd_local_t* stateout,
randomTape_t* tapes) {
picnic3_aux_sbox_bitsliced_mm256(LOWMC_129_129_4_N, mm256_xor, mm256_and, mm256_shift_left,
mm256_shift_right, mask_129_129_43_a, mask_129_129_43_b,
mask_129_129_43_c);
}

#include "lowmc_129_129_4_fns_s256.h"
#include "lowmc.c.i"

#include "lowmc_192_192_4_fns_s256.h"
#include "lowmc.c.i"

#include "lowmc_255_255_4_fns_s256.h"
#include "lowmc.c.i"

#include "lowmc_128_128_20_fns_s256.h"
#include "lowmc.c.i"

#include "lowmc_192_192_30_fns_s256.h"
#include "lowmc.c.i"

#include "lowmc_256_256_38_fns_s256.h"
#include "lowmc.c.i"

lowmc_implementation_f lowmc_get_implementation(const lowmc_parameters_t* lowmc) {
assert((lowmc->m == 43 && lowmc->n == 129) || (lowmc->m == 64 && lowmc->n == 192) ||
(lowmc->m == 85 && lowmc->n == 255) ||
(lowmc->m == 10 && (lowmc->n == 128 || lowmc->n == 192 || lowmc->n == 256)));

/* AVX2 enabled instances */
if (CPU_SUPPORTS_AVX2) {

/* Instances with full Sbox layer */
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_s256_lowmc_129_129_4;
}

/* SSE2/NEON enabled instances */
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) {

/* Instances with full Sbox layer */
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_s128_lowmc_129_129_4;
}

#if !defined(NO_UINT64_FALLBACK)
/* uint64_t implementations */

/* Instances with full Sbox layer */
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_uint64_lowmc_129_129_4;
#endif

return NULL;
}


lowmc_compute_aux_implementation_f
lowmc_compute_aux_get_implementation(const lowmc_parameters_t* lowmc) {
assert((lowmc->m == 43 && lowmc->n == 129) || (lowmc->m == 64 && lowmc->n == 192) ||
(lowmc->m == 85 && lowmc->n == 255));

if (CPU_SUPPORTS_AVX2) {
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_compute_aux_s256_lowmc_129_129_4;
}
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) {
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_compute_aux_s128_lowmc_129_129_4;
}

#if !defined(NO_UINT64_FALLBACK)
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_compute_aux_uint64_lowmc_129_129_4;
#endif

return NULL;
}

+ 38
- 0
src/sign/picnic/picnic3l1/avx2/lowmc.c.i View File

@@ -0,0 +1,38 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#if defined(LOWMC_INSTANCE)
#define N_LOWMC CONCAT(lowmc, CONCAT(IMPL, LOWMC_INSTANCE))
#define SBOX_FUNC CONCAT(sbox, CONCAT(IMPL, LOWMC_INSTANCE))
#if defined(LOWMC_PARTIAL)
#define SBOX(x) sbox_layer_10_uint64(&BLOCK(x, 0)->w64[(LOWMC_N / (sizeof(word) * 8)) - 1])
#include "lowmc_impl_partial.c.i"
#else
#define SBOX(x) SBOX_FUNC(BLOCK(x, 0))
#include "lowmc_impl.c.i"
#endif

#if !defined(LOWMC_PARTIAL)
#undef N_LOWMC
#undef RECORD_STATE
#undef SBOX
#undef SBOX_FUNC
#define SBOX_FUNC CONCAT(sbox_aux, CONCAT(IMPL, LOWMC_INSTANCE))
#define SBOX(x, y, tapes) SBOX_FUNC(BLOCK(x, 0), BLOCK(y, 0), tapes)
#define N_LOWMC CONCAT(lowmc_compute_aux, CONCAT(IMPL, LOWMC_INSTANCE))
#include "lowmc_impl_aux.c.i"
#endif

#undef N_LOWMC
#undef RECORD_STATE
#undef SBOX
#undef SBOX_FUNC
#endif

// vim: ft=c

+ 31
- 0
src/sign/picnic/picnic3l1/avx2/lowmc.h View File

@@ -0,0 +1,31 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef LOWMC_H
#define LOWMC_H

#include "lowmc_pars.h"

typedef struct {
mzd_local_t state[(MAX_LOWMC_BLOCK_SIZE + 255) / 256];
} recorded_state_t;

// forward decleration to picnic3_types.h since we get some cyclic dependencies otherwise
typedef struct randomTape_t randomTape_t;

typedef void (*lowmc_implementation_f)(lowmc_key_t const*, mzd_local_t const*, mzd_local_t*);
typedef void (*lowmc_store_implementation_f)(lowmc_key_t const*, mzd_local_t const*,
recorded_state_t* state);
typedef void (*lowmc_compute_aux_implementation_f)(lowmc_key_t*, randomTape_t* tapes);

lowmc_implementation_f lowmc_get_implementation(const lowmc_parameters_t* lowmc);
lowmc_store_implementation_f lowmc_store_get_implementation(const lowmc_parameters_t* lowmc);
lowmc_compute_aux_implementation_f lowmc_compute_aux_get_implementation(const lowmc_parameters_t* lowmc);

#endif

+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_128_128_20_fns_s128.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s128_128
#define MUL mzd_mul_v_s128_128
#define SHUFFLE mzd_shuffle_128_30
#define XOR mzd_xor_s128_128
#define COPY mzd_copy_s128_128

#define MUL_MC mzd_mul_v_s128_128_640
#define ADDMUL_R mzd_addmul_v_s128_30_128
#define MUL_Z mzd_mul_v_parity_uint64_128_30
#define XOR_MC mzd_xor_s128_640


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_128_128_20_fns_s256.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s256_128
#define MUL mzd_mul_v_s256_128
#define SHUFFLE mzd_shuffle_pext_128_30
#define XOR mzd_xor_s256_128
#define COPY mzd_copy_s256_128

#define MUL_MC mzd_mul_v_s256_128_768
#define ADDMUL_R mzd_addmul_v_s256_30_128
#define MUL_Z mzd_mul_v_parity_uint64_128_30
#define XOR_MC mzd_xor_s256_768


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_128_128_20_fns_uint64.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_uint64_128
#define MUL mzd_mul_v_uint64_128
#define XOR mzd_xor_uint64_128
#define SHUFFLE mzd_shuffle_128_30
#define COPY mzd_copy_uint64_128

#define MUL_MC mzd_mul_v_uint64_128_640
#define ADDMUL_R mzd_addmul_v_uint64_30_128
#define MUL_Z mzd_mul_v_parity_uint64_128_30
#define XOR_MC mzd_xor_uint64_640


+ 2768
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4.c
File diff suppressed because it is too large
View File


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4.h View File

@@ -0,0 +1,22 @@
#ifndef LOWMC_129_129_4_H
#define LOWMC_129_129_4_H

#include "lowmc_pars.h"

#define LOWMC_129_129_4_N 129
#define LOWMC_129_129_4_M 43
#define LOWMC_129_129_4_K LOWMC_129_129_4_N
#define LOWMC_129_129_4_R 4

extern const lowmc_t lowmc_129_129_4;
#define lowmc_parameters_129_129_4 {43, 129, 4, 129}

static const mzd_local_t mask_129_129_43_a[1] = {
{{UINT64_C(0x8000000000000000), UINT64_C(0x4924924924924924), UINT64_C(0x2492492492492492),
UINT64_C(0x0)}}};
static const mzd_local_t mask_129_129_43_b[1] = {
{{UINT64_C(0x0), UINT64_C(0x9249249249249249), UINT64_C(0x4924924924924924), UINT64_C(0x0)}}};
static const mzd_local_t mask_129_129_43_c[1] = {
{{UINT64_C(0x0), UINT64_C(0x2492492492492492), UINT64_C(0x9249249249249249), UINT64_C(0x0)}}};

#endif

+ 21
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4_fns_s128.h View File

@@ -0,0 +1,21 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s128_129
#define MUL mzd_mul_v_s128_129
#define XOR mzd_xor_s128_256
#define COPY mzd_copy_s128_256
#define MPC_MUL mpc_matrix_mul_s128_129

#define LOWMC_INSTANCE lowmc_129_129_4
#define LOWMC_N LOWMC_129_129_4_N
#define LOWMC_R LOWMC_129_129_4_R
#define LOWMC_M LOWMC_129_129_4_M

+ 21
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4_fns_s256.h View File

@@ -0,0 +1,21 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s256_129
#define MUL mzd_mul_v_s256_129
#define XOR mzd_xor_s256_256
#define COPY mzd_copy_s256_256
#define MPC_MUL mpc_matrix_mul_s256_129

#define LOWMC_INSTANCE lowmc_129_129_4
#define LOWMC_N LOWMC_129_129_4_N
#define LOWMC_R LOWMC_129_129_4_R
#define LOWMC_M LOWMC_129_129_4_M

+ 21
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_129_129_4_fns_uint64.h View File

@@ -0,0 +1,21 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_uint64_129
#define MUL mzd_mul_v_uint64_129
#define XOR mzd_xor_uint64_192
#define COPY mzd_copy_uint64_192
#define MPC_MUL mpc_matrix_mul_uint64_129

#define LOWMC_INSTANCE lowmc_129_129_4
#define LOWMC_N LOWMC_129_129_4_N
#define LOWMC_R LOWMC_129_129_4_R
#define LOWMC_M LOWMC_129_129_4_M

+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_192_192_30_fns_s128.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s128_192
#define MUL mzd_mul_v_s128_192
#define SHUFFLE mzd_shuffle_192_30
#define XOR mzd_xor_s128_256
#define COPY mzd_copy_s128_256

#define MUL_MC mzd_mul_v_s128_192_1024
#define ADDMUL_R mzd_addmul_v_s128_30_192
#define MUL_Z mzd_mul_v_parity_uint64_192_30
#define XOR_MC mzd_xor_s128_1024


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_192_192_30_fns_s256.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s256_192
#define MUL mzd_mul_v_s256_192
#define SHUFFLE mzd_shuffle_pext_192_30
#define XOR mzd_xor_s256_256
#define COPY mzd_copy_s256_256

#define MUL_MC mzd_mul_v_s256_192_1024
#define ADDMUL_R mzd_addmul_v_s256_30_192
#define MUL_Z mzd_mul_v_parity_uint64_192_30
#define XOR_MC mzd_xor_s256_1024


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_192_192_30_fns_uint64.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_uint64_192
#define MUL mzd_mul_v_uint64_192
#define SHUFFLE mzd_shuffle_192_30
#define XOR mzd_xor_uint64_192
#define COPY mzd_copy_uint64_192

#define MUL_MC mzd_mul_v_uint64_192_960
#define ADDMUL_R mzd_addmul_v_uint64_30_192
#define MUL_Z mzd_mul_v_parity_uint64_192_30
#define XOR_MC mzd_xor_uint64_960


+ 17
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_192_192_4_fns_s128.h View File

@@ -0,0 +1,17 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s128_192
#define MUL mzd_mul_v_s128_192
#define XOR mzd_xor_s128_256
#define COPY mzd_copy_s128_256
#define MPC_MUL mpc_matrix_mul_s128_192


+ 17
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_192_192_4_fns_s256.h View File

@@ -0,0 +1,17 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s256_192
#define MUL mzd_mul_v_s256_192
#define XOR mzd_xor_s256_256
#define COPY mzd_copy_s256_256
#define MPC_MUL mpc_matrix_mul_s256_192


+ 17
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_192_192_4_fns_uint64.h View File

@@ -0,0 +1,17 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_uint64_192
#define MUL mzd_mul_v_uint64_192
#define XOR mzd_xor_uint64_192
#define COPY mzd_copy_uint64_192
#define MPC_MUL mpc_matrix_mul_uint64_192


+ 17
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_255_255_4_fns_s128.h View File

@@ -0,0 +1,17 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s128_256
#define MUL mzd_mul_v_s128_256
#define XOR mzd_xor_s128_256
#define COPY mzd_copy_s128_256
#define MPC_MUL mpc_matrix_mul_s128_256


+ 17
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_255_255_4_fns_s256.h View File

@@ -0,0 +1,17 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s256_256
#define MUL mzd_mul_v_s256_256
#define XOR mzd_xor_s256_256
#define COPY mzd_copy_s256_256
#define MPC_MUL mpc_matrix_mul_s256_256


+ 17
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_255_255_4_fns_uint64.h View File

@@ -0,0 +1,17 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_uint64_256
#define MUL mzd_mul_v_uint64_256
#define XOR mzd_xor_uint64_256
#define COPY mzd_copy_uint64_256
#define MPC_MUL mpc_matrix_mul_uint64_256


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_256_256_38_fns_s128.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s128_256
#define MUL mzd_mul_v_s128_256
#define SHUFFLE mzd_shuffle_256_30
#define XOR mzd_xor_s128_256
#define COPY mzd_copy_s128_256

#define MUL_MC mzd_mul_v_s128_256_1280
#define ADDMUL_R mzd_addmul_v_s128_30_256
#define MUL_Z mzd_mul_v_parity_uint64_256_30
#define XOR_MC mzd_xor_s128_1280


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_256_256_38_fns_s256.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_s256_256
#define MUL mzd_mul_v_s256_256
#define SHUFFLE mzd_shuffle_pext_256_30
#define XOR mzd_xor_s256_256
#define COPY mzd_copy_s256_256

#define MUL_MC mzd_mul_v_s256_256_1280
#define ADDMUL_R mzd_addmul_v_s256_30_256
#define MUL_Z mzd_mul_v_parity_uint64_256_30
#define XOR_MC mzd_xor_s256_1280


+ 22
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_256_256_38_fns_uint64.h View File

@@ -0,0 +1,22 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#include "lowmc_fns_undef.h"

#define ADDMUL mzd_addmul_v_uint64_256
#define MUL mzd_mul_v_uint64_256
#define SHUFFLE mzd_shuffle_256_30
#define XOR mzd_xor_uint64_256
#define COPY mzd_copy_uint64_256

#define MUL_MC mzd_mul_v_uint64_256_1216
#define ADDMUL_R mzd_addmul_v_uint64_30_256
#define MUL_Z mzd_mul_v_parity_uint64_256_30
#define XOR_MC mzd_xor_uint64_1216


+ 24
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_fns_undef.h View File

@@ -0,0 +1,24 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#undef ADDMUL
#undef COPY
#undef LOWMC_INSTANCE
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_M
#undef LOWMC_PARTIAL
#undef MUL
#undef MUL_MC
#undef ADDMUL_R
#undef MUL_Z
#undef SHUFFLE
#undef XOR_MC
#undef XOR
#undef MPC_MUL

+ 44
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_impl.c.i View File

@@ -0,0 +1,44 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#if defined(FN_ATTR)
FN_ATTR
#endif
#if defined(RECORD_STATE)
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, recorded_state_t* state) {
#else
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, mzd_local_t* c) {
#endif
mzd_local_t x[((LOWMC_N) + 255) / 256];
mzd_local_t y[((LOWMC_N) + 255) / 256];

COPY(x, p);
ADDMUL(x, lowmc_key, LOWMC_INSTANCE.k0_matrix);

lowmc_round_t const* round = LOWMC_INSTANCE.rounds;
for (unsigned i = 0; i < LOWMC_R; ++i, ++round) {
#if defined(RECORD_STATE)
COPY(state[i].state, x);
#endif
SBOX(x);

MUL(y, x, round->l_matrix);
XOR(x, y, round->constant);
ADDMUL(x, lowmc_key, round->k_matrix);
}

#if defined(RECORD_STATE)
COPY(state[LOWMC_R].state, x);
#else
COPY(c, x);
#endif
}

// vim: ft=c

+ 39
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_impl_aux.c.i View File

@@ -0,0 +1,39 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#if defined(FN_ATTR)
FN_ATTR
#endif
static void N_LOWMC(lowmc_key_t* lowmc_key, randomTape_t* tapes) {
mzd_local_t x[((LOWMC_N) + 255) / 256] = {{{0, 0, 0, 0}}};
mzd_local_t y[((LOWMC_N) + 255) / 256];
mzd_local_t key0[((LOWMC_N) + 255) / 256];

COPY(key0, lowmc_key);
MUL(lowmc_key, key0, LOWMC_INSTANCE.ki0_matrix);

lowmc_round_t const* round = &LOWMC_INSTANCE.rounds[LOWMC_R - 1];
for (unsigned r = 0; r < LOWMC_R; ++r, round--) {
ADDMUL(x, lowmc_key, round->k_matrix);
MUL(y, x, round->li_matrix);

// recover input masks from tapes, only in first round we use the key as input
if (r == LOWMC_R - 1) {
COPY(x, key0);
} else {
bitstream_t bs = {{tapes->parity_tapes}, LOWMC_N * 2 * (LOWMC_R - 1 - r)};
mzd_from_bitstream(&bs, x, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N);
}
tapes->pos = LOWMC_N * 2 * (LOWMC_R - 1 - r) + LOWMC_N;
tapes->aux_pos = LOWMC_N * (LOWMC_R - 1 - r);
SBOX(x, y, tapes);
}
}

// vim: ft=c

+ 67
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_impl_partial.c.i View File

@@ -0,0 +1,67 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#if defined(FN_ATTR)
FN_ATTR
#endif
#if defined(RECORD_STATE)
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, recorded_state_t* state) {
#else
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, mzd_local_t* c) {
#endif
mzd_local_t x[((LOWMC_N) + 255) / 256];
mzd_local_t y[((LOWMC_N) + 255) / 256];
mzd_local_t nl_part[(LOWMC_R * 32 + 255) / 256];

XOR(x, p, LOWMC_INSTANCE.precomputed_constant_linear);
ADDMUL(x, lowmc_key, LOWMC_INSTANCE.k0_matrix);
MUL_MC(nl_part, lowmc_key, LOWMC_INSTANCE.precomputed_non_linear_part_matrix);
XOR_MC(nl_part, nl_part, LOWMC_INSTANCE.precomputed_constant_non_linear);

// multiply non-linear part of state with Z0 matrix
lowmc_partial_round_t const* round = LOWMC_INSTANCE.rounds;
for (unsigned i = 0; i < LOWMC_R - 1; ++i, ++round) {
#if defined(RECORD_STATE)
COPY(state[i].state, x);
#endif
SBOX(x);

const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1];
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] ^=
(nl << (1 - (i & 1)) * 32) & WORD_C(0xFFFFFFFF00000000);

MUL_Z(y, x, round->z_matrix);
SHUFFLE(x, round->r_mask);
ADDMUL_R(y, x, round->r_matrix);

BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] &=
WORD_C(0x00000003FFFFFFFF); // clear nl part
XOR(x, y, x);
}
#if defined(RECORD_STATE)
COPY(state[LOWMC_R - 1].state, x);
#endif
SBOX(x);

unsigned int i = (LOWMC_R - 1);
const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1];
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] ^=
(nl << (1 - (i & 1)) * 32) & WORD_C(0xFFFFFFFF00000000);
MUL(y, x, LOWMC_INSTANCE.zr_matrix);
COPY(x, y);

#if defined(RECORD_STATE)
COPY(state[LOWMC_R].state, x);
#else
COPY(c, x);
#endif
}

// vim: ft=c

+ 84
- 0
src/sign/picnic/picnic3l1/avx2/lowmc_pars.h View File

@@ -0,0 +1,84 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef LOWMC_PARS_H
#define LOWMC_PARS_H

#include <stddef.h>

#include "mzd_additional.h"

typedef mzd_local_t lowmc_key_t;

#define MAX_LOWMC_BLOCK_SIZE 32
#define MAX_LOWMC_BLOCK_SIZE_BITS (MAX_LOWMC_BLOCK_SIZE * 8)
#define MAX_LOWMC_KEY_SIZE MAX_LOWMC_BLOCK_SIZE
#define MAX_LOWMC_KEY_SIZE_BITS (MAX_LOWMC_KEY_SIZE * 8)
#define MAX_LOWMC_ROUNDS 38

/**
* Masks for 10 S-boxes.
*/
#define MASK_X0I UINT64_C(0x2492492400000000)
#define MASK_X1I UINT64_C(0x4924924800000000)
#define MASK_X2I UINT64_C(0x9249249000000000)
#define MASK_MASK UINT64_C(0x00000003ffffffff)

/**
* LowMC parameters
*/
typedef struct {
unsigned int m;
unsigned int n;
unsigned int r;
unsigned int k;
} lowmc_parameters_t;

/**
* LowMC round with full Sblox layer
*/
typedef struct {
const mzd_local_t* k_matrix;
const mzd_local_t* l_matrix;
const mzd_local_t* li_matrix;
const mzd_local_t* constant;
} lowmc_round_t;

/**
* LowMC definition with full Sbox layer
*/
typedef struct {
const mzd_local_t* k0_matrix; // K_0
const mzd_local_t* ki0_matrix; // inverse of K_0
const lowmc_round_t* rounds;
} lowmc_t;

/**
* LowMC round with partial Sblox layer
*/
typedef struct {
const mzd_local_t* z_matrix;
const mzd_local_t* r_matrix;
const word r_mask;
} lowmc_partial_round_t;

/**
* LowMC definition with partial Sbox layer
*/
typedef struct {
const mzd_local_t* k0_matrix; // K_0 + precomputed
const mzd_local_t* zr_matrix; // combined linear layers
const lowmc_partial_round_t* rounds;

const mzd_local_t* precomputed_non_linear_part_matrix;
const mzd_local_t* precomputed_constant_linear;
const mzd_local_t* precomputed_constant_non_linear;
} lowmc_partial_t;

#endif

+ 312
- 0
src/sign/picnic/picnic3l1/avx2/macros.h View File

@@ -0,0 +1,312 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC_MACROS_H
#define PICNIC_MACROS_H

/* __FUNCTION__ generates a warning on Linux with -Wpedantic and newer versions
* of GCC (tested with 5.4). So we use __func__ in all source and define it on
* Windows.
*/
#if defined(__WINDOWS__)
#define __func__ __FUNCTION__
#endif

/* compatibility with clang and other compilers */
#if !defined(__has_attribute)
#define __has_attribute(a) 0
#endif

#if !defined(__has_builtin)
#define __has_builtin(b) 0
#endif

/* gcc version check macro */
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
#define GNUC_CHECK(maj, min) \
(((__GNUC__ << 20) + (__GNUC_MINOR__ << 10)) >= (((maj) << 20) + ((min) << 10)))
#else
#define GNUC_CHECK(maj, min) 0
#endif

/* glibc version check macro */
#if defined(__GLIBC__)
#define GLIBC_CHECK(maj, min) __GLIBC_PREREQ(maj, min)
#else
#define GLIBC_CHECK(maj, min) 0
#endif

/* FreeBSD version check macro */
#if defined(__FreeBSD__)
#define FREEBSD_CHECK(maj, min) (__FreeBSD__ >= (maj))
#else
#define FREEBSD_CHECK(maj, min) 0
#endif

/* NetBSD version check macro */
#if defined(__NetBSD__)
#include <sys/param.h>
#define NETBSD_CHECK(maj, min) (__NetBSD_Version__ >= ((maj)*1000000000 + (min)*10000000))
#else
#define NETBSD_CHECK(maj, min) 0
#endif

/* Apple version check macro */
#if defined(__APPLE__)
#include <Availability.h>
#define MACOSX_CHECK(maj, min, rev) \
(__MAC_OS_X_VERSION_MIN_REQUIRED >= ((maj)*10000 + (min)*100 + (rev)))
#else
#define MACOSX_CHECK(maj, min, rev) 0
#endif

#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif

#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif

/* assume */
#if GNUC_CHECK(4, 5) || __has_builtin(__builtin_unreachable)
#define ASSUME(p) \
if (!(p)) \
__builtin_unreachable()
#elif defined(_MSC_VER)
#define ASSUME(p) __assume(p)
#else
#define ASSUME(p) (void)(p)
#endif

/* nonnull attribute */
#if GNUC_CHECK(3, 3) || __has_attribute(nonnull)
#define ATTR_NONNULL __attribute__((nonnull))
#define ATTR_NONNULL_ARG(i) __attribute__((nonnull(i)))
#else
#define ATTR_NONNULL
#define ATTR_NONNULL_ARG(i)
#endif

/* destructor attribute */
#if GNUC_CHECK(2, 7) || __has_attribute(destructor)
#define ATTR_DTOR __attribute__((destructor))
#else
#define ATTR_DTOR
#endif

/* assumed aligned attribute */
#if GNUC_CHECK(4, 9) || __has_attribute(assume_aligned)
#define ATTR_ASSUME_ALIGNED(i) __attribute__((assume_aligned(i)))
#else
#define ATTR_ASSUME_ALIGNED(i)
#endif

/* aligned attribute */
/* note that C11's alignas will only do the job once DR 444 is implemented */
#if GNUC_CHECK(4, 9) || __has_attribute(aligned)
#define ATTR_ALIGNED(i) __attribute__((aligned((i))))
#define HAVE_USEFUL_ATTR_ALIGNED
/* #elif defined(_MSC_VER)
#define ATTR_ALIGNED(i) __declspec(align((i)))
#define HAVE_USEFUL_ATTR_ALIGNED */
#else
#define ATTR_ALIGNED(i)
#endif

/* round size to meet alignment requirements */
#define ALIGNT(s, t) (((s) + sizeof(t) - 1) & ~(sizeof(t) - 1))
#define ALIGNU64T(s) ALIGNT(s, uint64_t)

/* unreachable builtin */
#if GNUC_CHECK(4, 5) || __has_builtin(__builtin_unreachable)
#define UNREACHABLE __builtin_unreachable()
/* #elif defined(_MSC_VER)
#define UNREACHABLE __assume(0) */
#endif

/* assume aligned builtin */
#if GNUC_CHECK(4, 9) || __has_builtin(__builtin_assume_aligned)
#define ASSUME_ALIGNED(p, a) __builtin_assume_aligned((p), (a))
#elif defined(UNREACHABLE) && defined(HAVE_USEFUL_ATTR_ALIGNED)
#define ASSUME_ALIGNED(p, a) (((((uintptr_t)(p)) % (a)) == 0) ? (p) : (UNREACHABLE, (p)))
#else
#define ASSUME_ALIGNED(p, a) (p)
#endif

/* always inline attribute */
#if GNUC_CHECK(4, 0) || __has_attribute(always_inline)
#define ATTR_ALWAYS_INLINE __attribute__((always_inline))
#elif defined(_MSC_VER)
#define ATTR_ALWAYS_INLINE __forceinline
#else
#define ATTR_ALWAYS_INLINE
#endif

/* pure attribute */
#if defined(__GNUC__) || __has_attribute(pure)
#define ATTR_PURE __attribute__((pure))
#else
#define ATTR_PURE
#endif

/* const attribute */
#if defined(__GNUC__) || __has_attribute(const)
#define ATTR_CONST __attribute__((const))
#else
#define ATTR_CONST
#endif

/* target attribute */
#if defined(__GNUC__) || __has_attribute(target)
#define ATTR_TARGET(x) __attribute__((target((x))))
#else
#define ATTR_TARGET(x)
#endif

/* artificial attribute */
#if GNUC_CHECK(4, 7) || __has_attribute(__artificial__)
#define ATTR_ARTIFICIAL __attribute__((__artificial__))
#else
#define ATTR_ARTIFICIAL
#endif

#define ATTR_TARGET_AVX2 ATTR_TARGET("avx2,bmi2")
#define ATTR_TARGET_SSE2 ATTR_TARGET("sse2")

#define FN_ATTRIBUTES_AVX2 ATTR_ARTIFICIAL ATTR_ALWAYS_INLINE ATTR_TARGET_AVX2
#define FN_ATTRIBUTES_SSE2 ATTR_ARTIFICIAL ATTR_ALWAYS_INLINE ATTR_TARGET_SSE2
#define FN_ATTRIBUTES_NEON ATTR_ARTIFICIAL ATTR_ALWAYS_INLINE

#define FN_ATTRIBUTES_AVX2_PURE FN_ATTRIBUTES_AVX2 ATTR_PURE
#define FN_ATTRIBUTES_SSE2_PURE FN_ATTRIBUTES_SSE2 ATTR_PURE
#define FN_ATTRIBUTES_NEON_PURE FN_ATTRIBUTES_NEON ATTR_PURE

#define FN_ATTRIBUTES_AVX2_CONST FN_ATTRIBUTES_AVX2 ATTR_CONST
#define FN_ATTRIBUTES_SSE2_CONST FN_ATTRIBUTES_SSE2 ATTR_CONST
#define FN_ATTRIBUTES_NEON_CONST FN_ATTRIBUTES_NEON ATTR_CONST

/* concatenation */
#define CONCAT2(a, b) a##_##b
#define CONCAT(a, b) CONCAT2(a, b)

/* helper macros/functions for checked integer subtraction */
#if GNUC_CHECK(5, 0) || __has_builtin(__builtin_add_overflow)
#define sub_overflow_size_t(x, y, diff) __builtin_sub_overflow(x, y, diff)
#else
#include <stdbool.h>
#include <stddef.h>

ATTR_ARTIFICIAL
static inline bool sub_overflow_size_t(const size_t x, const size_t y, size_t* diff) {
*diff = x - y;
return x < y;
}
#endif

#include <stdint.h>

/* helper functions for parity computations */
#if GNUC_CHECK(4, 9) || __has_builtin(__builtin_parity)
ATTR_CONST ATTR_ARTIFICIAL static inline uint8_t parity64_uint8(uint8_t in) {
return __builtin_parity(in);
}

ATTR_CONST ATTR_ARTIFICIAL static inline uint16_t parity64_uint16(uint16_t in) {
return __builtin_parity(in);
}

ATTR_CONST ATTR_ARTIFICIAL static inline uint64_t parity64_uint64(uint64_t in) {
return __builtin_parityll(in);
}
#else
ATTR_CONST ATTR_ARTIFICIAL static inline uint8_t parity64_uint8(uint8_t in) {
/* byte parity from: https://graphics.stanford.edu/~seander/bithacks.html#ParityWith64Bits */
return (((in * UINT64_C(0x0101010101010101)) & UINT64_C(0x8040201008040201)) % 0x1FF) & 1;
}

ATTR_CONST ATTR_ARTIFICIAL static inline uint16_t parity64_uint16(uint16_t in) {
in ^= in >> 1;
in ^= in >> 2;
in = (in & 0x1111) * 0x1111;
return (in >> 12) & 1;
}

ATTR_CONST ATTR_ARTIFICIAL static inline uint64_t parity64_uint64(uint64_t in) {
in ^= in >> 1;
in ^= in >> 2;
in = (in & 0x1111111111111111) * 0x1111111111111111;
return (in >> 60) & 1;
}
#endif

/* helper functions to compute number of leading zeroes */
#if GNUC_CHECK(4, 7) || __has_builtin(__builtin_clz)
ATTR_CONST ATTR_ARTIFICIAL static inline uint32_t clz(uint32_t x) {
return x ? __builtin_clz(x) : 32;
}
#else
/* Number of leading zeroes of x.
* From the book
* H.S. Warren, *Hacker's Delight*, Pearson Education, 2003.
* http://www.hackersdelight.org/hdcodetxt/nlz.c.txt
*/
ATTR_CONST ATTR_ARTIFICIAL static inline uint32_t clz(uint32_t x) {
if (!x) {
return 32;
}

uint32_t n = 1;
if (!(x >> 16)) {
n = n + 16;
x = x << 16;
}
if (!(x >> 24)) {
n = n + 8;
x = x << 8;
}
if (!(x >> 28)) {
n = n + 4;
x = x << 4;
}
if (!(x >> 30)) {
n = n + 2;
x = x << 2;
}
n = n - (x >> 31);

return n;
}
#endif

ATTR_CONST ATTR_ARTIFICIAL static inline uint32_t ceil_log2(uint32_t x) {
if (!x) {
return 0;
}
return 32 - clz(x - 1);
}

#if defined(__WIN32__)
#define SIZET_FMT "%Iu"
#else
#define SIZET_FMT "%zu"
#endif

/* crypto_declassify wrapper */
#if defined(TIMECOP)
#include "crypto_declassify.h"
#define picnic_declassify(x, len) crypto_declassify(x, len)
#elif defined(WITH_VALGRIND)
#include <valgrind/memcheck.h>
#define picnic_declassify(x, len) VALGRIND_MAKE_MEM_DEFINED(x, len)
#else
#define picnic_declassify(x, len)
#endif

#endif

+ 912
- 0
src/sign/picnic/picnic3l1/avx2/mzd_additional.c View File

@@ -0,0 +1,912 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#if !defined(_MSC_VER)
#include <stdalign.h>
#endif
#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "compat.h"
#include "mzd_additional.h"

#if !defined(_MSC_VER) && !defined(static_assert)
#define static_assert _Static_assert
#endif

static_assert(((sizeof(mzd_local_t) + 0x1f) & ~0x1f) == 32, "sizeof mzd_local_t not supported");

#include "simd.h"
static const unsigned int align_bound = 128 / (8 * sizeof(word));

static size_t calculate_rowstride(size_t width) {
// As soon as we hit the AVX bound, use 32 byte alignment. Otherwise use 16
// byte alignment for SSE2 and 128 bit vectors.
if (width > align_bound) {
return ((width * sizeof(word) + 31) & ~31) / sizeof(word);
} else {
return ((width * sizeof(word) + 15) & ~15) / sizeof(word);
}
}

static size_t calculate_width(size_t c) {
return (c + sizeof(word) * 8 - 1) / (sizeof(word) * 8);
}

// Notes on the memory layout: mzd_init allocates multiple memory blocks (one
// for mzd_local_t, one for rows and multiple for the buffers). We use one memory
// block for mzd_local_t, rows and the buffer. This improves memory locality and
// requires less calls to malloc.
//
// In mzd_local_init_multiple we do the same, but store n mzd_local_t instances in one
// memory block.

mzd_local_t* mzd_local_init_ex(unsigned int r, unsigned int c, bool clear) {
const size_t rowstride = calculate_rowstride(calculate_width(c));

const size_t buffer_size = r * rowstride * sizeof(word);
const size_t alloc_size = (buffer_size + 31) & ~31;

/* We always align mzd_local_ts to 32 bytes. Thus the first row is always
* aligned to 32 bytes as well. For 128 bit and SSE all other rows are then
* aligned to 16 bytes. */
unsigned char* buffer = aligned_alloc(32, alloc_size);
if (clear) {
memset(buffer, 0, alloc_size);
}

return (mzd_local_t*)buffer;
}

void mzd_local_free(mzd_local_t* v) {
aligned_free(v);
}

void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, unsigned int r, unsigned int c, bool clear) {
const size_t rowstride = calculate_rowstride(calculate_width(c));

const size_t buffer_size = r * rowstride * sizeof(word);
const size_t size_per_elem = (buffer_size + 31) & ~31;

unsigned char* full_buffer = aligned_alloc(32, size_per_elem * n);
if (clear) {
memset(full_buffer, 0, size_per_elem * n);
}

for (size_t s = 0; s < n; ++s, full_buffer += size_per_elem) {
dst[s] = (mzd_local_t*)full_buffer;
}
}

void mzd_local_free_multiple(mzd_local_t** vs) {
if (vs) {
aligned_free(vs[0]);
}
}

/* implementation of copy */

void mzd_copy_uint64_128(mzd_local_t* dst, mzd_local_t const* src) {
const block_t* sblock = CONST_BLOCK(src, 0);
block_t* dblock = BLOCK(dst, 0);

for (unsigned int i = 0; i < 2; ++i) {
dblock->w64[i] = sblock->w64[i];
}
}

void mzd_copy_uint64_192(mzd_local_t* dst, mzd_local_t const* src) {
const block_t* sblock = CONST_BLOCK(src, 0);
block_t* dblock = BLOCK(dst, 0);

for (unsigned int i = 0; i < 3; ++i) {
dblock->w64[i] = sblock->w64[i];
}
}

void mzd_copy_uint64_256(mzd_local_t* dst, mzd_local_t const* src) {
const block_t* sblock = CONST_BLOCK(src, 0);
block_t* dblock = BLOCK(dst, 0);

for (unsigned int i = 0; i < 4; ++i) {
dblock->w64[i] = sblock->w64[i];
}
}

ATTR_TARGET_S128
void mzd_copy_s128_128(mzd_local_t* dst, mzd_local_t const* src) {
BLOCK(dst, 0)->w128[0] = CONST_BLOCK(src, 0)->w128[0];
}

ATTR_TARGET_S128
void mzd_copy_s128_256(mzd_local_t* dst, mzd_local_t const* src) {
for (unsigned int i = 0; i < 2; ++i) {
dst->w128[i] = src->w128[i];
}
}

ATTR_TARGET_AVX2
void mzd_copy_s256_128(mzd_local_t* dst, mzd_local_t const* src) {
BLOCK(dst, 0)->w128[0] = CONST_BLOCK(src, 0)->w128[0];
}

ATTR_TARGET_AVX2
void mzd_copy_s256_256(mzd_local_t* dst, mzd_local_t const* src) {
BLOCK(dst, 0)->w256 = CONST_BLOCK(src, 0)->w256;
}

/* implementation of mzd_xor and variants */

ATTR_TARGET_S128
void mzd_xor_s128_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
block_t* rblock = BLOCK(res, 0);
const block_t* fblock = CONST_BLOCK(first, 0);
const block_t* sblock = CONST_BLOCK(second, 0);

rblock->w128[0] = mm128_xor(fblock->w128[0], sblock->w128[0]);
}

ATTR_TARGET_S128
static void mzd_xor_s128_blocks(block_t* rblock, const block_t* fblock, const block_t* sblock,
unsigned int count) {
for (; count; --count, ++rblock, ++fblock, ++sblock) {
rblock->w128[0] = mm128_xor(fblock->w128[0], sblock->w128[0]);
rblock->w128[1] = mm128_xor(fblock->w128[1], sblock->w128[1]);
}
}

ATTR_TARGET_S128
void mzd_xor_s128_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_s128_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1);
}




ATTR_TARGET_AVX2
void mzd_xor_s256_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
block_t* rblock = BLOCK(res, 0);
const block_t* fblock = CONST_BLOCK(first, 0);
const block_t* sblock = CONST_BLOCK(second, 0);

rblock->w128[0] = mm128_xor(fblock->w128[0], sblock->w128[0]);
}

ATTR_TARGET_AVX2
static void mzd_xor_s256_blocks(block_t* rblock, const block_t* fblock, const block_t* sblock,
unsigned int count) {
for (; count; --count, ++rblock, ++fblock, ++sblock) {
rblock->w256 = mm256_xor(fblock->w256, sblock->w256);
}
}

ATTR_TARGET_AVX2
void mzd_xor_s256_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1);
}

ATTR_TARGET_AVX2
void mzd_xor_s256_768(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 3);
}

void mzd_xor_s256_1024(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 4);
}

void mzd_xor_s256_1280(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 5);
}

static void mzd_xor_uint64_block(block_t* rblock, const block_t* fblock, const block_t* sblock,
const unsigned int len) {
for (unsigned int i = 0; i < len; ++i) {
rblock->w64[i] = fblock->w64[i] ^ sblock->w64[i];
}
}


void mzd_xor_uint64_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 2);
}

void mzd_xor_uint64_192(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 3);
}

void mzd_xor_uint64_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_xor_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 4);
}




/* implementation of mzd_and_* and variants */

ATTR_TARGET_S128
void mzd_and_s128_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
block_t* rblock = BLOCK(res, 0);
const block_t* fblock = CONST_BLOCK(first, 0);
const block_t* sblock = CONST_BLOCK(second, 0);

rblock->w128[0] = mm128_and(fblock->w128[0], sblock->w128[0]);
}

ATTR_TARGET_S128
static inline void mzd_and_s128_blocks(block_t* rblock, const block_t* fblock,
const block_t* sblock, unsigned int count) {
for (; count; --count, ++rblock, ++fblock, ++sblock) {
rblock->w128[0] = mm128_and(fblock->w128[0], sblock->w128[0]);
rblock->w128[1] = mm128_and(fblock->w128[1], sblock->w128[1]);
}
}

ATTR_TARGET_S128
void mzd_and_s128_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_and_s128_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1);
}

ATTR_TARGET_AVX2
void mzd_and_s256_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
block_t* rblock = BLOCK(res, 0);
const block_t* fblock = CONST_BLOCK(first, 0);
const block_t* sblock = CONST_BLOCK(second, 0);

rblock->w128[0] = mm128_and(fblock->w128[0], sblock->w128[0]);
}

ATTR_TARGET_AVX2
static inline void mzd_and_s256_blocks(block_t* rblock, const block_t* fblock,
const block_t* sblock, unsigned int count) {
for (; count; --count, ++rblock, ++fblock, ++sblock) {
rblock->w256 = mm256_and(fblock->w256, sblock->w256);
}
}

ATTR_TARGET_AVX2
void mzd_and_s256_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_and_s256_blocks(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 1);
}

static inline void mzd_and_uint64_block(block_t* rblock, const block_t* fblock,
const block_t* sblock, const unsigned int len) {
for (unsigned int i = 0; i < len; ++i) {
rblock->w64[i] = fblock->w64[i] & sblock->w64[i];
}
}

void mzd_and_uint64_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_and_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 2);
}

void mzd_and_uint64_192(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_and_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 3);
}

void mzd_and_uint64_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
mzd_and_uint64_block(BLOCK(res, 0), CONST_BLOCK(first, 0), CONST_BLOCK(second, 0), 4);
}

/* shifts and rotations */

void mzd_shift_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int right_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count);
rblock->w64[0] = block->w64[0] << count;
}

void mzd_shift_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int left_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count);
rblock->w64[1] = block->w64[1] >> count;
}

void mzd_shift_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int right_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count);
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count);
rblock->w64[0] = block->w64[0] << count;
}

void mzd_shift_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int left_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count);
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count);
rblock->w64[2] = block->w64[2] >> count;
}

void mzd_shift_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int right_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

rblock->w64[3] = (block->w64[3] << count) | (block->w64[2] >> right_count);
rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count);
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count);
rblock->w64[0] = block->w64[0] << count;
}

void mzd_shift_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int left_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count);
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count);
rblock->w64[2] = (block->w64[2] >> count) | (block->w64[3] << left_count);
rblock->w64[3] = block->w64[3] >> count;
}

#if defined(PICNIC_STATIC)
void mzd_rotate_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int right_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

const word tmp = block->w64[1] >> right_count;
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count);
rblock->w64[0] = (block->w64[0] << count) | tmp;
}

void mzd_rotate_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int left_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

const word tmp = block->w64[0] << left_count;
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count);
rblock->w64[1] = (block->w64[1] >> count) | tmp;
}

void mzd_rotate_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int right_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

const word tmp = block->w64[2] >> right_count;
rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count);
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count);
rblock->w64[0] = (block->w64[0] << count) | tmp;
}

void mzd_rotate_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int left_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

const word tmp = block->w64[0] << left_count;
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count);
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count);
rblock->w64[2] = (block->w64[2] >> count) | tmp;
}

void mzd_rotate_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int right_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

const word tmp = block->w64[3] >> right_count;
rblock->w64[3] = (block->w64[3] << count) | (block->w64[2] >> right_count);
rblock->w64[2] = (block->w64[2] << count) | (block->w64[1] >> right_count);
rblock->w64[1] = (block->w64[1] << count) | (block->w64[0] >> right_count);
rblock->w64[0] = (block->w64[0] << count) | tmp;
}

void mzd_rotate_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count) {
const unsigned int left_count = 8 * sizeof(word) - count;
const block_t* block = CONST_BLOCK(val, 0);
block_t* rblock = BLOCK(res, 0);

const word tmp = block->w64[0] << left_count;
rblock->w64[0] = (block->w64[0] >> count) | (block->w64[1] << left_count);
rblock->w64[1] = (block->w64[1] >> count) | (block->w64[2] << left_count);
rblock->w64[2] = (block->w64[2] >> count) | (block->w64[3] << left_count);
rblock->w64[3] = (block->w64[3] >> count) | tmp;
}
#endif




ATTR_TARGET_S128 ATTR_ARTIFICIAL ATTR_CONST static inline word128
mm128_compute_mask(const word idx, const size_t bit) {
return mm128_broadcast_u64(-((idx >> bit) & 1));
}

ATTR_TARGET_S128
void mzd_mul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero};
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 2) {
cval[0] = mm128_xor_mask(cval[0], Ablock[0].w128[0], mm128_compute_mask(idx, 0));
cval[1] = mm128_xor_mask(cval[1], Ablock[0].w128[1], mm128_compute_mask(idx, 1));
cval[0] = mm128_xor_mask(cval[0], Ablock[1].w128[0], mm128_compute_mask(idx, 2));
cval[1] = mm128_xor_mask(cval[1], Ablock[1].w128[1], mm128_compute_mask(idx, 3));
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[1]);
}

ATTR_TARGET_S128
void mzd_addmul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], mm128_zero, mm128_zero,
mm128_zero};
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 2) {
cval[0] = mm128_xor_mask(cval[0], Ablock[0].w128[0], mm128_compute_mask(idx, 0));
cval[1] = mm128_xor_mask(cval[1], Ablock[0].w128[1], mm128_compute_mask(idx, 1));
cval[0] = mm128_xor_mask(cval[0], Ablock[1].w128[0], mm128_compute_mask(idx, 2));
cval[1] = mm128_xor_mask(cval[1], Ablock[1].w128[1], mm128_compute_mask(idx, 3));
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[1]);
}

ATTR_TARGET_S128
void mzd_mul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero};
{
Ablock += 63;
word idx = (*vptr) >> 63;
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
vptr++;
Ablock++;
}
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) {
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2);
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[2]);
cblock->w128[1] = mm128_xor(cval[1], cval[3]);
}

ATTR_TARGET_S128
void mzd_addmul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], cblock->w128[1], mm128_zero,
mm128_zero};
{
Ablock += 63;
word idx = (*vptr) >> 63;
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
vptr++;
Ablock++;
}
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) {
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2);
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[2]);
cblock->w128[1] = mm128_xor(cval[1], cval[3]);
}

ATTR_TARGET_S128
void mzd_mul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero};
for (unsigned int w = 3; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) {
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2);
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[2]);
cblock->w128[1] = mm128_xor(cval[1], cval[3]);
}

ATTR_TARGET_S128
void mzd_addmul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], cblock->w128[1], mm128_zero,
mm128_zero};
for (unsigned int w = 3; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) {
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2);
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[2]);
cblock->w128[1] = mm128_xor(cval[1], cval[3]);
}

ATTR_TARGET_S128
void mzd_mul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {mm128_zero, mm128_zero, mm128_zero, mm128_zero};
for (unsigned int w = 4; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) {
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2);
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[2]);
cblock->w128[1] = mm128_xor(cval[1], cval[3]);
}

ATTR_TARGET_S128
void mzd_addmul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word128 cval[4] ATTR_ALIGNED(alignof(word128)) = {cblock->w128[0], cblock->w128[1], mm128_zero,
mm128_zero};
for (unsigned int w = 4; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 2) {
mm128_xor_mask_region(&cval[0], Ablock[0].w128, mm128_compute_mask(idx, 0), 2);
mm128_xor_mask_region(&cval[2], Ablock[1].w128, mm128_compute_mask(idx, 1), 2);
}
}
cblock->w128[0] = mm128_xor(cval[0], cval[2]);
cblock->w128[1] = mm128_xor(cval[1], cval[3]);
}




ATTR_TARGET_AVX2 ATTR_ARTIFICIAL ATTR_CONST static inline word256
mm256_compute_mask(const word idx, const size_t bit) {
return _mm256_set1_epi64x(-((idx >> bit) & 1));
}

ATTR_TARGET_AVX2 ATTR_ARTIFICIAL ATTR_CONST static inline word256
mm256_compute_mask_2(const word idx, const size_t bit) {
const uint64_t m1 = -((idx >> bit) & 1);
const uint64_t m2 = -((idx >> (bit + 1)) & 1);
return _mm256_set_epi64x(m2, m2, m1, m1);
}

ATTR_TARGET_AVX2
void mzd_addmul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {_mm256_setr_m128i(cblock->w128[0], mm128_zero),
mm256_zero};
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 8, idx >>= 8, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask_2(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask_2(idx, 2));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask_2(idx, 4));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask_2(idx, 6));
}
}
cval[0] = mm256_xor(cval[0], cval[1]);
cblock->w128[0] = _mm256_extracti128_si256(
mm256_xor(cval[0], _mm256_permute4x64_epi64(cval[0], _MM_SHUFFLE(3, 2, 3, 2))), 0);
}

ATTR_TARGET_AVX2
void mzd_mul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero};
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 8, idx >>= 8, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask_2(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask_2(idx, 2));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask_2(idx, 4));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask_2(idx, 6));
}
}
cval[0] = mm256_xor(cval[0], cval[1]);
cblock->w128[0] = _mm256_extracti128_si256(
mm256_xor(cval[0], _mm256_permute4x64_epi64(cval[0], _MM_SHUFFLE(3, 2, 3, 2))), 0);
}

ATTR_TARGET_AVX2
void mzd_addmul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {cblock->w256, mm256_zero};
{
Ablock += 63;
word idx = (*vptr) >> 63;
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
vptr++;
Ablock++;
}
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3));
}
}
cblock->w256 = mm256_xor(cval[0], cval[1]);
}

ATTR_TARGET_AVX2
void mzd_mul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero};
{
Ablock += 63;
word idx = (*vptr) >> 63;
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
vptr++;
Ablock++;
}
for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3));
}
}
cblock->w256 = mm256_xor(cval[0], cval[1]);
}

ATTR_TARGET_AVX2
void mzd_addmul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {cblock->w256, mm256_zero};
for (unsigned int w = 3; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3));
}
}
cblock->w256 = mm256_xor(cval[0], cval[1]);
}

ATTR_TARGET_AVX2
void mzd_mul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero};
for (unsigned int w = 3; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3));
}
}
cblock->w256 = mm256_xor(cval[0], cval[1]);
}

ATTR_TARGET_AVX2
void mzd_addmul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {cblock->w256, mm256_zero};
for (unsigned int w = 4; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3));
}
}
cblock->w256 = mm256_xor(cval[0], cval[1]);
}

ATTR_TARGET_AVX2
void mzd_mul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

word256 cval[2] ATTR_ALIGNED(alignof(word256)) = {mm256_zero, mm256_zero};
for (unsigned int w = 4; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 4, idx >>= 4, Ablock += 4) {
cval[0] = mm256_xor_mask(cval[0], Ablock[0].w256, mm256_compute_mask(idx, 0));
cval[1] = mm256_xor_mask(cval[1], Ablock[1].w256, mm256_compute_mask(idx, 1));
cval[0] = mm256_xor_mask(cval[0], Ablock[2].w256, mm256_compute_mask(idx, 2));
cval[1] = mm256_xor_mask(cval[1], Ablock[3].w256, mm256_compute_mask(idx, 3));
}
}
cblock->w256 = mm256_xor(cval[0], cval[1]);
}




static void clear_uint64_block(block_t* block, const unsigned int idx) {
for (unsigned int i = 0; i < idx; ++i) {
block->w64[i] = 0;
}
}


static void mzd_xor_mask_uint64_block(block_t* rblock, const block_t* fblock, const word mask,
const unsigned int idx) {
for (unsigned int i = 0; i < idx; ++i) {
rblock->w64[i] ^= fblock->w64[i] & mask;
}
}

void mzd_addmul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; i -= 2, idx >>= 2, Ablock += 1) {
const uint64_t mask1 = -(idx & 1);
const uint64_t mask2 = -((idx >> 1) & 1);
cblock->w64[0] ^= (Ablock->w64[0] & mask1) ^ (Ablock->w64[2] & mask2);
cblock->w64[1] ^= (Ablock->w64[1] & mask1) ^ (Ablock->w64[3] & mask2);
}
}
}

void mzd_mul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
clear_uint64_block(BLOCK(c, 0), 2);
mzd_addmul_v_uint64_128(c, v, A);
}

void mzd_addmul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

Ablock += 63;
{
word idx = (*vptr) >> 63;
const uint64_t mask = -(idx & 1);
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 3);
Ablock++;
vptr++;
}

for (unsigned int w = 2; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; --i, idx >>= 1, ++Ablock) {
const uint64_t mask = -(idx & 1);
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 3);
}
}
}

void mzd_mul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
clear_uint64_block(BLOCK(c, 0), 3);
mzd_addmul_v_uint64_129(c, v, A);
}

void mzd_addmul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

for (unsigned int w = 3; w; --w, ++vptr) {
word idx = *vptr;
for (unsigned int i = sizeof(word) * 8; i; --i, idx >>= 1, ++Ablock) {
const uint64_t mask = -(idx & 1);
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 3);
}
}
}

void mzd_mul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
clear_uint64_block(BLOCK(c, 0), 3);
mzd_addmul_v_uint64_192(c, v, A);
}

void mzd_addmul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
block_t* cblock = BLOCK(c, 0);
const word* vptr = CONST_BLOCK(v, 0)->w64;
const block_t* Ablock = CONST_BLOCK(A, 0);

for (unsigned int w = 4; w; --w, ++vptr) {
word idx = *vptr;

for (unsigned int i = sizeof(word) * 8; i; --i, idx >>= 1, ++Ablock) {
const uint64_t mask = -(idx & 1);
mzd_xor_mask_uint64_block(cblock, Ablock, mask, 4);
}
}
}

void mzd_mul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
clear_uint64_block(BLOCK(c, 0), 4);
mzd_addmul_v_uint64_256(c, v, A);
}








// no SIMD















+ 247
- 0
src/sign/picnic/picnic3l1/avx2/mzd_additional.h View File

@@ -0,0 +1,247 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

/* Inspired by m4ri's mzd implementation, but completely re-written for our use-case. */

#ifndef MZD_ADDITIONAL_H
#define MZD_ADDITIONAL_H

#include "macros.h"

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

typedef uint64_t word;
#define WORD_C(v) UINT64_C(v)

#include "simd.h"

typedef union {
word w64[4];
word128 w128[2];
word256 w256;
} block_t ATTR_ALIGNED(32);

/**
* Representation of matrices and vectors
*
* The basic memory unit is a block of 256 bit. Each row is stored in (possible multiple) blocks
* depending on the number of columns. Matrices with up to 128 columns are the only excpetion. In
* this case a block actually contains two rows. The row with even index is contained in w64[0] and
* w61[1], the row with odd index is contained in w64[2] and w64[3].
*/
typedef block_t mzd_local_t;

mzd_local_t* mzd_local_init_ex(unsigned int r, unsigned int c, bool clear) ATTR_ASSUME_ALIGNED(32);

#define mzd_local_init(r, c) mzd_local_init_ex(r, c, true)

void mzd_local_free(mzd_local_t* v);

void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, unsigned int r, unsigned int c, bool clear)
ATTR_NONNULL_ARG(1);

#define mzd_local_init_multiple(dst, n, r, c) mzd_local_init_multiple_ex(dst, n, r, c, true)

/**
* mzd_local_free for mzd_local_init_multiple.
*/
void mzd_local_free_multiple(mzd_local_t** vs);

void mzd_copy_uint64_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_uint64_192(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_uint64_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s128_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s128_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s256_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s256_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;

/**
* mzd_xor variants
*/
void mzd_xor_uint64_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_192(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_640(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_960(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_1216(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_640(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_1024(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_1280(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_768(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_1024(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_1280(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;

/**
* mzd_and variants
*/
void mzd_and_uint64_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_uint64_192(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_uint64_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_s128_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_s128_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_s256_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_s256_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;

/**
* shifts and rotations
*/
void mzd_shift_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
#if defined(PICNIC_STATIC)
/* only needed for tests */
void mzd_rotate_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
#endif

/**
* Compute v * A optimized for v being a vector.
*/
void mzd_mul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_192_960(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_256_1216(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_128_768(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s256_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s256_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;

/**
* Compute v * A optimized for v being a vector, for specific sizes depending on instance
* Only work for specific sizes and RLL_NEXT algorithm using uint64 operations
*/
void mzd_addmul_v_uint64_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;

/**
* Use SSE2 or NEON
*/
void mzd_addmul_v_s128_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;

/**
* Use AVX2
*/
void mzd_addmul_v_s256_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;

/**
* Compute using parity based algorithm
* */
void mzd_mul_v_parity_uint64_128_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_192_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_256_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;

/**
* Compute c + v * A optimized for c and v being vectors.
*/
void mzd_addmul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;

/**
* Shuffle vector x according to info in mask. Needed for OLLE optimiztaions.
*/
void mzd_shuffle_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;

#define BLOCK(v, b) ((block_t*)ASSUME_ALIGNED(&(v)[(b)], 32))
#define CONST_BLOCK(v, b) ((const block_t*)ASSUME_ALIGNED(&(v)[(b)], 32))

#endif

+ 390
- 0
src/sign/picnic/picnic3l1/avx2/picnic.c View File

@@ -0,0 +1,390 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include "picnic.h"

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "compat.h"
#include "io.h"
#include "lowmc.h"
#include "picnic_instances.h"
#include "picnic3_impl.h"
#include "randomness.h"

// Public and private keys are serialized as follows:
// - public key: instance || C || p
// - secret key: instance || sk || C || p

#define SK_SK(sk) &(sk)->data[1]
#define SK_C(sk) &(sk)->data[1 + input_size]
#define SK_PT(sk) &(sk)->data[1 + input_size + output_size]

#define PK_SK(pk) &(pk)->data[1]
#define PK_C(pk) &(pk)->data[1]
#define PK_PT(pk) &(pk)->data[1 + output_size]

size_t PICNIC_CALLING_CONVENTION picnic_get_lowmc_block_size(picnic_params_t param) {
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}

return instance->output_size;
}

size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t param) {
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}

return instance->max_signature_size;
}

size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t param) {
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}

return picnic_get_public_key_size(param) + instance->input_size;
}

size_t PICNIC_CALLING_CONVENTION picnic_get_public_key_size(picnic_params_t param) {
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}

return 1 + (instance->output_size << 1);
}

int PICNIC_CALLING_CONVENTION picnic_keygen(picnic_params_t param, picnic_publickey_t* pk,
picnic_privatekey_t* sk) {

if (!pk || !sk) {
return -1;
}

const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t input_size = instance->input_size;
const size_t output_size = instance->output_size;

uint8_t* sk_sk = SK_SK(sk);
uint8_t* sk_pt = SK_PT(sk);
uint8_t* sk_c = SK_C(sk);

// generate private key
sk->data[0] = param;
// random secret key
if (rand_bits(sk_sk, instance->lowmc.k)) {
return -1;
}
// random plain text
if (rand_bits(sk_pt, instance->lowmc.n)) {
return -1;
}
// encrypt plaintext under secret key
if (picnic_sk_to_pk(sk, pk)) {
return -1;
}
// copy ciphertext to secret key
memcpy(sk_c, PK_C(pk), output_size);
return 0;
}

int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* sk,
picnic_publickey_t* pk) {
if (!sk || !pk) {
return -1;
}

const picnic_params_t param = sk->data[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t input_size = instance->input_size;
const size_t output_size = instance->output_size;

const uint8_t* sk_sk = SK_SK(sk);
uint8_t* pk_c = PK_C(pk);
uint8_t* pk_pt = PK_PT(pk);
const uint8_t* sk_pt = SK_PT(sk);

mzd_local_t plaintext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256];
mzd_local_t privkey[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256];
mzd_local_t ciphertext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256];

mzd_from_char_array(plaintext, sk_pt, output_size);
mzd_from_char_array(privkey, sk_sk, input_size);

// compute public key
instance->impls.lowmc(privkey, plaintext, ciphertext);

pk->data[0] = param;
memcpy(pk_pt, sk_pt, output_size);
mzd_to_char_array(pk_c, ciphertext, output_size);

return 0;
}

int PICNIC_CALLING_CONVENTION picnic_validate_keypair(const picnic_privatekey_t* sk,
const picnic_publickey_t* pk) {
if (!sk || !pk) {
return -1;
}

const picnic_params_t param = sk->data[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t input_size = instance->input_size;
const size_t output_size = instance->output_size;
const uint8_t* sk_sk = SK_SK(sk);
const uint8_t* sk_pt = SK_PT(sk);
const uint8_t* sk_c = SK_C(sk);
const uint8_t* pk_pt = PK_PT(pk);
const uint8_t* pk_c = PK_C(pk);

// check param and plaintext
if (param != pk->data[0] || memcmp(sk_pt, pk_pt, output_size) != 0 ||
memcmp(sk_c, pk_c, output_size) != 0) {
return -1;
}

mzd_local_t plaintext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256];
mzd_local_t privkey[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256];
mzd_local_t ciphertext[(MAX_LOWMC_BLOCK_SIZE_BITS + 255) / 256];

mzd_from_char_array(plaintext, sk_pt, instance->output_size);
mzd_from_char_array(privkey, sk_sk, instance->input_size);

// compute public key
instance->impls.lowmc(privkey, plaintext, ciphertext);

uint8_t buffer[MAX_LOWMC_BLOCK_SIZE];
mzd_to_char_array(buffer, ciphertext, output_size);

return memcmp(buffer, pk_c, output_size);
}

int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk, const uint8_t* message,
size_t message_len, uint8_t* signature,
size_t* signature_len) {
if (!sk || !signature || !signature_len) {
return -1;
}

const picnic_params_t param = sk->data[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t output_size = instance->output_size;
const size_t input_size = instance->input_size;

const uint8_t* sk_sk = SK_SK(sk);
const uint8_t* sk_c = SK_C(sk);
const uint8_t* sk_pt = SK_PT(sk);

if (param == Picnic3_L1 || param == Picnic3_L3 || param == Picnic3_L5) {
return impl_sign_picnic3(instance, sk_pt, sk_sk, sk_c, message, message_len, signature,
signature_len);
} else {
return -1;
}
}

int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, const uint8_t* message,
size_t message_len, const uint8_t* signature,
size_t signature_len) {
if (!pk || !signature || !signature_len) {
return -1;
}

const picnic_params_t param = pk->data[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t output_size = instance->output_size;

const uint8_t* pk_c = PK_C(pk);
const uint8_t* pk_pt = PK_PT(pk);

if (param == Picnic3_L1 || param == Picnic3_L3 || param == Picnic3_L5) {
return impl_verify_picnic3(instance, pk_pt, pk_c, message, message_len, signature,
signature_len);
} else {
return -1;
}
}

const char* PICNIC_CALLING_CONVENTION picnic_get_param_name(picnic_params_t parameters) {
switch (parameters) {
case Picnic_L1_FS:
return "Picnic_L1_FS";
case Picnic_L1_UR:
return "Picnic_L1_UR";
case Picnic_L3_FS:
return "Picnic_L3_FS";
case Picnic_L3_UR:
return "Picnic_L3_UR";
case Picnic_L5_FS:
return "Picnic_L5_FS";
case Picnic_L5_UR:
return "Picnic_L5_UR";
case Picnic3_L1:
return "Picnic3_L1";
case Picnic3_L3:
return "Picnic3_L3";
case Picnic3_L5:
return "Picnic3_L5";
case Picnic_L1_full:
return "Picnic_L1_full";
case Picnic_L3_full:
return "Picnic_L3_full";
case Picnic_L5_full:
return "Picnic_L5_full";
default:
return "Unknown parameter set";
}
}

int PICNIC_CALLING_CONVENTION picnic_write_public_key(const picnic_publickey_t* key, uint8_t* buf,
size_t buflen) {
if (!key || !buf) {
return -1;
}

const picnic_params_t param = key->data[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t output_size = instance->output_size;
const size_t bytes_required = 1 + 2 * output_size;
if (buflen < bytes_required) {
return -1;
}

memcpy(buf, key->data, bytes_required);
return (int)bytes_required;
}

int PICNIC_CALLING_CONVENTION picnic_read_public_key(picnic_publickey_t* key, const uint8_t* buf,
size_t buflen) {
if (!key || !buf || buflen < 1) {
return -1;
}

const picnic_params_t param = buf[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t output_size = instance->output_size;
const size_t bytes_required = 1 + 2 * output_size;
if (buflen < bytes_required) {
return -1;
}

if (param == Picnic_L1_full || param == Picnic_L5_full || param == Picnic3_L1 ||
param == Picnic3_L5) {
const unsigned int diff = output_size * 8 - instance->lowmc.n;
if (check_padding_bits(buf[1 + output_size - 1], diff) ||
check_padding_bits(buf[1 + 2 * output_size - 1], diff)) {
return -1;
}
}

memcpy(key->data, buf, bytes_required);
return 0;
}

int PICNIC_CALLING_CONVENTION picnic_write_private_key(const picnic_privatekey_t* key, uint8_t* buf,
size_t buflen) {
if (!key || !buf) {
return -1;
}

const picnic_params_t param = key->data[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t input_size = instance->input_size;
const size_t output_size = instance->output_size;
const size_t bytes_required = 1 + input_size + 2 * output_size;
if (buflen < bytes_required) {
return -1;
}

memcpy(buf, &key->data, bytes_required);
return (int)bytes_required;
}

int PICNIC_CALLING_CONVENTION picnic_read_private_key(picnic_privatekey_t* key, const uint8_t* buf,
size_t buflen) {
if (!key || !buf || buflen < 1) {
return -1;
}

const picnic_params_t param = buf[0];
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}

const size_t input_size = instance->input_size;
const size_t output_size = instance->output_size;
const size_t bytes_required = 1 + input_size + 2 * output_size;
if (buflen < bytes_required) {
return -1;
}

if (param == Picnic_L1_full || param == Picnic_L5_full || param == Picnic3_L1 ||
param == Picnic3_L5) {
const unsigned int diff = output_size * 8 - instance->lowmc.n;
assert(diff == input_size * 8 - instance->lowmc.k);
/* sanity check of public data: padding bits need to be 0 */
const int check = check_padding_bits(buf[1 + input_size - 1], diff) |
check_padding_bits(buf[1 + input_size + output_size - 1], diff) |
check_padding_bits(buf[1 + input_size + 2 * output_size - 1], diff);
picnic_declassify(&check, sizeof(check));
if (check) {
return -1;
}
}

memcpy(key->data, buf, bytes_required);
return 0;
}

void PICNIC_CALLING_CONVENTION picnic_clear_private_key(picnic_privatekey_t* key) {
explicit_bzero(key, sizeof(picnic_privatekey_t));
}


+ 285
- 0
src/sign/picnic/picnic3l1/avx2/picnic.h View File

@@ -0,0 +1,285 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC_H
#define PICNIC_H

#if !defined(PICNIC_EXPORT)
#if !defined(PICNIC_STATIC) && (defined(_WIN16) || defined(_WIN32) || defined(_WIN64))
#define PICNIC_EXPORT __declspec(dllimport)
#else
#define PICNIC_EXPORT
#endif
#endif

#if defined(_WIN16) || defined(_WIN32) || defined(_WIN64)
#define PICNIC_CALLING_CONVENTION __stdcall
#else
#define PICNIC_CALLING_CONVENTION
#endif

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>

#ifdef __cplusplus
extern "C" {
#endif

#define PICNIC_CONCAT2(a, b) a##_##b
#define PICNIC_CONCAT(a, b) PICNIC_CONCAT2(a, b)

/* Block sizes of the LowMC ciphers per parameter */
#define LOWMC_BLOCK_SIZE_Picnic_L1_FS 16
#define LOWMC_BLOCK_SIZE_Picnic_L1_UR 16
#define LOWMC_BLOCK_SIZE_Picnic_L3_FS 24
#define LOWMC_BLOCK_SIZE_Picnic_L3_UR 24
#define LOWMC_BLOCK_SIZE_Picnic_L5_FS 32
#define LOWMC_BLOCK_SIZE_Picnic_L5_UR 32
#define LOWMC_BLOCK_SIZE_Picnic3_L1 17
#define LOWMC_BLOCK_SIZE_Picnic3_L3 24
#define LOWMC_BLOCK_SIZE_Picnic3_L5 32
#define LOWMC_BLOCK_SIZE_Picnic_L1_full 17
#define LOWMC_BLOCK_SIZE_Picnic_L3_full 24
#define LOWMC_BLOCK_SIZE_Picnic_L5_full 32

#define LOWMC_BLOCK_SIZE(p) PICNIC_CONCAT(LOWMC_BLOCK_SIZE, p)

#define PICNIC_PRIVATE_KEY_SIZE(p) (1 + 3 * LOWMC_BLOCK_SIZE(p))
#define PICNIC_PUBLIC_KEY_SIZE(p) (1 + 2 * LOWMC_BLOCK_SIZE(p))

/* Max. signature sizes per parameter */
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_FS 34032
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_UR 53961
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_FS 76772
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_UR 121845
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_FS 132856
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_UR 209506
#define PICNIC_SIGNATURE_SIZE_Picnic3_L1 14608
#define PICNIC_SIGNATURE_SIZE_Picnic3_L3 35024
#define PICNIC_SIGNATURE_SIZE_Picnic3_L5 61024
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_full 32061
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_full 71179
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_full 126286

#define PICNIC_SIGNATURE_SIZE(p) PICNIC_CONCAT(PICNIC_SIGNATURE_SIZE, p)

#define PICNIC_MAX_LOWMC_BLOCK_SIZE LOWMC_BLOCK_SIZE(Picnic_L5_UR)
#define PICNIC_MAX_PRIVATEKEY_SIZE PICNIC_PRIVATE_KEY_SIZE(Picnic_L5_UR)
#define PICNIC_MAX_PUBLICKEY_SIZE PICNIC_PUBLIC_KEY_SIZE(Picnic_L5_UR)
#define PICNIC_MAX_SIGNATURE_SIZE PICNIC_SIGNATURE_SIZE(Picnic_L5_UR)

/** Parameter set names */
typedef enum {
PARAMETER_SET_INVALID = 0,
/* ZKB++ with LowMC m=10 */
Picnic_L1_FS = 1,
Picnic_L1_UR = 2,
Picnic_L3_FS = 3,
Picnic_L3_UR = 4,
Picnic_L5_FS = 5,
Picnic_L5_UR = 6,
/* KKW with full LowMC */
Picnic3_L1 = 7,
Picnic3_L3 = 8,
Picnic3_L5 = 9,
/* ZKB++ with full LowMC */
Picnic_L1_full = 10,
Picnic_L3_full = 11,
Picnic_L5_full = 12,
PARAMETER_SET_MAX_INDEX = 13
} picnic_params_t;

/** Public key */
typedef struct {
uint8_t data[PICNIC_MAX_PUBLICKEY_SIZE];
} picnic_publickey_t;

/** Private key */
typedef struct {
uint8_t data[PICNIC_MAX_PRIVATEKEY_SIZE];
} picnic_privatekey_t;

/**
* Get a string representation of the parameter set.
*
* @param parameters A parameter set
*
* @return A null-terminated string describing the parameter set.
*/
PICNIC_EXPORT const char* PICNIC_CALLING_CONVENTION
picnic_get_param_name(picnic_params_t parameters);

/* Signature API */

/**
* Key generation function.
* Generates a public and private key pair, for the specified parameter set.
*
* @param[in] parameters The parameter set to use when generating a key.
* @param[out] pk The new public key.
* @param[out] sk The new private key.
*
* @return Returns 0 for success, or a nonzero value indicating an error.
*
* @see picnic_verify(), picnic_sign()
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_keygen(picnic_params_t parameters,
picnic_publickey_t* pk,
picnic_privatekey_t* sk);

/**
* Signature function.
* Signs a message with the given keypair.
*
* @param[in] sk The signer's private key.
* @param[in] message The message to be signed.
* @param[in] message_len The length of the message, in bytes.
* @param[out] signature A buffer to hold the signature. The required size does
* not exceed PICNIC_MAX_SIGNATURE_SIZE bytes. The specific max number of
* bytes required for a parameter set is given by picnic_signature_size(). Note
* that the length of each signature varies slightly, for the parameter sets
* using the FS transform. The parameter sets using the Unruh transform have a
* fixed length.
* @param[in,out] signature_len The length of the provided signature buffer.
* On success, this is set to the number of bytes written to the signature buffer.
*
* @return Returns 0 for success, or a nonzero value indicating an error.
*
* @see picnic_verify(), picnic_keygen(), picnic_signature_size()
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk,
const uint8_t* message, size_t message_len,
uint8_t* signature, size_t* signature_len);

/**
* Get the number of bytes required to hold a signature.
*
* @param[in] parameters The parameter set of the signature.
*
* @return The number of bytes required to hold the signature created by
* picnic_sign
*
* @note The size of signatures with parameter sets using the FS transform vary
* slightly based on the random choices made during signing. This function
* will return a suffcient number of bytes to hold a signature, and the
* picnic_sign() function returns the exact number used for a given signature.
*
* @see picnic_sign()
*/
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t parameters);

/**
* Verification function.
* Verifies a signature is valid with respect to a public key and message.
*
* @param[in] pk The signer's public key.
* @param[in] message The message the signature purpotedly signs.
* @param[in] message_len The length of the message, in bytes.
* @param[in] signature The signature to verify.
* @param[in] signature_len The length of the signature.
*
* @return Returns 0 for success, indicating a valid signature, or a nonzero
* value indicating an error or an invalid signature.
*
* @see picnic_sign(), picnic_keygen()
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk,
const uint8_t* message,
size_t message_len,
const uint8_t* signature,
size_t signature_len);

/**
* Serialize a public key.
*
* @param[in] key The public key to serialize
* @param[out] buf The buffer to write the key to.
* Must have size at least PICNIC_MAX_PUBLICKEY_SIZE bytes.
* @param[in] buflen The length of buf, in bytes
*
* @return Returns the number of bytes written.
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_write_public_key(const picnic_publickey_t* key,
uint8_t* buf, size_t buflen);

/**
* De-serialize a public key.
*
* @param[out] key The public key object to be populated.
* @param[in] buf The buffer to read the public key from.
* Must be at least PICNIC_MAX_PUBLICKEY_SIZE bytes.
* @param[in] buflen The length of buf, in bytes
*
* @return Returns 0 on success, or a nonzero value indicating an error.
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_read_public_key(picnic_publickey_t* key,
const uint8_t* buf,
size_t buflen);

/**
* Serialize a private key.
*
* @param[in] key The private key to serialize
* @param[out] buf The buffer to write the key to.
* Must have size at least PICNIC_MAX_PRIVATEKEY_SIZE bytes.
* @param[in] buflen The length of buf, in bytes
*
* @return Returns the number of bytes written.
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_write_private_key(const picnic_privatekey_t* key,
uint8_t* buf, size_t buflen);

/**
* De-serialize a private key.
*
* @param[out] key The private key object to be populated
* @param[in] buf The buffer to read the key from.
* Must have size at least PICNIC_MAX_PRIVATEKEY_SIZE bytes.
* @param[in] buflen The length of buf, in bytes
*
* @return Returns 0 on success, or a nonzero value indicating an error.
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_read_private_key(picnic_privatekey_t* key,
const uint8_t* buf,
size_t buflen);

/**
* Check that a key pair is valid.
*
* @param[in] privatekey The private key to check
* @param[in] publickey The public key to check
*
* @return Returns 0 if the key pair is valid, or a nonzero value indicating an error
*/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION
picnic_validate_keypair(const picnic_privatekey_t* privatekey, const picnic_publickey_t* publickey);

/**
* Clear data of a private key.
*
* @param[out] key The private key to clear
*/
PICNIC_EXPORT void PICNIC_CALLING_CONVENTION picnic_clear_private_key(picnic_privatekey_t* key);

/**
* Compute public key from private key.
*
* @param[in] privatekey The private key
* @param[out] publickey The public key to be populated
* @return Returns 0 on success, or a nonzero value indicating an error.
**/
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* privatekey,
picnic_publickey_t* publickey);

#ifdef __cplusplus
}
#endif

#endif

+ 971
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_impl.c View File

@@ -0,0 +1,971 @@
/*! @file picnic3_impl.c
* @brief This is the main file of the signature scheme for the Picnic3
* parameter sets.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "io.h"
#include "kdf_shake.h"
#include "macros.h"
#include "picnic.h"
#include "picnic3_impl.h"
#include "picnic3_tree.h"
#include "picnic3_types.h"

/* Helper functions */

ATTR_CONST
static uint32_t numBytes(uint32_t numBits) {
return (numBits + 7) >> 3;
}

static void createRandomTapes(randomTape_t* tapes, uint8_t** seeds, uint8_t* salt, size_t t,
const picnic_instance_t* params) {
hash_context_x4 ctx;

size_t tapeSizeBytes = 2 * params->view_size;

allocateRandomTape(tapes, params);
assert(params->num_MPC_parties % 4 == 0);
for (size_t i = 0; i < params->num_MPC_parties; i += 4) {
hash_init_x4(&ctx, params->digest_size);

const uint8_t* seeds_ptr[4] = {seeds[i], seeds[i + 1], seeds[i + 2], seeds[i + 3]};
hash_update_x4(&ctx, seeds_ptr, params->seed_size);
const uint8_t* salt_ptr[4] = {salt, salt, salt, salt};
hash_update_x4(&ctx, salt_ptr, SALT_SIZE);
hash_update_x4_uint16_le(&ctx, t);
const uint16_t i_arr[4] = {i + 0, i + 1, i + 2, i + 3};
hash_update_x4_uint16s_le(&ctx, i_arr);
hash_final_x4(&ctx);

uint8_t* out_ptr[4] = {tapes->tape[i], tapes->tape[i + 1], tapes->tape[i + 2],
tapes->tape[i + 3]};
hash_squeeze_x4(&ctx, out_ptr, tapeSizeBytes);
}
}

/* Input is the tapes for one parallel repitition; i.e., tapes[t]
* Updates the random tapes of all players with the mask values for the output of
* AND gates, and computes the N-th party's share such that the AND gate invariant
* holds on the mask values.
*/
static void computeAuxTape(randomTape_t* tapes, uint8_t* input_masks,
const picnic_instance_t* params) {
mzd_local_t lowmc_key[1];

size_t tapeSizeBytes = 2 * params->view_size;

// combine into key shares and calculate lowmc evaluation in plain
for (size_t i = 0; i < params->num_MPC_parties; i++) {
for (size_t j = 0; j < tapeSizeBytes; j++) {
tapes->parity_tapes[j] ^= tapes->tape[i][j];
}
}
mzd_from_char_array(lowmc_key, tapes->parity_tapes, params->input_size);
tapes->pos = params->lowmc.n;
tapes->aux_pos = 0;
memset(tapes->aux_bits, 0, params->view_size);

lowmc_compute_aux_implementation_f lowmc_aux_impl = params->impls.lowmc_aux;
// Perform LowMC evaluation and fix AND masks for all AND gates
lowmc_aux_impl(lowmc_key, tapes);

// write the key masks to the input
if (input_masks != NULL) {
mzd_to_char_array(input_masks, lowmc_key, params->input_size);
}

// Reset the random tape counter so that the online execution uses the
// same random bits as when computing the aux shares
tapes->pos = 0;
}

static void commit(uint8_t* digest, const uint8_t* seed, const uint8_t* aux, const uint8_t* salt,
size_t t, size_t j, const picnic_instance_t* params) {
/* Compute C[t][j]; as digest = H(seed||[aux]) aux is optional */
hash_context ctx;

hash_init(&ctx, params->digest_size);
hash_update(&ctx, seed, params->seed_size);
if (aux != NULL) {
hash_update(&ctx, aux, params->view_size);
}
hash_update(&ctx, salt, SALT_SIZE);
hash_update_uint16_le(&ctx, t);
hash_update_uint16_le(&ctx, j);
hash_final(&ctx);
hash_squeeze(&ctx, digest, params->digest_size);
}

static void commit_x4(uint8_t** digest, const uint8_t** seed, const uint8_t* salt, size_t t,
size_t j, const picnic_instance_t* params) {
/* Compute C[t][j]; as digest = H(seed||[aux]) aux is optional */
hash_context_x4 ctx;

hash_init_x4(&ctx, params->digest_size);
hash_update_x4(&ctx, seed, params->seed_size);
const uint8_t* salt_ptr[4] = {salt, salt, salt, salt};
hash_update_x4(&ctx, salt_ptr, SALT_SIZE);
hash_update_x4_uint16_le(&ctx, t);
const uint16_t j_arr[4] = {j + 0, j + 1, j + 2, j + 3};
hash_update_x4_uint16s_le(&ctx, j_arr);
hash_final_x4(&ctx);
hash_squeeze_x4(&ctx, digest, params->digest_size);
}

static void commit_h(uint8_t* digest, const commitments_t* C, const picnic_instance_t* params) {
hash_context ctx;

hash_init(&ctx, params->digest_size);
for (size_t i = 0; i < params->num_MPC_parties; i++) {
hash_update(&ctx, C->hashes[i], params->digest_size);
}
hash_final(&ctx);
hash_squeeze(&ctx, digest, params->digest_size);
}

static void commit_h_x4(uint8_t** digest, const commitments_t* C, const picnic_instance_t* params) {
hash_context_x4 ctx;

hash_init_x4(&ctx, params->digest_size);
for (size_t i = 0; i < params->num_MPC_parties; i++) {
const uint8_t* data[4] = {
C[0].hashes[i],
C[1].hashes[i],
C[2].hashes[i],
C[3].hashes[i],
};
hash_update_x4(&ctx, data, params->digest_size);
}
hash_final_x4(&ctx);
hash_squeeze_x4(&ctx, digest, params->digest_size);
}

// Commit to the views for one parallel rep
static void commit_v(uint8_t* digest, const uint8_t* input, const msgs_t* msgs,
const picnic_instance_t* params) {
hash_context ctx;

hash_init(&ctx, params->digest_size);
hash_update(&ctx, input, params->input_size);
for (size_t i = 0; i < params->num_MPC_parties; i++) {
hash_update(&ctx, msgs->msgs[i], numBytes(msgs->pos));
}
hash_final(&ctx);
hash_squeeze(&ctx, digest, params->digest_size);
}

static void commit_v_x4(uint8_t** digest, const uint8_t** input, const msgs_t* msgs,
const picnic_instance_t* params) {
hash_context_x4 ctx;

hash_init_x4(&ctx, params->digest_size);
hash_update_x4(&ctx, input, params->input_size);
for (size_t i = 0; i < params->num_MPC_parties; i++) {
assert(msgs[0].pos == msgs[1].pos && msgs[2].pos == msgs[3].pos && msgs[0].pos == msgs[2].pos);
const uint8_t* data[4] = {
msgs[0].msgs[i],
msgs[1].msgs[i],
msgs[2].msgs[i],
msgs[3].msgs[i],
};
hash_update_x4(&ctx, data, numBytes(msgs->pos));
}
hash_final_x4(&ctx);
hash_squeeze_x4(&ctx, digest, params->digest_size);
}

static void xor_byte_array(uint8_t* out, const uint8_t* in1, const uint8_t* in2, uint32_t length) {
for (uint32_t i = 0; i < length; i++) {
out[i] = in1[i] ^ in2[i];
}
}

static int contains(const uint16_t* list, size_t len, uint16_t value) {
for (size_t i = 0; i < len; i++) {
if (list[i] == value) {
return 1;
}
}
return 0;
}

static int indexOf(const uint16_t* list, size_t len, uint16_t value) {
for (size_t i = 0; i < len; i++) {
if (list[i] == value) {
return i;
}
}
assert(!"indexOf called on list where value is not found. (caller bug)");
return -1;
}

static void setAuxBits(randomTape_t* tapes, uint8_t* input, const picnic_instance_t* params) {
size_t last = params->num_MPC_parties - 1;
size_t inBit = 0;

for (size_t j = 0; j < params->lowmc.r; j++) {
for (size_t i = 0; i < params->lowmc.n; i++) {
setBit(tapes->tape[last], params->lowmc.n + params->lowmc.n * 2 * (j) + i,
getBit(input, inBit++));
}
}
}

static size_t bitsToChunks(size_t chunkLenBits, const uint8_t* input, size_t inputLen,
uint16_t* chunks) {
if (chunkLenBits > inputLen * 8) {
assert(!"Invalid input to bitsToChunks: not enough input");
return 0;
}
size_t chunkCount = ((inputLen * 8) / chunkLenBits);

for (size_t i = 0; i < chunkCount; i++) {
chunks[i] = 0;
for (size_t j = 0; j < chunkLenBits; j++) {
chunks[i] += getBit(input, i * chunkLenBits + j) << j;
assert(chunks[i] < (1 << chunkLenBits));
}
}

return chunkCount;
}

static size_t appendUnique(uint16_t* list, uint16_t value, size_t position) {
if (position == 0) {
list[position] = value;
return position + 1;
}

for (size_t i = 0; i < position; i++) {
if (list[i] == value) {
return position;
}
}
list[position] = value;
return position + 1;
}

static void expandChallenge(uint16_t* challengeC, uint16_t* challengeP, const uint8_t* sigH,
const picnic_instance_t* params) {
uint8_t h[MAX_DIGEST_SIZE] = {0};
hash_context ctx;

memcpy(h, sigH, params->digest_size);
// Populate C
uint32_t bitsPerChunkC = ceil_log2(params->num_rounds);
uint32_t bitsPerChunkP = ceil_log2(params->num_MPC_parties);
uint16_t* chunks =
calloc(params->digest_size * 8 / MIN(bitsPerChunkP, bitsPerChunkC), sizeof(uint16_t));

size_t countC = 0;
while (countC < params->num_opened_rounds) {
size_t numChunks = bitsToChunks(bitsPerChunkC, h, params->digest_size, chunks);
for (size_t i = 0; i < numChunks; i++) {
if (chunks[i] < params->num_rounds) {
countC = appendUnique(challengeC, chunks[i], countC);
}
if (countC == params->num_opened_rounds) {
break;
}
}

hash_init_prefix(&ctx, params->digest_size, HASH_PREFIX_1);
hash_update(&ctx, h, params->digest_size);
hash_final(&ctx);
hash_squeeze(&ctx, h, params->digest_size);
}

// Note that we always compute h = H(h) after setting C
size_t countP = 0;

while (countP < params->num_opened_rounds) {
size_t numChunks = bitsToChunks(bitsPerChunkP, h, params->digest_size, chunks);
for (size_t i = 0; i < numChunks; i++) {
if (chunks[i] < params->num_MPC_parties) {
challengeP[countP] = chunks[i];
countP++;
}
if (countP == params->num_opened_rounds) {
break;
}
}

hash_init_prefix(&ctx, params->digest_size, HASH_PREFIX_1);
hash_update(&ctx, h, params->digest_size);
hash_final(&ctx);
hash_squeeze(&ctx, h, params->digest_size);
}
free(chunks);
}

static void HCP(uint8_t* sigH, uint16_t* challengeC, uint16_t* challengeP, commitments_t* Ch,
uint8_t* hCv, uint8_t* salt, const uint8_t* pubKey, const uint8_t* plaintext,
const uint8_t* message, size_t messageByteLength, const picnic_instance_t* params) {
hash_context ctx;

assert(params->num_opened_rounds < params->num_rounds);

hash_init(&ctx, params->digest_size);
for (size_t t = 0; t < params->num_rounds; t++) {
hash_update(&ctx, Ch->hashes[t], params->digest_size);
}

hash_update(&ctx, hCv, params->digest_size);
hash_update(&ctx, salt, SALT_SIZE);
hash_update(&ctx, pubKey, params->input_size);
hash_update(&ctx, plaintext, params->input_size);
hash_update(&ctx, message, messageByteLength);
hash_final(&ctx);
hash_squeeze(&ctx, sigH, params->digest_size);
/* parts of this hash will be published as challenge so is public anyway */
picnic_declassify(sigH, params->digest_size);

expandChallenge(challengeC, challengeP, sigH, params);
}

static uint16_t* getMissingLeavesList(uint16_t* challengeC, const picnic_instance_t* params) {
size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds;
uint16_t* missingLeaves = calloc(missingLeavesSize, sizeof(uint16_t));
size_t pos = 0;

for (size_t i = 0; i < params->num_rounds; i++) {
if (!contains(challengeC, params->num_opened_rounds, i)) {
missingLeaves[pos] = i;
pos++;
}
}

return missingLeaves;
}

static int verify_picnic3(signature2_t* sig, const uint8_t* pubKey, const uint8_t* plaintext,
const uint8_t* message, size_t messageByteLength,
const picnic_instance_t* params) {
commitments_t C[4];
allocateCommitments2(&C[0], params, params->num_MPC_parties);
allocateCommitments2(&C[1], params, params->num_MPC_parties);
allocateCommitments2(&C[2], params, params->num_MPC_parties);
allocateCommitments2(&C[3], params, params->num_MPC_parties);
msgs_t* msgs = allocateMsgsVerify(params);
tree_t* treeCv = createTree(params->num_rounds, params->digest_size);
size_t challengeSizeBytes = params->num_opened_rounds * sizeof(uint16_t);
uint16_t* challengeC = malloc(challengeSizeBytes);
uint16_t* challengeP = malloc(challengeSizeBytes);
uint8_t challenge[MAX_DIGEST_SIZE];
randomTape_t* tapes = malloc(params->num_rounds * sizeof(randomTape_t));
tree_t* iSeedsTree = createTree(params->num_rounds, params->seed_size);
int ret = reconstructSeeds(iSeedsTree, sig->challengeC, params->num_opened_rounds, sig->iSeedInfo,
sig->iSeedInfoLen, sig->salt, 0, params);
const size_t last = params->num_MPC_parties - 1;
lowmc_simulate_online_f simulateOnline = params->impls.lowmc_simulate_online;

commitments_t Ch;
allocateCommitments2(&Ch, params, params->num_rounds);
commitments_t Cv;
allocateCommitments2(&Cv, params, params->num_rounds);
mzd_local_t m_plaintext[1];
mzd_local_t m_maskedKey[1];
mzd_from_char_array(m_plaintext, plaintext, params->output_size);

if (ret != 0) {
ret = -1;
goto Exit;
}

/* Populate seeds with values from the signature */
for (size_t t = 0; t < params->num_rounds; t++) {
tree_t* seed = NULL;
if (!contains(sig->challengeC, params->num_opened_rounds, t)) {
/* Expand iSeed[t] to seeds for each parties, using a seed tree */
seed = generateSeeds(params->num_MPC_parties, getLeaf(iSeedsTree, t), sig->salt, t, params);
} else {
/* We don't have the initial seed for the round, but instead a seed
* for each unopened party */
seed = createTree(params->num_MPC_parties, params->seed_size);
size_t P_index = indexOf(sig->challengeC, params->num_opened_rounds, t);
uint16_t hideList[1];
hideList[0] = sig->challengeP[P_index];
ret = reconstructSeeds(seed, hideList, 1, sig->proofs[t].seedInfo, sig->proofs[t].seedInfoLen,
sig->salt, t, params);
if (ret != 0) {
#if !defined(NDEBUG)
printf("Failed to reconstruct seeds for round " SIZET_FMT "\n", t);
#endif
freeTree(seed);
ret = -1;
goto Exit;
}
}
/* Commit */

/* Compute random tapes for all parties. One party for each repitition
* challengeC will have a bogus seed; but we won't use that party's
* random tape. */
createRandomTapes(&tapes[t], getLeaves(seed), sig->salt, t, params);

if (!contains(sig->challengeC, params->num_opened_rounds, t)) {
/* We're given iSeed, have expanded the seeds, compute aux from scratch so we can comnpte
* Com[t] */
computeAuxTape(&tapes[t], NULL, params);
for (size_t j = 0; j < params->num_MPC_parties; j += 4) {
const uint8_t* seed_ptr[4] = {getLeaf(seed, j + 0), getLeaf(seed, j + 1),
getLeaf(seed, j + 2), getLeaf(seed, j + 3)};
commit_x4(C[t % 4].hashes + j, seed_ptr, sig->salt, t, j, params);
}
commit(C[t % 4].hashes[last], getLeaf(seed, last), tapes[t].aux_bits, sig->salt, t, last,
params);
/* after we have checked the tape, we do not need it anymore for this opened iteration */
} else {
/* We're given all seeds and aux bits, execpt for the unopened
* party, we get their commitment */
size_t unopened = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)];
for (size_t j = 0; j < params->num_MPC_parties; j += 4) {
const uint8_t* seed_ptr[4] = {getLeaf(seed, j + 0), getLeaf(seed, j + 1),
getLeaf(seed, j + 2), getLeaf(seed, j + 3)};
commit_x4(C[t % 4].hashes + j, seed_ptr, sig->salt, t, j, params);
}
if (last != unopened) {
commit(C[t % 4].hashes[last], getLeaf(seed, last), sig->proofs[t].aux, sig->salt, t, last,
params);
}

memcpy(C[t % 4].hashes[unopened], sig->proofs[t].C, params->digest_size);
}
/* hash commitments every four iterations if possible, for the last few do single commitments
*/
if (t >= params->num_rounds / 4 * 4) {
commit_h(Ch.hashes[t], &C[t % 4], params);
} else if ((t + 1) % 4 == 0) {
size_t t4 = t / 4 * 4;
commit_h_x4(&Ch.hashes[t4], &C[0], params);
}
freeTree(seed);
}

/* Commit to the views */
for (size_t t = 0; t < params->num_rounds; t++) {
if (!contains(sig->challengeC, params->num_opened_rounds, t)) {
Cv.hashes[t] = NULL;
}
}

for (size_t i = 0; i < params->num_opened_rounds; i++) {
/* 2. When t is in C, we have everything we need to re-compute the view, as an honest signer
* would.
* We simulate the MPC with one fewer party; the unopned party's values are all set to zero.
*/
size_t t = sig->challengeC[i];
int unopened = sig->challengeP[i];
uint8_t* input = sig->proofs[t].input;
setAuxBits(&tapes[t], sig->proofs[t].aux, params);
memset(tapes[t].tape[unopened], 0, 2 * params->view_size);
memcpy(msgs->msgs[unopened], sig->proofs[t].msgs, params->view_size);
mzd_from_char_array(m_maskedKey, input, params->input_size);
msgs->unopened = unopened;
msgs->pos = 0;
ret = simulateOnline(m_maskedKey, &tapes[t], msgs, m_plaintext, pubKey, params);

if (ret != 0) {
#if !defined(NDEBUG)
printf("MPC simulation failed for round " SIZET_FMT ", signature invalid\n", i);
#endif
ret = -1;
goto Exit;
}
commit_v(Cv.hashes[t], sig->proofs[t].input, msgs, params);
}

size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds;
uint16_t* missingLeaves = getMissingLeavesList(sig->challengeC, params);
ret = addMerkleNodes(treeCv, missingLeaves, missingLeavesSize, sig->cvInfo, sig->cvInfoLen);
free(missingLeaves);
if (ret != 0) {
ret = -1;
goto Exit;
}

ret = verifyMerkleTree(treeCv, Cv.hashes, sig->salt, params);
if (ret != 0) {
ret = -1;
goto Exit;
}

/* Compute the challenge; two lists of integers */
HCP(challenge, challengeC, challengeP, &Ch, treeCv->nodes[0], sig->salt, pubKey, plaintext,
message, messageByteLength, params);

/* Compare to challenge from signature */
if (memcmp(sig->challenge, challenge, params->digest_size) != 0) {
#if !defined(NDEBUG)
printf("Challenge does not match, signature invalid\n");
#endif
ret = -1;
goto Exit;
}

ret = EXIT_SUCCESS;

Exit:
for (size_t t = 0; t < params->num_rounds; t++) {
freeRandomTape(&tapes[t]);
}

freeCommitments2(&Cv);
freeCommitments2(&Ch);
freeTree(iSeedsTree);
free(tapes);
free(challengeP);
free(challengeC);
freeTree(treeCv);
freeMsgs(msgs);
freeCommitments2(&C[3]);
freeCommitments2(&C[2]);
freeCommitments2(&C[1]);
freeCommitments2(&C[0]);

return ret;
}

static void computeSaltAndRootSeed(uint8_t* saltAndRoot, size_t saltAndRootLength,
const uint8_t* privateKey, const uint8_t* pubKey,
const uint8_t* plaintext, const uint8_t* message,
size_t messageByteLength, const picnic_instance_t* params) {
hash_context ctx;

hash_init(&ctx, params->digest_size);
hash_update(&ctx, privateKey, params->input_size);
hash_update(&ctx, message, messageByteLength);
hash_update(&ctx, pubKey, params->input_size);
hash_update(&ctx, plaintext, params->input_size);
hash_update_uint16_le(&ctx, (uint16_t)params->lowmc.n);
hash_final(&ctx);
hash_squeeze(&ctx, saltAndRoot, saltAndRootLength);
}

static int sign_picnic3(const uint8_t* privateKey, const uint8_t* pubKey, const uint8_t* plaintext,
const uint8_t* message, size_t messageByteLength, signature2_t* sig,
const picnic_instance_t* params) {
int ret = 0;
uint8_t* saltAndRoot = malloc(params->seed_size + SALT_SIZE);

computeSaltAndRootSeed(saltAndRoot, params->seed_size + SALT_SIZE, privateKey, pubKey, plaintext,
message, messageByteLength, params);
memcpy(sig->salt, saltAndRoot, SALT_SIZE);
tree_t* iSeedsTree =
generateSeeds(params->num_rounds, saltAndRoot + SALT_SIZE, sig->salt, 0, params);
uint8_t** iSeeds = getLeaves(iSeedsTree);
free(saltAndRoot);

randomTape_t* tapes = malloc(params->num_rounds * sizeof(randomTape_t));
tree_t** seeds = malloc(params->num_rounds * sizeof(tree_t*));
commitments_t* C = allocateCommitments(params, 0);

lowmc_simulate_online_f simulateOnline = params->impls.lowmc_simulate_online;
inputs_t inputs = allocateInputs(params);
msgs_t* msgs = allocateMsgs(params);

/* Commitments to the commitments and views */
commitments_t Ch;
allocateCommitments2(&Ch, params, params->num_rounds);
commitments_t Cv;
allocateCommitments2(&Cv, params, params->num_rounds);

mzd_local_t m_plaintext[1];
mzd_local_t m_maskedKey[1];

mzd_from_char_array(m_plaintext, plaintext, params->output_size);

for (size_t t = 0; t < params->num_rounds; t++) {
seeds[t] = generateSeeds(params->num_MPC_parties, iSeeds[t], sig->salt, t, params);
createRandomTapes(&tapes[t], getLeaves(seeds[t]), sig->salt, t, params);
/* Preprocessing; compute aux tape for the N-th player, for each parallel rep */
computeAuxTape(&tapes[t], inputs[t], params);
/* Commit to seeds and aux bits */
assert(params->num_MPC_parties % 4 == 0);
for (size_t j = 0; j < params->num_MPC_parties; j += 4) {
const uint8_t* seed_ptr[4] = {getLeaf(seeds[t], j + 0), getLeaf(seeds[t], j + 1),
getLeaf(seeds[t], j + 2), getLeaf(seeds[t], j + 3)};
commit_x4(C[t].hashes + j, seed_ptr, sig->salt, t, j, params);
}
const size_t last = params->num_MPC_parties - 1;
commit(C[t].hashes[last], getLeaf(seeds[t], last), tapes[t].aux_bits, sig->salt, t, last,
params);
}

for (size_t t = 0; t < params->num_rounds; t++) {
/* Simulate the online phase of the MPC */
uint8_t* maskedKey = inputs[t];

xor_byte_array(maskedKey, maskedKey, privateKey,
params->input_size); // maskedKey += privateKey
for (size_t i = params->lowmc.n; i < params->input_size * 8; i++) {
setBit(maskedKey, i, 0);
}
mzd_from_char_array(m_maskedKey, maskedKey, params->input_size);

int rv = simulateOnline(m_maskedKey, &tapes[t], &msgs[t], m_plaintext, pubKey, params);
if (rv != 0) {
#if !defined(NDEBUG)
printf("MPC simulation failed in round " SIZET_FMT ", aborting signature\n", t);
#endif
ret = -1;
}
}
/* Commit to the commitments and views */
{
size_t t = 0;
for (; t < params->num_rounds / 4 * 4; t += 4) {
commit_h_x4(&Ch.hashes[t], &C[t], params);
commit_v_x4(&Cv.hashes[t], (const uint8_t**)&inputs[t], &msgs[t], params);
}
for (; t < params->num_rounds; t++) {
commit_h(Ch.hashes[t], &C[t], params);
commit_v(Cv.hashes[t], inputs[t], &msgs[t], params);
}
}
/* Create a Merkle tree with Cv as the leaves */
tree_t* treeCv = createTree(params->num_rounds, params->digest_size);
buildMerkleTree(treeCv, Cv.hashes, sig->salt, params);

/* Compute the challenge; two lists of integers */
uint16_t* challengeC = sig->challengeC;
uint16_t* challengeP = sig->challengeP;
HCP(sig->challenge, challengeC, challengeP, &Ch, treeCv->nodes[0], sig->salt, pubKey, plaintext,
message, messageByteLength, params);

/* Send information required for checking commitments with Merkle tree.
* The commitments the verifier will be missing are those not in challengeC. */
size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds;
uint16_t* missingLeaves = getMissingLeavesList(challengeC, params);
size_t cvInfoLen = 0;
uint8_t* cvInfo = openMerkleTree(treeCv, missingLeaves, missingLeavesSize, &cvInfoLen);
sig->cvInfo = cvInfo;
sig->cvInfoLen = cvInfoLen;
free(missingLeaves);

/* Reveal iSeeds for unopned rounds, those in {0..T-1} \ ChallengeC. */
sig->iSeedInfo = malloc(params->num_rounds * params->seed_size);
sig->iSeedInfoLen = revealSeeds(iSeedsTree, challengeC, params->num_opened_rounds, sig->iSeedInfo,
params->num_rounds * params->seed_size, params);
sig->iSeedInfo = realloc(sig->iSeedInfo, sig->iSeedInfoLen);

/* Assemble the proof */
proof2_t* proofs = sig->proofs;
for (size_t t = 0; t < params->num_rounds; t++) {
if (contains(challengeC, params->num_opened_rounds, t)) {
allocateProof2(&proofs[t], params);
size_t P_index = indexOf(challengeC, params->num_opened_rounds, t);
proofs[t].unOpenedIndex = challengeP[P_index];

uint16_t hideList[1];
hideList[0] = challengeP[P_index];
proofs[t].seedInfo = malloc(params->num_MPC_parties * params->seed_size);
proofs[t].seedInfoLen = revealSeeds(seeds[t], hideList, 1, proofs[t].seedInfo,
params->num_MPC_parties * params->seed_size, params);
proofs[t].seedInfo = realloc(proofs[t].seedInfo, proofs[t].seedInfoLen);

size_t last = params->num_MPC_parties - 1;
if (challengeP[P_index] != last) {
memcpy(proofs[t].aux, tapes[t].aux_bits, params->view_size);
}

memcpy(proofs[t].input, inputs[t], params->input_size);
memcpy(proofs[t].msgs, msgs[t].msgs[challengeP[P_index]], params->view_size);

/* recompute commitment of unopened party since we did not store it for memory optimization
*/
if (proofs[t].unOpenedIndex == params->num_MPC_parties - 1) {
commit(proofs[t].C, getLeaf(seeds[t], proofs[t].unOpenedIndex), tapes[t].aux_bits,
sig->salt, t, proofs[t].unOpenedIndex, params);
} else {
commit(proofs[t].C, getLeaf(seeds[t], proofs[t].unOpenedIndex), NULL, sig->salt, t,
proofs[t].unOpenedIndex, params);
}
}
}

sig->proofs = proofs;

freeTree(treeCv);
for (size_t t = 0; t < params->num_rounds; t++) {
freeRandomTape(&tapes[t]);
freeTree(seeds[t]);
}
freeCommitments2(&Cv);
freeCommitments2(&Ch);
freeMsgs(msgs);
freeInputs(inputs);
freeCommitments(C);
free(seeds);
free(tapes);
freeTree(iSeedsTree);

return ret;
}

static int arePaddingBitsZero(uint8_t* data, size_t byteLength, size_t bitLength) {
return !check_padding_bits(data[byteLength - 1], byteLength * 8 - bitLength);
}

static int deserializeSignature2(signature2_t* sig, const uint8_t* sigBytes, size_t sigBytesLen,
const picnic_instance_t* params) {
/* Read the challenge and salt */
size_t bytesRequired = params->digest_size + SALT_SIZE;

if (sigBytesLen < bytesRequired) {
return EXIT_FAILURE;
}

memcpy(sig->challenge, sigBytes, params->digest_size);
sigBytes += params->digest_size;
memcpy(sig->salt, sigBytes, SALT_SIZE);
sigBytes += SALT_SIZE;

expandChallenge(sig->challengeC, sig->challengeP, sig->challenge, params);

/* Add size of iSeeds tree data */
sig->iSeedInfoLen =
revealSeedsSize(params->num_rounds, sig->challengeC, params->num_opened_rounds, params);
bytesRequired += sig->iSeedInfoLen;

/* Add the size of the Cv Merkle tree data */
size_t missingLeavesSize = params->num_rounds - params->num_opened_rounds;
uint16_t* missingLeaves = getMissingLeavesList(sig->challengeC, params);
sig->cvInfoLen = openMerkleTreeSize(params->num_rounds, missingLeaves, missingLeavesSize, params);
bytesRequired += sig->cvInfoLen;
free(missingLeaves);

/* Compute the number of bytes required for the proofs */
uint16_t hideList[1] = {0};
size_t seedInfoLen = revealSeedsSize(params->num_MPC_parties, hideList, 1, params);
for (size_t t = 0; t < params->num_rounds; t++) {
if (contains(sig->challengeC, params->num_opened_rounds, t)) {
size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)];
if (P_t != (params->num_MPC_parties - 1)) {
bytesRequired += params->view_size;
}
bytesRequired += params->digest_size;
bytesRequired += params->input_size;
bytesRequired += params->view_size;
bytesRequired += seedInfoLen;
}
}

/* Fail if the signature does not have the exact number of bytes we expect */
if (sigBytesLen != bytesRequired) {
#if !defined(NDEBUG)
printf("%s: sigBytesLen = " SIZET_FMT ", expected bytesRequired = " SIZET_FMT "\n", __func__,
sigBytesLen, bytesRequired);
#endif
return EXIT_FAILURE;
}

sig->iSeedInfo = malloc(sig->iSeedInfoLen);
memcpy(sig->iSeedInfo, sigBytes, sig->iSeedInfoLen);
sigBytes += sig->iSeedInfoLen;

sig->cvInfo = malloc(sig->cvInfoLen);
memcpy(sig->cvInfo, sigBytes, sig->cvInfoLen);
sigBytes += sig->cvInfoLen;

/* Read the proofs */
for (size_t t = 0; t < params->num_rounds; t++) {
if (contains(sig->challengeC, params->num_opened_rounds, t)) {
allocateProof2(&sig->proofs[t], params);
sig->proofs[t].seedInfoLen = seedInfoLen;
sig->proofs[t].seedInfo = malloc(sig->proofs[t].seedInfoLen);
memcpy(sig->proofs[t].seedInfo, sigBytes, sig->proofs[t].seedInfoLen);
sigBytes += sig->proofs[t].seedInfoLen;

size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)];
if (P_t != (params->num_MPC_parties - 1)) {
memcpy(sig->proofs[t].aux, sigBytes, params->view_size);
sigBytes += params->view_size;
if (!arePaddingBitsZero(sig->proofs[t].aux, params->view_size,
3 * params->lowmc.r * params->lowmc.m)) {
#if !defined(NDEBUG)
printf("%s: failed while deserializing aux bits\n", __func__);
#endif
return -1;
}
}

memcpy(sig->proofs[t].input, sigBytes, params->input_size);
if (!arePaddingBitsZero(sig->proofs[t].input, params->input_size, params->lowmc.n)) {
#if !defined(NDEBUG)
printf("%s: failed while deserializing input bits\n", __func__);
#endif
return -1;
}
sigBytes += params->input_size;

size_t msgsByteLength = params->view_size;
memcpy(sig->proofs[t].msgs, sigBytes, msgsByteLength);
sigBytes += msgsByteLength;
size_t msgsBitLength = 3 * params->lowmc.r * params->lowmc.m;
if (!arePaddingBitsZero(sig->proofs[t].msgs, msgsByteLength, msgsBitLength)) {
#if !defined(NDEBUG)
printf("%s: failed while deserializing msgs bits\n", __func__);
#endif
return -1;
}

memcpy(sig->proofs[t].C, sigBytes, params->digest_size);
sigBytes += params->digest_size;
}
}

return EXIT_SUCCESS;
}

static int serializeSignature2(const signature2_t* sig, uint8_t* sigBytes, size_t sigBytesLen,
const picnic_instance_t* params) {
uint8_t* sigBytesBase = sigBytes;

/* Compute the number of bytes required for the signature */
size_t bytesRequired = params->digest_size + SALT_SIZE; /* challenge and salt */

bytesRequired +=
sig->iSeedInfoLen; /* Encode only iSeedInfo, the length will be recomputed by deserialize */
bytesRequired += sig->cvInfoLen;

for (size_t t = 0; t < params->num_rounds; t++) { /* proofs */
if (contains(sig->challengeC, params->num_opened_rounds, t)) {
size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)];
bytesRequired += sig->proofs[t].seedInfoLen;
if (P_t != (params->num_MPC_parties - 1)) {
bytesRequired += params->view_size;
}
bytesRequired += params->digest_size;
bytesRequired += params->input_size;
bytesRequired += params->view_size;
}
}

if (sigBytesLen < bytesRequired) {
return -1;
}

memcpy(sigBytes, sig->challenge, params->digest_size);
sigBytes += params->digest_size;

memcpy(sigBytes, sig->salt, SALT_SIZE);
sigBytes += SALT_SIZE;

memcpy(sigBytes, sig->iSeedInfo, sig->iSeedInfoLen);
sigBytes += sig->iSeedInfoLen;
memcpy(sigBytes, sig->cvInfo, sig->cvInfoLen);
sigBytes += sig->cvInfoLen;

/* Write the proofs */
for (size_t t = 0; t < params->num_rounds; t++) {
if (contains(sig->challengeC, params->num_opened_rounds, t)) {
memcpy(sigBytes, sig->proofs[t].seedInfo, sig->proofs[t].seedInfoLen);
sigBytes += sig->proofs[t].seedInfoLen;

size_t P_t = sig->challengeP[indexOf(sig->challengeC, params->num_opened_rounds, t)];

if (P_t != (params->num_MPC_parties - 1)) {
memcpy(sigBytes, sig->proofs[t].aux, params->view_size);
sigBytes += params->view_size;
}

memcpy(sigBytes, sig->proofs[t].input, params->input_size);
sigBytes += params->input_size;

memcpy(sigBytes, sig->proofs[t].msgs, params->view_size);
sigBytes += params->view_size;

memcpy(sigBytes, sig->proofs[t].C, params->digest_size);
sigBytes += params->digest_size;
}
}

return (int)(sigBytes - sigBytesBase);
}

int impl_sign_picnic3(const picnic_instance_t* instance, const uint8_t* plaintext,
const uint8_t* private_key, const uint8_t* public_key, const uint8_t* msg,
size_t msglen, uint8_t* signature, size_t* signature_len) {
signature2_t* sig = (signature2_t*)malloc(sizeof(signature2_t));
allocateSignature2(sig, instance);
if (sig == NULL) {
return -1;
}
int ret = sign_picnic3(private_key, public_key, plaintext, msg, msglen, sig, instance);
picnic_declassify(&ret, sizeof(ret));
if (ret != EXIT_SUCCESS) {
#if !defined(NDEBUG)
fprintf(stderr, "Failed to create signature\n");
fflush(stderr);
#endif
freeSignature2(sig, instance);
free(sig);
return -1;
}
ret = serializeSignature2(sig, signature, *signature_len, instance);
if (ret == -1) {
#if !defined(NDEBUG)
fprintf(stderr, "Failed to serialize signature\n");
fflush(stderr);
#endif
freeSignature2(sig, instance);
free(sig);
return -1;
}
*signature_len = ret;

freeSignature2(sig, instance);
free(sig);
return 0;
}

int impl_verify_picnic3(const picnic_instance_t* instance, const uint8_t* plaintext,
const uint8_t* public_key, const uint8_t* msg, size_t msglen,
const uint8_t* signature, size_t signature_len) {
int ret;
signature2_t* sig = (signature2_t*)malloc(sizeof(signature2_t));
allocateSignature2(sig, instance);
if (sig == NULL) {
return -1;
}

ret = deserializeSignature2(sig, signature, signature_len, instance);
if (ret != EXIT_SUCCESS) {
#if !defined(NDEBUG)
fprintf(stderr, "Failed to deserialize signature\n");
fflush(stderr);
#endif
freeSignature2(sig, instance);
free(sig);
return -1;
}

ret = verify_picnic3(sig, public_key, plaintext, msg, msglen, instance);
if (ret != EXIT_SUCCESS) {
/* Signature is invalid, or verify function failed */
freeSignature2(sig, instance);
free(sig);
return -1;
}

freeSignature2(sig, instance);
free(sig);
return 0;
}

+ 52
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_impl.h View File

@@ -0,0 +1,52 @@
/*! @file picnic3_impl.h
* @brief This is the main implementation file of the signature scheme for
* the Picnic3 parameter sets.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC3_IMPL_H
#define PICNIC3_IMPL_H

#include <stdint.h>
#include <stddef.h>
#include "picnic_instances.h"

typedef struct proof2_t {
uint16_t unOpenedIndex; // P[t], index of the party that is not opened.
uint8_t* seedInfo; // Information required to compute the tree with seeds of of all opened parties
size_t seedInfoLen; // Length of seedInfo buffer
uint8_t* aux; // Last party's correction bits; NULL if P[t] == N-1
uint8_t* C; // Commitment to preprocessing step of unopened party
uint8_t* input; // Masked input used in online execution
uint8_t* msgs; // Broadcast messages of unopened party P[t]
} proof2_t;

typedef struct signature2_t {
uint8_t salt[SALT_SIZE];
uint8_t* iSeedInfo; // Info required to recompute the tree of all initial seeds
size_t iSeedInfoLen;
uint8_t* cvInfo; // Info required to check commitments to views (reconstruct Merkle tree)
size_t cvInfoLen;
uint8_t* challenge; // output of HCP
uint16_t* challengeC;
uint16_t* challengeP;
proof2_t* proofs; // One proof for each online execution the verifier checks
} signature2_t;

int impl_sign_picnic3(const picnic_instance_t* pp, const uint8_t* plaintext,
const uint8_t* private_key, const uint8_t* public_key, const uint8_t* msg,
size_t msglen, uint8_t* sig, size_t* siglen);
int impl_verify_picnic3(const picnic_instance_t* instance, const uint8_t* plaintext,
const uint8_t* public_key, const uint8_t* msg, size_t msglen,
const uint8_t* signature, size_t signature_len);

void allocateSignature2(signature2_t* sig, const picnic_instance_t* params);
void freeSignature2(signature2_t* sig, const picnic_instance_t* params);

#endif /* PICNIC3_IMPL_H */

+ 516
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_simulate.c View File

@@ -0,0 +1,516 @@
/*! @file picnic3_impl.c
* @brief This is the main file of the signature scheme for the Picnic3
* parameter sets.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if !defined(_MSC_VER)
#include <stdalign.h>
#endif

#include "compat.h"
#include "bitstream.h"
#include "io.h"
#include "picnic3_simulate.h"
#include "picnic3_types.h"
#include "simd.h"

#define picnic3_mpc_sbox_bitsliced(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, bitmask_c) \
do { \
mzd_local_t a[1], b[1], c[1]; \
/* a */ \
AND(a, bitmask_a, statein); \
/* b */ \
AND(b, bitmask_b, statein); \
/* c */ \
AND(c, bitmask_c, statein); \
\
SHL(a, a, 2); \
SHL(b, b, 1); \
\
mzd_local_t t0[1], t1[1], t2[1]; \
\
mzd_local_t s_ab[1], s_bc[1], s_ca[1]; \
/* b & c */ \
AND(s_bc, b, c); \
/* c & a */ \
AND(s_ca, c, a); \
/* a & b */ \
AND(s_ab, a, b); \
for (int i = 0; i < 16; i++) { \
mzd_local_t tmp[1]; \
bitstream_t party_msgs = {{msgs->msgs[i]}, msgs->pos}; \
if (i == msgs->unopened) { \
/* we are in verify, just grab the broadcast s from the msgs array */ \
mzd_from_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
AND(t0, bitmask_a, tmp); \
/* b */ \
AND(t1, bitmask_b, tmp); \
/* c */ \
AND(t2, bitmask_c, tmp); \
SHL(t0, t0, 2); \
SHL(t1, t1, 1); \
XOR(s_ab, t2, s_ab); \
XOR(s_bc, t1, s_bc); \
XOR(s_ca, t0, s_ca); \
\
continue; \
} \
bitstream_t party_tape = {{tapes->tape[i]}, tapes->pos}; \
/* make a mzd_local from tape[i] for input_masks */ \
mzd_local_t mask_a[1], mask_b[1], mask_c[1]; \
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
AND(mask_a, bitmask_a, tmp); \
/* b */ \
AND(mask_b, bitmask_b, tmp); \
/* c */ \
AND(mask_c, bitmask_c, tmp); \
SHL(mask_a, mask_a, 2); \
SHL(mask_b, mask_b, 1); \
\
/* make a mzd_local from tape[i] for and_helper */ \
mzd_local_t and_helper_ab[1], and_helper_bc[1], and_helper_ca[1]; \
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
AND(and_helper_ab, bitmask_c, tmp); \
/* b */ \
AND(and_helper_bc, bitmask_b, tmp); \
/* c */ \
AND(and_helper_ca, bitmask_a, tmp); \
SHL(and_helper_ca, and_helper_ca, 2); \
SHL(and_helper_bc, and_helper_bc, 1); \
\
/* s_ab */ \
AND(t0, a, mask_b); \
AND(t1, b, mask_a); \
XOR(t0, t0, t1); \
XOR(tmp, t0, and_helper_ab); \
XOR(s_ab, tmp, s_ab); \
/* s_bc */ \
AND(t0, b, mask_c); \
AND(t1, c, mask_b); \
XOR(t0, t0, t1); \
XOR(t0, t0, and_helper_bc); \
XOR(s_bc, t0, s_bc); \
\
SHR(t0, t0, 1); \
XOR(tmp, tmp, t0); \
/* s_ca */ \
AND(t0, c, mask_a); \
AND(t1, a, mask_c); \
XOR(t0, t0, t1); \
XOR(t0, t0, and_helper_ca); \
XOR(s_ca, t0, s_ca); \
\
SHR(t0, t0, 2); \
XOR(tmp, tmp, t0); \
mzd_to_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
} \
tapes->pos += LOWMC_N; \
tapes->pos += LOWMC_N; \
msgs->pos += LOWMC_N; \
\
/* (b & c) ^ a */ \
XOR(t0, s_bc, a); \
\
/* (c & a) ^ a ^ b */ \
XOR(a, a, b); \
XOR(t1, s_ca, a); \
\
/* (a & b) ^ a ^ b ^c */ \
XOR(t2, s_ab, a); \
XOR(t2, t2, c); \
\
SHR(t0, t0, 2); \
SHR(t1, t1, 1); \
\
XOR(t2, t2, t1); \
XOR(statein, t2, t0); \
} while (0)

#include "lowmc_129_129_4.h"

#if !defined(NO_UINT64_FALLBACK)
static void picnic3_mpc_sbox_uint64_lowmc_129_129_4(mzd_local_t* statein, randomTape_t* tapes,
msgs_t* msgs) {

picnic3_mpc_sbox_bitsliced(LOWMC_129_129_4_N, mzd_xor_uint64_192, mzd_and_uint64_192,
mzd_shift_left_uint64_192, mzd_shift_right_uint64_192,
mask_129_129_43_a, mask_129_129_43_b, mask_129_129_43_c);
}


#define IMPL uint64
/* PICNIC3_L1_FS */
#include "lowmc_129_129_4_fns_uint64.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_uint64_129_43
#include "picnic3_simulate.c.i"

/* PICNIC3_L3_FS */
#include "lowmc_192_192_4_fns_uint64.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_uint64_192_64
#include "picnic3_simulate.c.i"

/* PICNIC3_L5_FS */
#include "lowmc_255_255_4_fns_uint64.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_uint64_255_85
#include "picnic3_simulate.c.i"
#undef IMPL
#endif

#define picnic3_mpc_sbox_bitsliced_mm128(LOWMC_N, XOR, AND, SHL, SHR, bitmask_a, bitmask_b, \
bitmask_c) \
do { \
word128 a[2] ATTR_ALIGNED(alignof(word128)); \
word128 b[2] ATTR_ALIGNED(alignof(word128)); \
word128 c[2] ATTR_ALIGNED(alignof(word128)); \
/* a */ \
AND(a, bitmask_a->w128, statein->w128); \
/* b */ \
AND(b, bitmask_b->w128, statein->w128); \
/* c */ \
AND(c, bitmask_c->w128, statein->w128); \
\
SHL(a, a, 2); \
SHL(b, b, 1); \
\
word128 t0[2] ATTR_ALIGNED(alignof(word128)); \
word128 t1[2] ATTR_ALIGNED(alignof(word128)); \
word128 t2[2] ATTR_ALIGNED(alignof(word128)); \
word128 s_ab[2] ATTR_ALIGNED(alignof(word128)); \
word128 s_bc[2] ATTR_ALIGNED(alignof(word128)); \
word128 s_ca[2] ATTR_ALIGNED(alignof(word128)); \
\
/* b & c */ \
AND(s_bc, b, c); \
/* c & a */ \
AND(s_ca, c, a); \
/* a & b */ \
AND(s_ab, a, b); \
for (int i = 0; i < 16; i++) { \
mzd_local_t tmp[1]; \
bitstream_t party_msgs = {{msgs->msgs[i]}, msgs->pos}; \
if (i == msgs->unopened) { \
/* we are in verify, just grab the broadcast s from the msgs array */ \
mzd_from_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
AND(t0, bitmask_a->w128, tmp->w128); \
/* b */ \
AND(t1, bitmask_b->w128, tmp->w128); \
/* c */ \
AND(t2, bitmask_c->w128, tmp->w128); \
SHL(t0, t0, 2); \
SHL(t1, t1, 1); \
XOR(s_ab, t2, s_ab); \
XOR(s_bc, t1, s_bc); \
XOR(s_ca, t0, s_ca); \
\
continue; \
} \
bitstream_t party_tape = {{tapes->tape[i]}, tapes->pos}; \
/* make a mzd_local from tape[i] for input_masks */ \
word128 mask_a[2] ATTR_ALIGNED(alignof(word128)); \
word128 mask_b[2] ATTR_ALIGNED(alignof(word128)); \
word128 mask_c[2] ATTR_ALIGNED(alignof(word128)); \
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
AND(mask_a, bitmask_a->w128, tmp->w128); \
/* b */ \
AND(mask_b, bitmask_b->w128, tmp->w128); \
/* c */ \
AND(mask_c, bitmask_c->w128, tmp->w128); \
SHL(mask_a, mask_a, 2); \
SHL(mask_b, mask_b, 1); \
\
/* make a mzd_local from tape[i] for and_helper */ \
word128 and_helper_ab[2] ATTR_ALIGNED(alignof(word128)); \
word128 and_helper_bc[2] ATTR_ALIGNED(alignof(word128)); \
word128 and_helper_ca[2] ATTR_ALIGNED(alignof(word128)); \
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
AND(and_helper_ab, bitmask_c->w128, tmp->w128); \
/* b */ \
AND(and_helper_bc, bitmask_b->w128, tmp->w128); \
/* c */ \
AND(and_helper_ca, bitmask_a->w128, tmp->w128); \
SHL(and_helper_ca, and_helper_ca, 2); \
SHL(and_helper_bc, and_helper_bc, 1); \
\
/* s_ab */ \
AND(t0, a, mask_b); \
AND(t1, b, mask_a); \
XOR(t0, t0, t1); \
XOR(tmp->w128, t0, and_helper_ab); \
XOR(s_ab, tmp->w128, s_ab); \
/* s_bc */ \
AND(t0, b, mask_c); \
AND(t1, c, mask_b); \
XOR(t0, t0, t1); \
XOR(t0, t0, and_helper_bc); \
XOR(s_bc, t0, s_bc); \
\
SHR(t0, t0, 1); \
XOR(tmp->w128, tmp->w128, t0); \
/* s_ca */ \
AND(t0, c, mask_a); \
AND(t1, a, mask_c); \
XOR(t0, t0, t1); \
XOR(t0, t0, and_helper_ca); \
XOR(s_ca, t0, s_ca); \
\
SHR(t0, t0, 2); \
XOR(tmp->w128, tmp->w128, t0); \
mzd_to_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
} \
tapes->pos += LOWMC_N; \
tapes->pos += LOWMC_N; \
msgs->pos += LOWMC_N; \
\
/* (b & c) ^ a */ \
XOR(t0, s_bc, a); \
\
/* (c & a) ^ a ^ b */ \
XOR(a, a, b); \
XOR(t1, s_ca, a); \
\
/* (a & b) ^ a ^ b ^c */ \
XOR(t2, s_ab, a); \
XOR(t2, t2, c); \
\
SHR(t0, t0, 2); \
SHR(t1, t1, 1); \
\
XOR(t2, t2, t1); \
XOR(statein->w128, t2, t0); \
} while (0)

ATTR_TARGET_S128
static void picnic3_mpc_sbox_s128_lowmc_129_129_4(mzd_local_t* statein, randomTape_t* tapes,
msgs_t* msgs) {
picnic3_mpc_sbox_bitsliced_mm128(LOWMC_129_129_4_N, mm128_xor_256, mm128_and_256,
mm128_shift_left_256, mm128_shift_right_256, mask_129_129_43_a,
mask_129_129_43_b, mask_129_129_43_c);
}



#define IMPL s128
#undef FN_ATTR
#define FN_ATTR ATTR_TARGET_S128
/* PICNIC3_L1_FS */
#include "lowmc_129_129_4_fns_s128.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_s128_129_43
#include "picnic3_simulate.c.i"

/* PICNIC3_L3_FS */
#include "lowmc_192_192_4_fns_s128.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_s128_192_64
#include "picnic3_simulate.c.i"

/* PICNIC3_L5_FS */
#include "lowmc_255_255_4_fns_s128.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_s128_255_85
#include "picnic3_simulate.c.i"

#undef IMPL

#define picnic3_mpc_sbox_bitsliced_mm256(LOWMC_N, XOR, AND, ROL, ROR, bitmask_a, bitmask_b, \
bitmask_c) \
do { \
word256 a ATTR_ALIGNED(alignof(word256)); \
word256 b ATTR_ALIGNED(alignof(word256)); \
word256 c ATTR_ALIGNED(alignof(word256)); \
/* a */ \
a = AND(bitmask_a->w256, statein->w256); \
/* b */ \
b = AND(bitmask_b->w256, statein->w256); \
/* c */ \
c = AND(bitmask_c->w256, statein->w256); \
\
a = ROL(a, 2); \
b = ROL(b, 1); \
\
word256 t0 ATTR_ALIGNED(alignof(word256)); \
word256 t1 ATTR_ALIGNED(alignof(word256)); \
word256 t2 ATTR_ALIGNED(alignof(word256)); \
word256 s_ab ATTR_ALIGNED(alignof(word256)); \
word256 s_bc ATTR_ALIGNED(alignof(word256)); \
word256 s_ca ATTR_ALIGNED(alignof(word256)); \
\
/* b & c */ \
s_bc = AND(b, c); \
/* c & a */ \
s_ca = AND(c, a); \
/* a & b */ \
s_ab = AND(a, b); \
for (int i = 0; i < 16; i++) { \
mzd_local_t tmp[1]; \
bitstream_t party_msgs = {{msgs->msgs[i]}, msgs->pos}; \
if (i == msgs->unopened) { \
/* we are in verify, just grab the broadcast s from the msgs array */ \
mzd_from_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
t0 = AND(bitmask_a->w256, tmp->w256); \
/* b */ \
t1 = AND(bitmask_b->w256, tmp->w256); \
/* c */ \
t2 = AND(bitmask_c->w256, tmp->w256); \
t0 = ROL(t0, 2); \
t1 = ROL(t1, 1); \
s_ab = XOR(t2, s_ab); \
s_bc = XOR(t1, s_bc); \
s_ca = XOR(t0, s_ca); \
\
continue; \
} \
bitstream_t party_tape = {{tapes->tape[i]}, tapes->pos}; \
/* make a mzd_local from tape[i] for input_masks */ \
word256 mask_a ATTR_ALIGNED(alignof(word256)); \
word256 mask_b ATTR_ALIGNED(alignof(word256)); \
word256 mask_c ATTR_ALIGNED(alignof(word256)); \
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
mask_a = AND(bitmask_a->w256, tmp->w256); \
/* b */ \
mask_b = AND(bitmask_b->w256, tmp->w256); \
/* c */ \
mask_c = AND(bitmask_c->w256, tmp->w256); \
mask_a = ROL(mask_a, 2); \
mask_b = ROL(mask_b, 1); \
\
/* make a mzd_local from tape[i] for and_helper */ \
word256 and_helper_ab ATTR_ALIGNED(alignof(word256)); \
word256 and_helper_bc ATTR_ALIGNED(alignof(word256)); \
word256 and_helper_ca ATTR_ALIGNED(alignof(word256)); \
mzd_from_bitstream(&party_tape, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
/* a */ \
and_helper_ab = AND(bitmask_c->w256, tmp->w256); \
/* b */ \
and_helper_bc = AND(bitmask_b->w256, tmp->w256); \
/* c */ \
and_helper_ca = AND(bitmask_a->w256, tmp->w256); \
and_helper_ca = ROL(and_helper_ca, 2); \
and_helper_bc = ROL(and_helper_bc, 1); \
\
/* s_ab */ \
t0 = AND(a, mask_b); \
t1 = AND(b, mask_a); \
t0 = XOR(t0, t1); \
tmp->w256 = XOR(t0, and_helper_ab); \
s_ab = XOR(tmp->w256, s_ab); \
/* s_bc */ \
t0 = AND(b, mask_c); \
t1 = AND(c, mask_b); \
t0 = XOR(t0, t1); \
t0 = XOR(t0, and_helper_bc); \
s_bc = XOR(t0, s_bc); \
\
t0 = ROR(t0, 1); \
tmp->w256 = XOR(tmp->w256, t0); \
/* s_ca */ \
t0 = AND(c, mask_a); \
t1 = AND(a, mask_c); \
t0 = XOR(t0, t1); \
t0 = XOR(t0, and_helper_ca); \
s_ca = XOR(t0, s_ca); \
\
t0 = ROR(t0, 2); \
tmp->w256 = XOR(tmp->w256, t0); \
mzd_to_bitstream(&party_msgs, tmp, (LOWMC_N + 63) / (sizeof(uint64_t) * 8), LOWMC_N); \
} \
tapes->pos += LOWMC_N; \
tapes->pos += LOWMC_N; \
msgs->pos += LOWMC_N; \
\
/* (b & c) ^ a */ \
t0 = XOR(s_bc, a); \
\
/* (c & a) ^ a ^ b */ \
a = XOR(a, b); \
t1 = XOR(s_ca, a); \
\
/* (a & b) ^ a ^ b ^c */ \
t2 = XOR(s_ab, a); \
t2 = XOR(t2, c); \
\
t0 = ROR(t0, 2); \
t1 = ROR(t1, 1); \
\
t2 = XOR(t2, t1); \
statein->w256 = XOR(t2, t0); \
} while (0)

ATTR_TARGET_AVX2
static void picnic3_mpc_sbox_s256_lowmc_129_129_4(mzd_local_t* statein, randomTape_t* tapes,
msgs_t* msgs) {
picnic3_mpc_sbox_bitsliced_mm256(LOWMC_129_129_4_N, mm256_xor, mm256_and, mm256_rotate_left,
mm256_rotate_right, mask_129_129_43_a, mask_129_129_43_b,
mask_129_129_43_c);
}



#define IMPL s256
#undef FN_ATTR
#define FN_ATTR ATTR_TARGET_AVX2
/* PICNIC3_L1_FS */
#include "lowmc_129_129_4_fns_s256.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_s256_129_43
#include "picnic3_simulate.c.i"

/* PICNIC3_L3_FS */
#include "lowmc_192_192_4_fns_s256.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_s256_192_64
#include "picnic3_simulate.c.i"

/* PICNIC3_L5_FS */
#include "lowmc_255_255_4_fns_s256.h"
#undef SIM_ONLINE
#define SIM_ONLINE lowmc_simulate_online_s256_255_85
#include "picnic3_simulate.c.i"

#undef IMPL

lowmc_simulate_online_f lowmc_simulate_online_get_implementation(const lowmc_parameters_t* lowmc) {
assert((lowmc->m == 43 && lowmc->n == 129) || (lowmc->m == 64 && lowmc->n == 192) ||
(lowmc->m == 85 && lowmc->n == 255));

if (CPU_SUPPORTS_AVX2) {
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_simulate_online_s256_129_43;
}

if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) {
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_simulate_online_s128_129_43;
}

#if !defined(NO_UINT64_FALLBACK)
if (lowmc->n == 129 && lowmc->m == 43)
return lowmc_simulate_online_uint64_129_43;
#endif

return NULL;
}

+ 57
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_simulate.c.i View File

@@ -0,0 +1,57 @@
/*! @file picnic3_impl.c
* @brief This is the main file of the signature scheme for the Picnic3
* parameter sets.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#if defined(LOWMC_INSTANCE)
#if defined(FN_ATTR)
FN_ATTR
#endif
static int SIM_ONLINE(mzd_local_t* maskedKey, randomTape_t* tapes, msgs_t* msgs,
const mzd_local_t* plaintext, const uint8_t* pubKey,
const picnic_instance_t* params) {

#define mpc_sbox CONCAT(picnic3_mpc_sbox, CONCAT(IMPL, LOWMC_INSTANCE))
int ret = 0;
mzd_local_t state[(LOWMC_N + 255) / 256];
mzd_local_t temp[(LOWMC_N + 255) / 256];

// MPC_MUL(temp, maskedKey, LOWMC_INSTANCE.k0_matrix,
// mask_shares); // roundKey = maskedKey * KMatrix[0]
MUL(temp, maskedKey, LOWMC_INSTANCE.k0_matrix);
XOR(state, temp, plaintext);

for (uint32_t r = 0; r < LOWMC_R; r++) {
mpc_sbox(state, tapes, msgs);
// MPC_MUL(state, state, LOWMC_INSTANCE.rounds[r].l_matrix,
// mask_shares); // state = state * LMatrix (r-1)
MUL(temp, state, LOWMC_INSTANCE.rounds[r].l_matrix);
XOR(state, temp, LOWMC_INSTANCE.rounds[r].constant);
ADDMUL(state, maskedKey, LOWMC_INSTANCE.rounds[r].k_matrix);
}

/* check that the output is correct */
uint8_t output[MAX_LOWMC_BLOCK_SIZE];
mzd_to_char_array(output, state, params->output_size);

if (timingsafe_bcmp(output, pubKey, params->output_size) != 0) {
#if !defined(NDEBUG)
printf("%s: output does not match pubKey\n", __func__);
printf("pubKey: ");
print_hex(stdout, pubKey, params->output_size);
printf("\noutput: ");
print_hex(stdout, output, params->output_size);
printf("\n");
#endif
ret = -1;
}
return ret;
}
#endif

+ 25
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_simulate.h View File

@@ -0,0 +1,25 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC3_SIMULATE_H
#define PICNIC3_SIMULATE_H

#include "lowmc_pars.h"

typedef struct randomTape_t randomTape_t;
typedef struct msgs_t msgs_t;
typedef struct picnic_instance_t picnic_instance_t;

typedef int (*lowmc_simulate_online_f)(mzd_local_t* maskedKey, randomTape_t* tapes, msgs_t* msgs,
const mzd_local_t* plaintext, const uint8_t* pubKey,
const picnic_instance_t* params);

lowmc_simulate_online_f lowmc_simulate_online_get_implementation(const lowmc_parameters_t* lowmc);

#endif

+ 612
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_tree.c View File

@@ -0,0 +1,612 @@
/*! @file tree.c
* @brief This file has the tree implementation used to generate random seeds
* and commit to multiple values with a Merkle tree.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include <assert.h>
#include <limits.h>
#include <stdlib.h>

#include "endian_compat.h"
#include "kdf_shake.h"
#include "picnic.h"
#include "picnic3_tree.h"
#include "picnic3_types.h"

static int contains(size_t* list, size_t len, size_t value) {
for (size_t i = 0; i < len; i++) {
if (list[i] == value) {
return 1;
}
}
return 0;
}

static int exists(tree_t* tree, size_t i) {
if (i >= tree->numNodes) {
return 0;
}
if (tree->exists[i]) {
return 1;
}
return 0;
}

tree_t* createTree(size_t numLeaves, size_t dataSize) {
tree_t* tree = malloc(sizeof(tree_t));

tree->depth = ceil_log2(numLeaves) + 1;
tree->numNodes =
((1 << (tree->depth)) - 1) -
((1 << (tree->depth - 1)) - numLeaves); /* Num nodes in complete - number of missing leaves */
tree->numLeaves = numLeaves;
tree->dataSize = dataSize;
tree->nodes = malloc(tree->numNodes * sizeof(uint8_t*));

uint8_t* slab = calloc(tree->numNodes, dataSize);

for (size_t i = 0; i < tree->numNodes; i++) {
tree->nodes[i] = slab;
slab += dataSize;
}

tree->haveNode = calloc(tree->numNodes, 1);

/* Depending on the number of leaves, the tree may not be complete */
tree->exists = calloc(tree->numNodes, 1);
memset(tree->exists + tree->numNodes - tree->numLeaves, 1, tree->numLeaves); /* Set leaves */
for (int i = tree->numNodes - tree->numLeaves; i > 0; i--) {
if (exists(tree, 2 * i + 1) || exists(tree, 2 * i + 2)) {
tree->exists[i] = 1;
}
}
tree->exists[0] = 1;

return tree;
}

void freeTree(tree_t* tree) {
if (tree != NULL) {
free(tree->nodes[0]);
free(tree->nodes);
free(tree->haveNode);
free(tree->exists);
free(tree);
}
}

static int isLeftChild(size_t node) {
assert(node != 0);
return (node % 2 == 1);
}

static int hasRightChild(tree_t* tree, size_t node) {
return (2 * node + 2 < tree->numNodes && exists(tree, node));
}

static size_t getParent(size_t node) {
assert(node != 0);
return ((node + 1) >> 1) - 1;

//if (isLeftChild(node)) {
// /* (node - 1) / 2, but since node % 2 == 1, that's the same as node / 2 */
// return node >> 1;
//}
//return (node - 2) / 2;
}

uint8_t** getLeaves(tree_t* tree) {
return &tree->nodes[tree->numNodes - tree->numLeaves];
}

uint8_t* getLeaf(tree_t* tree, size_t leafIndex) {
assert(leafIndex < tree->numLeaves);
size_t firstLeaf = tree->numNodes - tree->numLeaves;
return tree->nodes[firstLeaf + leafIndex];
}

static void hashSeed(uint8_t* digest, const uint8_t* inputSeed, uint8_t* salt, uint8_t hashPrefix,
size_t repIndex, size_t nodeIndex, const picnic_instance_t* params) {
hash_context ctx;

hash_init_prefix(&ctx, params->digest_size, hashPrefix);
hash_update(&ctx, inputSeed, params->seed_size);
hash_update(&ctx, salt, SALT_SIZE);
hash_update_uint16_le(&ctx, repIndex);
hash_update_uint16_le(&ctx, nodeIndex);
hash_final(&ctx);
hash_squeeze(&ctx, digest, 2 * params->seed_size);
}

static void hashSeed_x4(uint8_t** digest, const uint8_t** inputSeed, uint8_t* salt,
uint8_t hashPrefix, size_t repIndex, size_t nodeIndex,
const picnic_instance_t* params) {
hash_context_x4 ctx;

hash_init_prefix_x4(&ctx, params->digest_size, hashPrefix);
hash_update_x4(&ctx, inputSeed, params->seed_size);

const uint8_t* salts[4] = {salt, salt, salt, salt};
hash_update_x4(&ctx, salts, SALT_SIZE);
hash_update_x4_uint16_le(&ctx, repIndex);

const uint16_t nodes[4] = {nodeIndex, nodeIndex + 1, nodeIndex + 2, nodeIndex + 3};
hash_update_x4_uint16s_le(&ctx, nodes);

hash_final_x4(&ctx);
hash_squeeze_x4(&ctx, digest, 2 * params->seed_size);
}

static void expandSeeds(tree_t* tree, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params) {
uint8_t tmp[4 * 2 * MAX_SEED_SIZE_BYTES];
uint8_t* tmp_ptr[4] = {&tmp[0], &tmp[2 * MAX_SEED_SIZE_BYTES], &tmp[2 * 2 * MAX_SEED_SIZE_BYTES],
&tmp[3 * 2 * MAX_SEED_SIZE_BYTES]};

/* Walk the tree, expanding seeds where possible. Compute children of
* non-leaf nodes. */
size_t lastNonLeaf = getParent(tree->numNodes - 1);
size_t i = 0;
/* expand the first 4 seeds*/
for (; i <= MIN(2,lastNonLeaf); i++) {
if (!tree->haveNode[i]) {
continue;
}

hashSeed(tmp, tree->nodes[i], salt, HASH_PREFIX_1, repIndex, i, params);

if (!tree->haveNode[2 * i + 1]) {
/* left child = H_left(seed_i || salt || t || i) */
memcpy(tree->nodes[2 * i + 1], tmp, params->seed_size);
tree->haveNode[2 * i + 1] = 1;
}

/* The last non-leaf node will only have a left child when there are an odd number of leaves */
if (exists(tree, 2 * i + 2) && !tree->haveNode[2 * i + 2]) {
/* right child = H_right(seed_i || salt || t || i) */
memcpy(tree->nodes[2 * i + 2], tmp + params->seed_size, params->seed_size);
tree->haveNode[2 * i + 2] = 1;
}
}
/* now hash in groups of 4 for faster hashing */
for (; i <= lastNonLeaf / 4 * 4; i += 4) {

hashSeed_x4(tmp_ptr, (const uint8_t**) &tree->nodes[i], salt, HASH_PREFIX_1, repIndex, i, params);

for (size_t j = i; j < i + 4; j++) {
if (!tree->haveNode[j]) {
continue;
}
if (!tree->haveNode[2 * j + 1]) {
/* left child = H_left(seed_i || salt || t || j) */
memcpy(tree->nodes[2 * j + 1], tmp_ptr[j-i], params->seed_size);
tree->haveNode[2 * j + 1] = 1;
}

/* The last non-leaf node will only have a left child when there are an odd number of leaves
*/
if (exists(tree, 2 * j + 2) && !tree->haveNode[2 * j + 2]) {
/* right child = H_right(seed_i || salt || t || j) */
memcpy(tree->nodes[2 * j + 2], tmp_ptr[j-i] + params->seed_size, params->seed_size);
tree->haveNode[2 * j + 2] = 1;
}
}
}
/* handle last few, which are not a multiple of 4 */
for (; i <= lastNonLeaf; i++) {
if (!tree->haveNode[i]) {
continue;
}

hashSeed(tmp, tree->nodes[i], salt, HASH_PREFIX_1, repIndex, i, params);

if (!tree->haveNode[2 * i + 1]) {
/* left child = H_left(seed_i || salt || t || i) */
memcpy(tree->nodes[2 * i + 1], tmp, params->seed_size);
tree->haveNode[2 * i + 1] = 1;
}

/* The last non-leaf node will only have a left child when there are an odd number of leaves */
if (exists(tree, 2 * i + 2) && !tree->haveNode[2 * i + 2]) {
/* right child = H_right(seed_i || salt || t || i) */
memcpy(tree->nodes[2 * i + 2], tmp + params->seed_size, params->seed_size);
tree->haveNode[2 * i + 2] = 1;
}
}
}

tree_t* generateSeeds(size_t nSeeds, uint8_t* rootSeed, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params) {
tree_t* tree = createTree(nSeeds, params->seed_size);

memcpy(tree->nodes[0], rootSeed, params->seed_size);
tree->haveNode[0] = 1;
expandSeeds(tree, salt, repIndex, params);

return tree;
}

static int isLeafNode(tree_t* tree, size_t node) {
return (2 * node + 1 >= tree->numNodes);
}

static int hasSibling(tree_t* tree, size_t node) {
if (!exists(tree, node)) {
return 0;
}

if (isLeftChild(node) && !exists(tree, node + 1)) {
return 0;
}

return 1;
}

static size_t getSibling(tree_t* tree, size_t node) {
assert(node < tree->numNodes);
assert(node != 0);
assert(hasSibling(tree, node));

if (isLeftChild(node)) {
if (node + 1 < tree->numNodes) {
return node + 1;
} else {
assert(!"getSibling: request for node with not sibling");
return 0;
}
} else {
return node - 1;
}
}

/* Returns the number of bytes written to output */
static size_t* getRevealedNodes(tree_t* tree, uint16_t* hideList, size_t hideListSize,
size_t* outputSize) {
/* Compute paths up from hideList to root, store as sets of nodes */
size_t pathLen = tree->depth - 1;

/* pathSets[i][0...hideListSize] stores the nodes in the path at depth i
* for each of the leaf nodes in hideListSize */
size_t** pathSets = malloc(pathLen * sizeof(size_t*));
size_t* slab = malloc(hideListSize * pathLen * sizeof(size_t));

for (size_t i = 0; i < pathLen; i++) {
pathSets[i] = slab;
slab += hideListSize;
}

/* Compute the paths back to the root */
for (size_t i = 0; i < hideListSize; i++) {
size_t pos = 0;
size_t node =
hideList[i] +
(tree->numNodes - tree->numLeaves); /* input lists leaf indexes, translate to nodes */
pathSets[pos][i] = node;
pos++;
while ((node = getParent(node)) != 0) {
pathSets[pos][i] = node;
pos++;
}
}

/* Determine seeds to reveal */
size_t* revealed = malloc(tree->numLeaves * sizeof(size_t));
size_t revealedPos = 0;
for (size_t d = 0; d < pathLen; d++) {
for (size_t i = 0; i < hideListSize; i++) {
if (!hasSibling(tree, pathSets[d][i])) {
continue;
}
size_t sibling = getSibling(tree, pathSets[d][i]);
if (!contains(pathSets[d], hideListSize, sibling)) {
// Determine the seed to reveal
while (!hasRightChild(tree, sibling) && !isLeafNode(tree, sibling)) {
sibling = 2 * sibling + 1; // sibling = leftChild(sibling)
}
// Only reveal if we haven't already
if (!contains(revealed, revealedPos, sibling)) {
revealed[revealedPos] = sibling;
revealedPos++;
}
}
}
}

free(pathSets[0]);
free(pathSets);

*outputSize = revealedPos;
return revealed;
}

size_t revealSeedsSize(size_t numNodes, uint16_t* hideList, size_t hideListSize,
const picnic_instance_t* params) {
tree_t* tree = createTree(numNodes, params->seed_size);
size_t numNodesRevealed = 0;
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &numNodesRevealed);

freeTree(tree);
free(revealed);
return numNodesRevealed * params->seed_size;
}

size_t revealSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* output,
size_t outputSize, const picnic_instance_t* params) {
uint8_t* outputBase = output;
size_t revealedSize = 0;

if (outputSize > INT_MAX) {
return -1;
}
int outLen = (int)outputSize;

size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &revealedSize);
for (size_t i = 0; i < revealedSize; i++) {
outLen -= params->seed_size;
if (outLen < 0) {
assert(!"Insufficient sized buffer provided to revealSeeds");
free(revealed);
return 0;
}
memcpy(output, tree->nodes[revealed[i]], params->seed_size);
output += params->seed_size;
}

free(revealed);
return output - outputBase;
}

int reconstructSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* input,
size_t inputLen, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params) {
int ret = 0;

if (inputLen > INT_MAX) {
return -1;
}
int inLen = (int)inputLen;

size_t revealedSize = 0;
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &revealedSize);
for (size_t i = 0; i < revealedSize; i++) {
inLen -= params->seed_size;
if (inLen < 0) {
ret = -1;
goto Exit;
}
memcpy(tree->nodes[revealed[i]], input, params->seed_size);
tree->haveNode[revealed[i]] = 1;
input += params->seed_size;
}

expandSeeds(tree, salt, repIndex, params);

Exit:
free(revealed);
return ret;
}

static void computeParentHash(tree_t* tree, size_t child, uint8_t* salt,
const picnic_instance_t* params) {
if (!exists(tree, child)) {
return;
}

size_t parent = getParent(child);

if (tree->haveNode[parent]) {
return;
}

/* Compute the hash for parent, if we have everything */
if (!tree->haveNode[2 * parent + 1]) {
return;
}

if (exists(tree, 2 * parent + 2) && !tree->haveNode[2 * parent + 2]) {
return;
}

/* Compute parent data = H(left child data || [right child data] || salt || parent idx) */
hash_context ctx;

hash_init_prefix(&ctx, params->digest_size, HASH_PREFIX_3);
hash_update(&ctx, tree->nodes[2 * parent + 1], params->digest_size);
if (hasRightChild(tree, parent)) {
/* One node may not have a right child when there's an odd number of leaves */
hash_update(&ctx, tree->nodes[2 * parent + 2], params->digest_size);
}

hash_update(&ctx, salt, SALT_SIZE);
hash_update_uint16_le(&ctx, parent);
hash_final(&ctx);
hash_squeeze(&ctx, tree->nodes[parent], params->digest_size);
tree->haveNode[parent] = 1;
}

/* Create a Merkle tree by hashing up all nodes.
* leafData must have length tree->numNodes, but some may be NULL. */
void buildMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt,
const picnic_instance_t* params) {
size_t firstLeaf = tree->numNodes - tree->numLeaves;

/* Copy data to the leaves. The actual data being committed to has already been
* hashed, according to the spec. */
for (size_t i = 0; i < tree->numLeaves; i++) {
if (leafData[i] != NULL) {
memcpy(tree->nodes[firstLeaf + i], leafData[i], tree->dataSize);
tree->haveNode[firstLeaf + i] = 1;
}
}
/* Starting at the leaves, work up the tree, computing the hashes for intermediate nodes */
for (int i = (int)tree->numNodes; i > 0; i--) {
computeParentHash(tree, i, salt, params);
}
}

/* Note that we never output the root node */
static size_t* getRevealedMerkleNodes(tree_t* tree, uint16_t* missingLeaves,
size_t missingLeavesSize, size_t* outputSize) {
size_t firstLeaf = tree->numNodes - tree->numLeaves;
uint8_t* missingNodes = calloc(tree->numNodes, 1);

/* Mark leaves that are missing */
for (size_t i = 0; i < missingLeavesSize; i++) {
missingNodes[firstLeaf + missingLeaves[i]] = 1;
}

/* For the nonleaf nodes, if both leaves are missing, mark it as missing too */
int lastNonLeaf = getParent(tree->numNodes - 1);
for (int i = lastNonLeaf; i > 0; i--) {
if (!exists(tree, i)) {
continue;
}
if (exists(tree, 2 * i + 2)) {
if (missingNodes[2 * i + 1] && missingNodes[2 * i + 2]) {
missingNodes[i] = 1;
}
} else {
if (missingNodes[2 * i + 1]) {
missingNodes[i] = 1;
}
}
}

/* For each missing leaf node, add the highest missing node on the path
* back to the root to the set to be revealed */
size_t* revealed = malloc(tree->numLeaves * sizeof(size_t));
size_t pos = 0;
for (size_t i = 0; i < missingLeavesSize; i++) {
size_t node = missingLeaves[i] + firstLeaf; /* input is leaf indexes, translate to nodes */
do {
if (!missingNodes[getParent(node)]) {
if (!contains(revealed, pos, node)) {
revealed[pos] = node;
pos++;
}
break;
}
} while ((node = getParent(node)) != 0);
}

free(missingNodes);
*outputSize = pos;
return revealed;
}

size_t openMerkleTreeSize(size_t numNodes, uint16_t* missingLeaves, size_t missingLeavesSize,
const picnic_instance_t* params) {

tree_t* tree = createTree(numNodes, params->digest_size);
size_t revealedSize = 0;
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize);

freeTree(tree);
free(revealed);

return revealedSize * params->digest_size;
}

/* Serialze the missing nodes that the verifier will require to check commitments for non-missing
* leaves */
uint8_t* openMerkleTree(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize,
size_t* outputSizeBytes) {
size_t revealedSize = 0;
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize);

/* Serialize output */
*outputSizeBytes = revealedSize * tree->dataSize;
uint8_t* output = malloc(*outputSizeBytes);
uint8_t* outputBase = output;

for (size_t i = 0; i < revealedSize; i++) {
memcpy(output, tree->nodes[revealed[i]], tree->dataSize);
output += tree->dataSize;
}

free(revealed);

return outputBase;
}

/* addMerkleNodes: deserialize and add the data for nodes provided by the committer */
int addMerkleNodes(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, uint8_t* input,
size_t inputSize) {
int ret = 0;

assert(missingLeavesSize < tree->numLeaves);

if (inputSize > INT_MAX) {
return -1;
}
int intLen = (int)inputSize;

size_t revealedSize = 0;
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize);
assert(!contains(revealed, revealedSize, 0));

/* Deserialize input */
for (size_t i = 0; i < revealedSize; i++) {
intLen -= tree->dataSize;
if (intLen < 0) {
ret = -1;
goto Exit;
}
memcpy(tree->nodes[revealed[i]], input, tree->dataSize);
input += tree->dataSize;
tree->haveNode[revealed[i]] = 1;
}

if (intLen != 0) {
ret = -1;
goto Exit;
}

Exit:

free(revealed);

return ret;
}

/* verifyMerkleTree: verify for each leaf that is set */
int verifyMerkleTree(tree_t* tree, /* uint16_t* missingLeaves, size_t missingLeavesSize, */
uint8_t** leafData, uint8_t* salt, const picnic_instance_t* params) {
size_t firstLeaf = tree->numNodes - tree->numLeaves;

/* Copy the leaf data, where we have it. The actual data being committed to has already been
* hashed, according to the spec. */
for (size_t i = 0; i < tree->numLeaves; i++) {
if (leafData[i] != NULL) {
if (tree->haveNode[firstLeaf + i] == 1) {
return -1; /* A leaf was assigned from the prover for a node we've recomputed */
}

if (leafData[i] != NULL) {
memcpy(tree->nodes[firstLeaf + i], leafData[i], tree->dataSize);
tree->haveNode[firstLeaf + i] = 1;
}
}
}

/* At this point the tree has some of the leaves, and some intermediate nodes
* Work up the tree, computing all nodes we don't have that are missing. */
for (int i = (int)tree->numNodes; i > 0; i--) {
computeParentHash(tree, i, salt, params);
}

/* Fail if the root was not computed. */
if (!tree->haveNode[0]) {
return -1;
}

return 0;
}

+ 83
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_tree.h View File

@@ -0,0 +1,83 @@
/*! @file tree.h
* @brief This file has part of the tree implementation used to generate
* random seeds and commit to multiple values with a Merkle tree.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC3_TREE_H
#define PICNIC3_TREE_H

#include "picnic_instances.h"

/*
* Represents a (nearly) complete binary tree, stored in memory as an array.
* The root is at nodes[0], and the left child of node k is 2k + 1, the right
* child is at 2k + 2
*/
typedef struct tree_t {
size_t depth; /* The depth of the tree */
uint8_t** nodes; /* The data for each node */
size_t dataSize; /* The size data at each node, in bytes */
uint8_t* haveNode; /* If we have the data (seed or hash) for node i, haveSeed[i] is 1 */
uint8_t* exists; /* Since the tree is not always complete, nodes marked 0 don't exist */
size_t numNodes; /* The total number of nodes in the tree */
size_t numLeaves; /* The total number of leaves in the tree */
} tree_t;

/* The largest seed size is 256 bits, for the Picnic3-L5-FS parameter set. */
#define MAX_SEED_SIZE_BYTES (32)

tree_t* createTree(size_t numLeaves, size_t dataSize);
void freeTree(tree_t* tree);
uint8_t** getLeaves(tree_t* tree);
/* Get one leaf, leafIndex must be in [0, tree->numLeaves -1] */
uint8_t* getLeaf(tree_t* tree, size_t leafIndex);

/* Functions for trees used to derive seeds.
* Signer's usage: generateSeeds -> revealSeeds -> freeTree
* Verifier's usage: createTree -> reconstructSeeds -> freeTree
*/

/* Returns the number of bytes written to output. A safe number of bytes for
* callers to allocate is numLeaves*params->seedSizeBytes, or call revealSeedsSize. */
tree_t* generateSeeds(size_t nSeeds, uint8_t* rootSeed, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params);
size_t revealSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* output,
size_t outputLen, const picnic_instance_t* params);
size_t revealSeedsSize(size_t numNodes, uint16_t* hideList, size_t hideListSize,
const picnic_instance_t* params);
int reconstructSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* input,
size_t inputLen, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params);

/* Functions for Merkle hash trees used for commitments.
*
* Signer call sequence:
* 1. createTree
* 2. buildMerkleTree with all commitments as leaf nodes
* 3. openMerkleTree with missingLeaves - list of commitments the verifier won't recompute
* 4. freeTree
* Verifier call sequence
* 1. createTree
* 2. addMerkleNodes with the output of the signer
* 3. verifyMerkleTree Checks that all leaf nodes present are correct commitments
* 4. freeTree
*/
void buildMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt,
const picnic_instance_t* params);
uint8_t* openMerkleTree(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize,
size_t* outputSizeBytes);
size_t openMerkleTreeSize(size_t numNodes, uint16_t* notMissingLeaves, size_t notMissingLeavesSize,
const picnic_instance_t* params);
int addMerkleNodes(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, uint8_t* input,
size_t inputSize);
int verifyMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt,
const picnic_instance_t* params);

#endif

+ 203
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_types.c View File

@@ -0,0 +1,203 @@
/*! @file picnic_types.c
* @brief Functions to allocate/free data types used in the Picnic signature
* scheme implementation.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "compat.h"
#include "picnic3_types.h"

void allocateRandomTape(randomTape_t* tape, const picnic_instance_t* params) {
tape->nTapes = params->num_MPC_parties;
tape->tape = malloc(tape->nTapes * sizeof(uint8_t*));
tape->aux_bits = calloc(1, params->view_size);
tape->buffer = aligned_alloc(32, 16 * sizeof(uint16_t));
size_t tapeSizeBytes = 2 * params->view_size;
tape->parity_tapes = calloc(1, tapeSizeBytes);
uint8_t* slab = calloc(1, tape->nTapes * tapeSizeBytes);
for (uint8_t i = 0; i < tape->nTapes; i++) {
tape->tape[i] = slab;
slab += tapeSizeBytes;
}
tape->pos = 0;
tape->aux_pos = 0;
}

void freeRandomTape(randomTape_t* tape) {
if (tape != NULL) {
free(tape->tape[0]);
free(tape->tape);
free(tape->parity_tapes);
aligned_free(tape->buffer);
free(tape->aux_bits);
}
}

void allocateProof2(proof2_t* proof, const picnic_instance_t* params) {
memset(proof, 0, sizeof(proof2_t));

proof->unOpenedIndex = 0;
proof->seedInfo = NULL; // Sign/verify code sets it
proof->seedInfoLen = 0;
proof->C = malloc(params->digest_size);
proof->input = malloc(params->input_size);
proof->aux = malloc(params->view_size);
proof->msgs = malloc(params->view_size);
}

static void freeProof2(proof2_t* proof) {
free(proof->seedInfo);
free(proof->C);
free(proof->input);
free(proof->aux);
free(proof->msgs);
}

void allocateSignature2(signature2_t* sig, const picnic_instance_t* params) {
sig->iSeedInfo = NULL;
sig->iSeedInfoLen = 0;
sig->cvInfo = NULL; // Sign/verify code sets it
sig->cvInfoLen = 0;
sig->challenge = (uint8_t*)malloc(params->digest_size);
sig->challengeC = (uint16_t*)malloc(params->num_opened_rounds * sizeof(uint16_t));
sig->challengeP = (uint16_t*)malloc(params->num_opened_rounds * sizeof(uint16_t));
sig->proofs = calloc(params->num_rounds, sizeof(proof2_t));
// Individual proofs are allocated during signature generation, only for rounds when neeeded
}

void freeSignature2(signature2_t* sig, const picnic_instance_t* params) {
free(sig->iSeedInfo);
free(sig->cvInfo);
free(sig->challenge);
free(sig->challengeC);
free(sig->challengeP);
for (size_t i = 0; i < params->num_rounds; i++) {
freeProof2(&sig->proofs[i]);
}
free(sig->proofs);
}

/* Allocate one commitments_t object with capacity for numCommitments values */
void allocateCommitments2(commitments_t* commitments, const picnic_instance_t* params,
size_t numCommitments) {
commitments->nCommitments = numCommitments;

uint8_t* slab = malloc(numCommitments * params->digest_size + numCommitments * sizeof(uint8_t*));

commitments->hashes = (uint8_t**)slab;
slab += numCommitments * sizeof(uint8_t*);

for (size_t i = 0; i < numCommitments; i++) {
commitments->hashes[i] = slab;
slab += params->digest_size;
}
}

void freeCommitments2(commitments_t* commitments) {
if (commitments != NULL) {
free(commitments->hashes);
}
}

inputs_t allocateInputs(const picnic_instance_t* params) {
uint8_t* slab = calloc(1, params->num_rounds * (params->input_size + sizeof(uint8_t*)));

inputs_t inputs = (uint8_t**)slab;

slab += params->num_rounds * sizeof(uint8_t*);

for (uint32_t i = 0; i < params->num_rounds; i++) {
inputs[i] = (uint8_t*)slab;
slab += params->input_size;
}

return inputs;
}

void freeInputs(inputs_t inputs) {
free(inputs);
}

msgs_t* allocateMsgs(const picnic_instance_t* params) {
msgs_t* msgs = malloc(params->num_rounds * sizeof(msgs_t));

uint8_t* slab =
calloc(1, params->num_rounds * (params->num_MPC_parties * ((params->view_size + 7) / 8 * 8) +
params->num_MPC_parties * sizeof(uint8_t*)));

for (uint32_t i = 0; i < params->num_rounds; i++) {
msgs[i].pos = 0;
msgs[i].unopened = -1;
msgs[i].msgs = (uint8_t**)slab;
slab += params->num_MPC_parties * sizeof(uint8_t*);

for (uint32_t j = 0; j < params->num_MPC_parties; j++) {
msgs[i].msgs[j] = slab;
slab += (params->view_size + 7) / 8 * 8;
}
}

return msgs;
}

msgs_t* allocateMsgsVerify(const picnic_instance_t* params) {
msgs_t* msgs = malloc(sizeof(msgs_t));

uint8_t* slab = calloc(1, (params->num_MPC_parties * ((params->view_size + 7) / 8 * 8) +
params->num_MPC_parties * sizeof(uint8_t*)));

msgs->pos = 0;
msgs->unopened = -1;
msgs->msgs = (uint8_t**)slab;
slab += params->num_MPC_parties * sizeof(uint8_t*);

for (uint32_t j = 0; j < params->num_MPC_parties; j++) {
msgs->msgs[j] = slab;
slab += (params->view_size + 7) / 8 * 8;
}

return msgs;
}

void freeMsgs(msgs_t* msgs) {
free(msgs[0].msgs);
free(msgs);
}

commitments_t* allocateCommitments(const picnic_instance_t* params, size_t numCommitments) {
commitments_t* commitments = malloc(params->num_rounds * sizeof(commitments_t));

commitments->nCommitments = (numCommitments) ? numCommitments : params->num_MPC_parties;

uint8_t* slab = malloc(params->num_rounds * (commitments->nCommitments * params->digest_size +
commitments->nCommitments * sizeof(uint8_t*)));

for (uint32_t i = 0; i < params->num_rounds; i++) {
commitments[i].hashes = (uint8_t**)slab;
slab += commitments->nCommitments * sizeof(uint8_t*);

for (uint32_t j = 0; j < commitments->nCommitments; j++) {
commitments[i].hashes[j] = slab;
slab += params->digest_size;
}
}

return commitments;
}

void freeCommitments(commitments_t* commitments) {
free(commitments[0].hashes);
free(commitments);
}

+ 63
- 0
src/sign/picnic/picnic3l1/avx2/picnic3_types.h View File

@@ -0,0 +1,63 @@
/*! @file picnic_types.h
* @brief Functions to allocate/free data types used in the Picnic signature
* scheme implementation.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC_TYPES_H
#define PICNIC_TYPES_H

#include "picnic3_impl.h"

/* Type definitions */
typedef struct randomTape_t {
uint8_t** tape;
uint8_t* aux_bits;
uint8_t* parity_tapes;
uint32_t pos;
uint32_t aux_pos;
size_t nTapes;
uint16_t* buffer;
} randomTape_t;

typedef struct commitments_t {
uint8_t** hashes;
size_t nCommitments;
} commitments_t;

typedef uint8_t** inputs_t;

typedef struct msgs_t {
uint8_t** msgs; // One for each player
size_t pos;
int unopened; // Index of the unopened party, or -1 if all parties opened (when signing)
} msgs_t;

#define UNUSED_PARAMETER(x) (void)(x)

void allocateRandomTape(randomTape_t* tape, const picnic_instance_t* params);
void freeRandomTape(randomTape_t* tape);

void allocateProof2(proof2_t* proof, const picnic_instance_t* params);

commitments_t* allocateCommitments(const picnic_instance_t* params, size_t nCommitments);
void freeCommitments(commitments_t* commitments);

void allocateCommitments2(commitments_t* commitments, const picnic_instance_t* params,
size_t nCommitments);
void freeCommitments2(commitments_t* commitments);

inputs_t allocateInputs(const picnic_instance_t* params);
void freeInputs(inputs_t inputs);

msgs_t* allocateMsgs(const picnic_instance_t* params);
msgs_t* allocateMsgsVerify(const picnic_instance_t* params);
void freeMsgs(msgs_t* msgs);

#endif /* PICNIC_TYPES_H */

+ 95
- 0
src/sign/picnic/picnic3l1/avx2/picnic_instances.c View File

@@ -0,0 +1,95 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include "picnic_instances.h"

// instance handling

// L1, L3, and L5 instances with partial Sbox layer
#define lowmc_parameters_128_128_20 { 0, 0, 0, 0 }
#define lowmc_parameters_192_192_30 { 0, 0, 0, 0 }
#define lowmc_parameters_256_256_38 { 0, 0, 0, 0 }

// L1, L3, and L5 instances with full Sbox layer
#include "lowmc_129_129_4.h"
#define lowmc_parameters_192_192_4 { 0, 0, 0, 0 }
#define lowmc_parameters_255_255_4 { 0, 0, 0, 0 }

#define ENABLE_ZKBPP(x) { 0, 0, 0, 0 }

#define ENABLE_KKW(x) x

#define NULL_FNS \
{ NULL, NULL, NULL }

static picnic_instance_t instances[PARAMETER_SET_MAX_INDEX] = {
{{0, 0, 0, 0}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, PARAMETER_SET_INVALID, NULL_FNS},
/* ZKB++ with partial LowMC instances */
{ENABLE_ZKBPP(lowmc_parameters_128_128_20), 32, 16, 219, 219, 3, 16, 16, 75, 30, 55, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic_L1_FS, Picnic_L1_FS, NULL_FNS},
{ENABLE_ZKBPP(lowmc_parameters_128_128_20), 32, 16, 219, 219, 3, 16, 16, 75, 30, 55, 91, 107,
PICNIC_SIGNATURE_SIZE_Picnic_L1_UR, Picnic_L1_UR, NULL_FNS},
{ENABLE_ZKBPP(lowmc_parameters_192_192_30), 48, 24, 329, 329, 3, 24, 24, 113, 30, 83, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic_L3_FS, Picnic_L3_FS, NULL_FNS},
{ENABLE_ZKBPP(lowmc_parameters_192_192_30), 48, 24, 329, 329, 3, 24, 24, 113, 30, 83, 137, 161,
PICNIC_SIGNATURE_SIZE_Picnic_L3_UR, Picnic_L3_UR, NULL_FNS},
{ENABLE_ZKBPP(lowmc_parameters_256_256_38), 64, 32, 438, 438, 3, 32, 32, 143, 30, 110, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic_L5_FS, Picnic_L5_FS, NULL_FNS},
{ENABLE_ZKBPP(lowmc_parameters_256_256_38), 64, 32, 438, 438, 3, 32, 32, 143, 30, 110, 175, 207,
PICNIC_SIGNATURE_SIZE_Picnic_L5_UR, Picnic_L5_UR, NULL_FNS},
/* KKW with full LowMC instances */
{ENABLE_KKW(lowmc_parameters_129_129_4), 32, 16, 250, 36, 16, 17, 17, 65, 129, 55, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic3_L1, Picnic3_L1, NULL_FNS},
{ENABLE_KKW(lowmc_parameters_192_192_4), 48, 24, 419, 52, 16, 24, 24, 96, 192, 83, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic3_L3, Picnic3_L3, NULL_FNS},
{ENABLE_KKW(lowmc_parameters_255_255_4), 64, 32, 601, 68, 16, 32, 32, 128, 255, 110, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic3_L5, Picnic3_L5, NULL_FNS},
/* ZKB++ with full LowMC instances */
{ENABLE_ZKBPP(lowmc_parameters_129_129_4), 32, 16, 219, 219, 3, 17, 17, 65, 129, 55, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic_L1_full, Picnic_L1_full, NULL_FNS},
{ENABLE_ZKBPP(lowmc_parameters_192_192_4), 48, 24, 329, 329, 3, 24, 24, 96, 192, 83, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic_L3_full, Picnic_L3_full, NULL_FNS},
{ENABLE_ZKBPP(lowmc_parameters_255_255_4), 64, 32, 438, 438, 3, 32, 32, 128, 255, 110, 0, 0,
PICNIC_SIGNATURE_SIZE_Picnic_L5_full, Picnic_L5_full, NULL_FNS},
};
static bool instance_initialized[PARAMETER_SET_MAX_INDEX];

static bool create_instance(picnic_instance_t* pp) {
if (!pp->lowmc.m || !pp->lowmc.n || !pp->lowmc.r || !pp->lowmc.k) {
return false;
}

if (pp->params == Picnic_L1_UR || pp->params == Picnic_L3_UR || pp->params == Picnic_L5_UR) {
return false;
}

pp->impls.lowmc = lowmc_get_implementation(&pp->lowmc);
if (pp->params >= Picnic3_L1 && pp->params <= Picnic3_L5) {
pp->impls.lowmc_aux = lowmc_compute_aux_get_implementation(&pp->lowmc);
pp->impls.lowmc_simulate_online = lowmc_simulate_online_get_implementation(&pp->lowmc);
}

return true;
}

const picnic_instance_t* picnic_instance_get(picnic_params_t param) {
if (param <= PARAMETER_SET_INVALID || param >= PARAMETER_SET_MAX_INDEX) {
return NULL;
}

if (!instance_initialized[param]) {
if (!create_instance(&instances[param])) {
return NULL;
}
instance_initialized[param] = true;
}

return &instances[param];
}

+ 62
- 0
src/sign/picnic/picnic3l1/avx2/picnic_instances.h View File

@@ -0,0 +1,62 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef PICNIC_INSTANCES_H
#define PICNIC_INSTANCES_H

#include "lowmc.h"
#include "picnic3_simulate.h"
#include "picnic.h"

#define SALT_SIZE 32
#define MAX_DIGEST_SIZE 64

typedef struct picnic_instance_t {
lowmc_parameters_t lowmc;

uint32_t digest_size; /* bytes */
uint32_t seed_size; /* bytes */
uint32_t num_rounds; // T
uint32_t num_opened_rounds; // u
uint32_t num_MPC_parties; // N

uint32_t input_size; /* bytes */
uint32_t output_size; /* bytes */
uint32_t view_size; /* bytes */
uint32_t view_round_size; /* bits (per round) */

uint32_t collapsed_challenge_size; /* bytes */
uint32_t unruh_without_input_bytes_size; /* bytes */
uint32_t unruh_with_input_bytes_size; /* bytes */
uint32_t max_signature_size; /* bytes */

picnic_params_t params;

struct {
lowmc_implementation_f lowmc;
lowmc_compute_aux_implementation_f lowmc_aux;
lowmc_simulate_online_f lowmc_simulate_online;
} impls;
} picnic_instance_t;

const picnic_instance_t* picnic_instance_get(picnic_params_t param);

PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_lowmc_block_size(picnic_params_t param);
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t param);
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_public_key_size(picnic_params_t param);

/* Prefix values for domain separation */
static const uint8_t HASH_PREFIX_0 = 0;
static const uint8_t HASH_PREFIX_1 = 1;
static const uint8_t HASH_PREFIX_2 = 2;
static const uint8_t HASH_PREFIX_3 = 3;
static const uint8_t HASH_PREFIX_4 = 4;
static const uint8_t HASH_PREFIX_5 = 5;

#endif

+ 35
- 0
src/sign/picnic/picnic3l1/avx2/randomness.c View File

@@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/


#include "randomness.h"
#include "macros.h"

// randombytes from the NIST framework / SUPERCOP
extern void randombytes(unsigned char* x, unsigned long long xlen);

int rand_bytes(uint8_t* dst, size_t len) {
randombytes(dst, len);
return 0;
}

int rand_bits(uint8_t* dst, size_t num_bits) {
const size_t num_bytes = (num_bits + 7) / 8;
const size_t num_extra_bits = num_bits % 8;

if (rand_bytes(dst, num_bytes)) {
return -1;
}

if (num_extra_bits) {
dst[num_bytes - 1] &= UINT8_C(0xff) << (8 - num_extra_bits);
}

return 0;
}

+ 19
- 0
src/sign/picnic/picnic3l1/avx2/randomness.h View File

@@ -0,0 +1,19 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/

#ifndef RANDOMNESS_H
#define RANDOMNESS_H

#include <stddef.h>
#include <stdint.h>

int rand_bytes(uint8_t* dst, size_t len);
int rand_bits(uint8_t* dst, size_t num_bits);

#endif

+ 81
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakHash.c View File

@@ -0,0 +1,81 @@
/*
The eXtended Keccak Code Package (XKCP)
https://github.com/XKCP/XKCP

Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.

Implementation by the designers, hereby denoted as "the implementer".

For more information, feedback or questions, please refer to the Keccak Team website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#include <string.h>
#include "KeccakHash.h"

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
{
HashReturn result;

if (delimitedSuffix == 0)
return KECCAK_FAIL;
result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity);
if (result != KECCAK_SUCCESS)
return result;
instance->fixedOutputLength = hashbitlen;
instance->delimitedSuffix = delimitedSuffix;
return KECCAK_SUCCESS;
}

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen)
{
if ((databitlen % 8) == 0)
return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
else {
HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
if (ret == KECCAK_SUCCESS) {
/* The last partial byte is assumed to be aligned on the least significant bits */
unsigned char lastByte = data[databitlen/8];
/* Concatenate the last few bits provided here with those of the suffix */
unsigned short delimitedLastBytes = (unsigned short)((unsigned short)(lastByte & ((1 << (databitlen % 8)) - 1)) | ((unsigned short)instance->delimitedSuffix << (databitlen % 8)));
if ((delimitedLastBytes & 0xFF00) == 0x0000) {
instance->delimitedSuffix = delimitedLastBytes & 0xFF;
}
else {
unsigned char oneByte[1];
oneByte[0] = delimitedLastBytes & 0xFF;
ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1);
instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF;
}
}
return ret;
}
}

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval)
{
HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix);
if (ret == KECCAK_SUCCESS)
return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8);
else
return ret;
}

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen)
{
if ((databitlen % 8) != 0)
return KECCAK_FAIL;
return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8);
}

+ 125
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakHash.h View File

@@ -0,0 +1,125 @@
/*
The eXtended Keccak Code Package (XKCP)
https://github.com/XKCP/XKCP

Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.

Implementation by the designers, hereby denoted as "the implementer".

For more information, feedback or questions, please refer to the Keccak Team website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#ifndef _KeccakHashInterface_h_
#define _KeccakHashInterface_h_

#include "config.h"
#ifdef XKCP_has_KeccakP1600

#include <stdint.h>
#include <string.h>
#include "KeccakSponge.h"

#ifndef _Keccak_BitTypes_
#define _Keccak_BitTypes_
typedef uint8_t BitSequence;

typedef size_t BitLength;
#endif

typedef enum { KECCAK_SUCCESS = 0, KECCAK_FAIL = 1, KECCAK_BAD_HASHLEN = 2 } HashReturn;

typedef struct {
KeccakWidth1600_SpongeInstance sponge;
unsigned int fixedOutputLength;
unsigned char delimitedSuffix;
} Keccak_HashInstance;

/**
* Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode.
* @param hashInstance Pointer to the hash instance to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @param hashbitlen The desired number of output bits,
* or 0 for an arbitrarily-long output.
* @param delimitedSuffix Bits that will be automatically appended to the end
* of the input message, as in domain separation.
* This is a byte containing from 0 to 7 bits
* formatted like the @a delimitedData parameter of
* the Keccak_SpongeAbsorbLastFewBits() function.
* @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise.
*/
HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);

/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F)

/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F)

/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06)

/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06)

/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06)

/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06)

/**
* Function to give input data to be absorbed.
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the least significant bits of the last byte (little-endian convention).
* In this case, the (8 - @a databitLen mod 8) most significant bits
* of the last byte are ignored.
* @param databitLen The number of input bits provided in the input data.
* @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8.
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise.
*/
HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen);

/**
* Function to call after all input blocks have been input and to get
* output bits if the length was specified when calling Keccak_HashInitialize().
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of
* output bits is equal to @a hashbitlen.
* If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits
* must be extracted using the Keccak_HashSqueeze() function.
* @param hashval Pointer to the buffer where to store the output data.
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise.
*/
HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval);

/**
* Function to squeeze output data.
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* @param data Pointer to the buffer where to store the output data.
* @param databitlen The number of output bits desired (must be a multiple of 8).
* @pre Keccak_HashFinal() must have been already called.
* @pre @a databitlen is a multiple of 8.
* @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise.
*/
HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen);

#else
#error This requires an implementation of Keccak-p[1600]
#endif

#endif

+ 60
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakHashtimes4.c View File

@@ -0,0 +1,60 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".

For more information, feedback or questions, please refer to our website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#include <string.h>
#include "KeccakHashtimes4.h"

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashInitializetimes4(Keccak_HashInstancetimes4 *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
{
HashReturn result;

if (delimitedSuffix == 0)
return KECCAK_FAIL;
result = (HashReturn)KeccakWidth1600times4_SpongeInitialize(&instance->sponge, rate, capacity);
if (result != KECCAK_SUCCESS)
return result;
instance->fixedOutputLength = hashbitlen;
instance->delimitedSuffix = delimitedSuffix;
return KECCAK_SUCCESS;
}

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashUpdatetimes4(Keccak_HashInstancetimes4 *instance, const BitSequence **data, BitLength databitlen)
{
if ((databitlen % 8) != 0)
return KECCAK_FAIL;
return (HashReturn)KeccakWidth1600times4_SpongeAbsorb(&instance->sponge, data, databitlen/8);
}

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashFinaltimes4(Keccak_HashInstancetimes4 *instance, BitSequence **hashval)
{
HashReturn ret = (HashReturn)KeccakWidth1600times4_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix);
if (ret == KECCAK_SUCCESS)
return (HashReturn)KeccakWidth1600times4_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8);
else
return ret;
}

/* ---------------------------------------------------------------- */

HashReturn Keccak_HashSqueezetimes4(Keccak_HashInstancetimes4 *instance, BitSequence **data, BitLength databitlen)
{
if ((databitlen % 8) != 0)
return KECCAK_FAIL;
return (HashReturn)KeccakWidth1600times4_SpongeSqueeze(&instance->sponge, data, databitlen/8);
}

+ 112
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakHashtimes4.h View File

@@ -0,0 +1,112 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".

For more information, feedback or questions, please refer to our website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#ifndef _KeccakHashInterfacetimes4_h_
#define _KeccakHashInterfacetimes4_h_

#include "config.h"
#ifdef XKCP_has_KeccakP1600times4

#if !defined(SUPERCOP)
#include "KeccakHash.h"
#else
#include <libkeccak.a.headers/KeccakHash.h>
#endif
#include "KeccakSpongetimes4.h"

typedef struct {
KeccakWidth1600times4_SpongeInstance sponge;
unsigned int fixedOutputLength;
unsigned char delimitedSuffix;
} Keccak_HashInstancetimes4;

/**
* Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode.
* @param hashInstance Pointer to the hash instance to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @param hashbitlen The desired number of output bits,
* or 0 for an arbitrarily-long output.
* @param delimitedSuffix Bits that will be automatically appended to the end
* of the input message, as in domain separation.
* This is a byte containing from 0 to 7 bits
* formatted like the @a delimitedData parameter of
* the Keccak_SpongeAbsorbLastFewBits() function.
* @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashInitializetimes4(Keccak_HashInstancetimes4 *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);

/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHAKE128(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1344, 256, 0, 0x1F)

/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHAKE256(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1088, 512, 0, 0x1F)

/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_224(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1152, 448, 224, 0x06)

/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_256(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1088, 512, 256, 0x06)

/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_384(hashInstance) Keccak_HashInitializetimes4(hashInstance, 832, 768, 384, 0x06)

/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_512(hashInstance) Keccak_HashInitializetimes4(hashInstance, 576, 1024, 512, 0x06)

/**
* Function to give input data to be absorbed.
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* @param data Array of 4 pointers to the input data.
* @param databitLen The number of input bits provided in the input data, must be a multiple of 8.
* @pre @a databitlen is a multiple of 8.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashUpdatetimes4(Keccak_HashInstancetimes4 *hashInstance, const BitSequence **data, BitLength databitlen);

/**
* Function to call after all input blocks have been input and to get
* output bits if the length was specified when calling Keccak_HashInitialize().
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of
* output bits is equal to @a hashbitlen.
* If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits
* must be extracted using the Keccak_HashSqueeze() function.
* @param hashval Pointer to the buffer where to store the output data.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashFinaltimes4(Keccak_HashInstancetimes4 *hashInstance, BitSequence **hashval);

/**
* Function to squeeze output data.
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* @param data Array of 4 pointers to the buffers where to store the output data.
* @param databitlen The number of output bits desired (must be a multiple of 8).
* @pre Keccak_HashFinal() must have been already called.
* @pre @a databitlen is a multiple of 8.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashSqueezetimes4(Keccak_HashInstancetimes4 *hashInstance, BitSequence **data, BitLength databitlen);

#else
#error This requires an implementation of Keccak-p[1600]x4
#endif

#endif

+ 1149
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-AVX2.s
File diff suppressed because it is too large
View File


+ 46
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-SnP.h View File

@@ -0,0 +1,46 @@
/*
The eXtended Keccak Code Package (XKCP)
https://github.com/XKCP/XKCP

The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.

Implementation by Ronny Van Keer, hereby denoted as "the implementer".

For more information, feedback or questions, please refer to the Keccak Team website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/

---

Please refer to SnP-documentation.h for more details.
*/

#ifndef _KeccakP_1600_SnP_h_
#define _KeccakP_1600_SnP_h_

#include <stddef.h>

#define KeccakP1600_implementation "AVX2 optimized implementation"
#define KeccakP1600_stateSizeInBytes 200
#define KeccakP1600_stateAlignment 32
#define KeccakF1600_FastLoop_supported
#define KeccakP1600_12rounds_FastLoop_supported

#define KeccakP1600_StaticInitialize()
void KeccakP1600_Initialize(void *state);
void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
void KeccakP1600_Permute_12rounds(void *state);
void KeccakP1600_Permute_24rounds(void *state);
void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);

#endif

+ 1317
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-times4-SIMD256.c
File diff suppressed because it is too large
View File


+ 55
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-times4-SnP.h View File

@@ -0,0 +1,55 @@
/*
The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.

Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".

For more information, feedback or questions, please refer to the Keccak Team website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/

---

Please refer to PlSnP-documentation.h for more details.
*/

#ifndef _KeccakP_1600_times4_SnP_h_
#define _KeccakP_1600_times4_SnP_h_

#include <stdint.h>
#include "SIMD256-config.h"

#define KeccakP1600times4_implementation "256-bit SIMD implementation (" KeccakP1600times4_implementation_config ")"
#define KeccakP1600times4_statesSizeInBytes 800
#define KeccakP1600times4_statesAlignment 32
#define KeccakF1600times4_FastLoop_supported
#define KeccakP1600times4_12rounds_FastLoop_supported
#define KeccakF1600times4_FastKravatte_supported

#include <stddef.h>

#define KeccakP1600times4_StaticInitialize()
void KeccakP1600times4_InitializeAll(void *states);
#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \
((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte)
void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
void KeccakP1600times4_PermuteAll_4rounds(void *states);
void KeccakP1600times4_PermuteAll_6rounds(void *states);
void KeccakP1600times4_PermuteAll_12rounds(void *states);
void KeccakP1600times4_PermuteAll_24rounds(void *states);
void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
size_t KeccakP1600times4_KravatteCompress(uint64_t *xAccu, uint64_t *kRoll, const unsigned char *input, size_t inputByteLen);
size_t KeccakP1600times4_KravatteExpand(uint64_t *yAccu, const uint64_t *kRoll, unsigned char *output, size_t outputByteLen);

#endif

+ 305
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakP-1600-unrolling.macros View File

@@ -0,0 +1,305 @@
/*
The eXtended Keccak Code Package (XKCP)
https://github.com/XKCP/XKCP

The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.

Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".

For more information, feedback or questions, please refer to the Keccak Team website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#if (defined(FullUnrolling))
#define rounds24 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(10, A, E) \
thetaRhoPiChiIotaPrepareTheta(11, E, A) \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#define rounds12 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#define rounds6 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#elif (Unrolling == 12)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=12) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
} \

#define rounds12 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#define rounds6 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#elif (Unrolling == 6)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \

#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \

#define rounds6 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#elif (Unrolling == 4)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \

#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \

#define rounds6 \
prepareTheta \
for(i=18; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \

#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \

#elif (Unrolling == 3)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \

#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \

#define rounds6 \
prepareTheta \
for(i=18; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \

#define rounds4 \
prepareTheta \
for(i=20; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \

#elif (Unrolling == 2)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \

#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \

#define rounds6 \
prepareTheta \
for(i=18; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \

#define rounds4 \
prepareTheta \
for(i=20; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \

#elif (Unrolling == 1)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \

#define rounds12 \
prepareTheta \
for(i=12; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \

#define rounds6 \
prepareTheta \
for(i=18; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \

#define rounds4 \
prepareTheta \
for(i=20; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \

#else
#error "Unrolling is not correctly specified!"
#endif

#define roundsN(__nrounds) \
prepareTheta \
i = 24 - (__nrounds); \
if ((i&1) != 0) { \
thetaRhoPiChiIotaPrepareTheta(i, A, E) \
copyStateVariables(A, E) \
++i; \
} \
for( /* empty */; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
}

+ 111
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakSponge.c View File

@@ -0,0 +1,111 @@
/*
The eXtended Keccak Code Package (XKCP)
https://github.com/XKCP/XKCP

Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.

Implementation by the designers, hereby denoted as "the implementer".

For more information, feedback or questions, please refer to the Keccak Team website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#include "KeccakSponge.h"

#ifdef KeccakReference
#include "displayIntermediateValues.h"
#endif

#ifdef XKCP_has_KeccakP200
#include "KeccakP-200-SnP.h"

#define prefix KeccakWidth200
#define SnP KeccakP200
#define SnP_width 200
#define SnP_Permute KeccakP200_Permute_18rounds
#if defined(KeccakF200_FastLoop_supported)
#define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb
#endif
#include "KeccakSponge.inc"
#undef prefix
#undef SnP
#undef SnP_width
#undef SnP_Permute
#undef SnP_FastLoop_Absorb
#endif

#ifdef XKCP_has_KeccakP400
#include "KeccakP-400-SnP.h"

#define prefix KeccakWidth400
#define SnP KeccakP400
#define SnP_width 400
#define SnP_Permute KeccakP400_Permute_20rounds
#if defined(KeccakF400_FastLoop_supported)
#define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb
#endif
#include "KeccakSponge.inc"
#undef prefix
#undef SnP
#undef SnP_width
#undef SnP_Permute
#undef SnP_FastLoop_Absorb
#endif

#ifdef XKCP_has_KeccakP800
#include "KeccakP-800-SnP.h"

#define prefix KeccakWidth800
#define SnP KeccakP800
#define SnP_width 800
#define SnP_Permute KeccakP800_Permute_22rounds
#if defined(KeccakF800_FastLoop_supported)
#define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb
#endif
#include "KeccakSponge.inc"
#undef prefix
#undef SnP
#undef SnP_width
#undef SnP_Permute
#undef SnP_FastLoop_Absorb
#endif

#ifdef XKCP_has_KeccakP1600
#include "KeccakP-1600-SnP.h"

#define prefix KeccakWidth1600
#define SnP KeccakP1600
#define SnP_width 1600
#define SnP_Permute KeccakP1600_Permute_24rounds
#if defined(KeccakF1600_FastLoop_supported)
#define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb
#endif
#include "KeccakSponge.inc"
#undef prefix
#undef SnP
#undef SnP_width
#undef SnP_Permute
#undef SnP_FastLoop_Absorb
#endif

#ifdef XKCP_has_KeccakP1600
#include "KeccakP-1600-SnP.h"

#define prefix KeccakWidth1600_12rounds
#define SnP KeccakP1600
#define SnP_width 1600
#define SnP_Permute KeccakP1600_Permute_12rounds
#if defined(KeccakP1600_12rounds_FastLoop_supported)
#define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb
#endif
#include "KeccakSponge.inc"
#undef prefix
#undef SnP
#undef SnP_width
#undef SnP_Permute
#undef SnP_FastLoop_Absorb
#endif

+ 76
- 0
src/sign/picnic/picnic3l1/avx2/sha3/KeccakSponge.h View File

@@ -0,0 +1,76 @@
/*
The eXtended Keccak Code Package (XKCP)
https://github.com/XKCP/XKCP

Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.

Implementation by the designers, hereby denoted as "the implementer".

For more information, feedback or questions, please refer to the Keccak Team website:
https://keccak.team/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

#ifndef _KeccakSponge_h_
#define _KeccakSponge_h_

/* For the documentation, please follow the link: */
/* #include "KeccakSponge-documentation.h" */

#include <string.h>
#include "align.h"
#include "config.h"

#define XKCP_DeclareSpongeStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \
unsigned char state[size]; \
unsigned int rate; \
unsigned int byteIOIndex; \
int squeezing; \
} prefix##_SpongeInstance;

#define XKCP_DeclareSpongeFunctions(prefix) \
int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \
int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \
int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \
int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \
int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);

#ifdef XKCP_has_KeccakP200
#include "KeccakP-200-SnP.h"
XKCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment)
XKCP_DeclareSpongeFunctions(KeccakWidth200)
#define XKCP_has_Sponge_Keccak_width200
#endif

#ifdef XKCP_has_KeccakP400
#include "KeccakP-400-SnP.h"
XKCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment)
XKCP_DeclareSpongeFunctions(KeccakWidth400)
#define XKCP_has_Sponge_Keccak_width400
#endif

#ifdef XKCP_has_KeccakP800
#include "KeccakP-800-SnP.h"
XKCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment)
XKCP_DeclareSpongeFunctions(KeccakWidth800)
#define XKCP_has_Sponge_Keccak_width800
#endif

#ifdef XKCP_has_KeccakP1600
#include "KeccakP-1600-SnP.h"
XKCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
XKCP_DeclareSpongeFunctions(KeccakWidth1600)
#define XKCP_has_Sponge_Keccak_width1600
#endif

#ifdef XKCP_has_KeccakP1600
#include "KeccakP-1600-SnP.h"
XKCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
XKCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds)
#endif

#endif

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save