@@ -171,14 +171,6 @@ FetchContent_Declare( | |||
) | |||
FetchContent_Populate(gbench) | |||
FetchContent_Declare( | |||
cpu_features | |||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/cpu_features | |||
GIT_REPOSITORY https://github.com/kriskwiatkowski/cpu_features.git | |||
GIT_TAG 38f4324533390b09079a38b524be8b178be8e435 | |||
) | |||
FetchContent_Populate(cpu_features) | |||
if(PQC_WEAK_RANDOMBYTES) | |||
string(APPEND PQC_CMAKE_C_CXX_FLAGS " -DPQC_WEAK_RANDOMBYTES") | |||
endif() | |||
@@ -187,7 +179,6 @@ endif() | |||
set(CMAKE_C_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_C_CXX_FLAGS}") | |||
set(CMAKE_CXX_FLAGS "$${PQC_CMAKE_C_CXX_FLAGS} {EXTRA_C_CXX_FLAGS}") | |||
set(BUILD_PIC ON CACHE BOOL "") | |||
add_subdirectory(3rd/cpu_features) | |||
# PQC library | |||
@@ -200,7 +191,6 @@ include_directories( | |||
public | |||
src/common/ | |||
src | |||
3rd/cpu_features/include | |||
) | |||
# Define sources of the components | |||
@@ -251,45 +241,6 @@ add_subdirectory(src/kem/mceliece/mceliece6960119f/clean) | |||
add_subdirectory(src/kem/mceliece/mceliece8192128f/clean) | |||
# Hardware optimized targets | |||
if(${ARCH} STREQUAL "ARCH_x86_64") | |||
set(COMMON_EXTRA_SRC "src/common/keccak4x/KeccakP-1600-times4-SIMD256.c") | |||
# Sign | |||
add_subdirectory(src/sign/dilithium/dilithium2/avx2) | |||
add_subdirectory(src/sign/dilithium/dilithium3/avx2) | |||
add_subdirectory(src/sign/dilithium/dilithium5/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-128s-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-128f-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-128s-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-128f-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-192s-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-192f-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-192s-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-192f-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-256f-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-256f-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-256s-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-shake256-256s-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-128f-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-128s-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-128s-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-128f-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-192s-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-192s-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-robust/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-simple/avx2) | |||
add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/avx2) | |||
# KEMs | |||
add_subdirectory(src/kem/kyber/kyber512/avx2) | |||
add_subdirectory(src/kem/kyber/kyber768/avx2) | |||
add_subdirectory(src/kem/kyber/kyber1024/avx2) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-128/avx2) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2) | |||
endif() | |||
# The rest of the library | |||
add_library( | |||
@@ -321,14 +272,12 @@ target_link_libraries( | |||
pqc | |||
${OBJ_LIBS} | |||
cpu_features | |||
common | |||
) | |||
target_link_libraries( | |||
pqc_s | |||
cpu_features | |||
common | |||
${OBJ_LIBS} | |||
) | |||
@@ -1,7 +1,6 @@ | |||
#include <stdint.h> | |||
#include <stdbool.h> | |||
#include <pqc/pqc.h> | |||
#include <cpuinfo_x86.h> | |||
#include <common/utils.h> | |||
#include "schemes.h" | |||
@@ -68,13 +67,6 @@ const pqc_sig_ctx_t sigs[] = { | |||
PQC_SUPPORTED_SIGS(REG_SIG) | |||
}; | |||
// Contains capabilities on x86 CPU on which implementation is running | |||
X86Features CPU_CAPS; | |||
const X86Features * get_cpu_caps(void) { | |||
return &CPU_CAPS; | |||
} | |||
const pqc_ctx_t *pqc_kem_alg_by_id(uint8_t id) { | |||
int i; | |||
for(i=0; i<PQC_ALG_KEM_MAX; i++) { | |||
@@ -3,7 +3,6 @@ | |||
#include <stdint.h> | |||
#include <stddef.h> | |||
#include <cpuinfo_x86.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
@@ -38,13 +37,6 @@ extern "C" { | |||
(((uint16_t)(x)[0])<<8 | \ | |||
((uint16_t)(x)[1])<<0) \ | |||
#ifdef __cplusplus | |||
const cpu_features::X86Features* | |||
#else | |||
const X86Features* | |||
#endif | |||
get_cpu_caps(void); | |||
/** | |||
* \brief Compares two arrays in constant time. | |||
* \param [in] a first array | |||
@@ -10,9 +10,7 @@ endif() | |||
add_executable( | |||
bench | |||
kyber.cc | |||
main.cc | |||
sphincs.cc) | |||
main.cc) | |||
target_link_libraries( | |||
bench | |||
@@ -1,128 +0,0 @@ | |||
#include <array> | |||
#include <stdint.h> | |||
#include <utility> | |||
#include <benchmark/benchmark.h> | |||
#include <benchmark/../../src/statistics.h> | |||
#include <benchmark/../../src/cycleclock.h> | |||
#include "kem/kyber/kyber512/avx2/polyvec.h" | |||
extern "C" { | |||
#include "kem/kyber/kyber512/avx2/indcpa.h" | |||
#include "kem/kyber/kyber512/avx2/kem.h" | |||
#include "kem/kyber/kyber512/avx2/rejsample.h" | |||
#include "kem/kyber/kyber512/avx2/ntt.h" | |||
} | |||
static auto cpucycle = [](benchmark::State &st, int64_t cycles) { | |||
st.counters["CPU cycles: mean"] = benchmark::Counter( | |||
cycles, benchmark::Counter::kAvgIterations | benchmark::Counter::kResultNoFormat); | |||
}; | |||
static void BenchKyberMatK2(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
polyvec a[KYBER_K]; | |||
uint8_t seed[32] = {0}; | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
PQCLEAN_KYBER512_AVX2_gen_matrix(a, seed, 0); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(a); | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchKyberRejSampling(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
int16_t a[256] = {0}; | |||
uint8_t buf[168*3] = {0}; | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
PQCLEAN_KYBER512_AVX2_rej_uniform_avx(a, buf); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(a); | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchKyberKeygen(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
uint8_t sk[1632]; | |||
uint8_t pk[800]; | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(pk); | |||
benchmark::DoNotOptimize(sk); | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchKyberEncaps(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
uint8_t sk[1632]; | |||
uint8_t pk[800]; | |||
uint8_t ct[768]; | |||
uint8_t ss[32]; | |||
PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk); | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
PQCLEAN_KYBER512_AVX2_crypto_kem_enc(ss, ct, pk); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(pk); | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchKyberDecaps(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
uint8_t sk[1632]; | |||
uint8_t pk[800]; | |||
uint8_t ct[768]; | |||
uint8_t ss[32]; | |||
PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk); | |||
PQCLEAN_KYBER512_AVX2_crypto_kem_enc(ss, ct, pk); | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
PQCLEAN_KYBER512_AVX2_crypto_kem_dec(ss, ct, sk); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(sk); | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchKyberBaseMulAVX(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
__m256i r[32],a[32],b[32],data[32]; | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
PQCLEAN_KYBER512_AVX2_basemul_avx(r,a,b,data); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(r); | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchKyberNttAVX(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
__m256i r[32],data[32]; | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
PQCLEAN_KYBER512_AVX2_ntt_avx(r, data); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(r); | |||
} | |||
cpucycle(st, total); | |||
} | |||
BENCHMARK(BenchKyberMatK2); | |||
BENCHMARK(BenchKyberRejSampling); | |||
BENCHMARK(BenchKyberKeygen); | |||
BENCHMARK(BenchKyberBaseMulAVX); | |||
BENCHMARK(BenchKyberNttAVX); | |||
// TODO: not sure why but memcheck fails in INDCPA encryption | |||
BENCHMARK(BenchKyberEncaps); | |||
BENCHMARK(BenchKyberDecaps); |
@@ -1,10 +1,8 @@ | |||
#include <benchmark/benchmark.h> | |||
void register_sphincs_benches(); | |||
int main(int argc, char** argv) | |||
{ | |||
register_sphincs_benches(); | |||
benchmark::Initialize(&argc, argv); | |||
benchmark::RunSpecifiedBenchmarks(); | |||
} |
@@ -1,139 +0,0 @@ | |||
#include <algorithm> | |||
#include <array> | |||
#include <random> | |||
#include <utility> | |||
#include <sstream> | |||
#include <pqc/pqc.h> | |||
#include <benchmark/benchmark.h> | |||
#include <benchmark/../../src/statistics.h> | |||
#include <benchmark/../../src/cycleclock.h> | |||
#define ARRAY_LEN(X) sizeof(X)/sizeof(X[0]) | |||
static auto cpucycle = [](benchmark::State &st, int64_t cycles) { | |||
st.counters["CPU cycles: mean"] = benchmark::Counter( | |||
cycles, benchmark::Counter::kAvgIterations | benchmark::Counter::kResultNoFormat); | |||
}; | |||
struct scheme_t { | |||
uint8_t id; | |||
const char* name; | |||
}; | |||
#define SCH(SCHEME) {SCHEME, #SCHEME}, | |||
#define SIG_LIST(_) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256128FSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256128SSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256128FROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256128SROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256192FSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256192SSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256192FROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256192SROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256256FSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256256SSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256256FROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHAKE256256SROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256128FSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256128SSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256128FROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256128SROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256192FSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256192SSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256192FROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256192SROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256256FSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256256SSIMPLE) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256256FROBUST) \ | |||
_(PQC_ALG_SIG_SPHINCSSHA256256SROBUST) | |||
static const struct scheme_t sig_schemes[] = { | |||
SIG_LIST(SCH) | |||
}; | |||
static void BenchKeyPair(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
uint32_t id = st.range(0); | |||
const pqc_ctx_t *ctx; | |||
ctx = pqc_sig_alg_by_id(id); | |||
std::vector<uint8_t> pk(pqc_public_key_bsz(ctx)); | |||
std::vector<uint8_t> sk(pqc_private_key_bsz(ctx)); | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
pqc_keygen(ctx, pk.data(), sk.data()); | |||
total += benchmark::cycleclock::Now() - t; | |||
benchmark::DoNotOptimize(pk); | |||
benchmark::DoNotOptimize(sk); | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchSign(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
struct pqcl_asym_t *key_pair = nullptr; | |||
uint32_t id = st.range(0); | |||
uint8_t msg[2048] = {0}; | |||
const pqc_ctx_t *ctx; | |||
ctx = pqc_sig_alg_by_id(id); | |||
std::vector<uint8_t> sign(pqc_signature_bsz(ctx)); | |||
std::vector<uint8_t> pk(pqc_public_key_bsz(ctx)); | |||
std::vector<uint8_t> sk(pqc_private_key_bsz(ctx)); | |||
pqc_keygen(ctx, pk.data(), sk.data()); | |||
size_t se_len = sign.size(); | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
pqc_sig_create(ctx, sign.data(), &se_len, msg, sizeof msg, sk.data()); | |||
total += benchmark::cycleclock::Now() - t; | |||
} | |||
cpucycle(st, total); | |||
} | |||
static void BenchVerify(benchmark::State &st) { | |||
int64_t t, total = 0; | |||
struct pqcl_asym_t *key_pair = nullptr; | |||
uint32_t id = st.range(0); | |||
const pqc_ctx_t *ctx; | |||
uint8_t msg[2048] = {0}; | |||
ctx = pqc_sig_alg_by_id(id); | |||
std::vector<uint8_t> sign(pqc_signature_bsz(ctx)); | |||
std::vector<uint8_t> pk(pqc_public_key_bsz(ctx)); | |||
std::vector<uint8_t> sk(pqc_private_key_bsz(ctx)); | |||
pqc_keygen(ctx, pk.data(), sk.data()); | |||
size_t se_len = sign.size(); | |||
pqc_sig_create(ctx, sign.data(), &se_len, msg, sizeof msg, sk.data()); | |||
for (auto _ : st) { | |||
t = benchmark::cycleclock::Now(); | |||
pqc_sig_verify(ctx, sign.data(), se_len, msg, sizeof msg, pk.data()); | |||
total += benchmark::cycleclock::Now() - t; | |||
} | |||
cpucycle(st, total); | |||
} | |||
void register_sphincs_benches() { | |||
for (size_t i=0; i<ARRAY_LEN(sig_schemes); i++) { | |||
std::stringstream s; | |||
s << "BenchKeyPair<" << sig_schemes[i].name << ">"; | |||
RegisterBenchmark(s.str().c_str(), BenchKeyPair) | |||
->Unit(benchmark::kMicrosecond) | |||
->Arg(sig_schemes[i].id)->ArgName(""); | |||
s.str(""); s.clear(); | |||
s << "BenchSign<" << sig_schemes[i].name << ">"; | |||
RegisterBenchmark(s.str().c_str(), BenchSign) | |||
->Unit(benchmark::kMicrosecond) | |||
->Arg(sig_schemes[i].id)->ArgName(""); | |||
s.str(""); s.clear(); | |||
s << "BenchVerify<" << sig_schemes[i].name << ">"; | |||
RegisterBenchmark(s.str().c_str(), BenchVerify) | |||
->Unit(benchmark::kMicrosecond) | |||
->Arg(sig_schemes[i].id)->ArgName(""); | |||
} | |||
} |