diff --git a/CMakeLists.txt b/CMakeLists.txt index 2115d968..51e272b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,14 +171,6 @@ FetchContent_Declare( ) FetchContent_Populate(gbench) -FetchContent_Declare( - cpu_features - SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/cpu_features - GIT_REPOSITORY https://github.com/kriskwiatkowski/cpu_features.git - GIT_TAG 38f4324533390b09079a38b524be8b178be8e435 -) -FetchContent_Populate(cpu_features) - if(PQC_WEAK_RANDOMBYTES) string(APPEND PQC_CMAKE_C_CXX_FLAGS " -DPQC_WEAK_RANDOMBYTES") endif() @@ -187,7 +179,6 @@ endif() set(CMAKE_C_FLAGS "${PQC_CMAKE_C_CXX_FLAGS} ${EXTRA_C_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "$${PQC_CMAKE_C_CXX_FLAGS} {EXTRA_C_CXX_FLAGS}") set(BUILD_PIC ON CACHE BOOL "") -add_subdirectory(3rd/cpu_features) # PQC library @@ -200,7 +191,6 @@ include_directories( public src/common/ src - 3rd/cpu_features/include ) # Define sources of the components @@ -251,45 +241,6 @@ add_subdirectory(src/kem/mceliece/mceliece6960119f/clean) add_subdirectory(src/kem/mceliece/mceliece8192128f/clean) # Hardware optimized targets -if(${ARCH} STREQUAL "ARCH_x86_64") -set(COMMON_EXTRA_SRC "src/common/keccak4x/KeccakP-1600-times4-SIMD256.c") - -# Sign -add_subdirectory(src/sign/dilithium/dilithium2/avx2) -add_subdirectory(src/sign/dilithium/dilithium3/avx2) -add_subdirectory(src/sign/dilithium/dilithium5/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-128s-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-128f-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-128s-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-128f-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-192s-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-192f-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-192s-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-192f-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-256f-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-256f-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-256s-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-shake256-256s-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-128f-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-128s-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-128s-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-128f-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-192s-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-192s-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-robust/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-256f-simple/avx2) -add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/avx2) -# KEMs -add_subdirectory(src/kem/kyber/kyber512/avx2) -add_subdirectory(src/kem/kyber/kyber768/avx2) -add_subdirectory(src/kem/kyber/kyber1024/avx2) -add_subdirectory(src/kem/hqc/hqc-rmrs-128/avx2) -add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2) -add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2) -endif() # The rest of the library add_library( @@ -321,14 +272,12 @@ target_link_libraries( pqc ${OBJ_LIBS} - cpu_features common ) target_link_libraries( pqc_s - cpu_features common ${OBJ_LIBS} ) diff --git a/src/capi/pqapi.c b/src/capi/pqapi.c index c0d0fc90..4b2ec551 100644 --- a/src/capi/pqapi.c +++ b/src/capi/pqapi.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include "schemes.h" @@ -68,13 +67,6 @@ const pqc_sig_ctx_t sigs[] = { PQC_SUPPORTED_SIGS(REG_SIG) }; -// Contains capabilities on x86 CPU on which implementation is running -X86Features CPU_CAPS; - -const X86Features * get_cpu_caps(void) { - return &CPU_CAPS; -} - const pqc_ctx_t *pqc_kem_alg_by_id(uint8_t id) { int i; for(i=0; i #include -#include #ifdef __cplusplus extern "C" { @@ -38,13 +37,6 @@ extern "C" { (((uint16_t)(x)[0])<<8 | \ ((uint16_t)(x)[1])<<0) \ -#ifdef __cplusplus -const cpu_features::X86Features* -#else -const X86Features* -#endif -get_cpu_caps(void); - /** * \brief Compares two arrays in constant time. * \param [in] a first array diff --git a/test/bench/CMakeLists.txt b/test/bench/CMakeLists.txt index e80f5931..eb29f80e 100644 --- a/test/bench/CMakeLists.txt +++ b/test/bench/CMakeLists.txt @@ -10,9 +10,7 @@ endif() add_executable( bench - kyber.cc - main.cc - sphincs.cc) + main.cc) target_link_libraries( bench diff --git a/test/bench/kyber.cc b/test/bench/kyber.cc deleted file mode 100644 index ca748a4e..00000000 --- a/test/bench/kyber.cc +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include "kem/kyber/kyber512/avx2/polyvec.h" - -extern "C" { - #include "kem/kyber/kyber512/avx2/indcpa.h" - #include "kem/kyber/kyber512/avx2/kem.h" - #include "kem/kyber/kyber512/avx2/rejsample.h" - #include "kem/kyber/kyber512/avx2/ntt.h" -} - -static auto cpucycle = [](benchmark::State &st, int64_t cycles) { - st.counters["CPU cycles: mean"] = benchmark::Counter( - cycles, benchmark::Counter::kAvgIterations | benchmark::Counter::kResultNoFormat); -}; - -static void BenchKyberMatK2(benchmark::State &st) { - int64_t t, total = 0; - polyvec a[KYBER_K]; - uint8_t seed[32] = {0}; - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - PQCLEAN_KYBER512_AVX2_gen_matrix(a, seed, 0); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(a); - } - cpucycle(st, total); -} - -static void BenchKyberRejSampling(benchmark::State &st) { - int64_t t, total = 0; - int16_t a[256] = {0}; - uint8_t buf[168*3] = {0}; - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - PQCLEAN_KYBER512_AVX2_rej_uniform_avx(a, buf); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(a); - } - cpucycle(st, total); -} - -static void BenchKyberKeygen(benchmark::State &st) { - int64_t t, total = 0; - uint8_t sk[1632]; - uint8_t pk[800]; - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(pk); - benchmark::DoNotOptimize(sk); - } - cpucycle(st, total); -} - -static void BenchKyberEncaps(benchmark::State &st) { - int64_t t, total = 0; - uint8_t sk[1632]; - uint8_t pk[800]; - uint8_t ct[768]; - uint8_t ss[32]; - PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk); - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - PQCLEAN_KYBER512_AVX2_crypto_kem_enc(ss, ct, pk); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(pk); - } - cpucycle(st, total); -} - -static void BenchKyberDecaps(benchmark::State &st) { - int64_t t, total = 0; - uint8_t sk[1632]; - uint8_t pk[800]; - uint8_t ct[768]; - uint8_t ss[32]; - PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk); - PQCLEAN_KYBER512_AVX2_crypto_kem_enc(ss, ct, pk); - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - PQCLEAN_KYBER512_AVX2_crypto_kem_dec(ss, ct, sk); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(sk); - } - cpucycle(st, total); -} - -static void BenchKyberBaseMulAVX(benchmark::State &st) { - int64_t t, total = 0; - __m256i r[32],a[32],b[32],data[32]; - - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - PQCLEAN_KYBER512_AVX2_basemul_avx(r,a,b,data); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(r); - } - cpucycle(st, total); -} - -static void BenchKyberNttAVX(benchmark::State &st) { - int64_t t, total = 0; - __m256i r[32],data[32]; - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - PQCLEAN_KYBER512_AVX2_ntt_avx(r, data); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(r); - } - cpucycle(st, total); -} - -BENCHMARK(BenchKyberMatK2); -BENCHMARK(BenchKyberRejSampling); -BENCHMARK(BenchKyberKeygen); -BENCHMARK(BenchKyberBaseMulAVX); -BENCHMARK(BenchKyberNttAVX); - -// TODO: not sure why but memcheck fails in INDCPA encryption -BENCHMARK(BenchKyberEncaps); -BENCHMARK(BenchKyberDecaps); diff --git a/test/bench/main.cc b/test/bench/main.cc index 5111f0ae..f7231f0a 100644 --- a/test/bench/main.cc +++ b/test/bench/main.cc @@ -1,10 +1,8 @@ #include -void register_sphincs_benches(); int main(int argc, char** argv) { - register_sphincs_benches(); benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); } diff --git a/test/bench/sphincs.cc b/test/bench/sphincs.cc deleted file mode 100644 index 8d6664f5..00000000 --- a/test/bench/sphincs.cc +++ /dev/null @@ -1,139 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define ARRAY_LEN(X) sizeof(X)/sizeof(X[0]) - - -static auto cpucycle = [](benchmark::State &st, int64_t cycles) { - st.counters["CPU cycles: mean"] = benchmark::Counter( - cycles, benchmark::Counter::kAvgIterations | benchmark::Counter::kResultNoFormat); -}; - -struct scheme_t { - uint8_t id; - const char* name; -}; - -#define SCH(SCHEME) {SCHEME, #SCHEME}, - -#define SIG_LIST(_) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256128FSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256128SSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256128FROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256128SROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256192FSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256192SSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256192FROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256192SROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256256FSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256256SSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256256FROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHAKE256256SROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHA256128FSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHA256128SSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHA256128FROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHA256128SROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHA256192FSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHA256192SSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHA256192FROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHA256192SROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHA256256FSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHA256256SSIMPLE) \ - _(PQC_ALG_SIG_SPHINCSSHA256256FROBUST) \ - _(PQC_ALG_SIG_SPHINCSSHA256256SROBUST) - - -static const struct scheme_t sig_schemes[] = { - SIG_LIST(SCH) -}; - -static void BenchKeyPair(benchmark::State &st) { - int64_t t, total = 0; - uint32_t id = st.range(0); - - const pqc_ctx_t *ctx; - ctx = pqc_sig_alg_by_id(id); - std::vector pk(pqc_public_key_bsz(ctx)); - std::vector sk(pqc_private_key_bsz(ctx)); - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - pqc_keygen(ctx, pk.data(), sk.data()); - total += benchmark::cycleclock::Now() - t; - benchmark::DoNotOptimize(pk); - benchmark::DoNotOptimize(sk); - } - cpucycle(st, total); -} - -static void BenchSign(benchmark::State &st) { - int64_t t, total = 0; - struct pqcl_asym_t *key_pair = nullptr; - uint32_t id = st.range(0); - uint8_t msg[2048] = {0}; - const pqc_ctx_t *ctx; - - ctx = pqc_sig_alg_by_id(id); - std::vector sign(pqc_signature_bsz(ctx)); - std::vector pk(pqc_public_key_bsz(ctx)); - std::vector sk(pqc_private_key_bsz(ctx)); - pqc_keygen(ctx, pk.data(), sk.data()); - size_t se_len = sign.size(); - - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - pqc_sig_create(ctx, sign.data(), &se_len, msg, sizeof msg, sk.data()); - total += benchmark::cycleclock::Now() - t; - } - cpucycle(st, total); -} - -static void BenchVerify(benchmark::State &st) { - int64_t t, total = 0; - struct pqcl_asym_t *key_pair = nullptr; - uint32_t id = st.range(0); - const pqc_ctx_t *ctx; - uint8_t msg[2048] = {0}; - - ctx = pqc_sig_alg_by_id(id); - std::vector sign(pqc_signature_bsz(ctx)); - std::vector pk(pqc_public_key_bsz(ctx)); - std::vector sk(pqc_private_key_bsz(ctx)); - pqc_keygen(ctx, pk.data(), sk.data()); - - size_t se_len = sign.size(); - pqc_sig_create(ctx, sign.data(), &se_len, msg, sizeof msg, sk.data()); - - for (auto _ : st) { - t = benchmark::cycleclock::Now(); - pqc_sig_verify(ctx, sign.data(), se_len, msg, sizeof msg, pk.data()); - total += benchmark::cycleclock::Now() - t; - } - cpucycle(st, total); -} - -void register_sphincs_benches() { - for (size_t i=0; i"; - RegisterBenchmark(s.str().c_str(), BenchKeyPair) - ->Unit(benchmark::kMicrosecond) - ->Arg(sig_schemes[i].id)->ArgName(""); - s.str(""); s.clear(); - s << "BenchSign<" << sig_schemes[i].name << ">"; - RegisterBenchmark(s.str().c_str(), BenchSign) - ->Unit(benchmark::kMicrosecond) - ->Arg(sig_schemes[i].id)->ArgName(""); - s.str(""); s.clear(); - s << "BenchVerify<" << sig_schemes[i].name << ">"; - RegisterBenchmark(s.str().c_str(), BenchVerify) - ->Unit(benchmark::kMicrosecond) - ->Arg(sig_schemes[i].id)->ArgName(""); - } -}