Kris Kwiatkowski
77ca982b4c
The test programs use googletest and google-benchmark libraries in order to ensure right level of optimizations and proper unit testing. Those two libraries are written in C++ and they use C++ standard library. If you want MemorySanitizer to work properly and not produce any false positives, you must ensure that all the code in your program and in libraries it uses is instrumented. That includes C++ standard library. (see here: https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo) With this change, the Memory Sanitizer build (enabled by -DMEMSAN=1) will also build MSan-instrumented libc++ from LLVM and will use it as a standard C++ library when building unit tests and benchmarks. In particular what I do is this: 1. Clone LLVM project and build libcxx and libcxxabi with MSan enabled 2. Build GTEST and GBENCH with -fsanitize=memory and -stdlib=libc++. Additionally link against -lc++abi 3. Then use this special version of libc++ and GTEST/GBENCH in order to build final binaries containing unit/benchmark tests The actuall tests with memory sanitizer are disabled, as I'm getting some errors which need to be investigated first. Additionally I've splitted single build into multiple, for release,debug,clang,gcc and AddressSanitizer. On unrelated note, I've also added flags to ignore some errors which I'm getting when using newer GCC (see GH#10 GH#11).
127 lines
3.6 KiB
C++
127 lines
3.6 KiB
C++
#include <array>
|
|
#include <stdint.h>
|
|
#include <utility>
|
|
|
|
#include <benchmark/benchmark.h>
|
|
#include <benchmark/../../src/statistics.h>
|
|
#include <benchmark/../../src/cycleclock.h>
|
|
#include "kem/kyber/kyber512/avx2/polyvec.h"
|
|
|
|
extern "C" {
|
|
#include "kem/kyber/kyber512/avx2/indcpa.h"
|
|
#include "kem/kyber/kyber512/avx2/kem.h"
|
|
#include "kem/kyber/kyber512/avx2/rejsample.h"
|
|
#include "kem/kyber/kyber512/avx2/ntt.h"
|
|
}
|
|
|
|
auto cpucycle = [](benchmark::State &st, int64_t cycles) {
|
|
st.counters["CPU cycles: mean"] = benchmark::Counter(
|
|
cycles, benchmark::Counter::kAvgIterations | benchmark::Counter::kResultNoFormat);
|
|
};
|
|
|
|
static void BenchKyberMatK2(benchmark::State &st) {
|
|
int64_t t, total = 0;
|
|
polyvec a[KYBER_K];
|
|
uint8_t seed[32];
|
|
for (auto _ : st) {
|
|
t = benchmark::cycleclock::Now();
|
|
PQCLEAN_KYBER512_AVX2_gen_matrix(a, seed, 0);
|
|
total += benchmark::cycleclock::Now() - t;
|
|
benchmark::DoNotOptimize(a);
|
|
}
|
|
cpucycle(st, total);
|
|
}
|
|
|
|
static void BenchKyberRejSampling(benchmark::State &st) {
|
|
int64_t t, total = 0;
|
|
int16_t a[256] = {0};
|
|
uint8_t buf[168*3] = {0};
|
|
for (auto _ : st) {
|
|
t = benchmark::cycleclock::Now();
|
|
PQCLEAN_KYBER512_AVX2_rej_uniform_avx(a, buf);
|
|
total += benchmark::cycleclock::Now() - t;
|
|
benchmark::DoNotOptimize(a);
|
|
}
|
|
cpucycle(st, total);
|
|
}
|
|
|
|
static void BenchKyberKeygen(benchmark::State &st) {
|
|
int64_t t, total = 0;
|
|
uint8_t sk[1632];
|
|
uint8_t pk[800];
|
|
for (auto _ : st) {
|
|
t = benchmark::cycleclock::Now();
|
|
PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk);
|
|
total += benchmark::cycleclock::Now() - t;
|
|
benchmark::DoNotOptimize(pk);
|
|
benchmark::DoNotOptimize(sk);
|
|
}
|
|
cpucycle(st, total);
|
|
}
|
|
|
|
static void BenchKyberEncaps(benchmark::State &st) {
|
|
int64_t t, total = 0;
|
|
uint8_t sk[1632];
|
|
uint8_t pk[800];
|
|
uint8_t ct[768];
|
|
uint8_t ss[32];
|
|
PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk);
|
|
for (auto _ : st) {
|
|
t = benchmark::cycleclock::Now();
|
|
PQCLEAN_KYBER512_AVX2_crypto_kem_enc(ss, ct, pk);
|
|
total += benchmark::cycleclock::Now() - t;
|
|
benchmark::DoNotOptimize(pk);
|
|
}
|
|
cpucycle(st, total);
|
|
}
|
|
|
|
static void BenchKyberDecaps(benchmark::State &st) {
|
|
int64_t t, total = 0;
|
|
uint8_t sk[1632];
|
|
uint8_t pk[800];
|
|
uint8_t ct[768];
|
|
uint8_t ss[32];
|
|
PQCLEAN_KYBER512_AVX2_crypto_kem_keypair(pk, sk);
|
|
PQCLEAN_KYBER512_AVX2_crypto_kem_enc(ss, ct, pk);
|
|
for (auto _ : st) {
|
|
t = benchmark::cycleclock::Now();
|
|
PQCLEAN_KYBER512_AVX2_crypto_kem_dec(ss, ct, sk);
|
|
total += benchmark::cycleclock::Now() - t;
|
|
benchmark::DoNotOptimize(sk);
|
|
}
|
|
cpucycle(st, total);
|
|
}
|
|
|
|
static void BenchKyberBaseMulAVX(benchmark::State &st) {
|
|
int64_t t, total = 0;
|
|
__m256i r[32],a[32],b[32],data[32];
|
|
|
|
for (auto _ : st) {
|
|
t = benchmark::cycleclock::Now();
|
|
PQCLEAN_KYBER512_AVX2_basemul_avx(r,a,b,data);
|
|
total += benchmark::cycleclock::Now() - t;
|
|
benchmark::DoNotOptimize(r);
|
|
}
|
|
cpucycle(st, total);
|
|
}
|
|
|
|
static void BenchKyberNttAVX(benchmark::State &st) {
|
|
int64_t t, total = 0;
|
|
__m256i r[32],data[32];
|
|
for (auto _ : st) {
|
|
t = benchmark::cycleclock::Now();
|
|
PQCLEAN_KYBER512_AVX2_ntt_avx(r, data);
|
|
total += benchmark::cycleclock::Now() - t;
|
|
benchmark::DoNotOptimize(r);
|
|
}
|
|
cpucycle(st, total);
|
|
}
|
|
|
|
BENCHMARK(BenchKyberMatK2);
|
|
BENCHMARK(BenchKyberRejSampling);
|
|
BENCHMARK(BenchKyberKeygen);
|
|
BENCHMARK(BenchKyberEncaps);
|
|
BENCHMARK(BenchKyberDecaps);
|
|
BENCHMARK(BenchKyberBaseMulAVX);
|
|
BENCHMARK(BenchKyberNttAVX);
|