diff --git a/CMakeLists.txt b/CMakeLists.txt index b0aeed64..71764d82 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,6 +141,9 @@ add_subdirectory(src/kem/ntru/ntruhps2048677/clean) add_subdirectory(src/kem/ntru_prime/ntrulpr761/clean) add_subdirectory(src/kem/ntru_prime/ntrulpr653/clean) add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean) +add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean) +add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean) +add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean) # Hardware optimized targets if(${ARCH} STREQUAL "ARCH_x86_64") @@ -196,8 +199,13 @@ add_subdirectory(src/kem/ntru/ntruhps2048677/avx2) add_subdirectory(src/kem/ntru_prime/ntrulpr761/avx2) add_subdirectory(src/kem/ntru_prime/ntrulpr653/avx2) add_subdirectory(src/kem/ntru_prime/ntrulpr857/avx2) +add_subdirectory(src/kem/hqc/hqc-rmrs-128/avx2) +add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2) +add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2) endif() + + # The rest of the library set(SRC_COMMON_GENERIC src/common/aes.c @@ -205,6 +213,7 @@ set(SRC_COMMON_GENERIC src/common/sp800-185.c src/common/randombytes.c src/common/sha2.c + src/common/nistseedexpander.c src/capi/pqapi.c ) diff --git a/README.md b/README.md index b117c41f..e76d9517 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Users shouldn't expect any level of security provided by this code. The library | SABER | 3 | x | | FrodoKEM | 3 | | | NTRU Prime | 3 | x | +| HQC-RMRS | 3 | x | | Dilithium | 3 | x | | Falcon | 2 | | | Rainbow | 3 | | diff --git a/public/pqc/pqc.h b/public/pqc/pqc.h index ce32d8d9..290bc4db 100644 --- a/public/pqc/pqc.h +++ b/public/pqc/pqc.h @@ -60,7 +60,10 @@ extern "C" { _(NTRULPR857) \ _(LIGHTSABER) \ _(FIRESABER) \ - _(SABER) + _(SABER) \ + _(HQCRMRS128) \ + _(HQCRMRS192) \ + _(HQCRMRS256) // Defines IDs for each algorithm. The // PQC_ALG_SIG/KEM_MAX indicates number diff --git a/src/capi/pqapi.c b/src/capi/pqapi.c index c824777d..8a06fd72 100644 --- a/src/capi/pqapi.c +++ b/src/capi/pqapi.c @@ -113,6 +113,12 @@ #include "kem/saber/firesaber/avx2/api.h" #include "kem/saber/saber/clean/api.h" #include "kem/saber/saber/avx2/api.h" +#include "kem/hqc/hqc-rmrs-128/clean/api.h" +#include "kem/hqc/hqc-rmrs-192/clean/api.h" +#include "kem/hqc/hqc-rmrs-256/clean/api.h" +#include "kem/hqc/hqc-rmrs-128/avx2/api.h" +#include "kem/hqc/hqc-rmrs-192/avx2/api.h" +#include "kem/hqc/hqc-rmrs-256/avx2/api.h" // not proud of this thingy #define OPT_VERSION _CLEAN_ diff --git a/src/common/cpucycles.c b/src/common/cpucycles.c deleted file mode 100644 index d95ba3af..00000000 --- a/src/common/cpucycles.c +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include "cpucycles.h" - -uint64_t cpucycles_overhead(void) { - uint64_t t0, t1, overhead = -1LL; - unsigned int i; - - for(i=0;i<100000;i++) { - t0 = cpucycles(); - __asm__ volatile (""); - t1 = cpucycles(); - if(t1 - t0 < overhead) - overhead = t1 - t0; - } - - return overhead; -} diff --git a/src/common/cpucycles.h b/src/common/cpucycles.h deleted file mode 100644 index 7b7b9f79..00000000 --- a/src/common/cpucycles.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef CPUCYCLES_H -#define CPUCYCLES_H - -#include - -#ifdef USE_RDPMC /* Needs echo 2 > /sys/devices/cpu/rdpmc */ - -static inline uint64_t cpucycles(void) { - const uint32_t ecx = (1U << 30) + 1; - uint64_t result; - - __asm__ volatile ("rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax" - : "=a" (result) : "c" (ecx) : "rdx"); - - return result; -} - -#else - -static inline uint64_t cpucycles(void) { - uint64_t result; - - __asm__ volatile ("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax" - : "=a" (result) : : "%rdx"); - - return result; -} - -#endif - -uint64_t cpucycles_overhead(void); - -#endif diff --git a/src/common/speed_print.c b/src/common/speed_print.c deleted file mode 100644 index 59f147de..00000000 --- a/src/common/speed_print.c +++ /dev/null @@ -1,51 +0,0 @@ -#include -#include -#include -#include -#include "cpucycles.h" -#include "speed_print.h" - -static int cmp_uint64(const void *a, const void *b) { - if(*(uint64_t *)a < *(uint64_t *)b) return -1; - if(*(uint64_t *)a > *(uint64_t *)b) return 1; - return 0; -} - -static uint64_t median(uint64_t *l, size_t llen) { - qsort(l,llen,sizeof(uint64_t),cmp_uint64); - - if(llen%2) return l[llen/2]; - else return (l[llen/2-1]+l[llen/2])/2; -} - -static uint64_t average(uint64_t *t, size_t tlen) { - size_t i; - uint64_t acc=0; - - for(i=0;i -#include - -void print_results(const char *s, uint64_t *t, size_t tlen); - -#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/CMakeLists.txt b/src/kem/hqc/hqc-rmrs-128/avx2/CMakeLists.txt new file mode 100644 index 00000000..46592a61 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/CMakeLists.txt @@ -0,0 +1,16 @@ +set( + SRC_AVX2_HQCRMRS128 + code.c + fft.c + gf2x.c + gf.c + hqc.c + kem.c + parsing.c + reed_muller.c + reed_solomon.c + vector.c +) + +define_kem_alg(hqcrmrs128_avx2 + PQCLEAN_HQCRMRS128_CLEAN "${SRC_AVX2_HQCRMRS128}" "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/api.h b/src/kem/hqc/hqc-rmrs-128/avx2/api.h new file mode 100644 index 00000000..a29de656 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/api.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_HQCRMRS128_AVX2_API_H +#define PQCLEAN_HQCRMRS128_AVX2_API_H +/** + * @file api.h + * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme + */ + +#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_ALGNAME "HQC-RMRS-128" + +#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES 2289 +#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES 2249 +#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES 64 +#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES 4481 + +// As a technicality, the public key is appended to the secret key in order to respect the NIST API. +// Without this constraint, PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32 + +int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk); + +int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/code.c b/src/kem/hqc/hqc-rmrs-128/avx2/code.c new file mode 100644 index 00000000..5059e0f0 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/code.c @@ -0,0 +1,47 @@ +#include "code.h" +#include "parameters.h" +#include "reed_muller.h" +#include "reed_solomon.h" +#include +#include +/** + * @file code.c + * @brief Implementation of concatenated code + */ + + + +/** + * + * @brief Encoding the message m to a code word em using the concatenated code + * + * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain + * a concatenated code word. + * + * @param[out] em Pointer to an array that is the tensor code word + * @param[in] m Pointer to an array that is the message + */ +void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *m) { + uint8_t tmp[8 * VEC_N1_SIZE_64] = {0}; + + PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(tmp, m); + PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(em, tmp); + +} + + + +/** + * @brief Decoding the code word em to a message m using the concatenated code + * + * @param[out] m Pointer to an array that is the message + * @param[in] em Pointer to an array that is the code word + */ +void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em) { + uint8_t tmp[8 * VEC_N1_SIZE_64] = {0}; + + PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(tmp, em); + PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(m, tmp); + + +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/code.h b/src/kem/hqc/hqc-rmrs-128/avx2/code.h new file mode 100644 index 00000000..4824298f --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/code.h @@ -0,0 +1,18 @@ +#ifndef CODE_H +#define CODE_H + + +/** + * @file code.h + * Header file of code.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *message); + +void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/fft.c b/src/kem/hqc/hqc-rmrs-128/avx2/fft.c new file mode 100644 index 00000000..23f2b20b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/fft.c @@ -0,0 +1,351 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include +#include +/** + * @file fft.c + * Implementation of the additive FFT and its transpose. + * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf + */ + + +static void compute_fft_betas(uint16_t *betas); +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size); +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); + + +/** + * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose + * + * @param[out] betas Array of size PARAM_M-1 + */ +static void compute_fft_betas(uint16_t *betas) { + size_t i; + for (i = 0; i < PARAM_M - 1; ++i) { + betas[i] = 1 << (PARAM_M - 1 - i); + } +} + + + +/** + * @brief Computes the subset sums of the given set + * + * The array subset_sums is such that its ith element is + * the subset sum of the set elements given by the binary form of i. + * + * @param[out] subset_sums Array of size 2^set_size receiving the subset sums + * @param[in] set Array of set_size elements + * @param[in] set_size Size of the array set + */ +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) { + uint16_t i, j; + subset_sums[0] = 0; + + for (i = 0; i < set_size; ++i) { + for (j = 0; j < (1 << i); ++j) { + subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; + } + } +} + + + +/** + * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x] + * + * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x) + * as proposed by Bernstein, Chou and Schwabe: + * https://binary.cr.yp.to/mcbits-20130616.pdf + * + * @param[out] f0 Array half the size of f + * @param[out] f1 Array half the size of f + * @param[in] f Array of size a power of 2 + * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f + */ +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + switch (m_f) { + case 4: + f0[4] = f[8] ^ f[12]; + f0[6] = f[12] ^ f[14]; + f0[7] = f[14] ^ f[15]; + f1[5] = f[11] ^ f[13]; + f1[6] = f[13] ^ f[14]; + f1[7] = f[15]; + f0[5] = f[10] ^ f[12] ^ f1[5]; + f1[4] = f[9] ^ f[13] ^ f0[5]; + + f0[0] = f[0]; + f1[3] = f[7] ^ f[11] ^ f[15]; + f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3]; + f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3]; + f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3]; + f1[2] = f[3] ^ f1[1] ^ f0[3]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 3: + f0[0] = f[0]; + f0[2] = f[4] ^ f[6]; + f0[3] = f[6] ^ f[7]; + f1[1] = f[3] ^ f[5] ^ f[7]; + f1[2] = f[5] ^ f[6]; + f1[3] = f[7]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 2: + f0[0] = f[0]; + f0[1] = f[2] ^ f[3]; + f1[0] = f[1] ^ f0[1]; + f1[1] = f[3]; + break; + + case 1: + f0[0] = f[0]; + f1[0] = f[1]; + break; + + default: + radix_big(f0, f1, f, m_f); + break; + } +} + +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1; + n <<= (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0; i < n; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + + + +/** + * @brief Evaluates f at all subset sums of a given set + * + * This function is a subroutine of the function PQCLEAN_HQCRMRS128_AVX2_fft. + * + * @param[out] w Array + * @param[in] f Array + * @param[in] f_coeffs Number of coefficients of f + * @param[in] m Number of betas + * @param[in] m_f Number of coefficients of f (one more than its degree) + * @param[in] betas FFT constants + */ +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; + uint16_t u[1 << (PARAM_M - 2)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; + size_t x; + + // Step 1 + if (m_f == 1) { + for (i = 0; i < m; ++i) { + tmp[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], f[1]); + } + + w[0] = f[0]; + x = 1; + for (j = 0; j < m; ++j) { + for (k = 0; k < x; ++k) { + w[x + k] = w[k] ^ tmp[j]; + } + x <<= 1; + } + + return; + } + + // Step 2: compute g + if (betas[m - 1] != 1) { + beta_m_pow = 1; + x = 1; + x <<= m_f; + for (i = 1; i < x; ++i) { + beta_m_pow = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, betas[m - 1]); + f[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, f[i]); + } + } + + // Step 3 + radix(f0, f1, f, m_f); + + // Step 4: compute gammas and deltas + for (i = 0; i + 1 < m; ++i) { + gammas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS128_AVX2_gf_inverse(betas[m - 1])); + deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(gammas[i]) ^ gammas[i]; + } + + // Compute gammas sums + compute_subset_sums(gammas_sums, gammas, m - 1); + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + + k = 1; + k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. + if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant + w[0] = u[0]; + w[k] = u[0] ^ f1[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], f1[0]); + w[k + i] = w[i] ^ f1[0]; + } + } else { + fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas); + + // Step 6 + memcpy(w + k, v, 2 * k); + w[0] = u[0]; + w[k] ^= u[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], v[i]); + w[k + i] ^= w[i]; + } + } +} + + + +/** + * @brief Evaluates f on all fields elements using an additive FFT algorithm + * + * f_coeffs is the number of coefficients of f (one less than its degree).
+ * The FFT proceeds recursively to evaluate f at all subset sums of a basis B.
+ * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf
+ * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas, + * meaning the first gammas subset sums are actually the subset sums of betas (except 1).
+ * Also note that f is altered during computation (twisted at each level). + * + * @param[out] w Array + * @param[in] f Array of 2^PARAM_FFT elements + * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) + */ +void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; + + // Follows Gao and Mateer algorithm + compute_fft_betas(betas); + + // Step 1: PARAM_FFT > 1, nothing to do + + // Compute gammas sums + compute_subset_sums(betas_sums, betas, PARAM_M - 1); + + // Step 2: beta_m = 1, nothing to do + + // Step 3 + radix(f0, f1, f, PARAM_FFT); + + // Step 4: Compute deltas + for (i = 0; i < PARAM_M - 1; ++i) { + deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(betas[i]) ^ betas[i]; + } + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + + k = 1 << (PARAM_M - 1); + // Step 6, 7 and error polynomial computation + memcpy(w + k, v, 2 * k); + + // Check if 0 is root + w[0] = u[0]; + + // Check if 1 is root + w[k] ^= u[0]; + + // Find other roots + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas_sums[i], v[i]); + w[k + i] ^= w[i]; + } +} + + + +/** + * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements. + * + * @param[out] error Array with the error + * @param[out] error_compact Array with the error in a compact form + * @param[in] w Array of size 2^PARAM_M + */ +void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t k; + size_t i, index; + + compute_fft_betas(gammas); + compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + + k = 1 << (PARAM_M - 1); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); + + for (i = 1; i < k; ++i) { + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]]; + error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); + + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1]; + error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15); + } +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/fft.h b/src/kem/hqc/hqc-rmrs-128/avx2/fft.h new file mode 100644 index 00000000..129cb0a5 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/fft.h @@ -0,0 +1,18 @@ +#ifndef FFT_H +#define FFT_H + + +/** + * @file fft.h + * Header file of fft.c + */ + +#include +#include + +void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs); + +void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/gf.c b/src/kem/hqc/hqc-rmrs-128/avx2/gf.c new file mode 100644 index 00000000..932da629 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf.c @@ -0,0 +1,176 @@ +#include "gf.h" +#include "parameters.h" +#include +/** + * @file gf.c + * Galois field implementation with multiplication using the pclmulqdq instruction + */ + + +static uint16_t gf_reduce(uint64_t x, size_t deg_x); + + + +/** + * Reduces polynomial x modulo primitive polynomial GF_POLY. + * @returns x mod GF_POLY + * @param[in] x Polynomial of degree less than 64 + * @param[in] deg_x The degree of polynomial x + */ +static uint16_t gf_reduce(uint64_t x, size_t deg_x) { + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; + + // Deduce the number of steps of reduction + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); + + // Reduce + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; + x &= (1 << PARAM_M) - 1; + x ^= mod; + + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; + x ^= mod; + rmdr ^= 1 << z2; + z1 = z2; + } + } + + return x; +} + + + +/** + * Multiplies two elements of GF(2^GF_M). + * @returns the product a*b + * @param[in] a Element of GF(2^GF_M) + * @param[in] b Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b) { + __m128i va = _mm_cvtsi32_si128(a); + __m128i vb = _mm_cvtsi32_si128(b); + __m128i vab = _mm_clmulepi64_si128(va, vb, 0); + uint32_t ab = _mm_cvtsi128_si32(vab); + + return gf_reduce(ab, 2 * (PARAM_M - 1)); +} + + + +/** + * Compute 16 products in GF(2^GF_M). + * @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M) + * @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers + * @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer + * + */ +__m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b) { + __m128i al = _mm256_extractf128_si256(a, 0); + __m128i ah = _mm256_extractf128_si256(a, 1); + __m128i bl = _mm256_extractf128_si256(b, 0); + __m128i bh = _mm256_extractf128_si256(b, 1); + + __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0); + abl0 &= CONST128_MIDDLEMASKL; + abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); + + __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11); + abh0 &= CONST128_MIDDLEMASKL; + abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); + + abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL); + abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH); + + __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0); + abl1 &= CONST128_MIDDLEMASKL; + abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); + + __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11); + abh1 &= CONST128_MIDDLEMASKL; + abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); + + abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL); + abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH); + + __m256i ret = _mm256_set_m128i(abl1, abl0); + + __m256i aux = CONST256_MR0; + + for (int32_t i = 0; i < 7; i++) { + ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux); + aux = aux << 1; + } + + ret &= CONST256_LASTMASK; + return ret; +} + + + +/** + * Squares an element of GF(2^GF_M). + * @returns a^2 + * @param[in] a Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a) { + uint32_t b = a; + uint32_t s = b & 1; + for (size_t i = 1; i < PARAM_M; ++i) { + b <<= 1; + s ^= b & (1 << 2 * i); + } + + return gf_reduce(s, 2 * (PARAM_M - 1)); +} + + + +/** + * Computes the inverse of an element of GF(2^8), + * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254 + * @returns the inverse of a + * @param[in] a Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a) { + uint16_t inv = a; + uint16_t tmp1, tmp2; + + inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(a); /* a^2 */ + tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, a); /* a^3 */ + inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^4 */ + tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp1); /* a^7 */ + tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^11 */ + inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, inv); /* a^15 */ + inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^30 */ + inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^60 */ + inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^120 */ + inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^127 */ + inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^254 */ + return inv; +} + + + +/** + * Returns i modulo 2^GF_M-1. + * i must be less than 2*(2^GF_M-1). + * Therefore, the return value is either i or i-2^GF_M+1. + * @returns i mod (2^GF_M-1) + * @param[in] i The integer whose modulo is taken + */ +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i) { + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); + + // mask = 0xffff if (i < GF_MUL_ORDER) + uint16_t mask = -(tmp >> 15); + + return tmp + (mask & PARAM_GF_MUL_ORDER); +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/gf.h b/src/kem/hqc/hqc-rmrs-128/avx2/gf.h new file mode 100644 index 00000000..6a568a48 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf.h @@ -0,0 +1,69 @@ +#ifndef GF_H +#define GF_H + + +/** + * @file gf.h + * Header file of gf.c + */ + +#include +#include +#include + +#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) + +/** + * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8. + * The last two elements are needed by the PQCLEAN_HQCRMRS128_AVX2_gf_mul function + * (for example if both elements to multiply are zero). + */ +static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 }; + + + +/** + * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8). + * The logarithm of 0 is set to 0 by convention. + */ +static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 }; + +/** + * Masks needed for the computation of 16 mult in GF(2^M) + */ +#define CONST256_MR0 _mm256_set1_epi64x((long long) 0x0100010001000100) +#define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff) +#define CONST128_MASKL _mm_set1_epi64x((long long) 0x0000ffff0000ffff) +#define CONST128_MASKH _mm_set1_epi64x((long long) 0xffff0000ffff0000) +#define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff) +#define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000) +#define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff) +#define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100) + +/** + * x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register + */ +static const __m256i red[7] = { + {0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL}, + {0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL}, + {0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL}, + {0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL}, + {0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL}, + {0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL}, + {0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL}, + +}; + + +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b); + +__m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b); + +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.c b/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.c new file mode 100644 index 00000000..74dd0b8b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.c @@ -0,0 +1,369 @@ +#include "gf2x.h" +#include "parameters.h" +#include +#include +#include +#include +/** + * \file gf2x.c + * \brief AVX2 implementation of multiplication of two polynomials + */ + + + +#define VEC_N_SPLIT_3x3 CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256) +#define VEC_N_SPLIT_3 (3*VEC_N_SPLIT_3x3) + +static inline void reduce(uint64_t *o, const __m256i *a); +static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B); +static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B); + + +/** + * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$ + * + * This function computes the modular reduction of the polynomial a(x) + * + * @param[out] o Pointer to the result + * @param[in] a Pointer to the polynomial a(x) + */ +static inline void reduce(uint64_t *o, const __m256i *a256) { + size_t i, i2; + __m256i r256, carry256; + __m256i *o256 = (__m256i *)o; + const uint64_t *a64 = (const uint64_t *)a256; + uint64_t r, carry; + + i2 = 0; + for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) { + r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i])); + r256 = _mm256_srli_epi64(r256, PARAM_N & 63); + carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1])); + carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63); + r256 ^= carry256; + _mm256_storeu_si256(&o256[i2], a256[i2] ^ r256); + i2 += 1; + } + + i = i - (PARAM_N >> 6); + for (; i < (PARAM_N >> 6) + 1; i++) { + r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63); + carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63); + r ^= carry; + o[i] = a64[i] ^ r; + } + + o[PARAM_N >> 6] &= RED_MASK; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * A(x) and B(x) are stored in 128-bit registers + * This function computes A(x)*B(x) using Karatsuba + * + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) { + __m128i D1[2]; + __m128i D0[2], D2[2]; + __m128i Al = _mm_loadu_si128(A); + __m128i Ah = _mm_loadu_si128(A + 1); + __m128i Bl = _mm_loadu_si128(B); + __m128i Bh = _mm_loadu_si128(B + 1); + + // Compute Al.Bl=D0 + __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0); + __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11); + __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e)); + __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e)); + __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Compute Ah.Bh=D2 + DD0 = _mm_clmulepi64_si128(Ah, Bh, 0); + DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11); + AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e)); + BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e)); + DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Compute AlpAh.BlpBh=D1 + // Initialisation of AlpAh and BlpBh + __m128i AlpAh = _mm_xor_si128(Al, Ah); + __m128i BlpBh = _mm_xor_si128(Bl, Bh); + DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0); + DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11); + AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e)); + BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e)); + DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Final comutation of C + __m128i middle = _mm_xor_si128(D0[1], D2[0]); + C[0] = D0[0]; + C[1] = middle ^ D0[0] ^ D1[0]; + C[2] = middle ^ D1[1] ^ D2[1]; + C[3] = D2[1]; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) { + __m256i D0[2], D1[2], D2[2], SAA, SBB; + const __m128i *A128 = (const __m128i *)A; + const __m128i *B128 = (const __m128i *)B; + __m256i middle; + + karat_mult_1((__m128i *) D0, A128, B128); + karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2); + + SAA = _mm256_xor_si256(A[0], A[1]); + SBB = _mm256_xor_si256(B[0], B[1]); + + karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB); + middle = _mm256_xor_si256(D0[1], D2[0]); + + C[0] = D0[0]; + C[1] = middle ^ D0[0] ^ D1[0]; + C[2] = middle ^ D1[1] ^ D2[1]; + C[3] = D2[1]; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) { + __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2]; + __m256i middle0; + __m256i middle1; + + karat_mult_2(D0, A, B); + karat_mult_2(D2, A + 2, B + 2); + + SAA[0] = A[0] ^ A[2]; + SBB[0] = B[0] ^ B[2]; + SAA[1] = A[1] ^ A[3]; + SBB[1] = B[1] ^ B[3]; + + karat_mult_2( D1, SAA, SBB); + + middle0 = _mm256_xor_si256(D0[2], D2[0]); + middle1 = _mm256_xor_si256(D0[3], D2[1]); + + C[0] = D0[0]; + C[1] = D0[1]; + C[2] = middle0 ^ D0[0] ^ D1[0]; + C[3] = middle1 ^ D0[1] ^ D1[1]; + C[4] = middle0 ^ D1[2] ^ D2[2]; + C[5] = middle1 ^ D1[3] ^ D2[3]; + C[6] = D2[2]; + C[7] = D2[3]; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) { + size_t i, is, is2, is3; + __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4]; + __m256i middle; + + karat_mult_4(D0, A, B); + karat_mult_4(D2, A + 4, B + 4); + + for (i = 0; i < 4; i++) { + is = i + 4; + SAA[i] = A[i] ^ A[is]; + SBB[i] = B[i] ^ B[is]; + } + + karat_mult_4(D1, SAA, SBB); + + for (i = 0; i < 4; i++) { + is = i + 4; + is2 = is + 4; + is3 = is2 + 4; + + middle = _mm256_xor_si256(D0[is], D2[i]); + + C[i] = D0[i]; + C[is] = middle ^ D0[i] ^ D1[i]; + C[is2] = middle ^ D1[is] ^ D2[is]; + C[is3] = D2[is]; + } +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba 3 part split + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B) { + size_t i, j; + const __m256i *a0, *b0, *a1, *b1, *a2, *b2; + __m256i aa01[VEC_N_SPLIT_3x3], bb01[VEC_N_SPLIT_3x3], aa02[VEC_N_SPLIT_3x3], bb02[VEC_N_SPLIT_3x3], aa12[VEC_N_SPLIT_3x3], bb12[VEC_N_SPLIT_3x3]; + __m256i D0[2 * VEC_N_SPLIT_3x3], D1[2 * VEC_N_SPLIT_3x3], D2[2 * VEC_N_SPLIT_3x3], D3[2 * VEC_N_SPLIT_3x3], D4[2 * VEC_N_SPLIT_3x3], D5[2 * VEC_N_SPLIT_3x3]; + __m256i ro256[6 * VEC_N_SPLIT_3x3]; + __m256i middle0; + + a0 = A; + a1 = A + VEC_N_SPLIT_3x3; + a2 = A + (VEC_N_SPLIT_3x3 << 1); + + b0 = B; + b1 = B + VEC_N_SPLIT_3x3; + b2 = B + (VEC_N_SPLIT_3x3 << 1); + + for (i = 0; i < VEC_N_SPLIT_3x3; i++) { + aa01[i] = a0[i] ^ a1[i]; + bb01[i] = b0[i] ^ b1[i]; + + aa12[i] = a2[i] ^ a1[i]; + bb12[i] = b2[i] ^ b1[i]; + + aa02[i] = a0[i] ^ a2[i]; + bb02[i] = b0[i] ^ b2[i]; + } + + karat_mult_8(D0, a0, b0); + karat_mult_8(D1, a1, b1); + karat_mult_8(D2, a2, b2); + + karat_mult_8(D3, aa01, bb01); + karat_mult_8(D4, aa02, bb02); + karat_mult_8(D5, aa12, bb12); + + for (i = 0; i < VEC_N_SPLIT_3x3; i++) { + j = i + VEC_N_SPLIT_3x3; + middle0 = D0[i] ^ D1[i] ^ D0[j]; + ro256[i] = D0[i]; + ro256[j] = D3[i] ^ middle0; + ro256[j + VEC_N_SPLIT_3x3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0; + middle0 = D1[j] ^ D2[i] ^ D2[j]; + ro256[j + (VEC_N_SPLIT_3x3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0; + ro256[i + (VEC_N_SPLIT_3x3 << 2)] = D5[j] ^ middle0; + ro256[j + (VEC_N_SPLIT_3x3 << 2)] = D2[j]; + } + + for (i = 0; i < 2 * VEC_N_SPLIT_3; i++) { + C[i] = ro256[i]; + } +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba 3 part split + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B) { + size_t i, j; + const __m256i *a0, *b0, *a1, *b1, *a2, *b2; + __m256i aa01[VEC_N_SPLIT_3], bb01[VEC_N_SPLIT_3], aa02[VEC_N_SPLIT_3], bb02[VEC_N_SPLIT_3], aa12[VEC_N_SPLIT_3], bb12[VEC_N_SPLIT_3]; + __m256i D0[2 * VEC_N_SPLIT_3], D1[2 * VEC_N_SPLIT_3], D2[2 * VEC_N_SPLIT_3], D3[2 * VEC_N_SPLIT_3], D4[2 * VEC_N_SPLIT_3], D5[2 * VEC_N_SPLIT_3]; + __m256i middle0; + + a0 = (__m256i *)(A->arr64); + a1 = a0 + VEC_N_SPLIT_3; + a2 = a0 + (2 * VEC_N_SPLIT_3); + + b0 = (__m256i *)(B->arr64); + b1 = b0 + VEC_N_SPLIT_3; + b2 = b0 + (2 * VEC_N_SPLIT_3); + + for (i = 0; i < VEC_N_SPLIT_3; i++) { + aa01[i] = a0[i] ^ a1[i]; + bb01[i] = b0[i] ^ b1[i]; + + aa12[i] = a2[i] ^ a1[i]; + bb12[i] = b2[i] ^ b1[i]; + + aa02[i] = a0[i] ^ a2[i]; + bb02[i] = b0[i] ^ b2[i]; + } + + karat_three_way_mult(D0, a0, b0); + karat_three_way_mult(D1, a1, b1); + karat_three_way_mult(D2, a2, b2); + + karat_three_way_mult(D3, aa01, bb01); + karat_three_way_mult(D4, aa02, bb02); + karat_three_way_mult(D5, aa12, bb12); + + for (i = 0; i < VEC_N_SPLIT_3; i++) { + j = i + VEC_N_SPLIT_3; + middle0 = D0[i] ^ D1[i] ^ D0[j]; + C[i] = D0[i]; + C[j] = D3[i] ^ middle0; + C[j + VEC_N_SPLIT_3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0; + middle0 = D1[j] ^ D2[i] ^ D2[j]; + C[j + (VEC_N_SPLIT_3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0; + C[i + (VEC_N_SPLIT_3 << 2)] = D5[j] ^ middle0; + C[j + (VEC_N_SPLIT_3 << 2)] = D2[j]; + } +} + + + +/** + * @brief Multiply two polynomials modulo \f$ X^n - 1\f$. + * + * This functions multiplies a dense polynomial a1 (of Hamming weight equal to weight) + * and a dense polynomial a2. The multiplication is done modulo \f$ X^n - 1\f$. + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to a polynomial + * @param[in] a2 Pointer to a polynomial + */ +void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) { + __m256i a1_times_a2[2 * PARAM_N_MULT + 1] = {0}; + karat_mult9(a1_times_a2, a1, a2); + reduce(o, a1_times_a2); +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.h b/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.h new file mode 100644 index 00000000..c1c5fd2d --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.h @@ -0,0 +1,21 @@ +#ifndef GF2X_H +#define GF2X_H + + +/** + * @file gf2x.h + * @brief Header file for gf2x.c + */ +#include "parameters.h" +#include +#include + +typedef union { + uint64_t arr64[VEC_N_256_SIZE_64]; + __m256i dummy; +} aligned_vec_t; + +void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/hqc.c b/src/kem/hqc/hqc-rmrs-128/avx2/hqc.c new file mode 100644 index 00000000..66af36a3 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/hqc.c @@ -0,0 +1,168 @@ +#include "code.h" +#include "gf2x.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +#include +/** + * @file hqc.c + * @brief Implementation of hqc.h + */ + + + +/** + * @brief Keygen of the HQC_PKE IND_CPA scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) { + AES_XOF_struct sk_seedexpander; + AES_XOF_struct pk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + uint8_t pk_seed[SEED_BYTES] = {0}; + aligned_vec_t vx = {0}; + uint64_t *x = vx.arr64; + aligned_vec_t vy = {0}; + uint64_t *y = vy.arr64; + aligned_vec_t vh = {0}; + uint64_t *h = vh.arr64; + aligned_vec_t vs = {0}; + uint64_t *s = vs.arr64; + aligned_vec_t vtmp = {0}; + uint64_t *tmp = vtmp.arr64; + + // Create seed_expanders for public key and secret key + randombytes(sk_seed, SEED_BYTES); + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + randombytes(pk_seed, SEED_BYTES); + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute secret key + PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA); + + // Compute public key + PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h); + PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp, &vy, &vh); + PQCLEAN_HQCRMRS128_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64); + + // Parse keys to string + PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(pk, pk_seed, s); + PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk); + +} + + + +/** + * @brief Encryption of the HQC_PKE IND_CPA scheme + * + * The cihertext is composed of vectors u and v. + * + * @param[out] u Vector u (first part of the ciphertext) + * @param[out] v Vector v (second part of the ciphertext) + * @param[in] m Vector representing the message to encrypt + * @param[in] theta Seed used to derive randomness required for encryption + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) { + AES_XOF_struct seedexpander; + aligned_vec_t vh = {0}; + uint64_t *h = vh.arr64; + aligned_vec_t vs = {0}; + uint64_t *s = vs.arr64; + aligned_vec_t vr1 = {0}; + uint64_t *r1 = vr1.arr64; + aligned_vec_t vr2 = {0}; + uint64_t *r2 = vr2.arr64; + aligned_vec_t ve = {0}; + uint64_t *e = ve.arr64; + aligned_vec_t vtmp1 = {0}; + uint64_t *tmp1 = vtmp1.arr64; + aligned_vec_t vtmp2 = {0}; + uint64_t *tmp2 = vtmp2.arr64; + aligned_vec_t vtmp3 = {0}; + uint64_t *tmp3 = vtmp3.arr64; + + // Create seed_expander from theta + seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH); + + // Retrieve h and s from public key + PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(h, s, pk); + + // Generate r1, r2 and e + PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E); + + + + // Compute u = r1 + r2.h + PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp1, &vr2, &vh); + PQCLEAN_HQCRMRS128_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64); + + // Compute v = m.G by encoding the message + PQCLEAN_HQCRMRS128_AVX2_code_encode((uint8_t *)v, m); + PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES); + PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + + // Compute v = m.G + s.r2 + e + PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp2, &vr2, &vs); + PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS128_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N); + +} + + + +/** + * @brief Decryption of the HQC_PKE IND_CPA scheme + * + * @param[out] m Vector representing the decrypted message + * @param[in] u Vector u (first part of the ciphertext) + * @param[in] v Vector v (second part of the ciphertext) + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) { + uint8_t pk[PUBLIC_KEY_BYTES] = {0}; + aligned_vec_t vx = {0}; + uint64_t *x = vx.arr64; + aligned_vec_t vy = {0}; + uint64_t *y = vy.arr64; + aligned_vec_t vtmp1 = {0}; + uint64_t *tmp1 = vtmp1.arr64; + aligned_vec_t vtmp2 = {0}; + uint64_t *tmp2 = vtmp2.arr64; + aligned_vec_t vtmp3 = {0}; + uint64_t *tmp3 = vtmp3.arr64; + + // Retrieve x, y, pk from secret key + PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(x, y, pk, sk); + + // Compute v - u.y + PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) { + tmp2[i] = u[i]; + } + PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp3, &vy, &vtmp2); + PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64); + + + // Compute m by decoding v - u.y + PQCLEAN_HQCRMRS128_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS128_AVX2_code_decode(m, (uint8_t *)tmp1); +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/hqc.h b/src/kem/hqc/hqc-rmrs-128/avx2/hqc.h new file mode 100644 index 00000000..eeff199c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/hqc.h @@ -0,0 +1,19 @@ +#ifndef HQC_H +#define HQC_H + + +/** + * @file hqc.h + * @brief Functions of the HQC_PKE IND_CPA scheme + */ + +#include + +void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk); + +void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk); + +void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/kem.c b/src/kem/hqc/hqc-rmrs-128/avx2/kem.c new file mode 100644 index 00000000..bb33e92f --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/kem.c @@ -0,0 +1,140 @@ +#include "api.h" +#include "fips202.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "sha2.h" +#include "vector.h" +#include +#include +/** + * @file kem.c + * @brief Implementation of api.h + */ + + + +/** + * @brief Keygen of the HQC_KEM IND_CAA2 scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + * @returns 0 if keygen is successful + */ +int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { + + PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(pk, sk); + return 0; +} + + + +/** + * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ct String containing the ciphertext + * @param[out] ss String containing the shared secret + * @param[in] pk String containing the public key + * @returns 0 if encapsulation is successful + */ +int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { + + uint8_t theta[SHA512_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + static uint64_t u[VEC_N_256_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Computing m + randombytes(m, VEC_K_SIZE_BYTES); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m + PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u, v, m, theta, pk); + + // Computing d + sha512(d, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Computing ciphertext + PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(ct, u, v, d); + + + return 0; +} + + + +/** + * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ss String containing the shared secret + * @param[in] ct String containing the cipĥertext + * @param[in] sk String containing the secret key + * @returns 0 if decapsulation is successful, -1 otherwise + */ +int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { + + uint8_t result; + uint64_t u[VEC_N_256_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char pk[PUBLIC_KEY_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint8_t theta[SHA512_BYTES] = {0}; + uint64_t u2[VEC_N_256_SIZE_64] = {0}; + uint64_t v2[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d2[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Retrieving u, v and d from ciphertext + PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(u, v, d, ct); + + // Retrieving pk from sk + memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES); + + // Decryting + PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(m, u, v, sk); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m' + PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk); + + // Computing d' + sha512(d2, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Abort if c != c' or d != d' + result = PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare(d, d2, SHA512_BYTES); + result = (uint8_t) (-((int16_t) result) >> 15); + for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { + ss[i] &= ~result; + } + + + return -(result & 1); +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/parameters.h b/src/kem/hqc/hqc-rmrs-128/avx2/parameters.h new file mode 100644 index 00000000..ee9efd25 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/parameters.h @@ -0,0 +1,111 @@ +#ifndef HQC_PARAMETERS_H +#define HQC_PARAMETERS_H + + +/** + * @file parameters.h + * @brief Parameters of the HQC_KEM IND-CCA2 scheme + */ +#include "api.h" + + +#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/ + +/* + #define PARAM_N Define the parameter n of the scheme + #define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code) + #define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code) + #define PARAM_N1N2 Define the length in bits of the Concatenated code + #define PARAM_OMEGA Define the parameter omega of the scheme + #define PARAM_OMEGA_E Define the parameter omega_e of the scheme + #define PARAM_OMEGA_R Define the parameter omega_r of the scheme + #define PARAM_SECURITY Define the security level corresponding to the chosen parameters + #define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters + + #define SECRET_KEY_BYTES Define the size of the secret key in bytes + #define PUBLIC_KEY_BYTES Define the size of the public key in bytes + #define SHARED_SECRET_BYTES Define the size of the shared secret in bytes + #define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes + + #define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function) + #define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes + #define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes + #define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes + #define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes + + #define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits + #define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits + #define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits + #define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits + + #define VEC_N_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits + #define VEC_N1N2_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits + + #define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code) + #define PARAM_M Define a positive integer + #define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form + #define PARAM_GF_POLY_WT Hamming weight of PARAM_GF_POLY + #define PARAM_GF_POLY_M2 Distance between the primitive polynomial first two set bits + #define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1 + #define PARAM_K Define the size of the information bits of the Reed-Solomon code + #define PARAM_G Define the size of the generator polynomial of Reed-Solomon code + #define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input + We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24 + The smallest power of 2 greater than 24+1 is 32=2^5 + #define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code + + #define RED_MASK A mask fot the higher bits of a vector + #define SHA512_BYTES Define the size of SHA512 output in bytes + #define SEED_BYTES Define the size of the seed in bytes + #define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length +*/ + +#define PARAM_N 17669 +#define PARAM_N1 46 +#define PARAM_N2 384 +#define PARAM_N1N2 17664 +#define PARAM_OMEGA 66 +#define PARAM_OMEGA_E 75 +#define PARAM_OMEGA_R 75 +#define PARAM_SECURITY 128 +#define PARAM_DFR_EXP 128 + +#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES +#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES +#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES +#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES + +#define UTILS_REJECTION_THRESHOLD 16767881 +#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8) +#define VEC_K_SIZE_BYTES PARAM_K +#define VEC_N1_SIZE_BYTES PARAM_N1 +#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8) + +#define VEC_N_SIZE_256 CEIL_DIVIDE(PARAM_N, 256) + +#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64) +#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8) +#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8) +#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64) + +#define PARAM_N_MULT (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256)) +#define VEC_N_256_SIZE_64 (PARAM_N_MULT / 64) +#define VEC_N1N2_256_SIZE_64 (CEIL_DIVIDE(PARAM_N1N2, 256) << 2) + +#define PARAM_DELTA 15 +#define PARAM_M 8 +#define PARAM_GF_POLY 0x11D +#define PARAM_GF_POLY_WT 5 +#define PARAM_GF_POLY_M2 4 +#define PARAM_GF_MUL_ORDER 255 +#define PARAM_K 16 +#define PARAM_G 31 +#define PARAM_FFT 5 +#define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1 + +#define RED_MASK 0x1f +#define SHA512_BYTES 64 +#define SEED_BYTES 40 +#define SEEDEXPANDER_MAX_LENGTH 4294967295 + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/parsing.c b/src/kem/hqc/hqc-rmrs-128/avx2/parsing.c new file mode 100644 index 00000000..a924a6eb --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/parsing.c @@ -0,0 +1,186 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file parsing.c + * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme + */ + + +void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in) { + out[0] = (in >> 0x00) & 0xFF; + out[1] = (in >> 0x08) & 0xFF; + out[2] = (in >> 0x10) & 0xFF; + out[3] = (in >> 0x18) & 0xFF; + out[4] = (in >> 0x20) & 0xFF; + out[5] = (in >> 0x28) & 0xFF; + out[6] = (in >> 0x30) & 0xFF; + out[7] = (in >> 0x38) & 0xFF; +} + + +uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in) { + uint64_t ret = in[7]; + + for (int8_t i = 6; i >= 0; i--) { + ret <<= 8; + ret |= in[i]; + } + + return ret; +} + +void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) { + size_t index_in = 0; + size_t index_out = 0; + + // first copy by 8 bytes + if (inlen >= 8 && outlen >= 1) { + while (index_out < outlen && index_in + 8 <= inlen) { + out64[index_out] = PQCLEAN_HQCRMRS128_AVX2_load8(in8 + index_in); + + index_in += 8; + index_out += 1; + } + } + + // we now need to do the last 7 bytes if necessary + if (index_in >= inlen || index_out >= outlen) { + return; + } + out64[index_out] = in8[inlen - 1]; + for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) { + out64[index_out] <<= 8; + out64[index_out] |= in8[index_in + i]; + } +} + +void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) { + for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) { + out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF; + index_out++; + if (index_out % 8 == 0) { + index_in++; + } + } +} + + +/** + * @brief Parse a secret key into a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] sk String containing the secret key + * @param[in] sk_seed Seed used to generate the secret key + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) { + memcpy(sk, sk_seed, SEED_BYTES); + sk += SEED_BYTES; + memcpy(sk, pk, PUBLIC_KEY_BYTES); +} + +/** + * @brief Parse a secret key from a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] x uint64_t representation of vector x + * @param[out] y uint64_t representation of vector y + * @param[out] pk String containing the public key + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) { + AES_XOF_struct sk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + + memcpy(sk_seed, sk, SEED_BYTES); + sk += SEED_BYTES; + memcpy(pk, sk, PUBLIC_KEY_BYTES); + + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA); +} + +/** + * @brief Parse a public key into a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] pk String containing the public key + * @param[in] pk_seed Seed used to generate the public key + * @param[in] s uint8_t representation of vector s + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) { + memcpy(pk, pk_seed, SEED_BYTES); + PQCLEAN_HQCRMRS128_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64); +} + + + +/** + * @brief Parse a public key from a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] h uint8_t representation of vector h + * @param[out] s uint8_t representation of vector s + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) { + AES_XOF_struct pk_seedexpander; + uint8_t pk_seed[SEED_BYTES] = {0}; + + memcpy(pk_seed, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_HQCRMRS128_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES); + + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h); +} + + +/** + * @brief Parse a ciphertext into a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] ct String containing the ciphertext + * @param[in] u uint8_t representation of vector u + * @param[in] v uint8_t representation of vector v + * @param[in] d String containing the hash d + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) { + PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(ct, d, SHA512_BYTES); +} + + +/** + * @brief Parse a ciphertext from a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] u uint8_t representation of vector u + * @param[out] v uint8_t representation of vector v + * @param[out] d String containing the hash d + * @param[in] ct String containing the ciphertext + */ +void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) { + PQCLEAN_HQCRMRS128_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(d, ct, SHA512_BYTES); +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/parsing.h b/src/kem/hqc/hqc-rmrs-128/avx2/parsing.h new file mode 100644 index 00000000..f2f0b5a1 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/parsing.h @@ -0,0 +1,36 @@ +#ifndef PARSING_H +#define PARSING_H + + +/** + * @file parsing.h + * @brief Header file for parsing.c + */ + +#include + +void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in); + +uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in); + +void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen); + +void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen); + + +void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk); + +void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk); + + +void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s); + +void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk); + + +void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d); + +void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.c b/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.c new file mode 100644 index 00000000..85afd331 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.c @@ -0,0 +1,389 @@ +#include "parameters.h" +#include "reed_muller.h" +#include +#include +#include +/** + * @file reed_muller.c + * Constant time implementation of Reed-Muller code RM(1,7) + */ + + +// number of repeated code words +#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) + +// copy bit 0 into all bits of a 64 bit value +#define BIT0MASK(x) (int64_t)(-((x) & 1)) + +static void encode(uint8_t *word, uint8_t message); +static void expand_and_sum(__m256i *dst, const uint64_t *src); +static void hadamard(__m256i *src, __m256i *dst); +static uint32_t find_peaks(__m256i *transform); + + + +/** + * @brief Encode a single byte into a single codeword using RM(1,7) + * + * Encoding matrix of this code: + * bit pattern (note that bits are numbered big endian) + * 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa + * 1 cccccccc cccccccc cccccccc cccccccc + * 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0 + * 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00 + * 4 ffff0000 ffff0000 ffff0000 ffff0000 + * 5 00000000 ffffffff 00000000 ffffffff + * 6 00000000 00000000 ffffffff ffffffff + * 7 ffffffff ffffffff ffffffff ffffffff + * + * @param[out] word An RM(1,7) codeword + * @param[in] message A message to encode + */ +static void encode(uint8_t *word, uint8_t message) { + uint32_t e; + // bit 7 flips all the bits, do that first to save work + e = BIT0MASK(message >> 7); + // bits 0, 1, 2, 3, 4 are the same for all four longs + // (Warning: in the bit matrix above, low bits are at the left!) + e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa; + e ^= BIT0MASK(message >> 1) & 0xcccccccc; + e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0; + e ^= BIT0MASK(message >> 3) & 0xff00ff00; + e ^= BIT0MASK(message >> 4) & 0xffff0000; + // we can store this in the first quarter + word[0 + 0] = (e >> 0x00) & 0xff; + word[0 + 1] = (e >> 0x08) & 0xff; + word[0 + 2] = (e >> 0x10) & 0xff; + word[0 + 3] = (e >> 0x18) & 0xff; + // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 + e ^= BIT0MASK(message >> 5); + word[4 + 0] = (e >> 0x00) & 0xff; + word[4 + 1] = (e >> 0x08) & 0xff; + word[4 + 2] = (e >> 0x10) & 0xff; + word[4 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 6); + word[12 + 0] = (e >> 0x00) & 0xff; + word[12 + 1] = (e >> 0x08) & 0xff; + word[12 + 2] = (e >> 0x10) & 0xff; + word[12 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 5); + word[8 + 0] = (e >> 0x00) & 0xff; + word[8 + 1] = (e >> 0x08) & 0xff; + word[8 + 2] = (e >> 0x10) & 0xff; + word[8 + 3] = (e >> 0x18) & 0xff; +} + + + +/** + * @brief Add multiple codewords into expanded codeword + * + * Note: this does not write the codewords as -1 or +1 as the green machine does + * instead, just 0 and 1 is used. + * The resulting hadamard transform has: + * all values are halved + * the first entry is 64 too high + * + * @param[out] dst Structure that contain the expanded codeword + * @param[in] src Structure that contain the codeword + */ +inline void expand_and_sum(__m256i *dst, const uint64_t *src) { + uint16_t v[16]; + for (size_t part = 0; part < 8; part++) { + dst[part] = _mm256_setzero_si256(); + } + for (size_t copy = 0; copy < MULTIPLICITY; copy++) { + for (size_t part = 0; part < 8; part++) { + for (size_t bit = 0; bit < 16; bit++) { + v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1; + } + dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8], + v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); + } + } +} + + + +/** + * @brief Hadamard transform + * + * Perform hadamard transform of src and store result in dst + * src is overwritten: it is also used as intermediate buffer + * Method is best explained if we use H(3) instead of H(7): + * + * The routine multiplies by the matrix H(3): + * [1 1 1 1 1 1 1 1] + * [1 -1 1 -1 1 -1 1 -1] + * [1 1 -1 -1 1 1 -1 -1] + * [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine + * [1 1 1 1 -1 -1 -1 -1] + * [1 -1 1 -1 -1 1 -1 1] + * [1 1 -1 -1 -1 -1 1 1] + * [1 -1 -1 1 -1 1 1 -1] + * You can do this in three passes, where each pass does this: + * set lower half of buffer to pairwise sums, + * and upper half to differences + * index 0 1 2 3 4 5 6 7 + * input: a, b, c, d, e, f, g, h + * pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h + * pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h + * pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h + * a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h + * This order of computation is chosen because it vectorises well. + * Likewise, this routine multiplies by H(7) in seven passes. + * + * @param[out] src Structure that contain the expanded codeword + * @param[out] dst Structure that contain the expanded codeword + */ +inline void hadamard(__m256i *src, __m256i *dst) { + // the passes move data: + // src -> dst -> src -> dst -> src -> dst -> src -> dst + // using p1 and p2 alternately + __m256i *p1 = src; + __m256i *p2 = dst; + __m256i *p3; + for (size_t pass = 0; pass < 7; pass++) { + // warning: hadd works "within lanes" as Intel call it + // so you have to swap the middle 64 bit blocks of the result + for (size_t part = 0; part < 4; part++) { + p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); + p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); + } + // swap p1, p2 for next round + p3 = p1; + p1 = p2; + p2 = p3; + } +} + + + +/** + * @brief Finding the location of the highest value + * + * This is the final step of the green machine: find the location of the highest value, + * and add 128 if the peak is positive + * Notes on decoding + * The standard "Green machine" decoder words as follows: + * if the received codeword is W, compute (2 * W - 1) * H7 + * The entries of the resulting vector are always even and vary from + * -128 (= the complement is a code word, add bit 7 to decode) + * via 0 (this is a different codeword) + * to 128 (this is the code word). + * + * Our decoding differs in two ways: + * - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1) + * - We take the sum of the repititions (so the entries are 0..MULTIPLICITY) + * This implies that we have to subtract 64M (M=MULTIPLICITY) + * from the first entry to make sure the first codewords is handled properly + * and that the entries vary from -64M to 64M. + * -64M or 64M stands for a perfect codeword. + * If there are fewer than 32M errors, there is always a unique codeword + * which an entry with absolute value > 32M; + * this is because an error changes an entry by 1. + * The highest number that seem to be decodable is 50 errors, so that the + * highest entries in the hadamard transform can be as low as 12. + * But this is different for the repeated code. + * Because multiple codewords are added, this changes: the lowest value of the + * hadamard transform of the sum of six words is seen to be as low as 43 (!), + * which is way less than 12*6. + * + * It is possible that there are more errors, but the word is still uniquely + * decodable: we found a word with distance of 50 from the nearest codeword. + * That means that the highest entry can be as low as 14M. + * Since we have to do binary search, we search for the range 1-64M + * which can be done in 6+l2g(M) steps. + * The binary search is based on (values>32M are unique): + * M 32M min> max> firstStep #steps + * 2 64 1 64 33 +- 16 6 + * 4 128 1 128 65 +- 32 7 + * 6 192 1 192 129 +- 64 8 + * + * As a check, we run a sample for M=6 to see the peak value; it ranged + * from 43 to 147, so my analysis looks right. Also, it shows that decoding + * far beyond the bound of 32M is needed. + * + * For the vectors, it would be tempting to use 8 bit ints, + * because the values "almost" fit in there. + * We could use some trickery to fit it in 8 bits, like saturated add or + * division by 2 in a late step. + * Unfortunately, these instructions do not exist. + * the adds _mm512_adds_epi8 is available only on the latest processors, + * and division, shift, mulhi are not available at all for 8 bits. + * So, we use 16 bit ints. + * + * For the search of the optimal comparison value, + * remember the transform contains 64M-d, + * where d are the distances to the codewords. + * The highest value gives the most likely codeword. + * There is not fast vectorized way to find this value, so we search for the + * maximum value itself. + * In each pass, we collect a bit map of the transform values that are, + * say >bound. There are three cases: + * bit map = 0: all code words are further away than 64M-bound (decrease bound) + * bit map has one bit: one unique code word has distance < 64M-bound + * bit map has multiple bits: multiple words (increase bound) + * We will search for the lowest value of bound that gives a nonzero bit map. + * + * @param[in] transform Structure that contain the expanded codeword + */ +inline uint32_t find_peaks(__m256i *transform) { + // a whole lot of vector variables + __m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows; + __m256i tmp = _mm256_setzero_si256(); + __m256i vect_mask; + __m256i res; + int32_t lower; + int32_t width; + uint32_t message; + uint32_t mask; + int8_t index; + int8_t abs_value; + int8_t mask1; + int8_t mask2; + uint16_t result; + + // compute absolute value of transform + for (size_t i = 0; i < 8; i++) { + abs_rows[i] = _mm256_abs_epi16(transform[i]); + } + // compute a vector of 16 elements which contains the maximum somewhere + // (later used to compute bits 0 through 3 of message) + max_abs_rows = abs_rows[0]; + for (size_t i = 1; i < 8; i++) { + max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]); + } + + // do binary search for the highest value that is lower than the maximum + // loop invariant: lower gives bit map = 0, lower + width gives bit map > 0 + lower = 1; + // this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6 + width = 1 << (5 + MULTIPLICITY / 2); + // if you don't unroll this loop, it fits in the loop cache + // uncomment the line below to speeding up the program by a few percent + // #pragma GCC unroll 0 + while (width > 1) { + width >>= 1; + // compare with lower + width; put result in bitmap + // make vector from value of new bound + bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width)); + bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound); + // step up if there are any matches + // rely on compiler to use conditional move here + mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap); + mask = ~(uint32_t) ((-(int64_t) mask) >> 63); + lower += mask & width; + } + // lower+width contains the maximum value of the vector + // or less, if the maximum is very high (which is OK) + // normally, there is one maximum, but sometimes there are more + // find where the maxima occur in the maximum vector + // (each determines lower 4 bits of peak position) + // construct vector filled with bound-1 + bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1)); + + // find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message + // find lowest value by searching backwards skip first check to save time + message = 0x70; + for (size_t i = 0; i < 8; i++) { + bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound); + mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap); + mask = ~(uint32_t) ((-(int64_t) mask) >> 63); + message ^= mask & (message ^ ((7 - i) << 4)); + } + // we decided which row of the matrix contains the lowest match + // select proper row + index = message >> 4; + + tmp = _mm256_setzero_si256(); + for (size_t i = 0; i < 8; i++) { + abs_value = (int8_t)(index - i); + mask1 = abs_value >> 7; + abs_value ^= mask1; + abs_value -= mask1; + mask2 = ((uint8_t) - abs_value >> 7); + mask = (-1ULL) + mask2; + vect_mask = _mm256_set1_epi32(mask); + res = _mm256_and_si256(abs_rows[i], vect_mask); + tmp = _mm256_or_si256(tmp, res); + } + + active_row = tmp; + + // get the column number of the vector element + // by setting the bits corresponding to the columns + // and then adding elements within two groups of 8 + vect_mask = _mm256_cmpgt_epi16(active_row, bound); + vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1); + for (size_t i = 0; i < 3; i++) { + vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask); + } + // add low 4 bits of message + message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8)); + + // set bit 7 if sign of biggest value is positive + // make sure a jump isn't generated by the compiler + tmp = _mm256_setzero_si256(); + for (size_t i = 0; i < 8; i++) { + mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63); + vect_mask = _mm256_set1_epi32(mask); + tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i])); + } + result = 0; + for (size_t i = 0; i < 16; i++) { + mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63); + result |= mask & ((uint16_t *)&tmp)[i]; + } + message |= (0x8000 & ~result) >> 8; + return message; +} + + + +/** + * @brief Encodes the received word + * + * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits, + * or MULTIPLICITY repeats of 128 bits + * + * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) { + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // encode first word + encode(&cdw[16 * i * MULTIPLICITY], msg[i]); + // copy to other identical codewords + for (size_t copy = 1; copy < MULTIPLICITY; copy++) { + memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16); + } + } +} + + + +/** + * @brief Decodes the received word + * + * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane. + * The theory of error-correcting codes codes @cite macwilliams1977theory + * + * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) { + __m256i expanded[8]; + __m256i transform[8]; + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // collect the codewords + expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]); + // apply hadamard transform + hadamard(expanded, transform); + // fix the first entry to get the half Hadamard transform + transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY); + // finish the decoding + msg[i] = find_peaks(transform); + } +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.h b/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.h new file mode 100644 index 00000000..4c5936d3 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.h @@ -0,0 +1,18 @@ +#ifndef REED_MULLER_H +#define REED_MULLER_H + + +/** + * @file reed_muller.h + * Header file of reed_muller.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.c b/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.c new file mode 100644 index 00000000..e42c9529 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.c @@ -0,0 +1,466 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include "parsing.h" +#include "reed_solomon.h" +#include +#include +#include +/** + * @file reed_solomon.c + * Constant time implementation of Reed-Solomon codes + */ + + +static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw); +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes); +static void compute_roots(uint8_t *error, uint16_t *sigma); +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes); +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error); +static void correct_errors(uint8_t *cdw, const uint16_t *error_values); + +static const __m256i alpha_ij256_1[45] = { + {0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087}, + {0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006}, + {0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035}, + {0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014}, + {0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be}, + {0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078}, + {0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3}, + {0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d}, + {0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed}, + {0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e}, + {0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054}, + {0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4}, + {0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5}, + {0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062}, + {0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064}, + {0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051}, + {0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045}, + {0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb}, + {0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083}, + {0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020}, + {0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d}, + {0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0}, + {0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee}, + {0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba}, + {0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e}, + {0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb}, + {0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9}, + {0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd}, + {0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec}, + {0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9}, + {0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052}, + {0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1}, + {0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1}, + {0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc}, + {0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c}, + {0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2}, + {0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048}, + {0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016}, + {0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad}, + {0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074}, + {0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9}, + {0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025}, + {0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c}, + {0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de}, + {0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f} +}; +static const __m256i alpha_ij256_2[45] = { + {0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x0000000000600030}, + {0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x0000000000b90069}, + {0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x0000000000df007f}, + {0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00000000003b00f8}, + {0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x000000000055004d}, + {0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x00000000009600f1}, + {0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x00000000005900e0}, + {0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x00000000002c00f7}, + {0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0000000000260040}, + {0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x0000000000c1009c}, + {0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00000000000f005f}, + {0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x00000000001a00b6}, + {0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x0000000000a900ec}, + {0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x00000000009100aa}, + {0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0000000000640096}, + {0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00000000002400a2}, + {0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x000000000001000b}, + {0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00000000006000cd}, + {0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x0000000000b900d4}, + {0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x0000000000df005e}, + {0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00000000003b0086}, + {0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0000000000550085}, + {0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x00000000009600d5}, + {0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000000000059006e}, + {0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x00000000002c00ef}, + {0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00000000002600fa}, + {0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0000000000c1002d}, + {0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x00000000000f0023}, + {0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x00000000001a001e}, + {0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0000000000a9001a}, + {0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x00000000009100da}, + {0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0000000000640063}, + {0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0000000000240082}, + {0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0000000000010045}, + {0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x000000000060006c}, + {0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x0000000000b9008f}, + {0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x0000000000df0028}, + {0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00000000003b00d3}, + {0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x00000000005500ce}, + {0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x0000000000960084}, + {0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x00000000005900e5}, + {0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x00000000002c0007}, + {0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0000000000260090}, + {0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x0000000000c10002}, + {0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x00000000000f0060} +}; + + +/** + * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes + * + * Following @cite lin1983error (Chapter 4 - Cyclic Codes), + * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register + * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code. + * + * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_K_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) { + size_t i, k; + uint8_t gate_value = 0; + uint8_t prev, x; + + union { + uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)]; + __m256i dummy; + } tmp = {0}; + + union { + uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)]; + __m256i dummy; + } PARAM_RS_POLY = {{ RS_POLY_COEFS }}; + + __m256i *tmp256 = (__m256i *)tmp.arr16; + __m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16; + + for (i = 0; i < PARAM_K; ++i) { + gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]); + _mm256_storeu_si256(&tmp256[0], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0])); + _mm256_storeu_si256(&tmp256[1], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1])); + + prev = 0; + for (k = 0; k < PARAM_N1 - PARAM_K; k++) { + x = cdw[k]; + cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]); + prev = x; + } + } + + memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K); +} + + + +/** + * @brief Computes 2 * PARAM_DELTA syndromes + * + * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes + * @param[in] cdw Array of size PARAM_N1 storing the received vector + */ +void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { + __m256i *syndromes256 = (__m256i *) syndromes; + __m256i last_syndromes256; + syndromes256[0] = _mm256_set1_epi16(cdw[0]); + + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + syndromes256[0] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]); + } + + last_syndromes256 = _mm256_set1_epi16(cdw[0]); + + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + last_syndromes256 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]); + } + + __m128i *s128 = (__m128i *) &last_syndromes256; + _mm_store_si128((__m128i *) (syndromes + 16), *s128); + + uint64_t *s8 = (uint64_t *) (syndromes + 24); + s8[0] = _mm_extract_epi64(s128[1], 0); + + uint32_t *s12 = (uint32_t *) (syndromes + 28); + uint32_t *s32 = ((uint32_t *) &last_syndromes256) + 6; + s12[0] = *s32; +} + + + +/** + * @brief Computes the error locator polynomial (ELP) sigma + * + * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes).
+ * We use the letter p for rho which is initialized at -1.
+ * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X).
+ * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p.
+ * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated.
+ * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA. + * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value + * and we only need to save its first PARAM_DELTA - 1 coefficients. + * + * @returns the degree of the ELP sigma + * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP + * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes + */ +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { + uint16_t deg_sigma = 0; + uint16_t deg_sigma_p = 0; + uint16_t deg_sigma_copy = 0; + uint16_t sigma_copy[PARAM_DELTA + 1] = {0}; + uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1}; + uint16_t pp = (uint16_t) -1; // 2*rho + uint16_t d_p = 1; + uint16_t d = syndromes[0]; + + uint16_t mask1, mask2, mask12; + uint16_t deg_X, deg_X_sigma_p; + uint16_t dd; + uint16_t mu; + + uint16_t i; + + sigma[0] = 1; + for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { + // Save sigma in case we need it to update X_sigma_p + memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); + deg_sigma_copy = deg_sigma; + + dd = PQCLEAN_HQCRMRS128_AVX2_gf_mul(d, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(d_p)); + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + sigma[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(dd, X_sigma_p[i]); + } + + deg_X = mu - pp; + deg_X_sigma_p = deg_X + deg_sigma_p; + + // mask1 = 0xffff if(d != 0) and 0 otherwise + mask1 = -((uint16_t) - d >> 15); + + // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise + mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15); + + // mask12 = 0xffff if the deg_sigma increased and 0 otherwise + mask12 = mask1 & mask2; + deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma); + + if (mu == (2 * PARAM_DELTA - 1)) { + break; + } + + pp ^= mask12 & (mu ^ pp); + d_p ^= mask12 & (d ^ d_p); + for (i = PARAM_DELTA; i; --i) { + X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); + } + + deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p); + d = syndromes[mu + 1]; + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + d ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]); + } + } + + return deg_sigma; +} + + + +/** + * @brief Computes the error polynomial error from the error locator polynomial sigma + * + * See function PQCLEAN_HQCRMRS128_AVX2_fft for more details. + * + * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial + * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + */ +static void compute_roots(uint8_t *error, uint16_t *sigma) { + uint16_t w[1 << PARAM_M] = {0}; + + PQCLEAN_HQCRMRS128_AVX2_fft(w, sigma, PARAM_DELTA + 1); + PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(error, w); +} + + + +/** + * @brief Computes the polynomial z(x) + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x) + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + * @param[in] degree Integer that is the degree of polynomial sigma + * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes + */ +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) { + size_t i, j; + uint16_t mask; + + z[0] = 1; + + for (i = 1; i < PARAM_DELTA + 1; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] = mask & sigma[i]; + } + + z[1] ^= syndromes[0]; + + for (i = 2; i <= PARAM_DELTA; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] ^= mask & syndromes[i - 1]; + + for (j = 1; j < i; ++j) { + z[i] ^= mask & PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]); + } + } +} + + + +/** + * @brief Computes the error values + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] error_values Array of PARAM_DELTA elements receiving the error values + * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x) + * @param[in] z_degree Integer that is the degree of polynomial z(x) + * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error + */ +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) { + uint16_t beta_j[PARAM_DELTA] = {0}; + uint16_t e_j[PARAM_DELTA] = {0}; + + uint16_t delta_counter; + uint16_t delta_real_value; + uint16_t found; + uint16_t mask1; + uint16_t mask2; + uint16_t tmp1; + uint16_t tmp2; + uint16_t inverse; + uint16_t inverse_power_j; + + // Compute the beta_{j_i} page 31 of the documentation + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; i++) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + beta_j[j] += mask1 & mask2 & gf_exp[i]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } + delta_real_value = delta_counter; + + // Compute the e_{j_i} page 31 of the documentation + for (size_t i = 0; i < PARAM_DELTA; ++i) { + tmp1 = 1; + tmp2 = 1; + inverse = PQCLEAN_HQCRMRS128_AVX2_gf_inverse(beta_j[i]); + inverse_power_j = 1; + + for (size_t j = 1; j <= PARAM_DELTA; ++j) { + inverse_power_j = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, inverse); + tmp1 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, z[j]); + } + for (size_t k = 1; k < PARAM_DELTA; ++k) { + tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); + } + mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value + e_j[i] = mask1 & PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(tmp2)); + } + + // Place the delta e_{j_i} values at the right coordinates of the output vector + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; ++i) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + error_values[i] += mask1 & mask2 & e_j[j]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } +} + + + +/** + * @brief Correct the errors + * + * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector + * @param[in] error Array of the error vector + * @param[in] error_values Array of PARAM_DELTA elements storing the error values + */ +static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { + for (size_t i = 0; i < PARAM_N1; ++i) { + cdw[i] ^= error_values[i]; + } +} + + + +/** + * @brief Decodes the received word + * + * This function relies on six steps: + *
    + *
  1. The first step, is the computation of the 2*PARAM_DELTA syndromes. + *
  2. The second step is the computation of the error-locator polynomial sigma. + *
  3. The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses. + *
  4. The fourth step, is the polynomial z(x). + *
  5. The fifth step, is the computation of the error values. + *
  6. The sixth step is the correction of the errors in the received polynomial. + *
+ * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error + * + * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) { + uint16_t syndromes[2 * PARAM_DELTA] = {0}; + uint16_t sigma[1 << PARAM_FFT] = {0}; + uint8_t error[1 << PARAM_M] = {0}; + uint16_t z[PARAM_N1] = {0}; + uint16_t error_values[PARAM_N1] = {0}; + uint16_t deg; + + // Calculate the 2*PARAM_DELTA syndromes + compute_syndromes(syndromes, cdw); + + // Compute the error locator polynomial sigma + // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room + deg = compute_elp(sigma, syndromes); + + // Compute the error polynomial error + compute_roots(error, sigma); + + // Compute the polynomial z(x) + compute_z_poly(z, sigma, deg, syndromes); + + // Compute the error values + compute_error_values(error_values, z, error); + + // Correct the errors + correct_errors(cdw, error_values); + + // Retrieve the message from the decoded codeword + memcpy(msg, cdw + (PARAM_G - 1), PARAM_K); + +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.h b/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.h new file mode 100644 index 00000000..f42de097 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.h @@ -0,0 +1,20 @@ +#ifndef REED_SOLOMON_H +#define REED_SOLOMON_H + + +/** + * @file reed_solomon.h + * Header file of reed_solomon.c + */ +#include "parameters.h" +#include +#include + +static const uint16_t alpha_ij_pow [48][79] = {{2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240}, {4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, 157, 78, 37, 148, 106, 181, 238, 159, 70, 5, 20, 80, 93, 105, 185, 222, 95, 97, 153, 94, 101, 137, 30, 120, 253, 211, 107, 177, 254, 223, 91, 113, 217, 67, 17, 68, 13, 52, 208, 103, 129, 62, 248, 199, 59, 236, 151, 102, 133, 46, 184, 218, 79, 33, 132, 42, 168, 154, 82, 85, 73, 57, 228, 183}, {8, 64, 58, 205, 38, 45, 117, 143, 12, 96, 39, 37, 53, 181, 193, 70, 10, 80, 186, 185, 161, 97, 47, 101, 15, 120, 231, 107, 127, 223, 182, 217, 134, 68, 26, 208, 206, 62, 237, 59, 197, 102, 23, 184, 169, 33, 21, 168, 41, 85, 146, 228, 115, 191, 145, 252, 179, 241, 219, 150, 196, 110, 87, 130, 100, 7, 56, 221, 166, 89, 242, 195, 86, 138, 36, 61, 245, 251, 139}, {16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, 95, 153, 101, 30, 253, 107, 254, 91, 217, 17, 13, 208, 129, 248, 59, 151, 133, 184, 79, 132, 168, 82, 73, 228, 230, 198, 252, 123, 227, 150, 149, 165, 130, 200, 28, 221, 81, 121, 195, 172, 18, 61, 247, 203, 44, 250, 27, 173, 2, 32, 58, 135, 152, 117, 3, 48, 39, 74, 212, 193, 140, 40, 186, 111}, {32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174, 100, 28, 167, 89, 239, 172, 36, 244, 235, 44, 233, 108, 1, 32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132}, {64, 205, 45, 143, 96, 37, 181, 70, 80, 185, 97, 101, 120, 107, 223, 217, 68, 208, 62, 59, 102, 184, 33, 168, 85, 228, 191, 252, 241, 150, 110, 130, 7, 221, 89, 195, 138, 61, 251, 44, 207, 173, 8, 58, 38, 117, 12, 39, 53, 193, 10, 186, 161, 47, 15, 231, 127, 182, 134, 26, 206, 237, 197, 23, 169, 21, 41, 146, 115, 145, 179, 219, 196, 87, 100, 56, 166, 242, 86}, {128, 19, 117, 24, 156, 181, 140, 93, 161, 94, 60, 107, 163, 67, 26, 129, 147, 102, 109, 132, 41, 57, 209, 252, 255, 98, 87, 200, 224, 89, 155, 18, 245, 11, 233, 173, 16, 232, 45, 3, 157, 53, 159, 40, 185, 194, 137, 231, 254, 226, 68, 189, 248, 197, 46, 158, 168, 170, 183, 145, 123, 75, 110, 25, 28, 166, 249, 69, 61, 235, 176, 54, 2, 29, 38, 234, 48, 37, 119}, {29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, 133, 79, 168, 73, 230, 252, 227, 149, 130, 28, 81, 195, 18, 247, 44, 27, 2, 58, 152, 3, 39, 212, 140, 186, 190, 202, 231, 225, 175, 26, 31, 118, 23, 158, 77, 146, 209, 229, 219, 55, 25, 56, 162, 155, 36, 243, 88, 54, 4, 116, 45, 6, 78, 181, 5, 105, 97, 137, 211, 223, 67, 52, 62, 236}, {58, 45, 12, 37, 193, 80, 161, 101, 231, 223, 134, 208, 237, 102, 169, 168, 146, 191, 179, 150, 87, 7, 166, 195, 36, 251, 125, 173, 64, 38, 143, 39, 181, 10, 185, 47, 120, 127, 217, 26, 62, 197, 184, 21, 85, 115, 252, 219, 110, 100, 221, 242, 138, 245, 44, 54, 8, 205, 117, 96, 53, 70, 186, 97, 15, 107, 182, 68, 206, 59, 23, 33, 41, 228, 145, 241, 196, 130, 56}, {116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51, 169, 77, 114, 145, 255, 55, 100, 167, 239, 36, 235, 233, 1, 116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3}, {232, 234, 39, 238, 160, 97, 60, 254, 134, 103, 118, 184, 84, 57, 145, 227, 220, 7, 162, 172, 245, 176, 71, 58, 180, 192, 181, 40, 95, 15, 177, 175, 208, 147, 46, 21, 73, 99, 241, 55, 200, 166, 43, 122, 44, 216, 128, 45, 48, 106, 10, 222, 202, 107, 226, 52, 237, 133, 66, 85, 209, 123, 196, 50, 167, 195, 144, 11, 54, 32, 76, 12, 148, 140, 185, 188, 211, 182, 13}, {205, 143, 37, 70, 185, 101, 107, 217, 208, 59, 184, 168, 228, 252, 150, 130, 221, 195, 61, 44, 173, 58, 117, 39, 193, 186, 47, 231, 182, 26, 237, 23, 21, 146, 145, 219, 87, 56, 242, 36, 139, 54, 64, 45, 96, 181, 80, 97, 120, 223, 68, 62, 102, 33, 85, 191, 241, 110, 7, 89, 138, 251, 207, 8, 38, 12, 53, 10, 161, 15, 127, 134, 206, 197, 169, 41, 115, 179, 196}, {135, 6, 53, 20, 190, 120, 163, 13, 237, 46, 84, 228, 229, 98, 100, 81, 69, 251, 131, 32, 45, 192, 238, 186, 94, 187, 217, 189, 236, 169, 82, 209, 241, 220, 28, 242, 72, 22, 173, 116, 201, 37, 140, 222, 15, 254, 34, 62, 204, 132, 146, 63, 75, 130, 167, 43, 245, 250, 4, 38, 24, 212, 80, 194, 253, 182, 52, 147, 184, 77, 183, 179, 149, 141, 89, 9, 203, 54, 128}, {19, 24, 181, 93, 94, 107, 67, 129, 102, 132, 57, 252, 98, 200, 89, 18, 11, 173, 232, 3, 53, 40, 194, 231, 226, 189, 197, 158, 170, 145, 75, 25, 166, 69, 235, 54, 29, 234, 37, 5, 95, 120, 91, 52, 59, 218, 82, 191, 227, 174, 221, 43, 247, 207, 32, 90, 39, 35, 111, 15, 225, 136, 237, 92, 77, 115, 246, 220, 56, 239, 122, 125, 4, 76, 96, 238, 105, 101, 177}, {38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145}, {76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44, 2, 152, 39, 140, 190, 231, 175, 31, 23, 77, 209, 219, 25, 162, 36, 88, 4, 45, 78, 5, 97, 211, 67, 62, 46, 154, 191, 171, 50, 89, 72, 176, 8, 90, 156, 10, 194, 187, 134, 124, 92, 41, 99, 75, 100, 178, 144, 125, 16, 180, 37, 20, 153, 107, 17, 248, 184, 82, 198, 150, 200, 121, 61, 250}, {152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153}, {45, 37, 80, 101, 223, 208, 102, 168, 191, 150, 7, 195, 251, 173, 38, 39, 10, 47, 127, 26, 197, 21, 115, 219, 100, 242, 245, 54, 205, 96, 70, 97, 107, 68, 59, 33, 228, 241, 130, 89, 61, 207, 58, 12, 193, 161, 231, 134, 237, 169, 146, 179, 87, 166, 36, 125, 64, 143, 181, 185, 120, 217, 62, 184, 85, 252, 110, 221, 138, 44, 8, 117, 53, 186, 15, 182, 206, 23, 41}, {90, 148, 186, 30, 226, 62, 109, 73, 179, 174, 162, 61, 131, 232, 96, 140, 153, 127, 52, 51, 168, 99, 98, 56, 172, 22, 8, 234, 212, 185, 240, 67, 237, 79, 114, 241, 25, 121, 245, 108, 19, 39, 20, 188, 223, 189, 133, 41, 63, 55, 221, 9, 176, 64, 3, 238, 161, 211, 34, 59, 66, 183, 219, 200, 239, 251, 71, 152, 37, 160, 137, 182, 129, 92, 85, 229, 165, 166, 72}, {180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5, 94, 223, 103, 46, 85, 215, 174, 89, 244, 108, 38, 156, 160, 15, 226, 124, 169, 114, 255, 100, 239, 235, 1, 180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5}, {117, 181, 161, 107, 26, 102, 41, 252, 87, 89, 245, 173, 45, 53, 185, 231, 68, 197, 168, 145, 110, 166, 61, 54, 38, 37, 186, 120, 134, 59, 21, 191, 196, 221, 36, 207, 205, 39, 80, 15, 217, 237, 33, 115, 150, 56, 138, 125, 58, 96, 10, 101, 182, 62, 169, 228, 219, 7, 86, 44, 64, 12, 70, 47, 223, 206, 184, 146, 241, 100, 195, 139, 8, 143, 193, 97, 127, 208, 23}, {234, 238, 97, 254, 103, 184, 57, 227, 7, 172, 176, 58, 192, 40, 15, 175, 147, 21, 99, 55, 166, 122, 216, 45, 106, 222, 107, 52, 133, 85, 123, 50, 195, 11, 32, 12, 140, 188, 182, 124, 158, 115, 49, 224, 36, 131, 19, 37, 105, 253, 68, 151, 154, 252, 174, 121, 251, 2, 201, 193, 194, 225, 206, 109, 114, 219, 14, 69, 125, 116, 157, 80, 30, 67, 59, 42, 198, 110, 81}, {201, 159, 47, 91, 124, 33, 209, 149, 166, 244, 71, 117, 238, 194, 223, 31, 79, 115, 98, 167, 61, 216, 90, 181, 190, 254, 206, 218, 213, 150, 224, 72, 54, 152, 106, 161, 177, 189, 184, 114, 171, 56, 18, 131, 38, 148, 111, 107, 104, 46, 146, 227, 14, 138, 233, 135, 37, 210, 211, 26, 133, 170, 241, 141, 172, 125, 232, 78, 186, 253, 136, 102, 164, 123, 100, 43, 88, 58, 157}, {143, 70, 101, 217, 59, 168, 252, 130, 195, 44, 58, 39, 186, 231, 26, 23, 146, 219, 56, 36, 54, 45, 181, 97, 223, 62, 33, 191, 110, 89, 251, 8, 12, 10, 15, 134, 197, 41, 179, 100, 86, 125, 205, 37, 185, 107, 208, 184, 228, 150, 221, 61, 173, 117, 193, 47, 182, 237, 21, 145, 87, 242, 139, 64, 96, 80, 120, 68, 102, 85, 241, 7, 138, 207, 38, 53, 161, 127, 206}, {3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174, 239, 44, 116, 156, 185, 214, 103, 169, 230, 55, 89, 235, 32, 96, 160, 253, 26, 46, 114, 150, 167, 244, 1, 3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174}, {6, 20, 120, 13, 46, 228, 98, 81, 251, 32, 192, 186, 187, 189, 169, 209, 220, 242, 22, 116, 37, 222, 254, 62, 132, 63, 130, 43, 250, 38, 212, 194, 182, 147, 77, 179, 141, 9, 54, 180, 159, 101, 67, 151, 85, 227, 112, 61, 142, 3, 10, 60, 136, 23, 114, 49, 166, 243, 16, 96, 93, 211, 208, 218, 230, 110, 121, 11, 58, 156, 111, 127, 31, 66, 145, 65, 155, 125, 19}, {12, 80, 231, 208, 169, 191, 87, 195, 125, 38, 181, 47, 217, 197, 85, 219, 221, 245, 8, 96, 186, 107, 206, 33, 145, 130, 86, 207, 45, 193, 101, 134, 102, 146, 150, 166, 251, 64, 39, 185, 127, 62, 21, 252, 100, 138, 54, 117, 70, 15, 68, 23, 228, 196, 89, 139, 58, 37, 161, 223, 237, 168, 179, 7, 36, 173, 143, 10, 120, 26, 184, 115, 110, 242, 44, 205, 53, 97, 182}, {24, 93, 107, 129, 132, 252, 200, 18, 173, 3, 40, 231, 189, 158, 145, 25, 69, 54, 234, 5, 120, 52, 218, 191, 174, 43, 207, 90, 35, 15, 136, 92, 115, 220, 239, 125, 76, 238, 101, 17, 133, 228, 149, 121, 44, 135, 212, 47, 175, 51, 146, 49, 162, 139, 116, 148, 97, 113, 236, 85, 171, 83, 251, 128, 156, 161, 163, 147, 41, 255, 224, 245, 16, 157, 185, 254, 248, 168, 123}, {48, 105, 127, 248, 77, 241, 224, 247, 64, 156, 95, 182, 236, 170, 150, 162, 11, 205, 212, 94, 134, 133, 213, 110, 239, 250, 45, 35, 30, 26, 218, 99, 130, 69, 108, 143, 40, 211, 206, 132, 229, 7, 144, 2, 96, 210, 254, 237, 154, 255, 221, 243, 128, 37, 190, 113, 197, 73, 49, 89, 22, 135, 181, 188, 17, 23, 183, 220, 195, 233, 90, 70, 60, 52, 169, 198, 25, 138, 216}, {96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15}, {192, 222, 182, 151, 114, 110, 155, 27, 143, 160, 177, 237, 82, 75, 89, 88, 152, 70, 240, 103, 21, 123, 224, 251, 116, 212, 101, 136, 218, 145, 200, 144, 8, 78, 190, 217, 204, 183, 87, 172, 216, 12, 105, 225, 59, 170, 98, 242, 250, 180, 10, 211, 31, 168, 255, 83, 139, 135, 238, 15, 52, 158, 252, 14, 244, 64, 74, 153, 134, 46, 209, 130, 9, 142, 96, 111, 91, 197, 57}, {157, 95, 217, 133, 230, 130, 18, 2, 39, 190, 175, 23, 209, 25, 36, 4, 78, 97, 67, 46, 191, 50, 72, 8, 156, 194, 134, 92, 99, 100, 144, 16, 37, 153, 17, 184, 198, 200, 61, 32, 74, 47, 34, 109, 145, 141, 122, 64, 148, 94, 68, 218, 63, 7, 244, 128, 53, 188, 136, 169, 126, 14, 245, 29, 106, 101, 13, 79, 252, 28, 247, 58, 212, 202, 26, 158, 229, 56, 243}, {39, 97, 134, 184, 145, 7, 245, 58, 181, 15, 208, 21, 241, 166, 44, 45, 10, 107, 237, 85, 196, 195, 54, 12, 185, 182, 102, 115, 130, 36, 8, 37, 47, 68, 169, 252, 56, 251, 205, 193, 120, 206, 168, 219, 89, 125, 117, 80, 127, 59, 146, 110, 86, 173, 96, 161, 217, 23, 191, 100, 61, 64, 53, 101, 26, 33, 179, 221, 139, 38, 70, 231, 62, 41, 150, 242, 207, 143, 186}, {78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79}, {156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15, 103, 77, 150, 239, 108, 96, 190, 17, 169, 215, 167, 44, 180, 160, 223, 51, 230, 100, 244, 116, 193, 253, 124, 85, 55, 172, 1, 156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15, 103, 77, 150, 239}, {37, 101, 208, 168, 150, 195, 173, 39, 47, 26, 21, 219, 242, 54, 96, 97, 68, 33, 241, 89, 207, 12, 161, 134, 169, 179, 166, 125, 143, 185, 217, 184, 252, 221, 44, 117, 186, 182, 23, 145, 56, 139, 45, 80, 223, 102, 191, 7, 251, 38, 10, 127, 197, 115, 100, 245, 205, 70, 107, 59, 228, 130, 61, 58, 193, 231, 237, 146, 87, 36, 64, 181, 120, 62, 85, 110, 138, 8, 53}, {74, 137, 206, 82, 55, 138, 16, 212, 120, 124, 73, 87, 72, 29, 193, 211, 147, 228, 25, 244, 205, 140, 177, 197, 230, 141, 251, 76, 40, 223, 204, 198, 56, 11, 180, 186, 113, 92, 252, 167, 176, 143, 111, 67, 169, 123, 162, 207, 24, 190, 68, 66, 227, 242, 108, 157, 47, 52, 84, 150, 155, 142, 37, 202, 103, 41, 149, 69, 8, 106, 60, 62, 170, 165, 36, 128, 238, 231, 199}, {148, 30, 62, 73, 174, 61, 232, 140, 127, 51, 99, 56, 22, 234, 185, 67, 79, 241, 121, 108, 39, 188, 189, 41, 55, 9, 64, 238, 211, 59, 183, 200, 251, 152, 160, 182, 92, 229, 166, 233, 24, 97, 13, 42, 150, 43, 2, 53, 60, 124, 146, 65, 122, 205, 5, 254, 102, 198, 112, 44, 201, 111, 134, 158, 255, 242, 216, 78, 101, 103, 82, 110, 18, 128, 193, 187, 118, 115, 141}, {53, 120, 237, 228, 100, 251, 45, 186, 217, 169, 241, 242, 173, 37, 15, 62, 146, 130, 245, 38, 80, 182, 184, 179, 89, 54, 39, 101, 206, 85, 87, 61, 205, 10, 223, 23, 252, 166, 207, 96, 47, 208, 41, 110, 36, 58, 70, 127, 102, 145, 221, 125, 12, 97, 26, 168, 196, 138, 64, 193, 107, 197, 191, 56, 44, 143, 161, 68, 21, 150, 86, 8, 181, 231, 59, 115, 7, 139, 117}, {106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100, 235, 180, 185, 17, 132, 150, 172, 32, 193, 214, 51, 145, 167, 233, 96, 94, 103, 85, 174, 244, 38, 160, 226, 169, 255, 239, 1, 106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100, 235, 180, 185, 17}, {212, 211, 197, 198, 167, 207, 157, 202, 62, 114, 200, 139, 201, 95, 26, 154, 220, 61, 19, 160, 217, 158, 171, 86, 32, 159, 127, 133, 229, 89, 216, 74, 120, 147, 230, 56, 176, 24, 47, 103, 170, 130, 243, 90, 185, 34, 42, 196, 18, 116, 10, 91, 109, 241, 239, 2, 181, 187, 151, 145, 83, 131, 39, 137, 124, 228, 141, 11, 143, 190, 52, 41, 165, 122, 38, 93, 175, 33, 75}, {181, 107, 102, 252, 89, 173, 53, 231, 197, 145, 166, 54, 37, 120, 59, 191, 221, 207, 39, 15, 237, 115, 56, 125, 96, 101, 62, 228, 7, 44, 12, 47, 206, 146, 100, 139, 143, 97, 208, 85, 130, 251, 117, 161, 26, 41, 87, 245, 45, 185, 68, 168, 110, 61, 38, 186, 134, 21, 196, 36, 205, 80, 217, 33, 150, 138, 58, 10, 182, 169, 219, 86, 64, 70, 223, 184, 241, 195, 8}, {119, 177, 23, 123, 239, 8, 159, 225, 184, 255, 43, 64, 140, 91, 169, 171, 69, 58, 20, 226, 33, 49, 18, 205, 160, 67, 21, 149, 144, 38, 105, 34, 168, 220, 244, 45, 111, 13, 41, 174, 243, 117, 95, 104, 85, 25, 203, 143, 194, 103, 146, 200, 22, 12, 94, 31, 228, 14, 176, 96, 202, 248, 115, 112, 233, 39, 30, 147, 191, 167, 27, 37, 240, 236, 145, 81, 216, 53, 211}, {238, 254, 184, 227, 172, 58, 40, 175, 21, 55, 122, 45, 222, 52, 85, 50, 11, 12, 188, 124, 115, 224, 131, 37, 253, 151, 252, 121, 2, 193, 225, 109, 219, 69, 116, 80, 67, 42, 110, 244, 90, 161, 104, 170, 100, 22, 24, 101, 248, 230, 221, 27, 74, 231, 51, 229, 242, 4, 159, 223, 218, 171, 138, 232, 160, 134, 84, 220, 245, 180, 95, 208, 73, 200, 44, 48, 202, 237, 209}, {193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44}, {159, 91, 33, 149, 244, 117, 194, 31, 115, 167, 216, 181, 254, 218, 150, 72, 152, 161, 189, 114, 56, 131, 148, 107, 46, 227, 138, 135, 210, 26, 170, 141, 125, 78, 253, 102, 123, 43, 58, 160, 34, 41, 25, 22, 96, 30, 236, 252, 249, 32, 10, 175, 84, 87, 235, 6, 101, 199, 198, 89, 2, 35, 182, 66, 55, 245, 234, 153, 62, 230, 83, 173, 119, 225, 169, 49, 144, 45, 95}, {35, 113, 21, 165, 235, 12, 137, 118, 252, 239, 128, 80, 34, 82, 100, 176, 78, 231, 133, 255, 138, 19, 111, 208, 114, 112, 54, 212, 254, 169, 98, 122, 117, 153, 124, 191, 162, 2, 70, 226, 42, 87, 203, 24, 15, 236, 229, 195, 29, 160, 68, 164, 200, 125, 156, 211, 23, 227, 9, 38, 222, 189, 228, 224, 108, 181, 225, 79, 196, 244, 234, 47, 248, 99, 89, 4, 140, 217, 84}, {70, 217, 168, 130, 44, 39, 231, 23, 219, 36, 45, 97, 62, 191, 89, 8, 10, 134, 41, 100, 125, 37, 107, 184, 150, 61, 117, 47, 237, 145, 242, 64, 80, 68, 85, 7, 207, 53, 127, 169, 196, 245, 143, 101, 59, 252, 195, 58, 186, 26, 146, 56, 54, 181, 223, 33, 110, 251, 12, 15, 197, 179, 86, 205, 185, 208, 228, 221, 173, 193, 182, 21, 87, 139, 96, 120, 102, 241, 138}}; + +void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/vector.c b/src/kem/hqc/hqc-rmrs-128/avx2/vector.c new file mode 100644 index 00000000..1fe644ec --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/vector.c @@ -0,0 +1,178 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +#include +/** + * @file vector.c + * @brief Implementation of vectors sampling and some utilities for the HQC scheme + */ + + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) { + size_t random_bytes_size = 3 * weight; + uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; + uint32_t tmp[PARAM_OMEGA_R] = {0}; + __m256i bit256[PARAM_OMEGA_R]; + __m256i bloc256[PARAM_OMEGA_R]; + __m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0); + __m256i pos256; + __m256i mask256; + __m256i aux; + __m256i i256; + uint64_t bloc, pos, bit64; + uint8_t inc; + size_t i, j, k; + + i = 0; + j = random_bytes_size; + while (i < weight) { + do { + if (j == random_bytes_size) { + seedexpander(ctx, rand_bytes, random_bytes_size); + j = 0; + } + + tmp[i] = ((uint32_t) rand_bytes[j++]) << 16; + tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8; + tmp[i] |= rand_bytes[j++]; + + } while (tmp[i] >= UTILS_REJECTION_THRESHOLD); + + tmp[i] = tmp[i] % PARAM_N; + + inc = 1; + for (k = 0; k < i; k++) { + if (tmp[k] == tmp[i]) { + inc = 0; + } + } + i += inc; + } + + for (i = 0; i < weight; i++) { + // we store the bloc number and bit position of each vb[i] + bloc = tmp[i] >> 6; + bloc256[i] = _mm256_set1_epi64x(bloc >> 2); + pos = (bloc & 0x3UL); + pos256 = _mm256_set1_epi64x(pos); + mask256 = _mm256_cmpeq_epi64(pos256, posCmp256); + bit64 = 1ULL << (tmp[i] & 0x3f); + bit256[i] = _mm256_set1_epi64x(bit64)&mask256; + } + + for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) { + aux = _mm256_loadu_si256(((__m256i *)v) + i); + i256 = _mm256_set1_epi64x(i); + + for (j = 0; j < weight; j++) { + mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); + aux ^= bit256[j] & mask256; + } + _mm256_storeu_si256(((__m256i *)v) + i, aux); + } + +} + + + +/** + * @brief Generates a random vector of dimension PARAM_N + * + * This function generates a random binary vector of dimension PARAM_N. It generates a random + * array of bytes using the seedexpander function, and drop the extra bits using a mask. + * + * @param[in] v Pointer to an array + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) { + uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0}; + + seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES); + + PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES); + v[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief Adds two vectors + * + * @param[out] o Pointer to an array that is the result + * @param[in] v1 Pointer to an array that is the first vector + * @param[in] v2 Pointer to an array that is the second vector + * @param[in] size Integer that is the size of the vectors + */ +void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + o[i] = v1[i] ^ v2[i]; + } +} + + + +/** + * @brief Compares two vectors + * + * @param[in] v1 Pointer to an array that is first vector + * @param[in] v2 Pointer to an array that is second vector + * @param[in] size Integer that is the size of the vectors + * @returns 0 if the vectors are equals and a negative/psotive value otherwise + */ +uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) { + uint64_t r = 0; + for (size_t i = 0; i < size; i++) { + r |= v1[i] ^ v2[i]; + } + r = (~r + 1) >> 63; + return (uint8_t) r; +} + + + +/** + * @brief Resize a vector so that it contains size_o bits + * + * @param[out] o Pointer to the output vector + * @param[in] size_o Integer that is the size of the output vector in bits + * @param[in] v Pointer to the input vector + * @param[in] size_v Integer that is the size of the input vector in bits + */ +void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) { + uint64_t mask = 0x7FFFFFFFFFFFFFFF; + int8_t val = 0; + if (size_o < size_v) { + if (size_o % 64) { + val = 64 - (size_o % 64); + } + + memcpy(o, v, VEC_N1N2_SIZE_BYTES); + + for (int8_t i = 0; i < val; ++i) { + o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); + } + } else { + memcpy(o, v, CEIL_DIVIDE(size_v, 8)); + } +} diff --git a/src/kem/hqc/hqc-rmrs-128/avx2/vector.h b/src/kem/hqc/hqc-rmrs-128/avx2/vector.h new file mode 100644 index 00000000..cba09fef --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/avx2/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + + +/** + * @file vector.h + * @brief Header file for vector.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v); + +void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_from_randombytes(uint64_t *v); + + +void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size); + +uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size); + +void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/CMakeLists.txt b/src/kem/hqc/hqc-rmrs-128/clean/CMakeLists.txt new file mode 100644 index 00000000..3ff4cbb0 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/CMakeLists.txt @@ -0,0 +1,16 @@ +set( + SRC_CLEAN_HQCRMRS128 + code.c + fft.c + gf2x.c + gf.c + hqc.c + kem.c + parsing.c + reed_muller.c + reed_solomon.c + vector.c +) + +define_kem_alg(hqcrmrs128_clean + PQCLEAN_HQCRMRS128_CLEAN "${SRC_CLEAN_HQCRMRS128}" "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/kem/hqc/hqc-rmrs-128/clean/api.h b/src/kem/hqc/hqc-rmrs-128/clean/api.h new file mode 100644 index 00000000..87447acc --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/api.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_HQCRMRS128_CLEAN_API_H +#define PQCLEAN_HQCRMRS128_CLEAN_API_H +/** + * @file api.h + * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme + */ + +#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_ALGNAME "HQC-RMRS-128" + +#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES 2289 +#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES 2249 +#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES 64 +#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES 4481 + +// As a technicality, the public key is appended to the secret key in order to respect the NIST API. +// Without this constraint, PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32 + +int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk); + +int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/code.c b/src/kem/hqc/hqc-rmrs-128/clean/code.c new file mode 100644 index 00000000..92a853a4 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/code.c @@ -0,0 +1,46 @@ +#include "code.h" +#include "parameters.h" +#include "reed_muller.h" +#include "reed_solomon.h" +#include +#include +/** + * @file code.c + * @brief Implementation of concatenated code + */ + + + +/** + * + * @brief Encoding the message m to a code word em using the concatenated code + * + * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain + * a concatenated code word. + * + * @param[out] em Pointer to an array that is the tensor code word + * @param[in] m Pointer to an array that is the message + */ +void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *m) { + uint8_t tmp[VEC_N1_SIZE_BYTES] = {0}; + + PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(tmp, m); + PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(em, tmp); + +} + + + +/** + * @brief Decoding the code word em to a message m using the concatenated code + * + * @param[out] m Pointer to an array that is the message + * @param[in] em Pointer to an array that is the code word + */ +void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em) { + uint8_t tmp[VEC_N1_SIZE_BYTES] = {0}; + + PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(tmp, em); + PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(m, tmp); + +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/code.h b/src/kem/hqc/hqc-rmrs-128/clean/code.h new file mode 100644 index 00000000..d7c439fa --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/code.h @@ -0,0 +1,18 @@ +#ifndef CODE_H +#define CODE_H + + +/** + * @file code.h + * Header file of code.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *message); + +void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/fft.c b/src/kem/hqc/hqc-rmrs-128/clean/fft.c new file mode 100644 index 00000000..83a47013 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/fft.c @@ -0,0 +1,351 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include +#include +/** + * @file fft.c + * Implementation of the additive FFT and its transpose. + * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf + */ + + +static void compute_fft_betas(uint16_t *betas); +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size); +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); + + +/** + * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose + * + * @param[out] betas Array of size PARAM_M-1 + */ +static void compute_fft_betas(uint16_t *betas) { + size_t i; + for (i = 0; i < PARAM_M - 1; ++i) { + betas[i] = 1 << (PARAM_M - 1 - i); + } +} + + + +/** + * @brief Computes the subset sums of the given set + * + * The array subset_sums is such that its ith element is + * the subset sum of the set elements given by the binary form of i. + * + * @param[out] subset_sums Array of size 2^set_size receiving the subset sums + * @param[in] set Array of set_size elements + * @param[in] set_size Size of the array set + */ +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) { + uint16_t i, j; + subset_sums[0] = 0; + + for (i = 0; i < set_size; ++i) { + for (j = 0; j < (1 << i); ++j) { + subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; + } + } +} + + + +/** + * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x] + * + * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x) + * as proposed by Bernstein, Chou and Schwabe: + * https://binary.cr.yp.to/mcbits-20130616.pdf + * + * @param[out] f0 Array half the size of f + * @param[out] f1 Array half the size of f + * @param[in] f Array of size a power of 2 + * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f + */ +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + switch (m_f) { + case 4: + f0[4] = f[8] ^ f[12]; + f0[6] = f[12] ^ f[14]; + f0[7] = f[14] ^ f[15]; + f1[5] = f[11] ^ f[13]; + f1[6] = f[13] ^ f[14]; + f1[7] = f[15]; + f0[5] = f[10] ^ f[12] ^ f1[5]; + f1[4] = f[9] ^ f[13] ^ f0[5]; + + f0[0] = f[0]; + f1[3] = f[7] ^ f[11] ^ f[15]; + f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3]; + f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3]; + f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3]; + f1[2] = f[3] ^ f1[1] ^ f0[3]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 3: + f0[0] = f[0]; + f0[2] = f[4] ^ f[6]; + f0[3] = f[6] ^ f[7]; + f1[1] = f[3] ^ f[5] ^ f[7]; + f1[2] = f[5] ^ f[6]; + f1[3] = f[7]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 2: + f0[0] = f[0]; + f0[1] = f[2] ^ f[3]; + f1[0] = f[1] ^ f0[1]; + f1[1] = f[3]; + break; + + case 1: + f0[0] = f[0]; + f1[0] = f[1]; + break; + + default: + radix_big(f0, f1, f, m_f); + break; + } +} + +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1; + n <<= (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0; i < n; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + + + +/** + * @brief Evaluates f at all subset sums of a given set + * + * This function is a subroutine of the function PQCLEAN_HQCRMRS128_CLEAN_fft. + * + * @param[out] w Array + * @param[in] f Array + * @param[in] f_coeffs Number of coefficients of f + * @param[in] m Number of betas + * @param[in] m_f Number of coefficients of f (one more than its degree) + * @param[in] betas FFT constants + */ +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; + uint16_t u[1 << (PARAM_M - 2)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; + size_t x; + + // Step 1 + if (m_f == 1) { + for (i = 0; i < m; ++i) { + tmp[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], f[1]); + } + + w[0] = f[0]; + x = 1; + for (j = 0; j < m; ++j) { + for (k = 0; k < x; ++k) { + w[x + k] = w[k] ^ tmp[j]; + } + x <<= 1; + } + + return; + } + + // Step 2: compute g + if (betas[m - 1] != 1) { + beta_m_pow = 1; + x = 1; + x <<= m_f; + for (i = 1; i < x; ++i) { + beta_m_pow = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); + f[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, f[i]); + } + } + + // Step 3 + radix(f0, f1, f, m_f); + + // Step 4: compute gammas and deltas + for (i = 0; i + 1 < m; ++i) { + gammas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(betas[m - 1])); + deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(gammas[i]) ^ gammas[i]; + } + + // Compute gammas sums + compute_subset_sums(gammas_sums, gammas, m - 1); + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + + k = 1; + k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. + if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant + w[0] = u[0]; + w[k] = u[0] ^ f1[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], f1[0]); + w[k + i] = w[i] ^ f1[0]; + } + } else { + fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas); + + // Step 6 + memcpy(w + k, v, 2 * k); + w[0] = u[0]; + w[k] ^= u[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], v[i]); + w[k + i] ^= w[i]; + } + } +} + + + +/** + * @brief Evaluates f on all fields elements using an additive FFT algorithm + * + * f_coeffs is the number of coefficients of f (one less than its degree).
+ * The FFT proceeds recursively to evaluate f at all subset sums of a basis B.
+ * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf
+ * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas, + * meaning the first gammas subset sums are actually the subset sums of betas (except 1).
+ * Also note that f is altered during computation (twisted at each level). + * + * @param[out] w Array + * @param[in] f Array of 2^PARAM_FFT elements + * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) + */ +void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; + + // Follows Gao and Mateer algorithm + compute_fft_betas(betas); + + // Step 1: PARAM_FFT > 1, nothing to do + + // Compute gammas sums + compute_subset_sums(betas_sums, betas, PARAM_M - 1); + + // Step 2: beta_m = 1, nothing to do + + // Step 3 + radix(f0, f1, f, PARAM_FFT); + + // Step 4: Compute deltas + for (i = 0; i < PARAM_M - 1; ++i) { + deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(betas[i]) ^ betas[i]; + } + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + + k = 1 << (PARAM_M - 1); + // Step 6, 7 and error polynomial computation + memcpy(w + k, v, 2 * k); + + // Check if 0 is root + w[0] = u[0]; + + // Check if 1 is root + w[k] ^= u[0]; + + // Find other roots + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas_sums[i], v[i]); + w[k + i] ^= w[i]; + } +} + + + +/** + * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements. + * + * @param[out] error Array with the error + * @param[out] error_compact Array with the error in a compact form + * @param[in] w Array of size 2^PARAM_M + */ +void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t k; + size_t i, index; + + compute_fft_betas(gammas); + compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + + k = 1 << (PARAM_M - 1); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); + + for (i = 1; i < k; ++i) { + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]]; + error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); + + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1]; + error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15); + } +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/fft.h b/src/kem/hqc/hqc-rmrs-128/clean/fft.h new file mode 100644 index 00000000..7c8ddd86 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/fft.h @@ -0,0 +1,18 @@ +#ifndef FFT_H +#define FFT_H + + +/** + * @file fft.h + * Header file of fft.c + */ + +#include +#include + +void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs); + +void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/gf.c b/src/kem/hqc/hqc-rmrs-128/clean/gf.c new file mode 100644 index 00000000..a7a3a1de --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/gf.c @@ -0,0 +1,63 @@ +#include "gf.h" +#include "parameters.h" +#include +/** + * @file gf.c + * Galois field implementation with multiplication using lookup tables + */ + + +/** + * @brief Multiplies nonzero element a by element b + * @returns the product a*b + * @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero) + * @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero) + */ +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b) { + uint16_t mask; + mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0 + return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(gf_log[a] + gf_log[b])]; +} + + + +/** + * @brief Squares an element of GF(2^PARAM_M) + * @returns a^2 + * @param[in] a Element of GF(2^PARAM_M) + */ +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a) { + int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(2 * gf_log[a])]; +} + + + +/** + * @brief Computes the inverse of an element of GF(2^PARAM_M) + * @returns the inverse of a + * @param[in] a Element of GF(2^PARAM_M) + */ +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a) { + int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]]; +} + + + +/** + * @brief Returns i modulo 2^PARAM_M-1 + * i must be less than 2*(2^PARAM_M-1). + * Therefore, the return value is either i or i-2^PARAM_M+1. + * @returns i mod (2^PARAM_M-1) + * @param[in] i The integer whose modulo is taken + */ +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i) { + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); + + // mask = 0xffff if(i < PARAM_GF_MUL_ORDER) + uint16_t mask = -(tmp >> 15); + + return tmp + (mask & PARAM_GF_MUL_ORDER); +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/gf.h b/src/kem/hqc/hqc-rmrs-128/clean/gf.h new file mode 100644 index 00000000..16f753ea --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/gf.h @@ -0,0 +1,39 @@ +#ifndef GF_H +#define GF_H + + +/** + * @file gf.h + * Header file of gf.c + */ + +#include +#include + + +/** + * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8. + * The last two elements are needed by the PQCLEAN_HQCRMRS128_CLEAN_gf_mul function + * (for example if both elements to multiply are zero). + */ +static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 }; + + + +/** + * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8). + * The logarithm of 0 is set to 0 by convention. + */ +static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 }; + + +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b); + +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/gf2x.c b/src/kem/hqc/hqc-rmrs-128/clean/gf2x.c new file mode 100644 index 00000000..1923ab2c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/gf2x.c @@ -0,0 +1,154 @@ +#include "gf2x.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include +/** + * \file gf2x.c + * \brief Implementation of multiplication of two polynomials + */ + + +static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2); +static void reduce(uint64_t *o, const uint64_t *a); +static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx); + +/** + * @brief swap two elements in a table + * + * This function exchanges tab[elt1] with tab[elt2] + * + * @param[in] tab Pointer to the table + * @param[in] elt1 Index of the first element + * @param[in] elt2 Index of the second element + */ +static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) { + uint16_t tmp = tab[elt1]; + + tab[elt1] = tab[elt2]; + tab[elt2] = tmp; +} + + + +/** + * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$ + * + * This function computes the modular reduction of the polynomial a(x) + * + * @param[in] a Pointer to the polynomial a(x) + * @param[out] o Pointer to the result + */ +static void reduce(uint64_t *o, const uint64_t *a) { + size_t i; + uint64_t r; + uint64_t carry; + + for (i = 0; i < VEC_N_SIZE_64; i++) { + r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); + carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); + o[i] = a[i] ^ r ^ carry; + } + + o[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief computes product of the polynomial a1(x) with the sparse polynomial a2 + * + * o(x) = a1(x)a2(x) + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2) + * @param[in] a2 Pointer to the polynomial a1(x) + * @param[in] weight Hamming wifht of the sparse polynomial a2 + * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process + */ +static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { +//static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx) + uint64_t carry; + uint32_t dec, s; + uint64_t table[16 * (VEC_N_SIZE_64 + 1)]; + uint16_t permuted_table[16]; + uint16_t permutation_table[16]; + uint16_t permuted_sparse_vect[PARAM_OMEGA_E]; + uint16_t permutation_sparse_vect[PARAM_OMEGA_E]; + uint64_t tmp; + uint64_t *pt; + uint8_t *res; + size_t i, j; + + for (i = 0; i < 16; i++) { + permuted_table[i] = (uint16_t) i; + } + + seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); + + for (i = 0; i < 15; i++) { + swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); + } + + pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); + for (j = 0; j < VEC_N_SIZE_64; j++) { + pt[j] = a2[j]; + } + pt[VEC_N_SIZE_64] = 0x0; + + for (i = 1; i < 16; i++) { + carry = 0; + pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); + for (j = 0; j < VEC_N_SIZE_64; j++) { + pt[j] = (a2[j] << i) ^ carry; + carry = (a2[j] >> ((64 - i))); + } + pt[VEC_N_SIZE_64] = carry; + } + + for (i = 0; i < weight; i++) { + permuted_sparse_vect[i] = (uint16_t) i; + } + + seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); + + for (i = 0; i + 1 < weight; i++) { + swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i))); + } + + for (i = 0; i < weight; i++) { + dec = a1[permuted_sparse_vect[i]] & 0xf; + s = a1[permuted_sparse_vect[i]] >> 4; + res = o + 2 * s; + pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); + + for (j = 0; j < VEC_N_SIZE_64 + 1; j++) { + tmp = PQCLEAN_HQCRMRS128_CLEAN_load8(res); + PQCLEAN_HQCRMRS128_CLEAN_store8(res, tmp ^ pt[j]); + res += 8; + } + } +} + + + +/** + * @brief Multiply two polynomials modulo \f$ X^n - 1\f$. + * + * This functions multiplies a sparse polynomial a1 (of Hamming weight equal to weight) + * and a dense polynomial a2. The multiplication is done modulo \f$ X^n - 1\f$. + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to the sparse polynomial + * @param[in] a2 Pointer to the dense polynomial + * @param[in] weight Integer that is the weigt of the sparse polynomial + * @param[in] ctx Pointer to the randomness context + */ +void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { + uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0}; + + fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx); + PQCLEAN_HQCRMRS128_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp)); + reduce(o, tmp); +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/gf2x.h b/src/kem/hqc/hqc-rmrs-128/clean/gf2x.h new file mode 100644 index 00000000..1b353318 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/gf2x.h @@ -0,0 +1,16 @@ +#ifndef GF2X_H +#define GF2X_H + + +/** + * @file gf2x.h + * @brief Header file for gf2x.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/hqc.c b/src/kem/hqc/hqc-rmrs-128/clean/hqc.c new file mode 100644 index 00000000..8784986f --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/hqc.c @@ -0,0 +1,144 @@ +#include "code.h" +#include "gf2x.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +/** + * @file hqc.c + * @brief Implementation of hqc.h + */ + + + +/** + * @brief Keygen of the HQC_PKE IND_CPA scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) { + AES_XOF_struct sk_seedexpander; + AES_XOF_struct pk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + uint8_t pk_seed[SEED_BYTES] = {0}; + uint64_t x[VEC_N_SIZE_64] = {0}; + uint32_t y[PARAM_OMEGA] = {0}; + uint64_t h[VEC_N_SIZE_64] = {0}; + uint64_t s[VEC_N_SIZE_64] = {0}; + + // Create seed_expanders for public key and secret key + randombytes(sk_seed, SEED_BYTES); + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + randombytes(pk_seed, SEED_BYTES); + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute secret key + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA); + + // Compute public key + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h); + PQCLEAN_HQCRMRS128_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander); + PQCLEAN_HQCRMRS128_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64); + + // Parse keys to string + PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(pk, pk_seed, s); + PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk); + +} + + + +/** + * @brief Encryption of the HQC_PKE IND_CPA scheme + * + * The cihertext is composed of vectors u and v. + * + * @param[out] u Vector u (first part of the ciphertext) + * @param[out] v Vector v (second part of the ciphertext) + * @param[in] m Vector representing the message to encrypt + * @param[in] theta Seed used to derive randomness required for encryption + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) { + AES_XOF_struct seedexpander; + uint64_t h[VEC_N_SIZE_64] = {0}; + uint64_t s[VEC_N_SIZE_64] = {0}; + uint64_t r1[VEC_N_SIZE_64] = {0}; + uint32_t r2[PARAM_OMEGA_R] = {0}; + uint64_t e[VEC_N_SIZE_64] = {0}; + uint64_t tmp1[VEC_N_SIZE_64] = {0}; + uint64_t tmp2[VEC_N_SIZE_64] = {0}; + + // Create seed_expander from theta + seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH); + + // Retrieve h and s from public key + PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(h, s, pk); + + // Generate r1, r2 and e + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E); + + // Compute u = r1 + r2.h + PQCLEAN_HQCRMRS128_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander); + PQCLEAN_HQCRMRS128_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64); + + // Compute v = m.G by encoding the message + PQCLEAN_HQCRMRS128_CLEAN_code_encode((uint8_t *)v, m); + PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES); + PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + + // Compute v = m.G + s.r2 + e + PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander); + PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS128_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N); + +} + + + +/** + * @brief Decryption of the HQC_PKE IND_CPA scheme + * + * @param[out] m Vector representing the decrypted message + * @param[in] u Vector u (first part of the ciphertext) + * @param[in] v Vector v (second part of the ciphertext) + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) { + uint8_t pk[PUBLIC_KEY_BYTES] = {0}; + uint64_t tmp1[VEC_N_SIZE_64] = {0}; + uint64_t tmp2[VEC_N_SIZE_64] = {0}; + uint32_t y[PARAM_OMEGA] = {0}; + AES_XOF_struct perm_seedexpander; + uint8_t perm_seed[SEED_BYTES] = {0}; + + // Retrieve x, y, pk from secret key + PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk); + + randombytes(perm_seed, SEED_BYTES); + seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute v - u.y + PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander); + PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64); + + + // Compute m by decoding v - u.y + PQCLEAN_HQCRMRS128_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS128_CLEAN_code_decode(m, (uint8_t *)tmp1); +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/hqc.h b/src/kem/hqc/hqc-rmrs-128/clean/hqc.h new file mode 100644 index 00000000..c7344f3a --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/hqc.h @@ -0,0 +1,19 @@ +#ifndef HQC_H +#define HQC_H + + +/** + * @file hqc.h + * @brief Functions of the HQC_PKE IND_CPA scheme + */ + +#include + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk); + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk); + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/kem.c b/src/kem/hqc/hqc-rmrs-128/clean/kem.c new file mode 100644 index 00000000..dd49c3a6 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/kem.c @@ -0,0 +1,140 @@ +#include "api.h" +#include "fips202.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "sha2.h" +#include "vector.h" +#include +#include +/** + * @file kem.c + * @brief Implementation of api.h + */ + + + +/** + * @brief Keygen of the HQC_KEM IND_CAA2 scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + * @returns 0 if keygen is successful + */ +int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { + + PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(pk, sk); + return 0; +} + + + +/** + * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ct String containing the ciphertext + * @param[out] ss String containing the shared secret + * @param[in] pk String containing the public key + * @returns 0 if encapsulation is successful + */ +int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { + + uint8_t theta[SHA512_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint64_t u[VEC_N_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Computing m + randombytes(m, VEC_K_SIZE_BYTES); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m + PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk); + + // Computing d + sha512(d, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Computing ciphertext + PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(ct, u, v, d); + + + return 0; +} + + + +/** + * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ss String containing the shared secret + * @param[in] ct String containing the cipĥertext + * @param[in] sk String containing the secret key + * @returns 0 if decapsulation is successful, -1 otherwise + */ +int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { + + uint8_t result; + uint64_t u[VEC_N_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char pk[PUBLIC_KEY_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint8_t theta[SHA512_BYTES] = {0}; + uint64_t u2[VEC_N_SIZE_64] = {0}; + uint64_t v2[VEC_N1N2_SIZE_64] = {0}; + unsigned char d2[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Retrieving u, v and d from ciphertext + PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(u, v, d, ct); + + // Retrieving pk from sk + memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES); + + // Decryting + PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(m, u, v, sk); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m' + PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk); + + // Computing d' + sha512(d2, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Abort if c != c' or d != d' + result = PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare(d, d2, SHA512_BYTES); + result = (uint8_t) (-((int16_t) result) >> 15); + for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { + ss[i] &= ~result; + } + + + return -(result & 1); +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/parameters.h b/src/kem/hqc/hqc-rmrs-128/clean/parameters.h new file mode 100644 index 00000000..2ecb16d1 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/parameters.h @@ -0,0 +1,98 @@ +#ifndef HQC_PARAMETERS_H +#define HQC_PARAMETERS_H + + +/** + * @file parameters.h + * @brief Parameters of the HQC_KEM IND-CCA2 scheme + */ +#include "api.h" + + +#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/ + +/* + #define PARAM_N Define the parameter n of the scheme + #define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code) + #define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code) + #define PARAM_N1N2 Define the length in bits of the Concatenated code + #define PARAM_OMEGA Define the parameter omega of the scheme + #define PARAM_OMEGA_E Define the parameter omega_e of the scheme + #define PARAM_OMEGA_R Define the parameter omega_r of the scheme + #define PARAM_SECURITY Define the security level corresponding to the chosen parameters + #define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters + + #define SECRET_KEY_BYTES Define the size of the secret key in bytes + #define PUBLIC_KEY_BYTES Define the size of the public key in bytes + #define SHARED_SECRET_BYTES Define the size of the shared secret in bytes + #define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes + + #define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function) + #define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes + #define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes + #define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes + #define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes + + #define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits + #define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits + #define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits + #define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits + + #define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code) + #define PARAM_M Define a positive integer + #define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form + #define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1 + #define PARAM_K Define the size of the information bits of the Reed-Solomon code + #define PARAM_G Define the size of the generator polynomial of Reed-Solomon code + #define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input + We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24 + The smallest power of 2 greater than 24+1 is 32=2^5 + #define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code + + #define RED_MASK A mask fot the higher bits of a vector + #define SHA512_BYTES Define the size of SHA512 output in bytes + #define SEED_BYTES Define the size of the seed in bytes + #define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length +*/ + +#define PARAM_N 17669 +#define PARAM_N1 46 +#define PARAM_N2 384 +#define PARAM_N1N2 17664 +#define PARAM_OMEGA 66 +#define PARAM_OMEGA_E 75 +#define PARAM_OMEGA_R 75 +#define PARAM_SECURITY 128 +#define PARAM_DFR_EXP 128 + +#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES +#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES +#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES +#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES + +#define UTILS_REJECTION_THRESHOLD 16767881 +#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8) +#define VEC_K_SIZE_BYTES PARAM_K +#define VEC_N1_SIZE_BYTES PARAM_N1 +#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8) + +#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64) +#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8) +#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8) +#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64) + +#define PARAM_DELTA 15 +#define PARAM_M 8 +#define PARAM_GF_POLY 0x11D +#define PARAM_GF_MUL_ORDER 255 +#define PARAM_K 16 +#define PARAM_G 31 +#define PARAM_FFT 5 +#define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1 + +#define RED_MASK 0x1f +#define SHA512_BYTES 64 +#define SEED_BYTES 40 +#define SEEDEXPANDER_MAX_LENGTH 4294967295 + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/parsing.c b/src/kem/hqc/hqc-rmrs-128/clean/parsing.c new file mode 100644 index 00000000..fe9e73b7 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/parsing.c @@ -0,0 +1,186 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file parsing.c + * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme + */ + + +void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in) { + out[0] = (in >> 0x00) & 0xFF; + out[1] = (in >> 0x08) & 0xFF; + out[2] = (in >> 0x10) & 0xFF; + out[3] = (in >> 0x18) & 0xFF; + out[4] = (in >> 0x20) & 0xFF; + out[5] = (in >> 0x28) & 0xFF; + out[6] = (in >> 0x30) & 0xFF; + out[7] = (in >> 0x38) & 0xFF; +} + + +uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in) { + uint64_t ret = in[7]; + + for (int8_t i = 6; i >= 0; i--) { + ret <<= 8; + ret |= in[i]; + } + + return ret; +} + +void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) { + size_t index_in = 0; + size_t index_out = 0; + + // first copy by 8 bytes + if (inlen >= 8 && outlen >= 1) { + while (index_out < outlen && index_in + 8 <= inlen) { + out64[index_out] = PQCLEAN_HQCRMRS128_CLEAN_load8(in8 + index_in); + + index_in += 8; + index_out += 1; + } + } + + // we now need to do the last 7 bytes if necessary + if (index_in >= inlen || index_out >= outlen) { + return; + } + out64[index_out] = in8[inlen - 1]; + for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) { + out64[index_out] <<= 8; + out64[index_out] |= in8[index_in + i]; + } +} + +void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) { + for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) { + out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF; + index_out++; + if (index_out % 8 == 0) { + index_in++; + } + } +} + + +/** + * @brief Parse a secret key into a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] sk String containing the secret key + * @param[in] sk_seed Seed used to generate the secret key + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) { + memcpy(sk, sk_seed, SEED_BYTES); + sk += SEED_BYTES; + memcpy(sk, pk, PUBLIC_KEY_BYTES); +} + +/** + * @brief Parse a secret key from a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] x uint64_t representation of vector x + * @param[out] y uint32_t representation of vector y + * @param[out] pk String containing the public key + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) { + AES_XOF_struct sk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + + memcpy(sk_seed, sk, SEED_BYTES); + sk += SEED_BYTES; + memcpy(pk, sk, PUBLIC_KEY_BYTES); + + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA); +} + +/** + * @brief Parse a public key into a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] pk String containing the public key + * @param[in] pk_seed Seed used to generate the public key + * @param[in] s uint8_t representation of vector s + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) { + memcpy(pk, pk_seed, SEED_BYTES); + PQCLEAN_HQCRMRS128_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64); +} + + + +/** + * @brief Parse a public key from a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] h uint8_t representation of vector h + * @param[out] s uint8_t representation of vector s + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) { + AES_XOF_struct pk_seedexpander; + uint8_t pk_seed[SEED_BYTES] = {0}; + + memcpy(pk_seed, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_HQCRMRS128_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES); + + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h); +} + + +/** + * @brief Parse a ciphertext into a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] ct String containing the ciphertext + * @param[in] u uint8_t representation of vector u + * @param[in] v uint8_t representation of vector v + * @param[in] d String containing the hash d + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) { + PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(ct, d, SHA512_BYTES); +} + + +/** + * @brief Parse a ciphertext from a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] u uint8_t representation of vector u + * @param[out] v uint8_t representation of vector v + * @param[out] d String containing the hash d + * @param[in] ct String containing the ciphertext + */ +void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) { + PQCLEAN_HQCRMRS128_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(d, ct, SHA512_BYTES); +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/parsing.h b/src/kem/hqc/hqc-rmrs-128/clean/parsing.h new file mode 100644 index 00000000..f351af7b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/parsing.h @@ -0,0 +1,36 @@ +#ifndef PARSING_H +#define PARSING_H + + +/** + * @file parsing.h + * @brief Header file for parsing.c + */ + +#include + +void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in); + +uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in); + +void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen); + +void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen); + + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk); + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk); + + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s); + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk); + + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d); + +void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.c b/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.c new file mode 100644 index 00000000..1273d9f9 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.c @@ -0,0 +1,237 @@ +#include "parameters.h" +#include "reed_muller.h" +#include +#include +/** + * @file reed_muller.c + * Constant time implementation of Reed-Muller code RM(1,7) + */ + + + +// number of repeated code words +#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) + +// copy bit 0 into all bits of a 32 bit value +#define BIT0MASK(x) (-((x) & 1)) + + +static void encode(uint8_t *word, uint8_t message); +static void hadamard(uint16_t src[128], uint16_t dst[128]); +static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]); +static uint8_t find_peaks(const uint16_t transform[128]); + + + +/** + * @brief Encode a single byte into a single codeword using RM(1,7) + * + * Encoding matrix of this code: + * bit pattern (note that bits are numbered big endian) + * 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa + * 1 cccccccc cccccccc cccccccc cccccccc + * 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0 + * 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00 + * 4 ffff0000 ffff0000 ffff0000 ffff0000 + * 5 ffffffff 00000000 ffffffff 00000000 + * 6 ffffffff ffffffff 00000000 00000000 + * 7 ffffffff ffffffff ffffffff ffffffff + * + * @param[out] word An RM(1,7) codeword + * @param[in] message A message + */ +static void encode(uint8_t *word, uint8_t message) { + uint32_t e; + // bit 7 flips all the bits, do that first to save work + e = BIT0MASK(message >> 7); + // bits 0, 1, 2, 3, 4 are the same for all four longs + // (Warning: in the bit matrix above, low bits are at the left!) + e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa; + e ^= BIT0MASK(message >> 1) & 0xcccccccc; + e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0; + e ^= BIT0MASK(message >> 3) & 0xff00ff00; + e ^= BIT0MASK(message >> 4) & 0xffff0000; + // we can store this in the first quarter + word[0 + 0] = (e >> 0x00) & 0xff; + word[0 + 1] = (e >> 0x08) & 0xff; + word[0 + 2] = (e >> 0x10) & 0xff; + word[0 + 3] = (e >> 0x18) & 0xff; + // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 + e ^= BIT0MASK(message >> 5); + word[4 + 0] = (e >> 0x00) & 0xff; + word[4 + 1] = (e >> 0x08) & 0xff; + word[4 + 2] = (e >> 0x10) & 0xff; + word[4 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 6); + word[12 + 0] = (e >> 0x00) & 0xff; + word[12 + 1] = (e >> 0x08) & 0xff; + word[12 + 2] = (e >> 0x10) & 0xff; + word[12 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 5); + word[8 + 0] = (e >> 0x00) & 0xff; + word[8 + 1] = (e >> 0x08) & 0xff; + word[8 + 2] = (e >> 0x10) & 0xff; + word[8 + 3] = (e >> 0x18) & 0xff; +} + + + +/** + * @brief Hadamard transform + * + * Perform hadamard transform of src and store result in dst + * src is overwritten: it is also used as intermediate buffer + * Method is best explained if we use H(3) instead of H(7): + * + * The routine multiplies by the matrix H(3): + * [1 1 1 1 1 1 1 1] + * [1 -1 1 -1 1 -1 1 -1] + * [1 1 -1 -1 1 1 -1 -1] + * [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine + * [1 1 1 1 -1 -1 -1 -1] + * [1 -1 1 -1 -1 1 -1 1] + * [1 1 -1 -1 -1 -1 1 1] + * [1 -1 -1 1 -1 1 1 -1] + * You can do this in three passes, where each pass does this: + * set lower half of buffer to pairwise sums, + * and upper half to differences + * index 0 1 2 3 4 5 6 7 + * input: a, b, c, d, e, f, g, h + * pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h + * pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h + * pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h + * a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h + * This order of computation is chosen because it vectorises well. + * Likewise, this routine multiplies by H(7) in seven passes. + * + * @param[out] src Structure that contain the expanded codeword + * @param[out] dst Structure that contain the expanded codeword + */ +static void hadamard(uint16_t src[128], uint16_t dst[128]) { + // the passes move data: + // src -> dst -> src -> dst -> src -> dst -> src -> dst + // using p1 and p2 alternately + uint16_t *p1 = src; + uint16_t *p2 = dst; + uint16_t *p3; + for (uint32_t pass = 0; pass < 7; pass++) { + for (uint32_t i = 0; i < 64; i++) { + p2[i] = p1[2 * i] + p1[2 * i + 1]; + p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; + } + // swap p1, p2 for next round + p3 = p1; + p1 = p2; + p2 = p3; + } +} + + + +/** + * @brief Add multiple codewords into expanded codeword + * + * Accesses memory in order + * Note: this does not write the codewords as -1 or +1 as the green machine does + * instead, just 0 and 1 is used. + * The resulting hadamard transform has: + * all values are halved + * the first entry is 64 too high + * + * @param[out] dest Structure that contain the expanded codeword + * @param[in] src Structure that contain the codeword + */ +static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) { + size_t part, bit, copy; + // start with the first copy + for (part = 0; part < 16; part++) { + for (bit = 0; bit < 8; bit++) { + dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1); + } + } + // sum the rest of the copies + for (copy = 1; copy < MULTIPLICITY; copy++) { + for (part = 0; part < 16; part++) { + for (bit = 0; bit < 8; bit++) { + dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1); + } + } + } +} + + + +/** + * @brief Finding the location of the highest value + * + * This is the final step of the green machine: find the location of the highest value, + * and add 128 if the peak is positive + * if there are two identical peaks, the peak with smallest value + * in the lowest 7 bits it taken + * @param[in] transform Structure that contain the expanded codeword + */ +static uint8_t find_peaks(const uint16_t transform[128]) { + uint16_t peak_abs = 0; + uint16_t peak = 0; + uint16_t pos = 0; + uint16_t t, abs, mask; + for (uint16_t i = 0; i < 128; i++) { + t = transform[i]; + abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) + mask = -(((uint16_t)(peak_abs - abs)) >> 15); + peak ^= mask & (peak ^ t); + pos ^= mask & (pos ^ i); + peak_abs ^= mask & (peak_abs ^ abs); + } + pos |= 128 & ((peak >> 15) - 1); + return (uint8_t) pos; +} + + + + +/** + * @brief Encodes the received word + * + * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits, + * or MULTIPLICITY repeats of 128 bits + * + * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) { + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // encode first word + encode(&cdw[16 * i * MULTIPLICITY], msg[i]); + // copy to other identical codewords + for (size_t copy = 1; copy < MULTIPLICITY; copy++) { + memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16); + } + } +} + + + +/** + * @brief Decodes the received word + * + * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane. + * The theory of error-correcting codes codes @cite macwilliams1977theory + * + * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) { + uint16_t expanded[128]; + uint16_t transform[128]; + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // collect the codewords + expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]); + // apply hadamard transform + hadamard(expanded, transform); + // fix the first entry to get the half Hadamard transform + transform[0] -= 64 * MULTIPLICITY; + // finish the decoding + msg[i] = find_peaks(transform); + } +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.h b/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.h new file mode 100644 index 00000000..0229e24a --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.h @@ -0,0 +1,18 @@ +#ifndef REED_MULLER_H +#define REED_MULLER_H + + +/** + * @file reed_muller.h + * Header file of reed_muller.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.c b/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.c new file mode 100644 index 00000000..9139ce10 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.c @@ -0,0 +1,349 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include "parsing.h" +#include "reed_solomon.h" +#include +#include +#include +/** + * @file reed_solomon.c + * Constant time implementation of Reed-Solomon codes + */ + + +static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw); +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes); +static void compute_roots(uint8_t *error, uint16_t *sigma); +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes); +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error); +static void correct_errors(uint8_t *cdw, const uint16_t *error_values); + +/** + * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes + * + * Following @cite lin1983error (Chapter 4 - Cyclic Codes), + * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register + * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code. + * + * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_K_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) { + size_t i, j, k; + uint8_t gate_value = 0; + + uint16_t tmp[PARAM_G] = {0}; + uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS}; + uint8_t prev, x; + + for (i = 0; i < PARAM_N1; ++i) { + cdw[i] = 0; + } + + for (i = 0; i < PARAM_K; ++i) { + gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]); + + for (j = 0; j < PARAM_G; ++j) { + tmp[j] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]); + } + + prev = 0; + for (k = 0; k < PARAM_N1 - PARAM_K; k++) { + x = cdw[k]; + cdw[k] = (uint8_t) (prev ^ tmp[k]); + prev = x; + } + } + + memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K); +} + + + +/** + * @brief Computes 2 * PARAM_DELTA syndromes + * + * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes + * @param[in] cdw Array of size PARAM_N1 storing the received vector + */ +void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { + for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { + for (size_t j = 1; j < PARAM_N1; ++j) { + syndromes[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); + } + syndromes[i] ^= cdw[0]; + } +} + + + +/** + * @brief Computes the error locator polynomial (ELP) sigma + * + * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes).
+ * We use the letter p for rho which is initialized at -1.
+ * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X).
+ * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p.
+ * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated.
+ * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA. + * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value + * and we only need to save its first PARAM_DELTA - 1 coefficients. + * + * @returns the degree of the ELP sigma + * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP + * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes + */ +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { + uint16_t deg_sigma = 0; + uint16_t deg_sigma_p = 0; + uint16_t deg_sigma_copy = 0; + uint16_t sigma_copy[PARAM_DELTA + 1] = {0}; + uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1}; + uint16_t pp = (uint16_t) -1; // 2*rho + uint16_t d_p = 1; + uint16_t d = syndromes[0]; + + uint16_t mask1, mask2, mask12; + uint16_t deg_X, deg_X_sigma_p; + uint16_t dd; + uint16_t mu; + + uint16_t i; + + sigma[0] = 1; + for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { + // Save sigma in case we need it to update X_sigma_p + memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); + deg_sigma_copy = deg_sigma; + + dd = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(d_p)); + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + sigma[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(dd, X_sigma_p[i]); + } + + deg_X = mu - pp; + deg_X_sigma_p = deg_X + deg_sigma_p; + + // mask1 = 0xffff if(d != 0) and 0 otherwise + mask1 = -((uint16_t) - d >> 15); + + // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise + mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15); + + // mask12 = 0xffff if the deg_sigma increased and 0 otherwise + mask12 = mask1 & mask2; + deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma); + + if (mu == (2 * PARAM_DELTA - 1)) { + break; + } + + pp ^= mask12 & (mu ^ pp); + d_p ^= mask12 & (d ^ d_p); + for (i = PARAM_DELTA; i; --i) { + X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); + } + + deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p); + d = syndromes[mu + 1]; + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + d ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]); + } + } + + return deg_sigma; +} + + + +/** + * @brief Computes the error polynomial error from the error locator polynomial sigma + * + * See function PQCLEAN_HQCRMRS128_CLEAN_fft for more details. + * + * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial + * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + */ +static void compute_roots(uint8_t *error, uint16_t *sigma) { + uint16_t w[1 << PARAM_M] = {0}; + + PQCLEAN_HQCRMRS128_CLEAN_fft(w, sigma, PARAM_DELTA + 1); + PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(error, w); +} + + + +/** + * @brief Computes the polynomial z(x) + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x) + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + * @param[in] degree Integer that is the degree of polynomial sigma + * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes + */ +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) { + size_t i, j; + uint16_t mask; + + z[0] = 1; + + for (i = 1; i < PARAM_DELTA + 1; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] = mask & sigma[i]; + } + + z[1] ^= syndromes[0]; + + for (i = 2; i <= PARAM_DELTA; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] ^= mask & syndromes[i - 1]; + + for (j = 1; j < i; ++j) { + z[i] ^= mask & PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]); + } + } +} + + + +/** + * @brief Computes the error values + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] error_values Array of PARAM_DELTA elements receiving the error values + * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x) + * @param[in] z_degree Integer that is the degree of polynomial z(x) + * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error + */ +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) { + uint16_t beta_j[PARAM_DELTA] = {0}; + uint16_t e_j[PARAM_DELTA] = {0}; + + uint16_t delta_counter; + uint16_t delta_real_value; + uint16_t found; + uint16_t mask1; + uint16_t mask2; + uint16_t tmp1; + uint16_t tmp2; + uint16_t inverse; + uint16_t inverse_power_j; + + // Compute the beta_{j_i} page 31 of the documentation + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; i++) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + beta_j[j] += mask1 & mask2 & gf_exp[i]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } + delta_real_value = delta_counter; + + // Compute the e_{j_i} page 31 of the documentation + for (size_t i = 0; i < PARAM_DELTA; ++i) { + tmp1 = 1; + tmp2 = 1; + inverse = PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(beta_j[i]); + inverse_power_j = 1; + + for (size_t j = 1; j <= PARAM_DELTA; ++j) { + inverse_power_j = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, inverse); + tmp1 ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, z[j]); + } + for (size_t k = 1; k < PARAM_DELTA; ++k) { + tmp2 = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); + } + mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value + e_j[i] = mask1 & PQCLEAN_HQCRMRS128_CLEAN_gf_mul(tmp1, PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(tmp2)); + } + + // Place the delta e_{j_i} values at the right coordinates of the output vector + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; ++i) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + error_values[i] += mask1 & mask2 & e_j[j]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } +} + + + +/** + * @brief Correct the errors + * + * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector + * @param[in] error Array of the error vector + * @param[in] error_values Array of PARAM_DELTA elements storing the error values + */ +static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { + for (size_t i = 0; i < PARAM_N1; ++i) { + cdw[i] ^= error_values[i]; + } +} + + + +/** + * @brief Decodes the received word + * + * This function relies on six steps: + *
    + *
  1. The first step, is the computation of the 2*PARAM_DELTA syndromes. + *
  2. The second step is the computation of the error-locator polynomial sigma. + *
  3. The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses. + *
  4. The fourth step, is the polynomial z(x). + *
  5. The fifth step, is the computation of the error values. + *
  6. The sixth step is the correction of the errors in the received polynomial. + *
+ * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error + * + * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) { + uint16_t syndromes[2 * PARAM_DELTA] = {0}; + uint16_t sigma[1 << PARAM_FFT] = {0}; + uint8_t error[1 << PARAM_M] = {0}; + uint16_t z[PARAM_N1] = {0}; + uint16_t error_values[PARAM_N1] = {0}; + uint16_t deg; + + // Calculate the 2*PARAM_DELTA syndromes + compute_syndromes(syndromes, cdw); + + // Compute the error locator polynomial sigma + // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room + deg = compute_elp(sigma, syndromes); + + // Compute the error polynomial error + compute_roots(error, sigma); + + // Compute the polynomial z(x) + compute_z_poly(z, sigma, deg, syndromes); + + // Compute the error values + compute_error_values(error_values, z, error); + + // Correct the errors + correct_errors(cdw, error_values); + + // Retrieve the message from the decoded codeword + memcpy(msg, cdw + (PARAM_G - 1), PARAM_K); + +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.h b/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.h new file mode 100644 index 00000000..84fc97cc --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.h @@ -0,0 +1,20 @@ +#ifndef REED_SOLOMON_H +#define REED_SOLOMON_H + + +/** + * @file reed_solomon.h + * Header file of reed_solomon.c + */ +#include "parameters.h" +#include +#include + +static const uint16_t alpha_ij_pow [30][45] = {{2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193}, {4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, 157, 78, 37, 148, 106, 181, 238, 159, 70, 5, 20, 80, 93, 105, 185, 222, 95, 97, 153, 94, 101, 137, 30, 120, 253, 211, 107, 177, 254, 223}, {8, 64, 58, 205, 38, 45, 117, 143, 12, 96, 39, 37, 53, 181, 193, 70, 10, 80, 186, 185, 161, 97, 47, 101, 15, 120, 231, 107, 127, 223, 182, 217, 134, 68, 26, 208, 206, 62, 237, 59, 197, 102, 23, 184, 169}, {16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, 95, 153, 101, 30, 253, 107, 254, 91, 217, 17, 13, 208, 129, 248, 59, 151, 133, 184, 79, 132, 168, 82, 73, 228, 230, 198, 252, 123, 227, 150}, {32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174, 100, 28, 167, 89, 239, 172, 36}, {64, 205, 45, 143, 96, 37, 181, 70, 80, 185, 97, 101, 120, 107, 223, 217, 68, 208, 62, 59, 102, 184, 33, 168, 85, 228, 191, 252, 241, 150, 110, 130, 7, 221, 89, 195, 138, 61, 251, 44, 207, 173, 8, 58, 38}, {128, 19, 117, 24, 156, 181, 140, 93, 161, 94, 60, 107, 163, 67, 26, 129, 147, 102, 109, 132, 41, 57, 209, 252, 255, 98, 87, 200, 224, 89, 155, 18, 245, 11, 233, 173, 16, 232, 45, 3, 157, 53, 159, 40, 185}, {29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, 133, 79, 168, 73, 230, 252, 227, 149, 130, 28, 81, 195, 18, 247, 44, 27, 2, 58, 152, 3, 39, 212, 140, 186, 190, 202, 231, 225, 175, 26}, {58, 45, 12, 37, 193, 80, 161, 101, 231, 223, 134, 208, 237, 102, 169, 168, 146, 191, 179, 150, 87, 7, 166, 195, 36, 251, 125, 173, 64, 38, 143, 39, 181, 10, 185, 47, 120, 127, 217, 26, 62, 197, 184, 21, 85}, {116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51, 169, 77, 114, 145, 255, 55, 100}, {232, 234, 39, 238, 160, 97, 60, 254, 134, 103, 118, 184, 84, 57, 145, 227, 220, 7, 162, 172, 245, 176, 71, 58, 180, 192, 181, 40, 95, 15, 177, 175, 208, 147, 46, 21, 73, 99, 241, 55, 200, 166, 43, 122, 44}, {205, 143, 37, 70, 185, 101, 107, 217, 208, 59, 184, 168, 228, 252, 150, 130, 221, 195, 61, 44, 173, 58, 117, 39, 193, 186, 47, 231, 182, 26, 237, 23, 21, 146, 145, 219, 87, 56, 242, 36, 139, 54, 64, 45, 96}, {135, 6, 53, 20, 190, 120, 163, 13, 237, 46, 84, 228, 229, 98, 100, 81, 69, 251, 131, 32, 45, 192, 238, 186, 94, 187, 217, 189, 236, 169, 82, 209, 241, 220, 28, 242, 72, 22, 173, 116, 201, 37, 140, 222, 15}, {19, 24, 181, 93, 94, 107, 67, 129, 102, 132, 57, 252, 98, 200, 89, 18, 11, 173, 232, 3, 53, 40, 194, 231, 226, 189, 197, 158, 170, 145, 75, 25, 166, 69, 235, 54, 29, 234, 37, 5, 95, 120, 91, 52, 59}, {38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145}, {76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44, 2, 152, 39, 140, 190, 231, 175, 31, 23, 77, 209, 219, 25, 162, 36, 88, 4, 45, 78, 5, 97, 211, 67, 62, 46, 154, 191, 171, 50, 89}, {152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1}, {45, 37, 80, 101, 223, 208, 102, 168, 191, 150, 7, 195, 251, 173, 38, 39, 10, 47, 127, 26, 197, 21, 115, 219, 100, 242, 245, 54, 205, 96, 70, 97, 107, 68, 59, 33, 228, 241, 130, 89, 61, 207, 58, 12, 193}, {90, 148, 186, 30, 226, 62, 109, 73, 179, 174, 162, 61, 131, 232, 96, 140, 153, 127, 52, 51, 168, 99, 98, 56, 172, 22, 8, 234, 212, 185, 240, 67, 237, 79, 114, 241, 25, 121, 245, 108, 19, 39, 20, 188, 223}, {180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5, 94, 223, 103, 46, 85, 215, 174, 89, 244, 108, 38, 156, 160, 15, 226, 124, 169}, {117, 181, 161, 107, 26, 102, 41, 252, 87, 89, 245, 173, 45, 53, 185, 231, 68, 197, 168, 145, 110, 166, 61, 54, 38, 37, 186, 120, 134, 59, 21, 191, 196, 221, 36, 207, 205, 39, 80, 15, 217, 237, 33, 115, 150}, {234, 238, 97, 254, 103, 184, 57, 227, 7, 172, 176, 58, 192, 40, 15, 175, 147, 21, 99, 55, 166, 122, 216, 45, 106, 222, 107, 52, 133, 85, 123, 50, 195, 11, 32, 12, 140, 188, 182, 124, 158, 115, 49, 224, 36}, {201, 159, 47, 91, 124, 33, 209, 149, 166, 244, 71, 117, 238, 194, 223, 31, 79, 115, 98, 167, 61, 216, 90, 181, 190, 254, 206, 218, 213, 150, 224, 72, 54, 152, 106, 161, 177, 189, 184, 114, 171, 56, 18, 131, 38}, {143, 70, 101, 217, 59, 168, 252, 130, 195, 44, 58, 39, 186, 231, 26, 23, 146, 219, 56, 36, 54, 45, 181, 97, 223, 62, 33, 191, 110, 89, 251, 8, 12, 10, 15, 134, 197, 41, 179, 100, 86, 125, 205, 37, 185}, {3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174, 239, 44, 116, 156, 185, 214, 103, 169, 230, 55, 89, 235, 32, 96, 160, 253, 26}, {6, 20, 120, 13, 46, 228, 98, 81, 251, 32, 192, 186, 187, 189, 169, 209, 220, 242, 22, 116, 37, 222, 254, 62, 132, 63, 130, 43, 250, 38, 212, 194, 182, 147, 77, 179, 141, 9, 54, 180, 159, 101, 67, 151, 85}, {12, 80, 231, 208, 169, 191, 87, 195, 125, 38, 181, 47, 217, 197, 85, 219, 221, 245, 8, 96, 186, 107, 206, 33, 145, 130, 86, 207, 45, 193, 101, 134, 102, 146, 150, 166, 251, 64, 39, 185, 127, 62, 21, 252, 100}, {24, 93, 107, 129, 132, 252, 200, 18, 173, 3, 40, 231, 189, 158, 145, 25, 69, 54, 234, 5, 120, 52, 218, 191, 174, 43, 207, 90, 35, 15, 136, 92, 115, 220, 239, 125, 76, 238, 101, 17, 133, 228, 149, 121, 44}, {48, 105, 127, 248, 77, 241, 224, 247, 64, 156, 95, 182, 236, 170, 150, 162, 11, 205, 212, 94, 134, 133, 213, 110, 239, 250, 45, 35, 30, 26, 218, 99, 130, 69, 108, 143, 40, 211, 206, 132, 229, 7, 144, 2, 96}, {96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15}}; + +void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-128/clean/vector.c b/src/kem/hqc/hqc-rmrs-128/clean/vector.c new file mode 100644 index 00000000..8d4485c8 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/vector.c @@ -0,0 +1,176 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file vector.c + * @brief Implementation of vectors sampling and some utilities for the HQC scheme + */ + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. The vector + * is stored by position. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) { + size_t random_bytes_size = 3 * weight; + uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R + uint8_t inc; + size_t i, j; + + i = 0; + j = random_bytes_size; + while (i < weight) { + do { + if (j == random_bytes_size) { + seedexpander(ctx, rand_bytes, random_bytes_size); + j = 0; + } + + v[i] = ((uint32_t) rand_bytes[j++]) << 16; + v[i] |= ((uint32_t) rand_bytes[j++]) << 8; + v[i] |= rand_bytes[j++]; + + } while (v[i] >= UTILS_REJECTION_THRESHOLD); + + v[i] = v[i] % PARAM_N; + + inc = 1; + for (size_t k = 0; k < i; k++) { + if (v[k] == v[i]) { + inc = 0; + } + } + i += inc; + } +} + + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) { + uint32_t tmp[PARAM_OMEGA_R] = {0}; + + PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(ctx, tmp, weight); + + for (size_t i = 0; i < weight; ++i) { + int32_t index = tmp[i] / 64; + int32_t pos = tmp[i] % 64; + v[index] |= ((uint64_t) 1) << pos; + } +} + + + +/** + * @brief Generates a random vector of dimension PARAM_N + * + * This function generates a random binary vector of dimension PARAM_N. It generates a random + * array of bytes using the seedexpander function, and drop the extra bits using a mask. + * + * @param[in] v Pointer to an array + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) { + uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0}; + + seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES); + + PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES); + v[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief Adds two vectors + * + * @param[out] o Pointer to an array that is the result + * @param[in] v1 Pointer to an array that is the first vector + * @param[in] v2 Pointer to an array that is the second vector + * @param[in] size Integer that is the size of the vectors + */ +void PQCLEAN_HQCRMRS128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + o[i] = v1[i] ^ v2[i]; + } +} + + + +/** + * @brief Compares two vectors + * + * @param[in] v1 Pointer to an array that is first vector + * @param[in] v2 Pointer to an array that is second vector + * @param[in] size Integer that is the size of the vectors + * @returns 0 if the vectors are equals and a negative/psotive value otherwise + */ +uint8_t PQCLEAN_HQCRMRS128_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) { + uint64_t r = 0; + for (size_t i = 0; i < size; i++) { + r |= v1[i] ^ v2[i]; + } + r = (~r + 1) >> 63; + return (uint8_t) r; +} + + + +/** + * @brief Resize a vector so that it contains size_o bits + * + * @param[out] o Pointer to the output vector + * @param[in] size_o Integer that is the size of the output vector in bits + * @param[in] v Pointer to the input vector + * @param[in] size_v Integer that is the size of the input vector in bits + */ +void PQCLEAN_HQCRMRS128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) { + if (size_o < size_v) { + uint64_t mask = 0x7FFFFFFFFFFFFFFF; + int8_t val = 0; + + if (size_o % 64) { + val = 64 - (size_o % 64); + } + + memcpy(o, v, 8 * VEC_N1N2_SIZE_64); + + for (int8_t i = 0; i < val; ++i) { + o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); + } + } else { + memcpy(o, v, 8 * CEIL_DIVIDE(size_v, 64)); + } +} diff --git a/src/kem/hqc/hqc-rmrs-128/clean/vector.h b/src/kem/hqc/hqc-rmrs-128/clean/vector.h new file mode 100644 index 00000000..17344bbb --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-128/clean/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + + +/** + * @file vector.h + * @brief Header file for vector.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v); + + +void PQCLEAN_HQCRMRS128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size); + +uint8_t PQCLEAN_HQCRMRS128_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size); + +void PQCLEAN_HQCRMRS128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/CMakeLists.txt b/src/kem/hqc/hqc-rmrs-192/avx2/CMakeLists.txt new file mode 100644 index 00000000..88b1da06 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/CMakeLists.txt @@ -0,0 +1,16 @@ +set( + SRC_AVX2_HQCRMRS192 + code.c + fft.c + gf2x.c + gf.c + hqc.c + kem.c + parsing.c + reed_muller.c + reed_solomon.c + vector.c +) + +define_kem_alg(hqcrmrs192_avx2 + PQCLEAN_HQCRMRS192_CLEAN "${SRC_AVX2_HQCRMRS192}" "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/api.h b/src/kem/hqc/hqc-rmrs-192/avx2/api.h new file mode 100644 index 00000000..d4db5bc5 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/api.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_HQCRMRS192_AVX2_API_H +#define PQCLEAN_HQCRMRS192_AVX2_API_H +/** + * @file api.h + * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme + */ + +#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_ALGNAME "HQC-RMRS-192" + +#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES 4562 +#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_PUBLICKEYBYTES 4522 +#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_BYTES 64 +#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_CIPHERTEXTBYTES 9026 + +// As a technicality, the public key is appended to the secret key in order to respect the NIST API. +// Without this constraint, PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32 + +int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk); + +int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/code.c b/src/kem/hqc/hqc-rmrs-192/avx2/code.c new file mode 100644 index 00000000..b44aa4b3 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/code.c @@ -0,0 +1,47 @@ +#include "code.h" +#include "parameters.h" +#include "reed_muller.h" +#include "reed_solomon.h" +#include +#include +/** + * @file code.c + * @brief Implementation of concatenated code + */ + + + +/** + * + * @brief Encoding the message m to a code word em using the concatenated code + * + * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain + * a concatenated code word. + * + * @param[out] em Pointer to an array that is the tensor code word + * @param[in] m Pointer to an array that is the message + */ +void PQCLEAN_HQCRMRS192_AVX2_code_encode(uint8_t *em, const uint8_t *m) { + uint8_t tmp[8 * VEC_N1_SIZE_64] = {0}; + + PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(tmp, m); + PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(em, tmp); + +} + + + +/** + * @brief Decoding the code word em to a message m using the concatenated code + * + * @param[out] m Pointer to an array that is the message + * @param[in] em Pointer to an array that is the code word + */ +void PQCLEAN_HQCRMRS192_AVX2_code_decode(uint8_t *m, const uint8_t *em) { + uint8_t tmp[8 * VEC_N1_SIZE_64] = {0}; + + PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(tmp, em); + PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(m, tmp); + + +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/code.h b/src/kem/hqc/hqc-rmrs-192/avx2/code.h new file mode 100644 index 00000000..ead7caf7 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/code.h @@ -0,0 +1,18 @@ +#ifndef CODE_H +#define CODE_H + + +/** + * @file code.h + * Header file of code.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS192_AVX2_code_encode(uint8_t *em, const uint8_t *message); + +void PQCLEAN_HQCRMRS192_AVX2_code_decode(uint8_t *m, const uint8_t *em); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/fft.c b/src/kem/hqc/hqc-rmrs-192/avx2/fft.c new file mode 100644 index 00000000..c1b44b15 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/fft.c @@ -0,0 +1,351 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include +#include +/** + * @file fft.c + * Implementation of the additive FFT and its transpose. + * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf + */ + + +static void compute_fft_betas(uint16_t *betas); +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size); +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); + + +/** + * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose + * + * @param[out] betas Array of size PARAM_M-1 + */ +static void compute_fft_betas(uint16_t *betas) { + size_t i; + for (i = 0; i < PARAM_M - 1; ++i) { + betas[i] = 1 << (PARAM_M - 1 - i); + } +} + + + +/** + * @brief Computes the subset sums of the given set + * + * The array subset_sums is such that its ith element is + * the subset sum of the set elements given by the binary form of i. + * + * @param[out] subset_sums Array of size 2^set_size receiving the subset sums + * @param[in] set Array of set_size elements + * @param[in] set_size Size of the array set + */ +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) { + uint16_t i, j; + subset_sums[0] = 0; + + for (i = 0; i < set_size; ++i) { + for (j = 0; j < (1 << i); ++j) { + subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; + } + } +} + + + +/** + * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x] + * + * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x) + * as proposed by Bernstein, Chou and Schwabe: + * https://binary.cr.yp.to/mcbits-20130616.pdf + * + * @param[out] f0 Array half the size of f + * @param[out] f1 Array half the size of f + * @param[in] f Array of size a power of 2 + * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f + */ +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + switch (m_f) { + case 4: + f0[4] = f[8] ^ f[12]; + f0[6] = f[12] ^ f[14]; + f0[7] = f[14] ^ f[15]; + f1[5] = f[11] ^ f[13]; + f1[6] = f[13] ^ f[14]; + f1[7] = f[15]; + f0[5] = f[10] ^ f[12] ^ f1[5]; + f1[4] = f[9] ^ f[13] ^ f0[5]; + + f0[0] = f[0]; + f1[3] = f[7] ^ f[11] ^ f[15]; + f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3]; + f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3]; + f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3]; + f1[2] = f[3] ^ f1[1] ^ f0[3]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 3: + f0[0] = f[0]; + f0[2] = f[4] ^ f[6]; + f0[3] = f[6] ^ f[7]; + f1[1] = f[3] ^ f[5] ^ f[7]; + f1[2] = f[5] ^ f[6]; + f1[3] = f[7]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 2: + f0[0] = f[0]; + f0[1] = f[2] ^ f[3]; + f1[0] = f[1] ^ f0[1]; + f1[1] = f[3]; + break; + + case 1: + f0[0] = f[0]; + f1[0] = f[1]; + break; + + default: + radix_big(f0, f1, f, m_f); + break; + } +} + +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1; + n <<= (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0; i < n; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + + + +/** + * @brief Evaluates f at all subset sums of a given set + * + * This function is a subroutine of the function PQCLEAN_HQCRMRS192_AVX2_fft. + * + * @param[out] w Array + * @param[in] f Array + * @param[in] f_coeffs Number of coefficients of f + * @param[in] m Number of betas + * @param[in] m_f Number of coefficients of f (one more than its degree) + * @param[in] betas FFT constants + */ +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; + uint16_t u[1 << (PARAM_M - 2)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; + size_t x; + + // Step 1 + if (m_f == 1) { + for (i = 0; i < m; ++i) { + tmp[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], f[1]); + } + + w[0] = f[0]; + x = 1; + for (j = 0; j < m; ++j) { + for (k = 0; k < x; ++k) { + w[x + k] = w[k] ^ tmp[j]; + } + x <<= 1; + } + + return; + } + + // Step 2: compute g + if (betas[m - 1] != 1) { + beta_m_pow = 1; + x = 1; + x <<= m_f; + for (i = 1; i < x; ++i) { + beta_m_pow = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, betas[m - 1]); + f[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, f[i]); + } + } + + // Step 3 + radix(f0, f1, f, m_f); + + // Step 4: compute gammas and deltas + for (i = 0; i + 1 < m; ++i) { + gammas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS192_AVX2_gf_inverse(betas[m - 1])); + deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(gammas[i]) ^ gammas[i]; + } + + // Compute gammas sums + compute_subset_sums(gammas_sums, gammas, m - 1); + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + + k = 1; + k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. + if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant + w[0] = u[0]; + w[k] = u[0] ^ f1[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], f1[0]); + w[k + i] = w[i] ^ f1[0]; + } + } else { + fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas); + + // Step 6 + memcpy(w + k, v, 2 * k); + w[0] = u[0]; + w[k] ^= u[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], v[i]); + w[k + i] ^= w[i]; + } + } +} + + + +/** + * @brief Evaluates f on all fields elements using an additive FFT algorithm + * + * f_coeffs is the number of coefficients of f (one less than its degree).
+ * The FFT proceeds recursively to evaluate f at all subset sums of a basis B.
+ * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf
+ * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas, + * meaning the first gammas subset sums are actually the subset sums of betas (except 1).
+ * Also note that f is altered during computation (twisted at each level). + * + * @param[out] w Array + * @param[in] f Array of 2^PARAM_FFT elements + * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) + */ +void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; + + // Follows Gao and Mateer algorithm + compute_fft_betas(betas); + + // Step 1: PARAM_FFT > 1, nothing to do + + // Compute gammas sums + compute_subset_sums(betas_sums, betas, PARAM_M - 1); + + // Step 2: beta_m = 1, nothing to do + + // Step 3 + radix(f0, f1, f, PARAM_FFT); + + // Step 4: Compute deltas + for (i = 0; i < PARAM_M - 1; ++i) { + deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(betas[i]) ^ betas[i]; + } + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + + k = 1 << (PARAM_M - 1); + // Step 6, 7 and error polynomial computation + memcpy(w + k, v, 2 * k); + + // Check if 0 is root + w[0] = u[0]; + + // Check if 1 is root + w[k] ^= u[0]; + + // Find other roots + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas_sums[i], v[i]); + w[k + i] ^= w[i]; + } +} + + + +/** + * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements. + * + * @param[out] error Array with the error + * @param[out] error_compact Array with the error in a compact form + * @param[in] w Array of size 2^PARAM_M + */ +void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t k; + size_t i, index; + + compute_fft_betas(gammas); + compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + + k = 1 << (PARAM_M - 1); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); + + for (i = 1; i < k; ++i) { + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]]; + error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); + + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1]; + error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15); + } +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/fft.h b/src/kem/hqc/hqc-rmrs-192/avx2/fft.h new file mode 100644 index 00000000..bb6b3ba1 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/fft.h @@ -0,0 +1,18 @@ +#ifndef FFT_H +#define FFT_H + + +/** + * @file fft.h + * Header file of fft.c + */ + +#include +#include + +void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs); + +void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/gf.c b/src/kem/hqc/hqc-rmrs-192/avx2/gf.c new file mode 100644 index 00000000..048e046e --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf.c @@ -0,0 +1,176 @@ +#include "gf.h" +#include "parameters.h" +#include +/** + * @file gf.c + * Galois field implementation with multiplication using the pclmulqdq instruction + */ + + +static uint16_t gf_reduce(uint64_t x, size_t deg_x); + + + +/** + * Reduces polynomial x modulo primitive polynomial GF_POLY. + * @returns x mod GF_POLY + * @param[in] x Polynomial of degree less than 64 + * @param[in] deg_x The degree of polynomial x + */ +static uint16_t gf_reduce(uint64_t x, size_t deg_x) { + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; + + // Deduce the number of steps of reduction + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); + + // Reduce + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; + x &= (1 << PARAM_M) - 1; + x ^= mod; + + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; + x ^= mod; + rmdr ^= 1 << z2; + z1 = z2; + } + } + + return x; +} + + + +/** + * Multiplies two elements of GF(2^GF_M). + * @returns the product a*b + * @param[in] a Element of GF(2^GF_M) + * @param[in] b Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mul(uint16_t a, uint16_t b) { + __m128i va = _mm_cvtsi32_si128(a); + __m128i vb = _mm_cvtsi32_si128(b); + __m128i vab = _mm_clmulepi64_si128(va, vb, 0); + uint32_t ab = _mm_cvtsi128_si32(vab); + + return gf_reduce(ab, 2 * (PARAM_M - 1)); +} + + + +/** + * Compute 16 products in GF(2^GF_M). + * @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M) + * @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers + * @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer + * + */ +__m256i PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(__m256i a, __m256i b) { + __m128i al = _mm256_extractf128_si256(a, 0); + __m128i ah = _mm256_extractf128_si256(a, 1); + __m128i bl = _mm256_extractf128_si256(b, 0); + __m128i bh = _mm256_extractf128_si256(b, 1); + + __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0); + abl0 &= CONST128_MIDDLEMASKL; + abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); + + __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11); + abh0 &= CONST128_MIDDLEMASKL; + abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); + + abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL); + abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH); + + __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0); + abl1 &= CONST128_MIDDLEMASKL; + abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); + + __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11); + abh1 &= CONST128_MIDDLEMASKL; + abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); + + abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL); + abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH); + + __m256i ret = _mm256_set_m128i(abl1, abl0); + + __m256i aux = CONST256_MR0; + + for (int32_t i = 0; i < 7; i++) { + ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux); + aux = aux << 1; + } + + ret &= CONST256_LASTMASK; + return ret; +} + + + +/** + * Squares an element of GF(2^GF_M). + * @returns a^2 + * @param[in] a Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_square(uint16_t a) { + uint32_t b = a; + uint32_t s = b & 1; + for (size_t i = 1; i < PARAM_M; ++i) { + b <<= 1; + s ^= b & (1 << 2 * i); + } + + return gf_reduce(s, 2 * (PARAM_M - 1)); +} + + + +/** + * Computes the inverse of an element of GF(2^8), + * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254 + * @returns the inverse of a + * @param[in] a Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_inverse(uint16_t a) { + uint16_t inv = a; + uint16_t tmp1, tmp2; + + inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(a); /* a^2 */ + tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, a); /* a^3 */ + inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^4 */ + tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp1); /* a^7 */ + tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^11 */ + inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, inv); /* a^15 */ + inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^30 */ + inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^60 */ + inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^120 */ + inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^127 */ + inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^254 */ + return inv; +} + + + +/** + * Returns i modulo 2^GF_M-1. + * i must be less than 2*(2^GF_M-1). + * Therefore, the return value is either i or i-2^GF_M+1. + * @returns i mod (2^GF_M-1) + * @param[in] i The integer whose modulo is taken + */ +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mod(uint16_t i) { + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); + + // mask = 0xffff if (i < GF_MUL_ORDER) + uint16_t mask = -(tmp >> 15); + + return tmp + (mask & PARAM_GF_MUL_ORDER); +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/gf.h b/src/kem/hqc/hqc-rmrs-192/avx2/gf.h new file mode 100644 index 00000000..603a0884 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf.h @@ -0,0 +1,69 @@ +#ifndef GF_H +#define GF_H + + +/** + * @file gf.h + * Header file of gf.c + */ + +#include +#include +#include + +#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) + +/** + * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8. + * The last two elements are needed by the PQCLEAN_HQCRMRS192_AVX2_gf_mul function + * (for example if both elements to multiply are zero). + */ +static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 }; + + + +/** + * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8). + * The logarithm of 0 is set to 0 by convention. + */ +static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 }; + +/** + * Masks needed for the computation of 16 mult in GF(2^M) + */ +#define CONST256_MR0 _mm256_set1_epi64x((long long) 0x0100010001000100) +#define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff) +#define CONST128_MASKL _mm_set1_epi64x((long long) 0x0000ffff0000ffff) +#define CONST128_MASKH _mm_set1_epi64x((long long) 0xffff0000ffff0000) +#define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff) +#define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000) +#define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff) +#define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100) + +/** + * x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register + */ +static const __m256i red[7] = { + {0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL}, + {0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL}, + {0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL}, + {0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL}, + {0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL}, + {0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL}, + {0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL}, + +}; + + +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mul(uint16_t a, uint16_t b); + +__m256i PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(__m256i a, __m256i b); + +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_square(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_inverse(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mod(uint16_t i); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.c b/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.c new file mode 100644 index 00000000..3c6e1fa9 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.c @@ -0,0 +1,408 @@ +#include "gf2x.h" +#include "parameters.h" +#include +#include +#include +#include +/** + * \file gf2x.c + * \brief AVX2 implementation of multiplication of two polynomials + */ + + + +#define VEC_N_SPLIT_3x3 CEIL_DIVIDE(PARAM_N/9, 256) +#define VEC_N_SPLIT_3 (3*VEC_N_SPLIT_3x3) + +static inline void reduce(uint64_t *o, const __m256i *a); +static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B); +static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B); + + +/** + * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$ + * + * This function computes the modular reduction of the polynomial a(x) + * + * @param[out] o Pointer to the result + * @param[in] a Pointer to the polynomial a(x) + */ +static inline void reduce(uint64_t *o, const __m256i *a256) { + size_t i, i2; + __m256i r256, carry256; + __m256i *o256 = (__m256i *)o; + const uint64_t *a64 = (const uint64_t *)a256; + uint64_t r, carry; + + i2 = 0; + for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) { + r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i])); + r256 = _mm256_srli_epi64(r256, PARAM_N & 63); + carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1])); + carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63); + r256 ^= carry256; + _mm256_storeu_si256(&o256[i2], a256[i2] ^ r256); + i2 += 1; + } + + i = i - (PARAM_N >> 6); + for (; i < (PARAM_N >> 6) + 1; i++) { + r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63); + carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63); + r ^= carry; + o[i] = a64[i] ^ r; + } + + o[PARAM_N >> 6] &= RED_MASK; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * A(x) and B(x) are stored in 128-bit registers + * This function computes A(x)*B(x) using Karatsuba + * + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) { + __m128i D1[2]; + __m128i D0[2], D2[2]; + __m128i Al = _mm_loadu_si128(A); + __m128i Ah = _mm_loadu_si128(A + 1); + __m128i Bl = _mm_loadu_si128(B); + __m128i Bh = _mm_loadu_si128(B + 1); + + // Compute Al.Bl=D0 + __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0); + __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11); + __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e)); + __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e)); + __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Compute Ah.Bh=D2 + DD0 = _mm_clmulepi64_si128(Ah, Bh, 0); + DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11); + AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e)); + BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e)); + DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Compute AlpAh.BlpBh=D1 + // Initialisation of AlpAh and BlpBh + __m128i AlpAh = _mm_xor_si128(Al, Ah); + __m128i BlpBh = _mm_xor_si128(Bl, Bh); + DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0); + DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11); + AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e)); + BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e)); + DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Final comutation of C + __m128i middle = _mm_xor_si128(D0[1], D2[0]); + C[0] = D0[0]; + C[1] = middle ^ D0[0] ^ D1[0]; + C[2] = middle ^ D1[1] ^ D2[1]; + C[3] = D2[1]; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) { + __m256i D0[2], D1[2], D2[2], SAA, SBB; + const __m128i *A128 = (const __m128i *)A; + const __m128i *B128 = (const __m128i *)B; + __m256i middle; + + karat_mult_1((__m128i *) D0, A128, B128); + karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2); + + SAA = A[0] ^ A[1]; + SBB = B[0] ^ B[1]; + karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB); + middle = _mm256_xor_si256(D0[1], D2[0]); + + C[0] = D0[0]; + C[1] = middle ^ D0[0] ^ D1[0]; + C[2] = middle ^ D1[1] ^ D2[1]; + C[3] = D2[1]; +} + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) { + __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2]; + __m256i middle0; + __m256i middle1; + + karat_mult_2(D0, A, B); + karat_mult_2(D2, A + 2, B + 2); + + SAA[0] = A[0] ^ A[2]; + SBB[0] = B[0] ^ B[2]; + SAA[1] = A[1] ^ A[3]; + SBB[1] = B[1] ^ B[3]; + + karat_mult_2(D1, SAA, SBB); + + middle0 = _mm256_xor_si256(D0[2], D2[0]); + middle1 = _mm256_xor_si256(D0[3], D2[1]); + + C[0] = D0[0]; + C[1] = D0[1]; + C[2] = middle0 ^ D0[0] ^ D1[0]; + C[3] = middle1 ^ D0[1] ^ D1[1]; + C[4] = middle0 ^ D1[2] ^ D2[2]; + C[5] = middle1 ^ D1[3] ^ D2[3]; + C[6] = D2[2]; + C[7] = D2[3]; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) { + size_t i, is, is2, is3; + __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4]; + __m256i middle; + + karat_mult_4(D0, A, B); + karat_mult_4(D2, A + 4, B + 4); + + for (i = 0; i < 4; i++) { + is = i + 4; + SAA[i] = A[i] ^ A[is]; + SBB[i] = B[i] ^ B[is]; + } + + karat_mult_4(D1, SAA, SBB); + + for (i = 0; i < 4; i++) { + is = i + 4; + is2 = is + 4; + is3 = is2 + 4; + + middle = _mm256_xor_si256(D0[is], D2[i]); + + C[i] = D0[i]; + C[is] = middle ^ D0[i] ^ D1[i]; + C[is2] = middle ^ D1[is] ^ D2[is]; + C[is3] = D2[is]; + } +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +inline static void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B) { + size_t i, is, is2, is3; + __m256i middle; + __m256i D0[16], D1[16], D2[16], SAA[8], SBB[8]; + + karat_mult_8(D0, A, B); + karat_mult_8(D2, A + 8, B + 8); + + for (i = 0; i < 8; i++) { + is = i + 8; + SAA[i] = A[i] ^ A[is]; + SBB[i] = B[i] ^ B[is]; + } + + karat_mult_8(D1, SAA, SBB); + + for (i = 0; i < 8; i++) { + is = i + 8; + is2 = is + 8; + is3 = is2 + 8; + + middle = D0[is] ^ D2[i]; + + C[i] = D0[i]; + C[is] = middle ^ D0[i] ^ D1[i]; + C[is2] = middle ^ D1[is] ^ D2[is]; + C[is3] = D2[is]; + } +} + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba 3 part split + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B) { + size_t i, j; + const __m256i *a0, *b0, *a1, *b1, *a2, *b2; + __m256i aa01[VEC_N_SPLIT_3x3], bb01[VEC_N_SPLIT_3x3], aa02[VEC_N_SPLIT_3x3], bb02[VEC_N_SPLIT_3x3], aa12[VEC_N_SPLIT_3x3], bb12[VEC_N_SPLIT_3x3]; + __m256i D0[2 * VEC_N_SPLIT_3x3], D1[2 * VEC_N_SPLIT_3x3], D2[2 * VEC_N_SPLIT_3x3], D3[2 * VEC_N_SPLIT_3x3], D4[2 * VEC_N_SPLIT_3x3], D5[2 * VEC_N_SPLIT_3x3]; + __m256i ro256[6 * VEC_N_SPLIT_3x3]; + __m256i middle0; + + a0 = A; + a1 = A + VEC_N_SPLIT_3x3; + a2 = A + (VEC_N_SPLIT_3x3 << 1); + + b0 = B; + b1 = B + VEC_N_SPLIT_3x3; + b2 = B + (VEC_N_SPLIT_3x3 << 1); + + for (i = 0; i < VEC_N_SPLIT_3x3; i++) { + aa01[i] = a0[i] ^ a1[i]; + bb01[i] = b0[i] ^ b1[i]; + + aa12[i] = a2[i] ^ a1[i]; + bb12[i] = b2[i] ^ b1[i]; + + aa02[i] = a0[i] ^ a2[i]; + bb02[i] = b0[i] ^ b2[i]; + } + + karat_mult_16(D0, a0, b0); + karat_mult_16(D1, a1, b1); + karat_mult_16(D2, a2, b2); + + karat_mult_16(D3, aa01, bb01); + karat_mult_16(D4, aa02, bb02); + karat_mult_16(D5, aa12, bb12); + + for (i = 0; i < VEC_N_SPLIT_3x3; i++) { + j = i + VEC_N_SPLIT_3x3; + middle0 = D0[i] ^ D1[i] ^ D0[j]; + ro256[i] = D0[i]; + ro256[j] = D3[i] ^ middle0; + ro256[j + VEC_N_SPLIT_3x3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0; + middle0 = D1[j] ^ D2[i] ^ D2[j]; + ro256[j + (VEC_N_SPLIT_3x3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0; + ro256[i + (VEC_N_SPLIT_3x3 << 2)] = D5[j] ^ middle0; + ro256[j + (VEC_N_SPLIT_3x3 << 2)] = D2[j]; + } + + for (i = 0; i < 2 * VEC_N_SPLIT_3; i++) { + C[i] = ro256[i]; + } +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba 3 part split + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B) { + size_t i, j; + const __m256i *a0, *b0, *a1, *b1, *a2, *b2; + __m256i aa01[VEC_N_SPLIT_3], bb01[VEC_N_SPLIT_3], aa02[VEC_N_SPLIT_3], bb02[VEC_N_SPLIT_3], aa12[VEC_N_SPLIT_3], bb12[VEC_N_SPLIT_3]; + __m256i D0[2 * VEC_N_SPLIT_3], D1[2 * VEC_N_SPLIT_3], D2[2 * VEC_N_SPLIT_3], D3[2 * VEC_N_SPLIT_3], D4[2 * VEC_N_SPLIT_3], D5[2 * VEC_N_SPLIT_3]; + __m256i middle0; + + a0 = (__m256i *)(A->arr64); + a1 = a0 + VEC_N_SPLIT_3; + a2 = a0 + (2 * VEC_N_SPLIT_3); + + b0 = (__m256i *)(B->arr64); + b1 = b0 + VEC_N_SPLIT_3; + b2 = b0 + (2 * VEC_N_SPLIT_3); + + for (i = 0; i < VEC_N_SPLIT_3; i++) { + aa01[i] = a0[i] ^ a1[i]; + bb01[i] = b0[i] ^ b1[i]; + + aa12[i] = a2[i] ^ a1[i]; + bb12[i] = b2[i] ^ b1[i]; + + aa02[i] = a0[i] ^ a2[i]; + bb02[i] = b0[i] ^ b2[i]; + } + + karat_three_way_mult(D0, a0, b0); + karat_three_way_mult(D1, a1, b1); + karat_three_way_mult(D2, a2, b2); + + karat_three_way_mult(D3, aa01, bb01); + karat_three_way_mult(D4, aa02, bb02); + karat_three_way_mult(D5, aa12, bb12); + + for (i = 0; i < VEC_N_SPLIT_3; i++) { + j = i + VEC_N_SPLIT_3; + middle0 = D0[i] ^ D1[i] ^ D0[j]; + C[i] = D0[i]; + C[j] = D3[i] ^ middle0; + C[j + VEC_N_SPLIT_3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0; + middle0 = D1[j] ^ D2[i] ^ D2[j]; + C[j + (VEC_N_SPLIT_3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0; + C[i + (VEC_N_SPLIT_3 << 2)] = D5[j] ^ middle0; + C[j + (VEC_N_SPLIT_3 << 2)] = D2[j]; + } +} + + + +/** + * @brief Multiply two polynomials modulo \f$ X^n - 1\f$. + * + * This functions multiplies a dense polynomial a1 (of Hamming weight equal to weight) + * and a dense polynomial a2. The multiplication is done modulo \f$ X^n - 1\f$. + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to a polynomial + * @param[in] a2 Pointer to a polynomial + */ +void PQCLEAN_HQCRMRS192_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) { + __m256i a1_times_a2[2 * PARAM_N_MULT + 1] = {0}; + karat_mult9(a1_times_a2, a1, a2); + reduce(o, a1_times_a2); +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.h b/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.h new file mode 100644 index 00000000..4e6b7fe0 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.h @@ -0,0 +1,21 @@ +#ifndef GF2X_H +#define GF2X_H + + +/** + * @file gf2x.h + * @brief Header file for gf2x.c + */ +#include "parameters.h" +#include +#include + +typedef union { + uint64_t arr64[VEC_N_256_SIZE_64]; + __m256i dummy; +} aligned_vec_t; + +void PQCLEAN_HQCRMRS192_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/hqc.c b/src/kem/hqc/hqc-rmrs-192/avx2/hqc.c new file mode 100644 index 00000000..895fd9b1 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/hqc.c @@ -0,0 +1,168 @@ +#include "code.h" +#include "gf2x.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +#include +/** + * @file hqc.c + * @brief Implementation of hqc.h + */ + + + +/** + * @brief Keygen of the HQC_PKE IND_CPA scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) { + AES_XOF_struct sk_seedexpander; + AES_XOF_struct pk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + uint8_t pk_seed[SEED_BYTES] = {0}; + aligned_vec_t vx = {0}; + uint64_t *x = vx.arr64; + aligned_vec_t vy = {0}; + uint64_t *y = vy.arr64; + aligned_vec_t vh = {0}; + uint64_t *h = vh.arr64; + aligned_vec_t vs = {0}; + uint64_t *s = vs.arr64; + aligned_vec_t vtmp = {0}; + uint64_t *tmp = vtmp.arr64; + + // Create seed_expanders for public key and secret key + randombytes(sk_seed, SEED_BYTES); + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + randombytes(pk_seed, SEED_BYTES); + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute secret key + PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA); + + // Compute public key + PQCLEAN_HQCRMRS192_AVX2_vect_set_random(&pk_seedexpander, h); + PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp, &vy, &vh); + PQCLEAN_HQCRMRS192_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64); + + // Parse keys to string + PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(pk, pk_seed, s); + PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk); + +} + + + +/** + * @brief Encryption of the HQC_PKE IND_CPA scheme + * + * The cihertext is composed of vectors u and v. + * + * @param[out] u Vector u (first part of the ciphertext) + * @param[out] v Vector v (second part of the ciphertext) + * @param[in] m Vector representing the message to encrypt + * @param[in] theta Seed used to derive randomness required for encryption + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) { + AES_XOF_struct seedexpander; + aligned_vec_t vh = {0}; + uint64_t *h = vh.arr64; + aligned_vec_t vs = {0}; + uint64_t *s = vs.arr64; + aligned_vec_t vr1 = {0}; + uint64_t *r1 = vr1.arr64; + aligned_vec_t vr2 = {0}; + uint64_t *r2 = vr2.arr64; + aligned_vec_t ve = {0}; + uint64_t *e = ve.arr64; + aligned_vec_t vtmp1 = {0}; + uint64_t *tmp1 = vtmp1.arr64; + aligned_vec_t vtmp2 = {0}; + uint64_t *tmp2 = vtmp2.arr64; + aligned_vec_t vtmp3 = {0}; + uint64_t *tmp3 = vtmp3.arr64; + + // Create seed_expander from theta + seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH); + + // Retrieve h and s from public key + PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(h, s, pk); + + // Generate r1, r2 and e + PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E); + + + + // Compute u = r1 + r2.h + PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp1, &vr2, &vh); + PQCLEAN_HQCRMRS192_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64); + + // Compute v = m.G by encoding the message + PQCLEAN_HQCRMRS192_AVX2_code_encode((uint8_t *)v, m); + PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES); + PQCLEAN_HQCRMRS192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + + // Compute v = m.G + s.r2 + e + PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp2, &vr2, &vs); + PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS192_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N); + +} + + + +/** + * @brief Decryption of the HQC_PKE IND_CPA scheme + * + * @param[out] m Vector representing the decrypted message + * @param[in] u Vector u (first part of the ciphertext) + * @param[in] v Vector v (second part of the ciphertext) + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) { + uint8_t pk[PUBLIC_KEY_BYTES] = {0}; + aligned_vec_t vx = {0}; + uint64_t *x = vx.arr64; + aligned_vec_t vy = {0}; + uint64_t *y = vy.arr64; + aligned_vec_t vtmp1 = {0}; + uint64_t *tmp1 = vtmp1.arr64; + aligned_vec_t vtmp2 = {0}; + uint64_t *tmp2 = vtmp2.arr64; + aligned_vec_t vtmp3 = {0}; + uint64_t *tmp3 = vtmp3.arr64; + + // Retrieve x, y, pk from secret key + PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(x, y, pk, sk); + + // Compute v - u.y + PQCLEAN_HQCRMRS192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) { + tmp2[i] = u[i]; + } + PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp3, &vy, &vtmp2); + PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64); + + + // Compute m by decoding v - u.y + PQCLEAN_HQCRMRS192_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS192_AVX2_code_decode(m, (uint8_t *)tmp1); +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/hqc.h b/src/kem/hqc/hqc-rmrs-192/avx2/hqc.h new file mode 100644 index 00000000..0642294d --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/hqc.h @@ -0,0 +1,19 @@ +#ifndef HQC_H +#define HQC_H + + +/** + * @file hqc.h + * @brief Functions of the HQC_PKE IND_CPA scheme + */ + +#include + +void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk); + +void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk); + +void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/kem.c b/src/kem/hqc/hqc-rmrs-192/avx2/kem.c new file mode 100644 index 00000000..012d8eed --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/kem.c @@ -0,0 +1,140 @@ +#include "api.h" +#include "fips202.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "sha2.h" +#include "vector.h" +#include +#include +/** + * @file kem.c + * @brief Implementation of api.h + */ + + + +/** + * @brief Keygen of the HQC_KEM IND_CAA2 scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + * @returns 0 if keygen is successful + */ +int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { + + PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(pk, sk); + return 0; +} + + + +/** + * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ct String containing the ciphertext + * @param[out] ss String containing the shared secret + * @param[in] pk String containing the public key + * @returns 0 if encapsulation is successful + */ +int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { + + uint8_t theta[SHA512_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + static uint64_t u[VEC_N_256_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Computing m + randombytes(m, VEC_K_SIZE_BYTES); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m + PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(u, v, m, theta, pk); + + // Computing d + sha512(d, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Computing ciphertext + PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(ct, u, v, d); + + + return 0; +} + + + +/** + * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ss String containing the shared secret + * @param[in] ct String containing the cipĥertext + * @param[in] sk String containing the secret key + * @returns 0 if decapsulation is successful, -1 otherwise + */ +int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { + + uint8_t result; + uint64_t u[VEC_N_256_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char pk[PUBLIC_KEY_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint8_t theta[SHA512_BYTES] = {0}; + uint64_t u2[VEC_N_256_SIZE_64] = {0}; + uint64_t v2[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d2[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Retrieving u, v and d from ciphertext + PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(u, v, d, ct); + + // Retrieving pk from sk + memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES); + + // Decryting + PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(m, u, v, sk); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m' + PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk); + + // Computing d' + sha512(d2, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Abort if c != c' or d != d' + result = PQCLEAN_HQCRMRS192_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS192_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS192_AVX2_vect_compare(d, d2, SHA512_BYTES); + result = (uint8_t) (-((int16_t) result) >> 15); + for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { + ss[i] &= ~result; + } + + + return -(result & 1); +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/parameters.h b/src/kem/hqc/hqc-rmrs-192/avx2/parameters.h new file mode 100644 index 00000000..c893b7b7 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/parameters.h @@ -0,0 +1,109 @@ +#ifndef HQC_PARAMETERS_H +#define HQC_PARAMETERS_H + + +/** + * @file parameters.h + * @brief Parameters of the HQC_KEM IND-CCA2 scheme + */ +#include "api.h" + + +#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/ + +/* + #define PARAM_N Define the parameter n of the scheme + #define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code) + #define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code) + #define PARAM_N1N2 Define the length in bits of the Concatenated code + #define PARAM_OMEGA Define the parameter omega of the scheme + #define PARAM_OMEGA_E Define the parameter omega_e of the scheme + #define PARAM_OMEGA_R Define the parameter omega_r of the scheme + #define PARAM_SECURITY Define the security level corresponding to the chosen parameters + #define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters + + #define SECRET_KEY_BYTES Define the size of the secret key in bytes + #define PUBLIC_KEY_BYTES Define the size of the public key in bytes + #define SHARED_SECRET_BYTES Define the size of the shared secret in bytes + #define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes + + #define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function) + #define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes + #define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes + #define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes + #define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes + + #define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits + #define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits + #define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits + #define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits + + #define VEC_N_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits + #define VEC_N1N2_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits + + #define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code) + #define PARAM_M Define a positive integer + #define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form + #define PARAM_GF_POLY_WT Hamming weight of PARAM_GF_POLY + #define PARAM_GF_POLY_M2 Distance between the primitive polynomial first two set bits + #define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1 + #define PARAM_K Define the size of the information bits of the Reed-Solomon code + #define PARAM_G Define the size of the generator polynomial of Reed-Solomon code + #define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input + We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24 + The smallest power of 2 greater than 24+1 is 32=2^5 + #define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code + + #define RED_MASK A mask fot the higher bits of a vector + #define SHA512_BYTES Define the size of SHA512 output in bytes + #define SEED_BYTES Define the size of the seed in bytes + #define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length +*/ + +#define PARAM_N 35851 +#define PARAM_N1 56 +#define PARAM_N2 640 +#define PARAM_N1N2 35840 +#define PARAM_OMEGA 100 +#define PARAM_OMEGA_E 114 +#define PARAM_OMEGA_R 114 +#define PARAM_SECURITY 192 +#define PARAM_DFR_EXP 192 + +#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES +#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_PUBLICKEYBYTES +#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_BYTES +#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_CIPHERTEXTBYTES + +#define UTILS_REJECTION_THRESHOLD 16742417 +#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8) +#define VEC_K_SIZE_BYTES PARAM_K +#define VEC_N1_SIZE_BYTES PARAM_N1 +#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8) + +#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64) +#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8) +#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8) +#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64) + +#define PARAM_N_MULT (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256)) +#define VEC_N_256_SIZE_64 (PARAM_N_MULT / 64) +#define VEC_N1N2_256_SIZE_64 (CEIL_DIVIDE(PARAM_N1N2, 256) << 2) + +#define PARAM_DELTA 16 +#define PARAM_M 8 +#define PARAM_GF_POLY 0x11D +#define PARAM_GF_POLY_WT 5 +#define PARAM_GF_POLY_M2 4 +#define PARAM_GF_MUL_ORDER 255 +#define PARAM_K 24 +#define PARAM_G 33 +#define PARAM_FFT 5 +#define RS_POLY_COEFS 45,216,239,24,253,104,27,40,107,50,163,210,227,134,224,158,119,13,158,1,238,164,82,43,15,232,246,142,50,189,29,232,1 + +#define RED_MASK 0x7ff +#define SHA512_BYTES 64 +#define SEED_BYTES 40 +#define SEEDEXPANDER_MAX_LENGTH 4294967295 + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/parsing.c b/src/kem/hqc/hqc-rmrs-192/avx2/parsing.c new file mode 100644 index 00000000..5ed99cc6 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/parsing.c @@ -0,0 +1,186 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file parsing.c + * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme + */ + + +void PQCLEAN_HQCRMRS192_AVX2_store8(unsigned char *out, uint64_t in) { + out[0] = (in >> 0x00) & 0xFF; + out[1] = (in >> 0x08) & 0xFF; + out[2] = (in >> 0x10) & 0xFF; + out[3] = (in >> 0x18) & 0xFF; + out[4] = (in >> 0x20) & 0xFF; + out[5] = (in >> 0x28) & 0xFF; + out[6] = (in >> 0x30) & 0xFF; + out[7] = (in >> 0x38) & 0xFF; +} + + +uint64_t PQCLEAN_HQCRMRS192_AVX2_load8(const unsigned char *in) { + uint64_t ret = in[7]; + + for (int8_t i = 6; i >= 0; i--) { + ret <<= 8; + ret |= in[i]; + } + + return ret; +} + +void PQCLEAN_HQCRMRS192_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) { + size_t index_in = 0; + size_t index_out = 0; + + // first copy by 8 bytes + if (inlen >= 8 && outlen >= 1) { + while (index_out < outlen && index_in + 8 <= inlen) { + out64[index_out] = PQCLEAN_HQCRMRS192_AVX2_load8(in8 + index_in); + + index_in += 8; + index_out += 1; + } + } + + // we now need to do the last 7 bytes if necessary + if (index_in >= inlen || index_out >= outlen) { + return; + } + out64[index_out] = in8[inlen - 1]; + for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) { + out64[index_out] <<= 8; + out64[index_out] |= in8[index_in + i]; + } +} + +void PQCLEAN_HQCRMRS192_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) { + for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) { + out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF; + index_out++; + if (index_out % 8 == 0) { + index_in++; + } + } +} + + +/** + * @brief Parse a secret key into a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] sk String containing the secret key + * @param[in] sk_seed Seed used to generate the secret key + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) { + memcpy(sk, sk_seed, SEED_BYTES); + sk += SEED_BYTES; + memcpy(sk, pk, PUBLIC_KEY_BYTES); +} + +/** + * @brief Parse a secret key from a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] x uint64_t representation of vector x + * @param[out] y uint64_t representation of vector y + * @param[out] pk String containing the public key + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) { + AES_XOF_struct sk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + + memcpy(sk_seed, sk, SEED_BYTES); + sk += SEED_BYTES; + memcpy(pk, sk, PUBLIC_KEY_BYTES); + + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA); +} + +/** + * @brief Parse a public key into a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] pk String containing the public key + * @param[in] pk_seed Seed used to generate the public key + * @param[in] s uint8_t representation of vector s + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) { + memcpy(pk, pk_seed, SEED_BYTES); + PQCLEAN_HQCRMRS192_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64); +} + + + +/** + * @brief Parse a public key from a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] h uint8_t representation of vector h + * @param[out] s uint8_t representation of vector s + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) { + AES_XOF_struct pk_seedexpander; + uint8_t pk_seed[SEED_BYTES] = {0}; + + memcpy(pk_seed, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_HQCRMRS192_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES); + + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS192_AVX2_vect_set_random(&pk_seedexpander, h); +} + + +/** + * @brief Parse a ciphertext into a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] ct String containing the ciphertext + * @param[in] u uint8_t representation of vector u + * @param[in] v uint8_t representation of vector v + * @param[in] d String containing the hash d + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) { + PQCLEAN_HQCRMRS192_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS192_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(ct, d, SHA512_BYTES); +} + + +/** + * @brief Parse a ciphertext from a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] u uint8_t representation of vector u + * @param[out] v uint8_t representation of vector v + * @param[out] d String containing the hash d + * @param[in] ct String containing the ciphertext + */ +void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) { + PQCLEAN_HQCRMRS192_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(d, ct, SHA512_BYTES); +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/parsing.h b/src/kem/hqc/hqc-rmrs-192/avx2/parsing.h new file mode 100644 index 00000000..2993e88f --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/parsing.h @@ -0,0 +1,36 @@ +#ifndef PARSING_H +#define PARSING_H + + +/** + * @file parsing.h + * @brief Header file for parsing.c + */ + +#include + +void PQCLEAN_HQCRMRS192_AVX2_store8(unsigned char *out, uint64_t in); + +uint64_t PQCLEAN_HQCRMRS192_AVX2_load8(const unsigned char *in); + +void PQCLEAN_HQCRMRS192_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen); + +void PQCLEAN_HQCRMRS192_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen); + + +void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk); + +void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk); + + +void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s); + +void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk); + + +void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d); + +void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.c b/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.c new file mode 100644 index 00000000..dbfd6a29 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.c @@ -0,0 +1,389 @@ +#include "parameters.h" +#include "reed_muller.h" +#include +#include +#include +/** + * @file reed_muller.c + * Constant time implementation of Reed-Muller code RM(1,7) + */ + + +// number of repeated code words +#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) + +// copy bit 0 into all bits of a 64 bit value +#define BIT0MASK(x) (int64_t)(-((x) & 1)) + +static void encode(uint8_t *word, uint8_t message); +static void expand_and_sum(__m256i *dst, const uint64_t *src); +static void hadamard(__m256i *src, __m256i *dst); +static uint32_t find_peaks(__m256i *transform); + + + +/** + * @brief Encode a single byte into a single codeword using RM(1,7) + * + * Encoding matrix of this code: + * bit pattern (note that bits are numbered big endian) + * 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa + * 1 cccccccc cccccccc cccccccc cccccccc + * 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0 + * 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00 + * 4 ffff0000 ffff0000 ffff0000 ffff0000 + * 5 00000000 ffffffff 00000000 ffffffff + * 6 00000000 00000000 ffffffff ffffffff + * 7 ffffffff ffffffff ffffffff ffffffff + * + * @param[out] word An RM(1,7) codeword + * @param[in] message A message to encode + */ +static void encode(uint8_t *word, uint8_t message) { + uint32_t e; + // bit 7 flips all the bits, do that first to save work + e = BIT0MASK(message >> 7); + // bits 0, 1, 2, 3, 4 are the same for all four longs + // (Warning: in the bit matrix above, low bits are at the left!) + e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa; + e ^= BIT0MASK(message >> 1) & 0xcccccccc; + e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0; + e ^= BIT0MASK(message >> 3) & 0xff00ff00; + e ^= BIT0MASK(message >> 4) & 0xffff0000; + // we can store this in the first quarter + word[0 + 0] = (e >> 0x00) & 0xff; + word[0 + 1] = (e >> 0x08) & 0xff; + word[0 + 2] = (e >> 0x10) & 0xff; + word[0 + 3] = (e >> 0x18) & 0xff; + // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 + e ^= BIT0MASK(message >> 5); + word[4 + 0] = (e >> 0x00) & 0xff; + word[4 + 1] = (e >> 0x08) & 0xff; + word[4 + 2] = (e >> 0x10) & 0xff; + word[4 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 6); + word[12 + 0] = (e >> 0x00) & 0xff; + word[12 + 1] = (e >> 0x08) & 0xff; + word[12 + 2] = (e >> 0x10) & 0xff; + word[12 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 5); + word[8 + 0] = (e >> 0x00) & 0xff; + word[8 + 1] = (e >> 0x08) & 0xff; + word[8 + 2] = (e >> 0x10) & 0xff; + word[8 + 3] = (e >> 0x18) & 0xff; +} + + + +/** + * @brief Add multiple codewords into expanded codeword + * + * Note: this does not write the codewords as -1 or +1 as the green machine does + * instead, just 0 and 1 is used. + * The resulting hadamard transform has: + * all values are halved + * the first entry is 64 too high + * + * @param[out] dst Structure that contain the expanded codeword + * @param[in] src Structure that contain the codeword + */ +inline void expand_and_sum(__m256i *dst, const uint64_t *src) { + uint16_t v[16]; + for (size_t part = 0; part < 8; part++) { + dst[part] = _mm256_setzero_si256(); + } + for (size_t copy = 0; copy < MULTIPLICITY; copy++) { + for (size_t part = 0; part < 8; part++) { + for (size_t bit = 0; bit < 16; bit++) { + v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1; + } + dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8], + v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); + } + } +} + + + +/** + * @brief Hadamard transform + * + * Perform hadamard transform of src and store result in dst + * src is overwritten: it is also used as intermediate buffer + * Method is best explained if we use H(3) instead of H(7): + * + * The routine multiplies by the matrix H(3): + * [1 1 1 1 1 1 1 1] + * [1 -1 1 -1 1 -1 1 -1] + * [1 1 -1 -1 1 1 -1 -1] + * [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine + * [1 1 1 1 -1 -1 -1 -1] + * [1 -1 1 -1 -1 1 -1 1] + * [1 1 -1 -1 -1 -1 1 1] + * [1 -1 -1 1 -1 1 1 -1] + * You can do this in three passes, where each pass does this: + * set lower half of buffer to pairwise sums, + * and upper half to differences + * index 0 1 2 3 4 5 6 7 + * input: a, b, c, d, e, f, g, h + * pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h + * pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h + * pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h + * a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h + * This order of computation is chosen because it vectorises well. + * Likewise, this routine multiplies by H(7) in seven passes. + * + * @param[out] src Structure that contain the expanded codeword + * @param[out] dst Structure that contain the expanded codeword + */ +inline void hadamard(__m256i *src, __m256i *dst) { + // the passes move data: + // src -> dst -> src -> dst -> src -> dst -> src -> dst + // using p1 and p2 alternately + __m256i *p1 = src; + __m256i *p2 = dst; + __m256i *p3; + for (size_t pass = 0; pass < 7; pass++) { + // warning: hadd works "within lanes" as Intel call it + // so you have to swap the middle 64 bit blocks of the result + for (size_t part = 0; part < 4; part++) { + p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); + p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); + } + // swap p1, p2 for next round + p3 = p1; + p1 = p2; + p2 = p3; + } +} + + + +/** + * @brief Finding the location of the highest value + * + * This is the final step of the green machine: find the location of the highest value, + * and add 128 if the peak is positive + * Notes on decoding + * The standard "Green machine" decoder words as follows: + * if the received codeword is W, compute (2 * W - 1) * H7 + * The entries of the resulting vector are always even and vary from + * -128 (= the complement is a code word, add bit 7 to decode) + * via 0 (this is a different codeword) + * to 128 (this is the code word). + * + * Our decoding differs in two ways: + * - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1) + * - We take the sum of the repititions (so the entries are 0..MULTIPLICITY) + * This implies that we have to subtract 64M (M=MULTIPLICITY) + * from the first entry to make sure the first codewords is handled properly + * and that the entries vary from -64M to 64M. + * -64M or 64M stands for a perfect codeword. + * If there are fewer than 32M errors, there is always a unique codeword + * which an entry with absolute value > 32M; + * this is because an error changes an entry by 1. + * The highest number that seem to be decodable is 50 errors, so that the + * highest entries in the hadamard transform can be as low as 12. + * But this is different for the repeated code. + * Because multiple codewords are added, this changes: the lowest value of the + * hadamard transform of the sum of six words is seen to be as low as 43 (!), + * which is way less than 12*6. + * + * It is possible that there are more errors, but the word is still uniquely + * decodable: we found a word with distance of 50 from the nearest codeword. + * That means that the highest entry can be as low as 14M. + * Since we have to do binary search, we search for the range 1-64M + * which can be done in 6+l2g(M) steps. + * The binary search is based on (values>32M are unique): + * M 32M min> max> firstStep #steps + * 2 64 1 64 33 +- 16 6 + * 4 128 1 128 65 +- 32 7 + * 6 192 1 192 129 +- 64 8 + * + * As a check, we run a sample for M=6 to see the peak value; it ranged + * from 43 to 147, so my analysis looks right. Also, it shows that decoding + * far beyond the bound of 32M is needed. + * + * For the vectors, it would be tempting to use 8 bit ints, + * because the values "almost" fit in there. + * We could use some trickery to fit it in 8 bits, like saturated add or + * division by 2 in a late step. + * Unfortunately, these instructions do not exist. + * the adds _mm512_adds_epi8 is available only on the latest processors, + * and division, shift, mulhi are not available at all for 8 bits. + * So, we use 16 bit ints. + * + * For the search of the optimal comparison value, + * remember the transform contains 64M-d, + * where d are the distances to the codewords. + * The highest value gives the most likely codeword. + * There is not fast vectorized way to find this value, so we search for the + * maximum value itself. + * In each pass, we collect a bit map of the transform values that are, + * say >bound. There are three cases: + * bit map = 0: all code words are further away than 64M-bound (decrease bound) + * bit map has one bit: one unique code word has distance < 64M-bound + * bit map has multiple bits: multiple words (increase bound) + * We will search for the lowest value of bound that gives a nonzero bit map. + * + * @param[in] transform Structure that contain the expanded codeword + */ +inline uint32_t find_peaks(__m256i *transform) { + // a whole lot of vector variables + __m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows; + __m256i tmp = _mm256_setzero_si256(); + __m256i vect_mask; + __m256i res; + int32_t lower; + int32_t width; + uint32_t message; + uint32_t mask; + int8_t index; + int8_t abs_value; + int8_t mask1; + int8_t mask2; + uint16_t result; + + // compute absolute value of transform + for (size_t i = 0; i < 8; i++) { + abs_rows[i] = _mm256_abs_epi16(transform[i]); + } + // compute a vector of 16 elements which contains the maximum somewhere + // (later used to compute bits 0 through 3 of message) + max_abs_rows = abs_rows[0]; + for (size_t i = 1; i < 8; i++) { + max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]); + } + + // do binary search for the highest value that is lower than the maximum + // loop invariant: lower gives bit map = 0, lower + width gives bit map > 0 + lower = 1; + // this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6 + width = 1 << (5 + MULTIPLICITY / 2); + // if you don't unroll this loop, it fits in the loop cache + // uncomment the line below to speeding up the program by a few percent + // #pragma GCC unroll 0 + while (width > 1) { + width >>= 1; + // compare with lower + width; put result in bitmap + // make vector from value of new bound + bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width)); + bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound); + // step up if there are any matches + // rely on compiler to use conditional move here + mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap); + mask = ~(uint32_t) ((-(int64_t) mask) >> 63); + lower += mask & width; + } + // lower+width contains the maximum value of the vector + // or less, if the maximum is very high (which is OK) + // normally, there is one maximum, but sometimes there are more + // find where the maxima occur in the maximum vector + // (each determines lower 4 bits of peak position) + // construct vector filled with bound-1 + bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1)); + + // find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message + // find lowest value by searching backwards skip first check to save time + message = 0x70; + for (size_t i = 0; i < 8; i++) { + bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound); + mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap); + mask = ~(uint32_t) ((-(int64_t) mask) >> 63); + message ^= mask & (message ^ ((7 - i) << 4)); + } + // we decided which row of the matrix contains the lowest match + // select proper row + index = message >> 4; + + tmp = _mm256_setzero_si256(); + for (size_t i = 0; i < 8; i++) { + abs_value = (int8_t)(index - i); + mask1 = abs_value >> 7; + abs_value ^= mask1; + abs_value -= mask1; + mask2 = ((uint8_t) - abs_value >> 7); + mask = (-1ULL) + mask2; + vect_mask = _mm256_set1_epi32(mask); + res = _mm256_and_si256(abs_rows[i], vect_mask); + tmp = _mm256_or_si256(tmp, res); + } + + active_row = tmp; + + // get the column number of the vector element + // by setting the bits corresponding to the columns + // and then adding elements within two groups of 8 + vect_mask = _mm256_cmpgt_epi16(active_row, bound); + vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1); + for (size_t i = 0; i < 3; i++) { + vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask); + } + // add low 4 bits of message + message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8)); + + // set bit 7 if sign of biggest value is positive + // make sure a jump isn't generated by the compiler + tmp = _mm256_setzero_si256(); + for (size_t i = 0; i < 8; i++) { + mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63); + vect_mask = _mm256_set1_epi32(mask); + tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i])); + } + result = 0; + for (size_t i = 0; i < 16; i++) { + mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63); + result |= mask & ((uint16_t *)&tmp)[i]; + } + message |= (0x8000 & ~result) >> 8; + return message; +} + + + +/** + * @brief Encodes the received word + * + * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits, + * or MULTIPLICITY repeats of 128 bits + * + * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) { + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // encode first word + encode(&cdw[16 * i * MULTIPLICITY], msg[i]); + // copy to other identical codewords + for (size_t copy = 1; copy < MULTIPLICITY; copy++) { + memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16); + } + } +} + + + +/** + * @brief Decodes the received word + * + * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane. + * The theory of error-correcting codes codes @cite macwilliams1977theory + * + * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) { + __m256i expanded[8]; + __m256i transform[8]; + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // collect the codewords + expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]); + // apply hadamard transform + hadamard(expanded, transform); + // fix the first entry to get the half Hadamard transform + transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY); + // finish the decoding + msg[i] = find_peaks(transform); + } +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.h b/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.h new file mode 100644 index 00000000..28960b77 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.h @@ -0,0 +1,18 @@ +#ifndef REED_MULLER_H +#define REED_MULLER_H + + +/** + * @file reed_muller.h + * Header file of reed_muller.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.c b/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.c new file mode 100644 index 00000000..41bb74fd --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.c @@ -0,0 +1,476 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include "parsing.h" +#include "reed_solomon.h" +#include +#include +#include +/** + * @file reed_solomon.c + * Constant time implementation of Reed-Solomon codes + */ + + +static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw); +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes); +static void compute_roots(uint8_t *error, uint16_t *sigma); +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes); +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error); +static void correct_errors(uint8_t *cdw, const uint16_t *error_values); + +static const __m256i alpha_ij256_1[55] = { + {0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087}, + {0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006}, + {0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035}, + {0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014}, + {0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be}, + {0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078}, + {0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3}, + {0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d}, + {0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed}, + {0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e}, + {0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054}, + {0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4}, + {0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5}, + {0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062}, + {0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064}, + {0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051}, + {0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045}, + {0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb}, + {0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083}, + {0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020}, + {0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d}, + {0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0}, + {0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee}, + {0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba}, + {0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e}, + {0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb}, + {0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9}, + {0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd}, + {0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec}, + {0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9}, + {0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052}, + {0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1}, + {0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1}, + {0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc}, + {0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c}, + {0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2}, + {0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048}, + {0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016}, + {0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad}, + {0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074}, + {0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9}, + {0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025}, + {0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c}, + {0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de}, + {0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f}, + {0x00950021005b009f, 0x001f00c2007500f4, 0x00b500d800a70073, 0x0048009600da00fe}, + {0x00a5001500710023, 0x00760089000c00eb, 0x0050008000ef00fc, 0x00b0006400520022}, + {0x008200a800d90046, 0x001700e70027002c, 0x0061002d002400db, 0x0008005900bf003e}, + {0x00c800290043008c, 0x009e00fe003500e9, 0x0078003000eb006e, 0x005a002400e300cc}, + {0x001c005500110005, 0x004d00e200c1006c, 0x00df006a00e90064, 0x009c002c00ae0084}, + {0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd, 0x000a000100dd0092}, + {0x005100e4000d0014, 0x00d100bd00ba0020, 0x003e00de007400f2, 0x00c20026002b003f}, + {0x0079007300340028, 0x00e500f800a10074, 0x006600ca00b4008a, 0x00bb006000f7004b}, + {0x00c300bf00d00050, 0x00db00c5002f0026, 0x0021006b006000f5, 0x008600c100cf0082}, + {0x00ac0091006700a0, 0x0037002e000f00b4, 0x005500e2006a002c, 0x007c00b9002000a7} +}; +static const __m256i alpha_ij256_2[55] = { + {0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x009d00c000600030}, + {0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x005f00de00b90069}, + {0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x00d900b600df007f}, + {0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00850097003b00f8}, + {0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x00e600720055004d}, + {0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x0082006e009600f1}, + {0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x0012009b005900e0}, + {0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x0002001b002c00f7}, + {0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0027008f00260040}, + {0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x00be00a000c1009c}, + {0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00af00b1000f005f}, + {0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x001700ed001a00b6}, + {0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x00d1005200a900ec}, + {0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x0019004b009100aa}, + {0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0024005900640096}, + {0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00040058002400a2}, + {0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x004e00980001000b}, + {0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00610046006000cd}, + {0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x004300f000b900d4}, + {0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x002e006700df005e}, + {0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00bf0015003b0086}, + {0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0032007b00550085}, + {0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x004800e0009600d5}, + {0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000800fb0059006e}, + {0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x009c0074002c00ef}, + {0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00c200d4002600fa}, + {0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0086006500c1002d}, + {0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x005c0088000f0023}, + {0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x006300da001a001e}, + {0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0064009100a9001a}, + {0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x009000c8009100da}, + {0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0010009000640063}, + {0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0025000800240082}, + {0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0099004e00010045}, + {0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x001100be0060006c}, + {0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x00b800d900b9008f}, + {0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x00c600cc00df0028}, + {0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00c800b7003b00d3}, + {0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x003d0057005500ce}, + {0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x002000ac00960084}, + {0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x004a00d8005900e5}, + {0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x002f000c002c0007}, + {0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0022006900260090}, + {0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x006d00e100c10002}, + {0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x0091003b000f0060}, + {0x007200bd00a10098, 0x006b009400830038, 0x0087008a00e3002e, 0x008d00aa001a00d2}, + {0x00ff008500e7004e, 0x00d0006f0013008a, 0x00d4003600700072, 0x007a006200a900fe}, + {0x006400290086000a, 0x00b8006b0025007d, 0x002f0075003d0096, 0x004000f2009100ed}, + {0x00ef003f00ed0099, 0x00e400680069003a, 0x00af0046008e00a7, 0x009400fa0064009a}, + {0x00eb003700a900d6, 0x0096002e00fd0060, 0x0033000f000300f4, 0x005e00b4002400ff}, + {0x000100dd00920044, 0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd}, + {0x00b4000900b30093, 0x003d00e300970065, 0x00310017003c0003, 0x00da00d3006000f3}, + {0x006a00b00057004f, 0x00ad000e009a00b6, 0x00a200e400880005, 0x003f001f00b90080}, + {0x00b9004000a60092, 0x0075008a00fc003e, 0x008b00c40017000f, 0x000700a800df0025}, + {0x00fd0003002400d7, 0x00c100e900ae00a9, 0x0074005900720011, 0x00f400ff003b00be} +}; + +/** + * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes + * + * Following @cite lin1983error (Chapter 4 - Cyclic Codes), + * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register + * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code. + * + * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_K_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) { + size_t i, k; + uint8_t gate_value = 0; + uint8_t prev, x; + + union { + uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)]; + __m256i dummy; + } tmp = {0}; + + union { + uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)]; + __m256i dummy; + } PARAM_RS_POLY = {{ RS_POLY_COEFS }}; + + __m256i *tmp256 = (__m256i *)tmp.arr16; + __m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16; + + for (i = 0; i < PARAM_K; ++i) { + gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]); + tmp256[0] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0]); + tmp256[1] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1]); + + for (size_t j = 32; j < PARAM_G; ++j) { + tmp.arr16[j] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(gate_value, PARAM_RS_POLY.arr16[j]); + } + + prev = 0; + for (k = 0; k < PARAM_N1 - PARAM_K; k++) { + x = cdw[k]; + cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]); + prev = x; + } + } + + memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K); +} + + + +/** + * @brief Computes 2 * PARAM_DELTA syndromes + * + * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes + * @param[in] cdw Array of size PARAM_N1 storing the received vector + */ +void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { + __m256i *syndromes256 = (__m256i *) syndromes; + syndromes256[0] = _mm256_set1_epi16(cdw[0]); + + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + syndromes256[0] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]); + } + + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + syndromes256[1] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]); + } +} + + + +/** + * @brief Computes the error locator polynomial (ELP) sigma + * + * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes).
+ * We use the letter p for rho which is initialized at -1.
+ * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X).
+ * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p.
+ * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated.
+ * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA. + * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value + * and we only need to save its first PARAM_DELTA - 1 coefficients. + * + * @returns the degree of the ELP sigma + * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP + * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes + */ +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { + uint16_t deg_sigma = 0; + uint16_t deg_sigma_p = 0; + uint16_t deg_sigma_copy = 0; + uint16_t sigma_copy[PARAM_DELTA + 1] = {0}; + uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1}; + uint16_t pp = (uint16_t) -1; // 2*rho + uint16_t d_p = 1; + uint16_t d = syndromes[0]; + + uint16_t mask1, mask2, mask12; + uint16_t deg_X, deg_X_sigma_p; + uint16_t dd; + uint16_t mu; + + uint16_t i; + + sigma[0] = 1; + for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { + // Save sigma in case we need it to update X_sigma_p + memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); + deg_sigma_copy = deg_sigma; + + dd = PQCLEAN_HQCRMRS192_AVX2_gf_mul(d, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(d_p)); + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + sigma[i] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(dd, X_sigma_p[i]); + } + + deg_X = mu - pp; + deg_X_sigma_p = deg_X + deg_sigma_p; + + // mask1 = 0xffff if(d != 0) and 0 otherwise + mask1 = -((uint16_t) - d >> 15); + + // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise + mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15); + + // mask12 = 0xffff if the deg_sigma increased and 0 otherwise + mask12 = mask1 & mask2; + deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma); + + if (mu == (2 * PARAM_DELTA - 1)) { + break; + } + + pp ^= mask12 & (mu ^ pp); + d_p ^= mask12 & (d ^ d_p); + for (i = PARAM_DELTA; i; --i) { + X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); + } + + deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p); + d = syndromes[mu + 1]; + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + d ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]); + } + } + + return deg_sigma; +} + + + +/** + * @brief Computes the error polynomial error from the error locator polynomial sigma + * + * See function PQCLEAN_HQCRMRS192_AVX2_fft for more details. + * + * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial + * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + */ +static void compute_roots(uint8_t *error, uint16_t *sigma) { + uint16_t w[1 << PARAM_M] = {0}; + + PQCLEAN_HQCRMRS192_AVX2_fft(w, sigma, PARAM_DELTA + 1); + PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(error, w); +} + + + +/** + * @brief Computes the polynomial z(x) + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x) + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + * @param[in] degree Integer that is the degree of polynomial sigma + * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes + */ +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) { + size_t i, j; + uint16_t mask; + + z[0] = 1; + + for (i = 1; i < PARAM_DELTA + 1; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] = mask & sigma[i]; + } + + z[1] ^= syndromes[0]; + + for (i = 2; i <= PARAM_DELTA; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] ^= mask & syndromes[i - 1]; + + for (j = 1; j < i; ++j) { + z[i] ^= mask & PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]); + } + } +} + + + +/** + * @brief Computes the error values + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] error_values Array of PARAM_DELTA elements receiving the error values + * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x) + * @param[in] z_degree Integer that is the degree of polynomial z(x) + * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error + */ +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) { + uint16_t beta_j[PARAM_DELTA] = {0}; + uint16_t e_j[PARAM_DELTA] = {0}; + + uint16_t delta_counter; + uint16_t delta_real_value; + uint16_t found; + uint16_t mask1; + uint16_t mask2; + uint16_t tmp1; + uint16_t tmp2; + uint16_t inverse; + uint16_t inverse_power_j; + + // Compute the beta_{j_i} page 31 of the documentation + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; i++) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + beta_j[j] += mask1 & mask2 & gf_exp[i]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } + delta_real_value = delta_counter; + + // Compute the e_{j_i} page 31 of the documentation + for (size_t i = 0; i < PARAM_DELTA; ++i) { + tmp1 = 1; + tmp2 = 1; + inverse = PQCLEAN_HQCRMRS192_AVX2_gf_inverse(beta_j[i]); + inverse_power_j = 1; + + for (size_t j = 1; j <= PARAM_DELTA; ++j) { + inverse_power_j = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, inverse); + tmp1 ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, z[j]); + } + for (size_t k = 1; k < PARAM_DELTA; ++k) { + tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); + } + mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value + e_j[i] = mask1 & PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(tmp2)); + } + + // Place the delta e_{j_i} values at the right coordinates of the output vector + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; ++i) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + error_values[i] += mask1 & mask2 & e_j[j]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } +} + + + +/** + * @brief Correct the errors + * + * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector + * @param[in] error Array of the error vector + * @param[in] error_values Array of PARAM_DELTA elements storing the error values + */ +static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { + for (size_t i = 0; i < PARAM_N1; ++i) { + cdw[i] ^= error_values[i]; + } +} + + + +/** + * @brief Decodes the received word + * + * This function relies on six steps: + *
    + *
  1. The first step, is the computation of the 2*PARAM_DELTA syndromes. + *
  2. The second step is the computation of the error-locator polynomial sigma. + *
  3. The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses. + *
  4. The fourth step, is the polynomial z(x). + *
  5. The fifth step, is the computation of the error values. + *
  6. The sixth step is the correction of the errors in the received polynomial. + *
+ * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error + * + * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) { + uint16_t syndromes[2 * PARAM_DELTA] = {0}; + uint16_t sigma[1 << PARAM_FFT] = {0}; + uint8_t error[1 << PARAM_M] = {0}; + uint16_t z[PARAM_N1] = {0}; + uint16_t error_values[PARAM_N1] = {0}; + uint16_t deg; + + // Calculate the 2*PARAM_DELTA syndromes + compute_syndromes(syndromes, cdw); + + // Compute the error locator polynomial sigma + // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room + deg = compute_elp(sigma, syndromes); + + // Compute the error polynomial error + compute_roots(error, sigma); + + // Compute the polynomial z(x) + compute_z_poly(z, sigma, deg, syndromes); + + // Compute the error values + compute_error_values(error_values, z, error); + + // Correct the errors + correct_errors(cdw, error_values); + + // Retrieve the message from the decoded codeword + memcpy(msg, cdw + (PARAM_G - 1), PARAM_K); + +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.h b/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.h new file mode 100644 index 00000000..841a148c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.h @@ -0,0 +1,20 @@ +#ifndef REED_SOLOMON_H +#define REED_SOLOMON_H + + +/** + * @file reed_solomon.h + * Header file of reed_solomon.c + */ +#include "parameters.h" +#include +#include + +static const uint16_t alpha_ij_pow [44][75] = {{2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15}, {4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, 157, 78, 37, 148, 106, 181, 238, 159, 70, 5, 20, 80, 93, 105, 185, 222, 95, 97, 153, 94, 101, 137, 30, 120, 253, 211, 107, 177, 254, 223, 91, 113, 217, 67, 17, 68, 13, 52, 208, 103, 129, 62, 248, 199, 59, 236, 151, 102, 133, 46, 184, 218, 79, 33, 132, 42, 168, 154, 82, 85}, {8, 64, 58, 205, 38, 45, 117, 143, 12, 96, 39, 37, 53, 181, 193, 70, 10, 80, 186, 185, 161, 97, 47, 101, 15, 120, 231, 107, 127, 223, 182, 217, 134, 68, 26, 208, 206, 62, 237, 59, 197, 102, 23, 184, 169, 33, 21, 168, 41, 85, 146, 228, 115, 191, 145, 252, 179, 241, 219, 150, 196, 110, 87, 130, 100, 7, 56, 221, 166, 89, 242, 195, 86, 138, 36}, {16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, 95, 153, 101, 30, 253, 107, 254, 91, 217, 17, 13, 208, 129, 248, 59, 151, 133, 184, 79, 132, 168, 82, 73, 228, 230, 198, 252, 123, 227, 150, 149, 165, 130, 200, 28, 221, 81, 121, 195, 172, 18, 61, 247, 203, 44, 250, 27, 173, 2, 32, 58, 135, 152, 117, 3, 48, 39, 74, 212, 193}, {32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174, 100, 28, 167, 89, 239, 172, 36, 244, 235, 44, 233, 108, 1, 32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59}, {64, 205, 45, 143, 96, 37, 181, 70, 80, 185, 97, 101, 120, 107, 223, 217, 68, 208, 62, 59, 102, 184, 33, 168, 85, 228, 191, 252, 241, 150, 110, 130, 7, 221, 89, 195, 138, 61, 251, 44, 207, 173, 8, 58, 38, 117, 12, 39, 53, 193, 10, 186, 161, 47, 15, 231, 127, 182, 134, 26, 206, 237, 197, 23, 169, 21, 41, 146, 115, 145, 179, 219, 196, 87, 100}, {128, 19, 117, 24, 156, 181, 140, 93, 161, 94, 60, 107, 163, 67, 26, 129, 147, 102, 109, 132, 41, 57, 209, 252, 255, 98, 87, 200, 224, 89, 155, 18, 245, 11, 233, 173, 16, 232, 45, 3, 157, 53, 159, 40, 185, 194, 137, 231, 254, 226, 68, 189, 248, 197, 46, 158, 168, 170, 183, 145, 123, 75, 110, 25, 28, 166, 249, 69, 61, 235, 176, 54, 2, 29, 38}, {29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, 133, 79, 168, 73, 230, 252, 227, 149, 130, 28, 81, 195, 18, 247, 44, 27, 2, 58, 152, 3, 39, 212, 140, 186, 190, 202, 231, 225, 175, 26, 31, 118, 23, 158, 77, 146, 209, 229, 219, 55, 25, 56, 162, 155, 36, 243, 88, 54, 4, 116, 45, 6, 78, 181, 5, 105, 97, 137, 211, 223}, {58, 45, 12, 37, 193, 80, 161, 101, 231, 223, 134, 208, 237, 102, 169, 168, 146, 191, 179, 150, 87, 7, 166, 195, 36, 251, 125, 173, 64, 38, 143, 39, 181, 10, 185, 47, 120, 127, 217, 26, 62, 197, 184, 21, 85, 115, 252, 219, 110, 100, 221, 242, 138, 245, 44, 54, 8, 205, 117, 96, 53, 70, 186, 97, 15, 107, 182, 68, 206, 59, 23, 33, 41, 228, 145}, {116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51, 169, 77, 114, 145, 255, 55, 100, 167, 239, 36, 235, 233, 1, 116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44}, {232, 234, 39, 238, 160, 97, 60, 254, 134, 103, 118, 184, 84, 57, 145, 227, 220, 7, 162, 172, 245, 176, 71, 58, 180, 192, 181, 40, 95, 15, 177, 175, 208, 147, 46, 21, 73, 99, 241, 55, 200, 166, 43, 122, 44, 216, 128, 45, 48, 106, 10, 222, 202, 107, 226, 52, 237, 133, 66, 85, 209, 123, 196, 50, 167, 195, 144, 11, 54, 32, 76, 12, 148, 140, 185}, {205, 143, 37, 70, 185, 101, 107, 217, 208, 59, 184, 168, 228, 252, 150, 130, 221, 195, 61, 44, 173, 58, 117, 39, 193, 186, 47, 231, 182, 26, 237, 23, 21, 146, 145, 219, 87, 56, 242, 36, 139, 54, 64, 45, 96, 181, 80, 97, 120, 223, 68, 62, 102, 33, 85, 191, 241, 110, 7, 89, 138, 251, 207, 8, 38, 12, 53, 10, 161, 15, 127, 134, 206, 197, 169}, {135, 6, 53, 20, 190, 120, 163, 13, 237, 46, 84, 228, 229, 98, 100, 81, 69, 251, 131, 32, 45, 192, 238, 186, 94, 187, 217, 189, 236, 169, 82, 209, 241, 220, 28, 242, 72, 22, 173, 116, 201, 37, 140, 222, 15, 254, 34, 62, 204, 132, 146, 63, 75, 130, 167, 43, 245, 250, 4, 38, 24, 212, 80, 194, 253, 182, 52, 147, 184, 77, 183, 179, 149, 141, 89}, {19, 24, 181, 93, 94, 107, 67, 129, 102, 132, 57, 252, 98, 200, 89, 18, 11, 173, 232, 3, 53, 40, 194, 231, 226, 189, 197, 158, 170, 145, 75, 25, 166, 69, 235, 54, 29, 234, 37, 5, 95, 120, 91, 52, 59, 218, 82, 191, 227, 174, 221, 43, 247, 207, 32, 90, 39, 35, 111, 15, 225, 136, 237, 92, 77, 115, 246, 220, 56, 239, 122, 125, 4, 76, 96}, {38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26}, {76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44, 2, 152, 39, 140, 190, 231, 175, 31, 23, 77, 209, 219, 25, 162, 36, 88, 4, 45, 78, 5, 97, 211, 67, 62, 46, 154, 191, 171, 50, 89, 72, 176, 8, 90, 156, 10, 194, 187, 134, 124, 92, 41, 99, 75, 100, 178, 144, 125, 16, 180, 37, 20, 153, 107, 17, 248, 184, 82, 198, 150}, {152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1}, {45, 37, 80, 101, 223, 208, 102, 168, 191, 150, 7, 195, 251, 173, 38, 39, 10, 47, 127, 26, 197, 21, 115, 219, 100, 242, 245, 54, 205, 96, 70, 97, 107, 68, 59, 33, 228, 241, 130, 89, 61, 207, 58, 12, 193, 161, 231, 134, 237, 169, 146, 179, 87, 166, 36, 125, 64, 143, 181, 185, 120, 217, 62, 184, 85, 252, 110, 221, 138, 44, 8, 117, 53, 186, 15}, {90, 148, 186, 30, 226, 62, 109, 73, 179, 174, 162, 61, 131, 232, 96, 140, 153, 127, 52, 51, 168, 99, 98, 56, 172, 22, 8, 234, 212, 185, 240, 67, 237, 79, 114, 241, 25, 121, 245, 108, 19, 39, 20, 188, 223, 189, 133, 41, 63, 55, 221, 9, 176, 64, 3, 238, 161, 211, 34, 59, 66, 183, 219, 200, 239, 251, 71, 152, 37, 160, 137, 182, 129, 92, 85}, {180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5, 94, 223, 103, 46, 85, 215, 174, 89, 244, 108, 38, 156, 160, 15, 226, 124, 169, 114, 255, 100, 239, 235, 1, 180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36}, {117, 181, 161, 107, 26, 102, 41, 252, 87, 89, 245, 173, 45, 53, 185, 231, 68, 197, 168, 145, 110, 166, 61, 54, 38, 37, 186, 120, 134, 59, 21, 191, 196, 221, 36, 207, 205, 39, 80, 15, 217, 237, 33, 115, 150, 56, 138, 125, 58, 96, 10, 101, 182, 62, 169, 228, 219, 7, 86, 44, 64, 12, 70, 47, 223, 206, 184, 146, 241, 100, 195, 139, 8, 143, 193}, {234, 238, 97, 254, 103, 184, 57, 227, 7, 172, 176, 58, 192, 40, 15, 175, 147, 21, 99, 55, 166, 122, 216, 45, 106, 222, 107, 52, 133, 85, 123, 50, 195, 11, 32, 12, 140, 188, 182, 124, 158, 115, 49, 224, 36, 131, 19, 37, 105, 253, 68, 151, 154, 252, 174, 121, 251, 2, 201, 193, 194, 225, 206, 109, 114, 219, 14, 69, 125, 116, 157, 80, 30, 67, 59}, {201, 159, 47, 91, 124, 33, 209, 149, 166, 244, 71, 117, 238, 194, 223, 31, 79, 115, 98, 167, 61, 216, 90, 181, 190, 254, 206, 218, 213, 150, 224, 72, 54, 152, 106, 161, 177, 189, 184, 114, 171, 56, 18, 131, 38, 148, 111, 107, 104, 46, 146, 227, 14, 138, 233, 135, 37, 210, 211, 26, 133, 170, 241, 141, 172, 125, 232, 78, 186, 253, 136, 102, 164, 123, 100}, {143, 70, 101, 217, 59, 168, 252, 130, 195, 44, 58, 39, 186, 231, 26, 23, 146, 219, 56, 36, 54, 45, 181, 97, 223, 62, 33, 191, 110, 89, 251, 8, 12, 10, 15, 134, 197, 41, 179, 100, 86, 125, 205, 37, 185, 107, 208, 184, 228, 150, 221, 61, 173, 117, 193, 47, 182, 237, 21, 145, 87, 242, 139, 64, 96, 80, 120, 68, 102, 85, 241, 7, 138, 207, 38}, {3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174, 239, 44, 116, 156, 185, 214, 103, 169, 230, 55, 89, 235, 32, 96, 160, 253, 26, 46, 114, 150, 167, 244, 1, 3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223}, {6, 20, 120, 13, 46, 228, 98, 81, 251, 32, 192, 186, 187, 189, 169, 209, 220, 242, 22, 116, 37, 222, 254, 62, 132, 63, 130, 43, 250, 38, 212, 194, 182, 147, 77, 179, 141, 9, 54, 180, 159, 101, 67, 151, 85, 227, 112, 61, 142, 3, 10, 60, 136, 23, 114, 49, 166, 243, 16, 96, 93, 211, 208, 218, 230, 110, 121, 11, 58, 156, 111, 127, 31, 66, 145}, {12, 80, 231, 208, 169, 191, 87, 195, 125, 38, 181, 47, 217, 197, 85, 219, 221, 245, 8, 96, 186, 107, 206, 33, 145, 130, 86, 207, 45, 193, 101, 134, 102, 146, 150, 166, 251, 64, 39, 185, 127, 62, 21, 252, 100, 138, 54, 117, 70, 15, 68, 23, 228, 196, 89, 139, 58, 37, 161, 223, 237, 168, 179, 7, 36, 173, 143, 10, 120, 26, 184, 115, 110, 242, 44}, {24, 93, 107, 129, 132, 252, 200, 18, 173, 3, 40, 231, 189, 158, 145, 25, 69, 54, 234, 5, 120, 52, 218, 191, 174, 43, 207, 90, 35, 15, 136, 92, 115, 220, 239, 125, 76, 238, 101, 17, 133, 228, 149, 121, 44, 135, 212, 47, 175, 51, 146, 49, 162, 139, 116, 148, 97, 113, 236, 85, 171, 83, 251, 128, 156, 161, 163, 147, 41, 255, 224, 245, 16, 157, 185}, {48, 105, 127, 248, 77, 241, 224, 247, 64, 156, 95, 182, 236, 170, 150, 162, 11, 205, 212, 94, 134, 133, 213, 110, 239, 250, 45, 35, 30, 26, 218, 99, 130, 69, 108, 143, 40, 211, 206, 132, 229, 7, 144, 2, 96, 210, 254, 237, 154, 255, 221, 243, 128, 37, 190, 113, 197, 73, 49, 89, 22, 135, 181, 188, 17, 23, 183, 220, 195, 233, 90, 70, 60, 52, 169}, {96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89}, {192, 222, 182, 151, 114, 110, 155, 27, 143, 160, 177, 237, 82, 75, 89, 88, 152, 70, 240, 103, 21, 123, 224, 251, 116, 212, 101, 136, 218, 145, 200, 144, 8, 78, 190, 217, 204, 183, 87, 172, 216, 12, 105, 225, 59, 170, 98, 242, 250, 180, 10, 211, 31, 168, 255, 83, 139, 135, 238, 15, 52, 158, 252, 14, 244, 64, 74, 153, 134, 46, 209, 130, 9, 142, 96}, {157, 95, 217, 133, 230, 130, 18, 2, 39, 190, 175, 23, 209, 25, 36, 4, 78, 97, 67, 46, 191, 50, 72, 8, 156, 194, 134, 92, 99, 100, 144, 16, 37, 153, 17, 184, 198, 200, 61, 32, 74, 47, 34, 109, 145, 141, 122, 64, 148, 94, 68, 218, 63, 7, 244, 128, 53, 188, 136, 169, 126, 14, 245, 29, 106, 101, 13, 79, 252, 28, 247, 58, 212, 202, 26}, {39, 97, 134, 184, 145, 7, 245, 58, 181, 15, 208, 21, 241, 166, 44, 45, 10, 107, 237, 85, 196, 195, 54, 12, 185, 182, 102, 115, 130, 36, 8, 37, 47, 68, 169, 252, 56, 251, 205, 193, 120, 206, 168, 219, 89, 125, 117, 80, 127, 59, 146, 110, 86, 173, 96, 161, 217, 23, 191, 100, 61, 64, 53, 101, 26, 33, 179, 221, 139, 38, 70, 231, 62, 41, 150}, {78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1}, {156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15, 103, 77, 150, 239, 108, 96, 190, 17, 169, 215, 167, 44, 180, 160, 223, 51, 230, 100, 244, 116, 193, 253, 124, 85, 55, 172, 1, 156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15}, {37, 101, 208, 168, 150, 195, 173, 39, 47, 26, 21, 219, 242, 54, 96, 97, 68, 33, 241, 89, 207, 12, 161, 134, 169, 179, 166, 125, 143, 185, 217, 184, 252, 221, 44, 117, 186, 182, 23, 145, 56, 139, 45, 80, 223, 102, 191, 7, 251, 38, 10, 127, 197, 115, 100, 245, 205, 70, 107, 59, 228, 130, 61, 58, 193, 231, 237, 146, 87, 36, 64, 181, 120, 62, 85}, {74, 137, 206, 82, 55, 138, 16, 212, 120, 124, 73, 87, 72, 29, 193, 211, 147, 228, 25, 244, 205, 140, 177, 197, 230, 141, 251, 76, 40, 223, 204, 198, 56, 11, 180, 186, 113, 92, 252, 167, 176, 143, 111, 67, 169, 123, 162, 207, 24, 190, 68, 66, 227, 242, 108, 157, 47, 52, 84, 150, 155, 142, 37, 202, 103, 41, 149, 69, 8, 106, 60, 62, 170, 165, 36}, {148, 30, 62, 73, 174, 61, 232, 140, 127, 51, 99, 56, 22, 234, 185, 67, 79, 241, 121, 108, 39, 188, 189, 41, 55, 9, 64, 238, 211, 59, 183, 200, 251, 152, 160, 182, 92, 229, 166, 233, 24, 97, 13, 42, 150, 43, 2, 53, 60, 124, 146, 65, 122, 205, 5, 254, 102, 198, 112, 44, 201, 111, 134, 158, 255, 242, 216, 78, 101, 103, 82, 110, 18, 128, 193}, {53, 120, 237, 228, 100, 251, 45, 186, 217, 169, 241, 242, 173, 37, 15, 62, 146, 130, 245, 38, 80, 182, 184, 179, 89, 54, 39, 101, 206, 85, 87, 61, 205, 10, 223, 23, 252, 166, 207, 96, 47, 208, 41, 110, 36, 58, 70, 127, 102, 145, 221, 125, 12, 97, 26, 168, 196, 138, 64, 193, 107, 197, 191, 56, 44, 143, 161, 68, 21, 150, 86, 8, 181, 231, 59}, {106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100, 235, 180, 185, 17, 132, 150, 172, 32, 193, 214, 51, 145, 167, 233, 96, 94, 103, 85, 174, 244, 38, 160, 226, 169, 255, 239, 1, 106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100}, {212, 211, 197, 198, 167, 207, 157, 202, 62, 114, 200, 139, 201, 95, 26, 154, 220, 61, 19, 160, 217, 158, 171, 86, 32, 159, 127, 133, 229, 89, 216, 74, 120, 147, 230, 56, 176, 24, 47, 103, 170, 130, 243, 90, 185, 34, 42, 196, 18, 116, 10, 91, 109, 241, 239, 2, 181, 187, 151, 145, 83, 131, 39, 137, 124, 228, 141, 11, 143, 190, 52, 41, 165, 122, 38}, {181, 107, 102, 252, 89, 173, 53, 231, 197, 145, 166, 54, 37, 120, 59, 191, 221, 207, 39, 15, 237, 115, 56, 125, 96, 101, 62, 228, 7, 44, 12, 47, 206, 146, 100, 139, 143, 97, 208, 85, 130, 251, 117, 161, 26, 41, 87, 245, 45, 185, 68, 168, 110, 61, 38, 186, 134, 21, 196, 36, 205, 80, 217, 33, 150, 138, 58, 10, 182, 169, 219, 86, 64, 70, 223}, {119, 177, 23, 123, 239, 8, 159, 225, 184, 255, 43, 64, 140, 91, 169, 171, 69, 58, 20, 226, 33, 49, 18, 205, 160, 67, 21, 149, 144, 38, 105, 34, 168, 220, 244, 45, 111, 13, 41, 174, 243, 117, 95, 104, 85, 25, 203, 143, 194, 103, 146, 200, 22, 12, 94, 31, 228, 14, 176, 96, 202, 248, 115, 112, 233, 39, 30, 147, 191, 167, 27, 37, 240, 236, 145}, {238, 254, 184, 227, 172, 58, 40, 175, 21, 55, 122, 45, 222, 52, 85, 50, 11, 12, 188, 124, 115, 224, 131, 37, 253, 151, 252, 121, 2, 193, 225, 109, 219, 69, 116, 80, 67, 42, 110, 244, 90, 161, 104, 170, 100, 22, 24, 101, 248, 230, 221, 27, 74, 231, 51, 229, 242, 4, 159, 223, 218, 171, 138, 232, 160, 134, 84, 220, 245, 180, 95, 208, 73, 200, 44}}; + +void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/vector.c b/src/kem/hqc/hqc-rmrs-192/avx2/vector.c new file mode 100644 index 00000000..3f58b19c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/vector.c @@ -0,0 +1,178 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +#include +/** + * @file vector.c + * @brief Implementation of vectors sampling and some utilities for the HQC scheme + */ + + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) { + size_t random_bytes_size = 3 * weight; + uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; + uint32_t tmp[PARAM_OMEGA_R] = {0}; + __m256i bit256[PARAM_OMEGA_R]; + __m256i bloc256[PARAM_OMEGA_R]; + __m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0); + __m256i pos256; + __m256i mask256; + __m256i aux; + __m256i i256; + uint64_t bloc, pos, bit64; + uint8_t inc; + size_t i, j, k; + + i = 0; + j = random_bytes_size; + while (i < weight) { + do { + if (j == random_bytes_size) { + seedexpander(ctx, rand_bytes, random_bytes_size); + j = 0; + } + + tmp[i] = ((uint32_t) rand_bytes[j++]) << 16; + tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8; + tmp[i] |= rand_bytes[j++]; + + } while (tmp[i] >= UTILS_REJECTION_THRESHOLD); + + tmp[i] = tmp[i] % PARAM_N; + + inc = 1; + for (k = 0; k < i; k++) { + if (tmp[k] == tmp[i]) { + inc = 0; + } + } + i += inc; + } + + for (i = 0; i < weight; i++) { + // we store the bloc number and bit position of each vb[i] + bloc = tmp[i] >> 6; + bloc256[i] = _mm256_set1_epi64x(bloc >> 2); + pos = (bloc & 0x3UL); + pos256 = _mm256_set1_epi64x(pos); + mask256 = _mm256_cmpeq_epi64(pos256, posCmp256); + bit64 = 1ULL << (tmp[i] & 0x3f); + bit256[i] = _mm256_set1_epi64x(bit64)&mask256; + } + + for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) { + aux = _mm256_loadu_si256(((__m256i *)v) + i); + i256 = _mm256_set1_epi64x(i); + + for (j = 0; j < weight; j++) { + mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); + aux ^= bit256[j] & mask256; + } + _mm256_storeu_si256(((__m256i *)v) + i, aux); + } + +} + + + +/** + * @brief Generates a random vector of dimension PARAM_N + * + * This function generates a random binary vector of dimension PARAM_N. It generates a random + * array of bytes using the seedexpander function, and drop the extra bits using a mask. + * + * @param[in] v Pointer to an array + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) { + uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0}; + + seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES); + + PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES); + v[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief Adds two vectors + * + * @param[out] o Pointer to an array that is the result + * @param[in] v1 Pointer to an array that is the first vector + * @param[in] v2 Pointer to an array that is the second vector + * @param[in] size Integer that is the size of the vectors + */ +void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + o[i] = v1[i] ^ v2[i]; + } +} + + + +/** + * @brief Compares two vectors + * + * @param[in] v1 Pointer to an array that is first vector + * @param[in] v2 Pointer to an array that is second vector + * @param[in] size Integer that is the size of the vectors + * @returns 0 if the vectors are equals and a negative/psotive value otherwise + */ +uint8_t PQCLEAN_HQCRMRS192_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) { + uint64_t r = 0; + for (size_t i = 0; i < size; i++) { + r |= v1[i] ^ v2[i]; + } + r = (~r + 1) >> 63; + return (uint8_t) r; +} + + + +/** + * @brief Resize a vector so that it contains size_o bits + * + * @param[out] o Pointer to the output vector + * @param[in] size_o Integer that is the size of the output vector in bits + * @param[in] v Pointer to the input vector + * @param[in] size_v Integer that is the size of the input vector in bits + */ +void PQCLEAN_HQCRMRS192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) { + uint64_t mask = 0x7FFFFFFFFFFFFFFF; + int8_t val = 0; + if (size_o < size_v) { + if (size_o % 64) { + val = 64 - (size_o % 64); + } + + memcpy(o, v, VEC_N1N2_SIZE_BYTES); + + for (int8_t i = 0; i < val; ++i) { + o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); + } + } else { + memcpy(o, v, CEIL_DIVIDE(size_v, 8)); + } +} diff --git a/src/kem/hqc/hqc-rmrs-192/avx2/vector.h b/src/kem/hqc/hqc-rmrs-192/avx2/vector.h new file mode 100644 index 00000000..5ec58393 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/avx2/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + + +/** + * @file vector.h + * @brief Header file for vector.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v); + +void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_from_randombytes(uint64_t *v); + + +void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size); + +uint8_t PQCLEAN_HQCRMRS192_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size); + +void PQCLEAN_HQCRMRS192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/CMakeLists.txt b/src/kem/hqc/hqc-rmrs-192/clean/CMakeLists.txt new file mode 100644 index 00000000..ccc7ba9e --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/CMakeLists.txt @@ -0,0 +1,16 @@ +set( + SRC_CLEAN_HQCRMRS192 + code.c + fft.c + gf2x.c + gf.c + hqc.c + kem.c + parsing.c + reed_muller.c + reed_solomon.c + vector.c +) + +define_kem_alg(hqcrmrs192_clean + PQCLEAN_HQCRMRS192_CLEAN "${SRC_CLEAN_HQCRMRS192}" "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/kem/hqc/hqc-rmrs-192/clean/api.h b/src/kem/hqc/hqc-rmrs-192/clean/api.h new file mode 100644 index 00000000..f70271b2 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/api.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_HQCRMRS192_CLEAN_API_H +#define PQCLEAN_HQCRMRS192_CLEAN_API_H +/** + * @file api.h + * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme + */ + +#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_ALGNAME "HQC-RMRS-192" + +#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES 4562 +#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_PUBLICKEYBYTES 4522 +#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_BYTES 64 +#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_CIPHERTEXTBYTES 9026 + +// As a technicality, the public key is appended to the secret key in order to respect the NIST API. +// Without this constraint, PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32 + +int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk); + +int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/code.c b/src/kem/hqc/hqc-rmrs-192/clean/code.c new file mode 100644 index 00000000..31ec8047 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/code.c @@ -0,0 +1,46 @@ +#include "code.h" +#include "parameters.h" +#include "reed_muller.h" +#include "reed_solomon.h" +#include +#include +/** + * @file code.c + * @brief Implementation of concatenated code + */ + + + +/** + * + * @brief Encoding the message m to a code word em using the concatenated code + * + * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain + * a concatenated code word. + * + * @param[out] em Pointer to an array that is the tensor code word + * @param[in] m Pointer to an array that is the message + */ +void PQCLEAN_HQCRMRS192_CLEAN_code_encode(uint8_t *em, const uint8_t *m) { + uint8_t tmp[VEC_N1_SIZE_BYTES] = {0}; + + PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(tmp, m); + PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(em, tmp); + +} + + + +/** + * @brief Decoding the code word em to a message m using the concatenated code + * + * @param[out] m Pointer to an array that is the message + * @param[in] em Pointer to an array that is the code word + */ +void PQCLEAN_HQCRMRS192_CLEAN_code_decode(uint8_t *m, const uint8_t *em) { + uint8_t tmp[VEC_N1_SIZE_BYTES] = {0}; + + PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(tmp, em); + PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_decode(m, tmp); + +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/code.h b/src/kem/hqc/hqc-rmrs-192/clean/code.h new file mode 100644 index 00000000..53bad7bd --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/code.h @@ -0,0 +1,18 @@ +#ifndef CODE_H +#define CODE_H + + +/** + * @file code.h + * Header file of code.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS192_CLEAN_code_encode(uint8_t *em, const uint8_t *message); + +void PQCLEAN_HQCRMRS192_CLEAN_code_decode(uint8_t *m, const uint8_t *em); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/fft.c b/src/kem/hqc/hqc-rmrs-192/clean/fft.c new file mode 100644 index 00000000..62b1c8ba --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/fft.c @@ -0,0 +1,351 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include +#include +/** + * @file fft.c + * Implementation of the additive FFT and its transpose. + * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf + */ + + +static void compute_fft_betas(uint16_t *betas); +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size); +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); + + +/** + * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose + * + * @param[out] betas Array of size PARAM_M-1 + */ +static void compute_fft_betas(uint16_t *betas) { + size_t i; + for (i = 0; i < PARAM_M - 1; ++i) { + betas[i] = 1 << (PARAM_M - 1 - i); + } +} + + + +/** + * @brief Computes the subset sums of the given set + * + * The array subset_sums is such that its ith element is + * the subset sum of the set elements given by the binary form of i. + * + * @param[out] subset_sums Array of size 2^set_size receiving the subset sums + * @param[in] set Array of set_size elements + * @param[in] set_size Size of the array set + */ +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) { + uint16_t i, j; + subset_sums[0] = 0; + + for (i = 0; i < set_size; ++i) { + for (j = 0; j < (1 << i); ++j) { + subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; + } + } +} + + + +/** + * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x] + * + * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x) + * as proposed by Bernstein, Chou and Schwabe: + * https://binary.cr.yp.to/mcbits-20130616.pdf + * + * @param[out] f0 Array half the size of f + * @param[out] f1 Array half the size of f + * @param[in] f Array of size a power of 2 + * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f + */ +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + switch (m_f) { + case 4: + f0[4] = f[8] ^ f[12]; + f0[6] = f[12] ^ f[14]; + f0[7] = f[14] ^ f[15]; + f1[5] = f[11] ^ f[13]; + f1[6] = f[13] ^ f[14]; + f1[7] = f[15]; + f0[5] = f[10] ^ f[12] ^ f1[5]; + f1[4] = f[9] ^ f[13] ^ f0[5]; + + f0[0] = f[0]; + f1[3] = f[7] ^ f[11] ^ f[15]; + f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3]; + f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3]; + f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3]; + f1[2] = f[3] ^ f1[1] ^ f0[3]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 3: + f0[0] = f[0]; + f0[2] = f[4] ^ f[6]; + f0[3] = f[6] ^ f[7]; + f1[1] = f[3] ^ f[5] ^ f[7]; + f1[2] = f[5] ^ f[6]; + f1[3] = f[7]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 2: + f0[0] = f[0]; + f0[1] = f[2] ^ f[3]; + f1[0] = f[1] ^ f0[1]; + f1[1] = f[3]; + break; + + case 1: + f0[0] = f[0]; + f1[0] = f[1]; + break; + + default: + radix_big(f0, f1, f, m_f); + break; + } +} + +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1; + n <<= (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0; i < n; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + + + +/** + * @brief Evaluates f at all subset sums of a given set + * + * This function is a subroutine of the function PQCLEAN_HQCRMRS192_CLEAN_fft. + * + * @param[out] w Array + * @param[in] f Array + * @param[in] f_coeffs Number of coefficients of f + * @param[in] m Number of betas + * @param[in] m_f Number of coefficients of f (one more than its degree) + * @param[in] betas FFT constants + */ +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; + uint16_t u[1 << (PARAM_M - 2)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; + size_t x; + + // Step 1 + if (m_f == 1) { + for (i = 0; i < m; ++i) { + tmp[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], f[1]); + } + + w[0] = f[0]; + x = 1; + for (j = 0; j < m; ++j) { + for (k = 0; k < x; ++k) { + w[x + k] = w[k] ^ tmp[j]; + } + x <<= 1; + } + + return; + } + + // Step 2: compute g + if (betas[m - 1] != 1) { + beta_m_pow = 1; + x = 1; + x <<= m_f; + for (i = 1; i < x; ++i) { + beta_m_pow = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); + f[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, f[i]); + } + } + + // Step 3 + radix(f0, f1, f, m_f); + + // Step 4: compute gammas and deltas + for (i = 0; i + 1 < m; ++i) { + gammas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(betas[m - 1])); + deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(gammas[i]) ^ gammas[i]; + } + + // Compute gammas sums + compute_subset_sums(gammas_sums, gammas, m - 1); + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + + k = 1; + k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. + if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant + w[0] = u[0]; + w[k] = u[0] ^ f1[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], f1[0]); + w[k + i] = w[i] ^ f1[0]; + } + } else { + fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas); + + // Step 6 + memcpy(w + k, v, 2 * k); + w[0] = u[0]; + w[k] ^= u[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], v[i]); + w[k + i] ^= w[i]; + } + } +} + + + +/** + * @brief Evaluates f on all fields elements using an additive FFT algorithm + * + * f_coeffs is the number of coefficients of f (one less than its degree).
+ * The FFT proceeds recursively to evaluate f at all subset sums of a basis B.
+ * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf
+ * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas, + * meaning the first gammas subset sums are actually the subset sums of betas (except 1).
+ * Also note that f is altered during computation (twisted at each level). + * + * @param[out] w Array + * @param[in] f Array of 2^PARAM_FFT elements + * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) + */ +void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; + + // Follows Gao and Mateer algorithm + compute_fft_betas(betas); + + // Step 1: PARAM_FFT > 1, nothing to do + + // Compute gammas sums + compute_subset_sums(betas_sums, betas, PARAM_M - 1); + + // Step 2: beta_m = 1, nothing to do + + // Step 3 + radix(f0, f1, f, PARAM_FFT); + + // Step 4: Compute deltas + for (i = 0; i < PARAM_M - 1; ++i) { + deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(betas[i]) ^ betas[i]; + } + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + + k = 1 << (PARAM_M - 1); + // Step 6, 7 and error polynomial computation + memcpy(w + k, v, 2 * k); + + // Check if 0 is root + w[0] = u[0]; + + // Check if 1 is root + w[k] ^= u[0]; + + // Find other roots + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas_sums[i], v[i]); + w[k + i] ^= w[i]; + } +} + + + +/** + * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements. + * + * @param[out] error Array with the error + * @param[out] error_compact Array with the error in a compact form + * @param[in] w Array of size 2^PARAM_M + */ +void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t k; + size_t i, index; + + compute_fft_betas(gammas); + compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + + k = 1 << (PARAM_M - 1); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); + + for (i = 1; i < k; ++i) { + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]]; + error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); + + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1]; + error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15); + } +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/fft.h b/src/kem/hqc/hqc-rmrs-192/clean/fft.h new file mode 100644 index 00000000..7e8f8bc1 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/fft.h @@ -0,0 +1,18 @@ +#ifndef FFT_H +#define FFT_H + + +/** + * @file fft.h + * Header file of fft.c + */ + +#include +#include + +void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs); + +void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/gf.c b/src/kem/hqc/hqc-rmrs-192/clean/gf.c new file mode 100644 index 00000000..b209032e --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/gf.c @@ -0,0 +1,63 @@ +#include "gf.h" +#include "parameters.h" +#include +/** + * @file gf.c + * Galois field implementation with multiplication using lookup tables + */ + + +/** + * @brief Multiplies nonzero element a by element b + * @returns the product a*b + * @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero) + * @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero) + */ +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mul(uint16_t a, uint16_t b) { + uint16_t mask; + mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0 + return mask & gf_exp[PQCLEAN_HQCRMRS192_CLEAN_gf_mod(gf_log[a] + gf_log[b])]; +} + + + +/** + * @brief Squares an element of GF(2^PARAM_M) + * @returns a^2 + * @param[in] a Element of GF(2^PARAM_M) + */ +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_square(uint16_t a) { + int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + return mask & gf_exp[PQCLEAN_HQCRMRS192_CLEAN_gf_mod(2 * gf_log[a])]; +} + + + +/** + * @brief Computes the inverse of an element of GF(2^PARAM_M) + * @returns the inverse of a + * @param[in] a Element of GF(2^PARAM_M) + */ +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(uint16_t a) { + int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]]; +} + + + +/** + * @brief Returns i modulo 2^PARAM_M-1 + * i must be less than 2*(2^PARAM_M-1). + * Therefore, the return value is either i or i-2^PARAM_M+1. + * @returns i mod (2^PARAM_M-1) + * @param[in] i The integer whose modulo is taken + */ +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mod(uint16_t i) { + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); + + // mask = 0xffff if(i < PARAM_GF_MUL_ORDER) + uint16_t mask = -(tmp >> 15); + + return tmp + (mask & PARAM_GF_MUL_ORDER); +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/gf.h b/src/kem/hqc/hqc-rmrs-192/clean/gf.h new file mode 100644 index 00000000..427198db --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/gf.h @@ -0,0 +1,39 @@ +#ifndef GF_H +#define GF_H + + +/** + * @file gf.h + * Header file of gf.c + */ + +#include +#include + + +/** + * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8. + * The last two elements are needed by the PQCLEAN_HQCRMRS192_CLEAN_gf_mul function + * (for example if both elements to multiply are zero). + */ +static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 }; + + + +/** + * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8). + * The logarithm of 0 is set to 0 by convention. + */ +static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 }; + + +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mul(uint16_t a, uint16_t b); + +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_square(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mod(uint16_t i); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/gf2x.c b/src/kem/hqc/hqc-rmrs-192/clean/gf2x.c new file mode 100644 index 00000000..89374761 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/gf2x.c @@ -0,0 +1,154 @@ +#include "gf2x.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include +/** + * \file gf2x.c + * \brief Implementation of multiplication of two polynomials + */ + + +static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2); +static void reduce(uint64_t *o, const uint64_t *a); +static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx); + +/** + * @brief swap two elements in a table + * + * This function exchanges tab[elt1] with tab[elt2] + * + * @param[in] tab Pointer to the table + * @param[in] elt1 Index of the first element + * @param[in] elt2 Index of the second element + */ +static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) { + uint16_t tmp = tab[elt1]; + + tab[elt1] = tab[elt2]; + tab[elt2] = tmp; +} + + + +/** + * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$ + * + * This function computes the modular reduction of the polynomial a(x) + * + * @param[in] a Pointer to the polynomial a(x) + * @param[out] o Pointer to the result + */ +static void reduce(uint64_t *o, const uint64_t *a) { + size_t i; + uint64_t r; + uint64_t carry; + + for (i = 0; i < VEC_N_SIZE_64; i++) { + r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); + carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); + o[i] = a[i] ^ r ^ carry; + } + + o[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief computes product of the polynomial a1(x) with the sparse polynomial a2 + * + * o(x) = a1(x)a2(x) + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2) + * @param[in] a2 Pointer to the polynomial a1(x) + * @param[in] weight Hamming wifht of the sparse polynomial a2 + * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process + */ +static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { +//static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx) + uint64_t carry; + uint32_t dec, s; + uint64_t table[16 * (VEC_N_SIZE_64 + 1)]; + uint16_t permuted_table[16]; + uint16_t permutation_table[16]; + uint16_t permuted_sparse_vect[PARAM_OMEGA_E]; + uint16_t permutation_sparse_vect[PARAM_OMEGA_E]; + uint64_t tmp; + uint64_t *pt; + uint8_t *res; + size_t i, j; + + for (i = 0; i < 16; i++) { + permuted_table[i] = (uint16_t) i; + } + + seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); + + for (i = 0; i < 15; i++) { + swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); + } + + pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); + for (j = 0; j < VEC_N_SIZE_64; j++) { + pt[j] = a2[j]; + } + pt[VEC_N_SIZE_64] = 0x0; + + for (i = 1; i < 16; i++) { + carry = 0; + pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); + for (j = 0; j < VEC_N_SIZE_64; j++) { + pt[j] = (a2[j] << i) ^ carry; + carry = (a2[j] >> ((64 - i))); + } + pt[VEC_N_SIZE_64] = carry; + } + + for (i = 0; i < weight; i++) { + permuted_sparse_vect[i] = (uint16_t) i; + } + + seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); + + for (i = 0; i + 1 < weight; i++) { + swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i))); + } + + for (i = 0; i < weight; i++) { + dec = a1[permuted_sparse_vect[i]] & 0xf; + s = a1[permuted_sparse_vect[i]] >> 4; + res = o + 2 * s; + pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); + + for (j = 0; j < VEC_N_SIZE_64 + 1; j++) { + tmp = PQCLEAN_HQCRMRS192_CLEAN_load8(res); + PQCLEAN_HQCRMRS192_CLEAN_store8(res, tmp ^ pt[j]); + res += 8; + } + } +} + + + +/** + * @brief Multiply two polynomials modulo \f$ X^n - 1\f$. + * + * This functions multiplies a sparse polynomial a1 (of Hamming weight equal to weight) + * and a dense polynomial a2. The multiplication is done modulo \f$ X^n - 1\f$. + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to the sparse polynomial + * @param[in] a2 Pointer to the dense polynomial + * @param[in] weight Integer that is the weigt of the sparse polynomial + * @param[in] ctx Pointer to the randomness context + */ +void PQCLEAN_HQCRMRS192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { + uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0}; + + fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx); + PQCLEAN_HQCRMRS192_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp)); + reduce(o, tmp); +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/gf2x.h b/src/kem/hqc/hqc-rmrs-192/clean/gf2x.h new file mode 100644 index 00000000..fc4fdd4b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/gf2x.h @@ -0,0 +1,16 @@ +#ifndef GF2X_H +#define GF2X_H + + +/** + * @file gf2x.h + * @brief Header file for gf2x.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/hqc.c b/src/kem/hqc/hqc-rmrs-192/clean/hqc.c new file mode 100644 index 00000000..183cd378 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/hqc.c @@ -0,0 +1,144 @@ +#include "code.h" +#include "gf2x.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +/** + * @file hqc.c + * @brief Implementation of hqc.h + */ + + + +/** + * @brief Keygen of the HQC_PKE IND_CPA scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) { + AES_XOF_struct sk_seedexpander; + AES_XOF_struct pk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + uint8_t pk_seed[SEED_BYTES] = {0}; + uint64_t x[VEC_N_SIZE_64] = {0}; + uint32_t y[PARAM_OMEGA] = {0}; + uint64_t h[VEC_N_SIZE_64] = {0}; + uint64_t s[VEC_N_SIZE_64] = {0}; + + // Create seed_expanders for public key and secret key + randombytes(sk_seed, SEED_BYTES); + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + randombytes(pk_seed, SEED_BYTES); + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute secret key + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA); + + // Compute public key + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(&pk_seedexpander, h); + PQCLEAN_HQCRMRS192_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander); + PQCLEAN_HQCRMRS192_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64); + + // Parse keys to string + PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(pk, pk_seed, s); + PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk); + +} + + + +/** + * @brief Encryption of the HQC_PKE IND_CPA scheme + * + * The cihertext is composed of vectors u and v. + * + * @param[out] u Vector u (first part of the ciphertext) + * @param[out] v Vector v (second part of the ciphertext) + * @param[in] m Vector representing the message to encrypt + * @param[in] theta Seed used to derive randomness required for encryption + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) { + AES_XOF_struct seedexpander; + uint64_t h[VEC_N_SIZE_64] = {0}; + uint64_t s[VEC_N_SIZE_64] = {0}; + uint64_t r1[VEC_N_SIZE_64] = {0}; + uint32_t r2[PARAM_OMEGA_R] = {0}; + uint64_t e[VEC_N_SIZE_64] = {0}; + uint64_t tmp1[VEC_N_SIZE_64] = {0}; + uint64_t tmp2[VEC_N_SIZE_64] = {0}; + + // Create seed_expander from theta + seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH); + + // Retrieve h and s from public key + PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(h, s, pk); + + // Generate r1, r2 and e + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E); + + // Compute u = r1 + r2.h + PQCLEAN_HQCRMRS192_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander); + PQCLEAN_HQCRMRS192_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64); + + // Compute v = m.G by encoding the message + PQCLEAN_HQCRMRS192_CLEAN_code_encode((uint8_t *)v, m); + PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES); + PQCLEAN_HQCRMRS192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + + // Compute v = m.G + s.r2 + e + PQCLEAN_HQCRMRS192_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander); + PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS192_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N); + +} + + + +/** + * @brief Decryption of the HQC_PKE IND_CPA scheme + * + * @param[out] m Vector representing the decrypted message + * @param[in] u Vector u (first part of the ciphertext) + * @param[in] v Vector v (second part of the ciphertext) + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) { + uint8_t pk[PUBLIC_KEY_BYTES] = {0}; + uint64_t tmp1[VEC_N_SIZE_64] = {0}; + uint64_t tmp2[VEC_N_SIZE_64] = {0}; + uint32_t y[PARAM_OMEGA] = {0}; + AES_XOF_struct perm_seedexpander; + uint8_t perm_seed[SEED_BYTES] = {0}; + + // Retrieve x, y, pk from secret key + PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk); + + randombytes(perm_seed, SEED_BYTES); + seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute v - u.y + PQCLEAN_HQCRMRS192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + PQCLEAN_HQCRMRS192_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander); + PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64); + + + // Compute m by decoding v - u.y + PQCLEAN_HQCRMRS192_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS192_CLEAN_code_decode(m, (uint8_t *)tmp1); +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/hqc.h b/src/kem/hqc/hqc-rmrs-192/clean/hqc.h new file mode 100644 index 00000000..4aa01aae --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/hqc.h @@ -0,0 +1,19 @@ +#ifndef HQC_H +#define HQC_H + + +/** + * @file hqc.h + * @brief Functions of the HQC_PKE IND_CPA scheme + */ + +#include + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk); + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk); + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/kem.c b/src/kem/hqc/hqc-rmrs-192/clean/kem.c new file mode 100644 index 00000000..10f2f9a0 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/kem.c @@ -0,0 +1,140 @@ +#include "api.h" +#include "fips202.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "sha2.h" +#include "vector.h" +#include +#include +/** + * @file kem.c + * @brief Implementation of api.h + */ + + + +/** + * @brief Keygen of the HQC_KEM IND_CAA2 scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + * @returns 0 if keygen is successful + */ +int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { + + PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(pk, sk); + return 0; +} + + + +/** + * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ct String containing the ciphertext + * @param[out] ss String containing the shared secret + * @param[in] pk String containing the public key + * @returns 0 if encapsulation is successful + */ +int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { + + uint8_t theta[SHA512_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint64_t u[VEC_N_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Computing m + randombytes(m, VEC_K_SIZE_BYTES); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m + PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk); + + // Computing d + sha512(d, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Computing ciphertext + PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(ct, u, v, d); + + + return 0; +} + + + +/** + * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ss String containing the shared secret + * @param[in] ct String containing the cipĥertext + * @param[in] sk String containing the secret key + * @returns 0 if decapsulation is successful, -1 otherwise + */ +int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { + + uint8_t result; + uint64_t u[VEC_N_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char pk[PUBLIC_KEY_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint8_t theta[SHA512_BYTES] = {0}; + uint64_t u2[VEC_N_SIZE_64] = {0}; + uint64_t v2[VEC_N1N2_SIZE_64] = {0}; + unsigned char d2[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Retrieving u, v and d from ciphertext + PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(u, v, d, ct); + + // Retrieving pk from sk + memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES); + + // Decryting + PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(m, u, v, sk); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m' + PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk); + + // Computing d' + sha512(d2, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Abort if c != c' or d != d' + result = PQCLEAN_HQCRMRS192_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS192_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS192_CLEAN_vect_compare(d, d2, SHA512_BYTES); + result = (uint8_t) (-((int16_t) result) >> 15); + for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { + ss[i] &= ~result; + } + + + return -(result & 1); +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/parameters.h b/src/kem/hqc/hqc-rmrs-192/clean/parameters.h new file mode 100644 index 00000000..e47f86eb --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/parameters.h @@ -0,0 +1,98 @@ +#ifndef HQC_PARAMETERS_H +#define HQC_PARAMETERS_H + + +/** + * @file parameters.h + * @brief Parameters of the HQC_KEM IND-CCA2 scheme + */ +#include "api.h" + + +#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/ + +/* + #define PARAM_N Define the parameter n of the scheme + #define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code) + #define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code) + #define PARAM_N1N2 Define the length in bits of the Concatenated code + #define PARAM_OMEGA Define the parameter omega of the scheme + #define PARAM_OMEGA_E Define the parameter omega_e of the scheme + #define PARAM_OMEGA_R Define the parameter omega_r of the scheme + #define PARAM_SECURITY Define the security level corresponding to the chosen parameters + #define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters + + #define SECRET_KEY_BYTES Define the size of the secret key in bytes + #define PUBLIC_KEY_BYTES Define the size of the public key in bytes + #define SHARED_SECRET_BYTES Define the size of the shared secret in bytes + #define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes + + #define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function) + #define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes + #define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes + #define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes + #define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes + + #define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits + #define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits + #define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits + #define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits + + #define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code) + #define PARAM_M Define a positive integer + #define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form + #define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1 + #define PARAM_K Define the size of the information bits of the Reed-Solomon code + #define PARAM_G Define the size of the generator polynomial of Reed-Solomon code + #define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input + We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24 + The smallest power of 2 greater than 24+1 is 32=2^5 + #define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code + + #define RED_MASK A mask fot the higher bits of a vector + #define SHA512_BYTES Define the size of SHA512 output in bytes + #define SEED_BYTES Define the size of the seed in bytes + #define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length +*/ + +#define PARAM_N 35851 +#define PARAM_N1 56 +#define PARAM_N2 640 +#define PARAM_N1N2 35840 +#define PARAM_OMEGA 100 +#define PARAM_OMEGA_E 114 +#define PARAM_OMEGA_R 114 +#define PARAM_SECURITY 192 +#define PARAM_DFR_EXP 192 + +#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES +#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_PUBLICKEYBYTES +#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_BYTES +#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_CIPHERTEXTBYTES + +#define UTILS_REJECTION_THRESHOLD 16742417 +#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8) +#define VEC_K_SIZE_BYTES PARAM_K +#define VEC_N1_SIZE_BYTES PARAM_N1 +#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8) + +#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64) +#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8) +#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8) +#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64) + +#define PARAM_DELTA 16 +#define PARAM_M 8 +#define PARAM_GF_POLY 0x11D +#define PARAM_GF_MUL_ORDER 255 +#define PARAM_K 24 +#define PARAM_G 33 +#define PARAM_FFT 5 +#define RS_POLY_COEFS 45,216,239,24,253,104,27,40,107,50,163,210,227,134,224,158,119,13,158,1,238,164,82,43,15,232,246,142,50,189,29,232,1 + +#define RED_MASK 0x7ff +#define SHA512_BYTES 64 +#define SEED_BYTES 40 +#define SEEDEXPANDER_MAX_LENGTH 4294967295 + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/parsing.c b/src/kem/hqc/hqc-rmrs-192/clean/parsing.c new file mode 100644 index 00000000..4677511c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/parsing.c @@ -0,0 +1,186 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file parsing.c + * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme + */ + + +void PQCLEAN_HQCRMRS192_CLEAN_store8(unsigned char *out, uint64_t in) { + out[0] = (in >> 0x00) & 0xFF; + out[1] = (in >> 0x08) & 0xFF; + out[2] = (in >> 0x10) & 0xFF; + out[3] = (in >> 0x18) & 0xFF; + out[4] = (in >> 0x20) & 0xFF; + out[5] = (in >> 0x28) & 0xFF; + out[6] = (in >> 0x30) & 0xFF; + out[7] = (in >> 0x38) & 0xFF; +} + + +uint64_t PQCLEAN_HQCRMRS192_CLEAN_load8(const unsigned char *in) { + uint64_t ret = in[7]; + + for (int8_t i = 6; i >= 0; i--) { + ret <<= 8; + ret |= in[i]; + } + + return ret; +} + +void PQCLEAN_HQCRMRS192_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) { + size_t index_in = 0; + size_t index_out = 0; + + // first copy by 8 bytes + if (inlen >= 8 && outlen >= 1) { + while (index_out < outlen && index_in + 8 <= inlen) { + out64[index_out] = PQCLEAN_HQCRMRS192_CLEAN_load8(in8 + index_in); + + index_in += 8; + index_out += 1; + } + } + + // we now need to do the last 7 bytes if necessary + if (index_in >= inlen || index_out >= outlen) { + return; + } + out64[index_out] = in8[inlen - 1]; + for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) { + out64[index_out] <<= 8; + out64[index_out] |= in8[index_in + i]; + } +} + +void PQCLEAN_HQCRMRS192_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) { + for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) { + out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF; + index_out++; + if (index_out % 8 == 0) { + index_in++; + } + } +} + + +/** + * @brief Parse a secret key into a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] sk String containing the secret key + * @param[in] sk_seed Seed used to generate the secret key + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) { + memcpy(sk, sk_seed, SEED_BYTES); + sk += SEED_BYTES; + memcpy(sk, pk, PUBLIC_KEY_BYTES); +} + +/** + * @brief Parse a secret key from a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] x uint64_t representation of vector x + * @param[out] y uint32_t representation of vector y + * @param[out] pk String containing the public key + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) { + AES_XOF_struct sk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + + memcpy(sk_seed, sk, SEED_BYTES); + sk += SEED_BYTES; + memcpy(pk, sk, PUBLIC_KEY_BYTES); + + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA); +} + +/** + * @brief Parse a public key into a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] pk String containing the public key + * @param[in] pk_seed Seed used to generate the public key + * @param[in] s uint8_t representation of vector s + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) { + memcpy(pk, pk_seed, SEED_BYTES); + PQCLEAN_HQCRMRS192_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64); +} + + + +/** + * @brief Parse a public key from a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] h uint8_t representation of vector h + * @param[out] s uint8_t representation of vector s + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) { + AES_XOF_struct pk_seedexpander; + uint8_t pk_seed[SEED_BYTES] = {0}; + + memcpy(pk_seed, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_HQCRMRS192_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES); + + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(&pk_seedexpander, h); +} + + +/** + * @brief Parse a ciphertext into a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] ct String containing the ciphertext + * @param[in] u uint8_t representation of vector u + * @param[in] v uint8_t representation of vector v + * @param[in] d String containing the hash d + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) { + PQCLEAN_HQCRMRS192_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS192_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(ct, d, SHA512_BYTES); +} + + +/** + * @brief Parse a ciphertext from a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] u uint8_t representation of vector u + * @param[out] v uint8_t representation of vector v + * @param[out] d String containing the hash d + * @param[in] ct String containing the ciphertext + */ +void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) { + PQCLEAN_HQCRMRS192_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(d, ct, SHA512_BYTES); +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/parsing.h b/src/kem/hqc/hqc-rmrs-192/clean/parsing.h new file mode 100644 index 00000000..c8044ab8 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/parsing.h @@ -0,0 +1,36 @@ +#ifndef PARSING_H +#define PARSING_H + + +/** + * @file parsing.h + * @brief Header file for parsing.c + */ + +#include + +void PQCLEAN_HQCRMRS192_CLEAN_store8(unsigned char *out, uint64_t in); + +uint64_t PQCLEAN_HQCRMRS192_CLEAN_load8(const unsigned char *in); + +void PQCLEAN_HQCRMRS192_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen); + +void PQCLEAN_HQCRMRS192_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen); + + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk); + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk); + + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s); + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk); + + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d); + +void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.c b/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.c new file mode 100644 index 00000000..05762a0b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.c @@ -0,0 +1,237 @@ +#include "parameters.h" +#include "reed_muller.h" +#include +#include +/** + * @file reed_muller.c + * Constant time implementation of Reed-Muller code RM(1,7) + */ + + + +// number of repeated code words +#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) + +// copy bit 0 into all bits of a 32 bit value +#define BIT0MASK(x) (-((x) & 1)) + + +static void encode(uint8_t *word, uint8_t message); +static void hadamard(uint16_t src[128], uint16_t dst[128]); +static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]); +static uint8_t find_peaks(const uint16_t transform[128]); + + + +/** + * @brief Encode a single byte into a single codeword using RM(1,7) + * + * Encoding matrix of this code: + * bit pattern (note that bits are numbered big endian) + * 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa + * 1 cccccccc cccccccc cccccccc cccccccc + * 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0 + * 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00 + * 4 ffff0000 ffff0000 ffff0000 ffff0000 + * 5 ffffffff 00000000 ffffffff 00000000 + * 6 ffffffff ffffffff 00000000 00000000 + * 7 ffffffff ffffffff ffffffff ffffffff + * + * @param[out] word An RM(1,7) codeword + * @param[in] message A message + */ +static void encode(uint8_t *word, uint8_t message) { + uint32_t e; + // bit 7 flips all the bits, do that first to save work + e = BIT0MASK(message >> 7); + // bits 0, 1, 2, 3, 4 are the same for all four longs + // (Warning: in the bit matrix above, low bits are at the left!) + e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa; + e ^= BIT0MASK(message >> 1) & 0xcccccccc; + e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0; + e ^= BIT0MASK(message >> 3) & 0xff00ff00; + e ^= BIT0MASK(message >> 4) & 0xffff0000; + // we can store this in the first quarter + word[0 + 0] = (e >> 0x00) & 0xff; + word[0 + 1] = (e >> 0x08) & 0xff; + word[0 + 2] = (e >> 0x10) & 0xff; + word[0 + 3] = (e >> 0x18) & 0xff; + // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 + e ^= BIT0MASK(message >> 5); + word[4 + 0] = (e >> 0x00) & 0xff; + word[4 + 1] = (e >> 0x08) & 0xff; + word[4 + 2] = (e >> 0x10) & 0xff; + word[4 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 6); + word[12 + 0] = (e >> 0x00) & 0xff; + word[12 + 1] = (e >> 0x08) & 0xff; + word[12 + 2] = (e >> 0x10) & 0xff; + word[12 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 5); + word[8 + 0] = (e >> 0x00) & 0xff; + word[8 + 1] = (e >> 0x08) & 0xff; + word[8 + 2] = (e >> 0x10) & 0xff; + word[8 + 3] = (e >> 0x18) & 0xff; +} + + + +/** + * @brief Hadamard transform + * + * Perform hadamard transform of src and store result in dst + * src is overwritten: it is also used as intermediate buffer + * Method is best explained if we use H(3) instead of H(7): + * + * The routine multiplies by the matrix H(3): + * [1 1 1 1 1 1 1 1] + * [1 -1 1 -1 1 -1 1 -1] + * [1 1 -1 -1 1 1 -1 -1] + * [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine + * [1 1 1 1 -1 -1 -1 -1] + * [1 -1 1 -1 -1 1 -1 1] + * [1 1 -1 -1 -1 -1 1 1] + * [1 -1 -1 1 -1 1 1 -1] + * You can do this in three passes, where each pass does this: + * set lower half of buffer to pairwise sums, + * and upper half to differences + * index 0 1 2 3 4 5 6 7 + * input: a, b, c, d, e, f, g, h + * pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h + * pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h + * pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h + * a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h + * This order of computation is chosen because it vectorises well. + * Likewise, this routine multiplies by H(7) in seven passes. + * + * @param[out] src Structure that contain the expanded codeword + * @param[out] dst Structure that contain the expanded codeword + */ +static void hadamard(uint16_t src[128], uint16_t dst[128]) { + // the passes move data: + // src -> dst -> src -> dst -> src -> dst -> src -> dst + // using p1 and p2 alternately + uint16_t *p1 = src; + uint16_t *p2 = dst; + uint16_t *p3; + for (uint32_t pass = 0; pass < 7; pass++) { + for (uint32_t i = 0; i < 64; i++) { + p2[i] = p1[2 * i] + p1[2 * i + 1]; + p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; + } + // swap p1, p2 for next round + p3 = p1; + p1 = p2; + p2 = p3; + } +} + + + +/** + * @brief Add multiple codewords into expanded codeword + * + * Accesses memory in order + * Note: this does not write the codewords as -1 or +1 as the green machine does + * instead, just 0 and 1 is used. + * The resulting hadamard transform has: + * all values are halved + * the first entry is 64 too high + * + * @param[out] dest Structure that contain the expanded codeword + * @param[in] src Structure that contain the codeword + */ +static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) { + size_t part, bit, copy; + // start with the first copy + for (part = 0; part < 16; part++) { + for (bit = 0; bit < 8; bit++) { + dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1); + } + } + // sum the rest of the copies + for (copy = 1; copy < MULTIPLICITY; copy++) { + for (part = 0; part < 16; part++) { + for (bit = 0; bit < 8; bit++) { + dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1); + } + } + } +} + + + +/** + * @brief Finding the location of the highest value + * + * This is the final step of the green machine: find the location of the highest value, + * and add 128 if the peak is positive + * if there are two identical peaks, the peak with smallest value + * in the lowest 7 bits it taken + * @param[in] transform Structure that contain the expanded codeword + */ +static uint8_t find_peaks(const uint16_t transform[128]) { + uint16_t peak_abs = 0; + uint16_t peak = 0; + uint16_t pos = 0; + uint16_t t, abs, mask; + for (uint16_t i = 0; i < 128; i++) { + t = transform[i]; + abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) + mask = -(((uint16_t)(peak_abs - abs)) >> 15); + peak ^= mask & (peak ^ t); + pos ^= mask & (pos ^ i); + peak_abs ^= mask & (peak_abs ^ abs); + } + pos |= 128 & ((peak >> 15) - 1); + return (uint8_t) pos; +} + + + + +/** + * @brief Encodes the received word + * + * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits, + * or MULTIPLICITY repeats of 128 bits + * + * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) { + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // encode first word + encode(&cdw[16 * i * MULTIPLICITY], msg[i]); + // copy to other identical codewords + for (size_t copy = 1; copy < MULTIPLICITY; copy++) { + memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16); + } + } +} + + + +/** + * @brief Decodes the received word + * + * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane. + * The theory of error-correcting codes codes @cite macwilliams1977theory + * + * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) { + uint16_t expanded[128]; + uint16_t transform[128]; + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // collect the codewords + expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]); + // apply hadamard transform + hadamard(expanded, transform); + // fix the first entry to get the half Hadamard transform + transform[0] -= 64 * MULTIPLICITY; + // finish the decoding + msg[i] = find_peaks(transform); + } +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.h b/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.h new file mode 100644 index 00000000..46d2fb53 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.h @@ -0,0 +1,18 @@ +#ifndef REED_MULLER_H +#define REED_MULLER_H + + +/** + * @file reed_muller.h + * Header file of reed_muller.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.c b/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.c new file mode 100644 index 00000000..6f30c1de --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.c @@ -0,0 +1,349 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include "parsing.h" +#include "reed_solomon.h" +#include +#include +#include +/** + * @file reed_solomon.c + * Constant time implementation of Reed-Solomon codes + */ + + +static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw); +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes); +static void compute_roots(uint8_t *error, uint16_t *sigma); +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes); +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error); +static void correct_errors(uint8_t *cdw, const uint16_t *error_values); + +/** + * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes + * + * Following @cite lin1983error (Chapter 4 - Cyclic Codes), + * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register + * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code. + * + * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_K_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) { + size_t i, j, k; + uint8_t gate_value = 0; + + uint16_t tmp[PARAM_G] = {0}; + uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS}; + uint8_t prev, x; + + for (i = 0; i < PARAM_N1; ++i) { + cdw[i] = 0; + } + + for (i = 0; i < PARAM_K; ++i) { + gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]); + + for (j = 0; j < PARAM_G; ++j) { + tmp[j] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]); + } + + prev = 0; + for (k = 0; k < PARAM_N1 - PARAM_K; k++) { + x = cdw[k]; + cdw[k] = (uint8_t) (prev ^ tmp[k]); + prev = x; + } + } + + memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K); +} + + + +/** + * @brief Computes 2 * PARAM_DELTA syndromes + * + * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes + * @param[in] cdw Array of size PARAM_N1 storing the received vector + */ +void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { + for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { + for (size_t j = 1; j < PARAM_N1; ++j) { + syndromes[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); + } + syndromes[i] ^= cdw[0]; + } +} + + + +/** + * @brief Computes the error locator polynomial (ELP) sigma + * + * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes).
+ * We use the letter p for rho which is initialized at -1.
+ * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X).
+ * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p.
+ * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated.
+ * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA. + * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value + * and we only need to save its first PARAM_DELTA - 1 coefficients. + * + * @returns the degree of the ELP sigma + * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP + * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes + */ +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { + uint16_t deg_sigma = 0; + uint16_t deg_sigma_p = 0; + uint16_t deg_sigma_copy = 0; + uint16_t sigma_copy[PARAM_DELTA + 1] = {0}; + uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1}; + uint16_t pp = (uint16_t) -1; // 2*rho + uint16_t d_p = 1; + uint16_t d = syndromes[0]; + + uint16_t mask1, mask2, mask12; + uint16_t deg_X, deg_X_sigma_p; + uint16_t dd; + uint16_t mu; + + uint16_t i; + + sigma[0] = 1; + for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { + // Save sigma in case we need it to update X_sigma_p + memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); + deg_sigma_copy = deg_sigma; + + dd = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(d_p)); + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + sigma[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(dd, X_sigma_p[i]); + } + + deg_X = mu - pp; + deg_X_sigma_p = deg_X + deg_sigma_p; + + // mask1 = 0xffff if(d != 0) and 0 otherwise + mask1 = -((uint16_t) - d >> 15); + + // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise + mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15); + + // mask12 = 0xffff if the deg_sigma increased and 0 otherwise + mask12 = mask1 & mask2; + deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma); + + if (mu == (2 * PARAM_DELTA - 1)) { + break; + } + + pp ^= mask12 & (mu ^ pp); + d_p ^= mask12 & (d ^ d_p); + for (i = PARAM_DELTA; i; --i) { + X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); + } + + deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p); + d = syndromes[mu + 1]; + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + d ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]); + } + } + + return deg_sigma; +} + + + +/** + * @brief Computes the error polynomial error from the error locator polynomial sigma + * + * See function PQCLEAN_HQCRMRS192_CLEAN_fft for more details. + * + * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial + * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + */ +static void compute_roots(uint8_t *error, uint16_t *sigma) { + uint16_t w[1 << PARAM_M] = {0}; + + PQCLEAN_HQCRMRS192_CLEAN_fft(w, sigma, PARAM_DELTA + 1); + PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(error, w); +} + + + +/** + * @brief Computes the polynomial z(x) + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x) + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + * @param[in] degree Integer that is the degree of polynomial sigma + * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes + */ +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) { + size_t i, j; + uint16_t mask; + + z[0] = 1; + + for (i = 1; i < PARAM_DELTA + 1; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] = mask & sigma[i]; + } + + z[1] ^= syndromes[0]; + + for (i = 2; i <= PARAM_DELTA; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] ^= mask & syndromes[i - 1]; + + for (j = 1; j < i; ++j) { + z[i] ^= mask & PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]); + } + } +} + + + +/** + * @brief Computes the error values + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] error_values Array of PARAM_DELTA elements receiving the error values + * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x) + * @param[in] z_degree Integer that is the degree of polynomial z(x) + * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error + */ +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) { + uint16_t beta_j[PARAM_DELTA] = {0}; + uint16_t e_j[PARAM_DELTA] = {0}; + + uint16_t delta_counter; + uint16_t delta_real_value; + uint16_t found; + uint16_t mask1; + uint16_t mask2; + uint16_t tmp1; + uint16_t tmp2; + uint16_t inverse; + uint16_t inverse_power_j; + + // Compute the beta_{j_i} page 31 of the documentation + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; i++) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + beta_j[j] += mask1 & mask2 & gf_exp[i]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } + delta_real_value = delta_counter; + + // Compute the e_{j_i} page 31 of the documentation + for (size_t i = 0; i < PARAM_DELTA; ++i) { + tmp1 = 1; + tmp2 = 1; + inverse = PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(beta_j[i]); + inverse_power_j = 1; + + for (size_t j = 1; j <= PARAM_DELTA; ++j) { + inverse_power_j = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, inverse); + tmp1 ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, z[j]); + } + for (size_t k = 1; k < PARAM_DELTA; ++k) { + tmp2 = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); + } + mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value + e_j[i] = mask1 & PQCLEAN_HQCRMRS192_CLEAN_gf_mul(tmp1, PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(tmp2)); + } + + // Place the delta e_{j_i} values at the right coordinates of the output vector + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; ++i) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + error_values[i] += mask1 & mask2 & e_j[j]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } +} + + + +/** + * @brief Correct the errors + * + * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector + * @param[in] error Array of the error vector + * @param[in] error_values Array of PARAM_DELTA elements storing the error values + */ +static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { + for (size_t i = 0; i < PARAM_N1; ++i) { + cdw[i] ^= error_values[i]; + } +} + + + +/** + * @brief Decodes the received word + * + * This function relies on six steps: + *
    + *
  1. The first step, is the computation of the 2*PARAM_DELTA syndromes. + *
  2. The second step is the computation of the error-locator polynomial sigma. + *
  3. The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses. + *
  4. The fourth step, is the polynomial z(x). + *
  5. The fifth step, is the computation of the error values. + *
  6. The sixth step is the correction of the errors in the received polynomial. + *
+ * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error + * + * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) { + uint16_t syndromes[2 * PARAM_DELTA] = {0}; + uint16_t sigma[1 << PARAM_FFT] = {0}; + uint8_t error[1 << PARAM_M] = {0}; + uint16_t z[PARAM_N1] = {0}; + uint16_t error_values[PARAM_N1] = {0}; + uint16_t deg; + + // Calculate the 2*PARAM_DELTA syndromes + compute_syndromes(syndromes, cdw); + + // Compute the error locator polynomial sigma + // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room + deg = compute_elp(sigma, syndromes); + + // Compute the error polynomial error + compute_roots(error, sigma); + + // Compute the polynomial z(x) + compute_z_poly(z, sigma, deg, syndromes); + + // Compute the error values + compute_error_values(error_values, z, error); + + // Correct the errors + correct_errors(cdw, error_values); + + // Retrieve the message from the decoded codeword + memcpy(msg, cdw + (PARAM_G - 1), PARAM_K); + +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.h b/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.h new file mode 100644 index 00000000..069c599f --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.h @@ -0,0 +1,20 @@ +#ifndef REED_SOLOMON_H +#define REED_SOLOMON_H + + +/** + * @file reed_solomon.h + * Header file of reed_solomon.c + */ +#include "parameters.h" +#include +#include + +static const uint16_t alpha_ij_pow [32][55] = {{2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160}, {4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, 157, 78, 37, 148, 106, 181, 238, 159, 70, 5, 20, 80, 93, 105, 185, 222, 95, 97, 153, 94, 101, 137, 30, 120, 253, 211, 107, 177, 254, 223, 91, 113, 217, 67, 17, 68, 13, 52, 208, 103}, {8, 64, 58, 205, 38, 45, 117, 143, 12, 96, 39, 37, 53, 181, 193, 70, 10, 80, 186, 185, 161, 97, 47, 101, 15, 120, 231, 107, 127, 223, 182, 217, 134, 68, 26, 208, 206, 62, 237, 59, 197, 102, 23, 184, 169, 33, 21, 168, 41, 85, 146, 228, 115, 191, 145}, {16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, 95, 153, 101, 30, 253, 107, 254, 91, 217, 17, 13, 208, 129, 248, 59, 151, 133, 184, 79, 132, 168, 82, 73, 228, 230, 198, 252, 123, 227, 150, 149, 165, 130, 200, 28, 221, 81, 121, 195, 172}, {32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174, 100, 28, 167, 89, 239, 172, 36, 244, 235, 44, 233, 108, 1, 32, 116, 38, 180}, {64, 205, 45, 143, 96, 37, 181, 70, 80, 185, 97, 101, 120, 107, 223, 217, 68, 208, 62, 59, 102, 184, 33, 168, 85, 228, 191, 252, 241, 150, 110, 130, 7, 221, 89, 195, 138, 61, 251, 44, 207, 173, 8, 58, 38, 117, 12, 39, 53, 193, 10, 186, 161, 47, 15}, {128, 19, 117, 24, 156, 181, 140, 93, 161, 94, 60, 107, 163, 67, 26, 129, 147, 102, 109, 132, 41, 57, 209, 252, 255, 98, 87, 200, 224, 89, 155, 18, 245, 11, 233, 173, 16, 232, 45, 3, 157, 53, 159, 40, 185, 194, 137, 231, 254, 226, 68, 189, 248, 197, 46}, {29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, 133, 79, 168, 73, 230, 252, 227, 149, 130, 28, 81, 195, 18, 247, 44, 27, 2, 58, 152, 3, 39, 212, 140, 186, 190, 202, 231, 225, 175, 26, 31, 118, 23, 158, 77, 146, 209, 229, 219, 55}, {58, 45, 12, 37, 193, 80, 161, 101, 231, 223, 134, 208, 237, 102, 169, 168, 146, 191, 179, 150, 87, 7, 166, 195, 36, 251, 125, 173, 64, 38, 143, 39, 181, 10, 185, 47, 120, 127, 217, 26, 62, 197, 184, 21, 85, 115, 252, 219, 110, 100, 221, 242, 138, 245, 44}, {116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51, 169, 77, 114, 145, 255, 55, 100, 167, 239, 36, 235, 233, 1, 116, 180, 96, 106}, {232, 234, 39, 238, 160, 97, 60, 254, 134, 103, 118, 184, 84, 57, 145, 227, 220, 7, 162, 172, 245, 176, 71, 58, 180, 192, 181, 40, 95, 15, 177, 175, 208, 147, 46, 21, 73, 99, 241, 55, 200, 166, 43, 122, 44, 216, 128, 45, 48, 106, 10, 222, 202, 107, 226}, {205, 143, 37, 70, 185, 101, 107, 217, 208, 59, 184, 168, 228, 252, 150, 130, 221, 195, 61, 44, 173, 58, 117, 39, 193, 186, 47, 231, 182, 26, 237, 23, 21, 146, 145, 219, 87, 56, 242, 36, 139, 54, 64, 45, 96, 181, 80, 97, 120, 223, 68, 62, 102, 33, 85}, {135, 6, 53, 20, 190, 120, 163, 13, 237, 46, 84, 228, 229, 98, 100, 81, 69, 251, 131, 32, 45, 192, 238, 186, 94, 187, 217, 189, 236, 169, 82, 209, 241, 220, 28, 242, 72, 22, 173, 116, 201, 37, 140, 222, 15, 254, 34, 62, 204, 132, 146, 63, 75, 130, 167}, {19, 24, 181, 93, 94, 107, 67, 129, 102, 132, 57, 252, 98, 200, 89, 18, 11, 173, 232, 3, 53, 40, 194, 231, 226, 189, 197, 158, 170, 145, 75, 25, 166, 69, 235, 54, 29, 234, 37, 5, 95, 120, 91, 52, 59, 218, 82, 191, 227, 174, 221, 43, 247, 207, 32}, {38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185}, {76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44, 2, 152, 39, 140, 190, 231, 175, 31, 23, 77, 209, 219, 25, 162, 36, 88, 4, 45, 78, 5, 97, 211, 67, 62, 46, 154, 191, 171, 50, 89, 72, 176, 8, 90, 156, 10, 194, 187, 134, 124}, {152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215}, {45, 37, 80, 101, 223, 208, 102, 168, 191, 150, 7, 195, 251, 173, 38, 39, 10, 47, 127, 26, 197, 21, 115, 219, 100, 242, 245, 54, 205, 96, 70, 97, 107, 68, 59, 33, 228, 241, 130, 89, 61, 207, 58, 12, 193, 161, 231, 134, 237, 169, 146, 179, 87, 166, 36}, {90, 148, 186, 30, 226, 62, 109, 73, 179, 174, 162, 61, 131, 232, 96, 140, 153, 127, 52, 51, 168, 99, 98, 56, 172, 22, 8, 234, 212, 185, 240, 67, 237, 79, 114, 241, 25, 121, 245, 108, 19, 39, 20, 188, 223, 189, 133, 41, 63, 55, 221, 9, 176, 64, 3}, {180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5, 94, 223, 103, 46, 85, 215, 174, 89, 244, 108, 38, 156, 160, 15, 226, 124, 169, 114, 255, 100, 239, 235, 1, 180, 106, 185, 253}, {117, 181, 161, 107, 26, 102, 41, 252, 87, 89, 245, 173, 45, 53, 185, 231, 68, 197, 168, 145, 110, 166, 61, 54, 38, 37, 186, 120, 134, 59, 21, 191, 196, 221, 36, 207, 205, 39, 80, 15, 217, 237, 33, 115, 150, 56, 138, 125, 58, 96, 10, 101, 182, 62, 169}, {234, 238, 97, 254, 103, 184, 57, 227, 7, 172, 176, 58, 192, 40, 15, 175, 147, 21, 99, 55, 166, 122, 216, 45, 106, 222, 107, 52, 133, 85, 123, 50, 195, 11, 32, 12, 140, 188, 182, 124, 158, 115, 49, 224, 36, 131, 19, 37, 105, 253, 68, 151, 154, 252, 174}, {201, 159, 47, 91, 124, 33, 209, 149, 166, 244, 71, 117, 238, 194, 223, 31, 79, 115, 98, 167, 61, 216, 90, 181, 190, 254, 206, 218, 213, 150, 224, 72, 54, 152, 106, 161, 177, 189, 184, 114, 171, 56, 18, 131, 38, 148, 111, 107, 104, 46, 146, 227, 14, 138, 233}, {143, 70, 101, 217, 59, 168, 252, 130, 195, 44, 58, 39, 186, 231, 26, 23, 146, 219, 56, 36, 54, 45, 181, 97, 223, 62, 33, 191, 110, 89, 251, 8, 12, 10, 15, 134, 197, 41, 179, 100, 86, 125, 205, 37, 185, 107, 208, 184, 228, 150, 221, 61, 173, 117, 193}, {3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174, 239, 44, 116, 156, 185, 214, 103, 169, 230, 55, 89, 235, 32, 96, 160, 253, 26, 46, 114, 150, 167, 244, 1, 3, 5, 15, 17}, {6, 20, 120, 13, 46, 228, 98, 81, 251, 32, 192, 186, 187, 189, 169, 209, 220, 242, 22, 116, 37, 222, 254, 62, 132, 63, 130, 43, 250, 38, 212, 194, 182, 147, 77, 179, 141, 9, 54, 180, 159, 101, 67, 151, 85, 227, 112, 61, 142, 3, 10, 60, 136, 23, 114}, {12, 80, 231, 208, 169, 191, 87, 195, 125, 38, 181, 47, 217, 197, 85, 219, 221, 245, 8, 96, 186, 107, 206, 33, 145, 130, 86, 207, 45, 193, 101, 134, 102, 146, 150, 166, 251, 64, 39, 185, 127, 62, 21, 252, 100, 138, 54, 117, 70, 15, 68, 23, 228, 196, 89}, {24, 93, 107, 129, 132, 252, 200, 18, 173, 3, 40, 231, 189, 158, 145, 25, 69, 54, 234, 5, 120, 52, 218, 191, 174, 43, 207, 90, 35, 15, 136, 92, 115, 220, 239, 125, 76, 238, 101, 17, 133, 228, 149, 121, 44, 135, 212, 47, 175, 51, 146, 49, 162, 139, 116}, {48, 105, 127, 248, 77, 241, 224, 247, 64, 156, 95, 182, 236, 170, 150, 162, 11, 205, 212, 94, 134, 133, 213, 110, 239, 250, 45, 35, 30, 26, 218, 99, 130, 69, 108, 143, 40, 211, 206, 132, 229, 7, 144, 2, 96, 210, 254, 237, 154, 255, 221, 243, 128, 37, 190}, {96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59}, {192, 222, 182, 151, 114, 110, 155, 27, 143, 160, 177, 237, 82, 75, 89, 88, 152, 70, 240, 103, 21, 123, 224, 251, 116, 212, 101, 136, 218, 145, 200, 144, 8, 78, 190, 217, 204, 183, 87, 172, 216, 12, 105, 225, 59, 170, 98, 242, 250, 180, 10, 211, 31, 168, 255}, {157, 95, 217, 133, 230, 130, 18, 2, 39, 190, 175, 23, 209, 25, 36, 4, 78, 97, 67, 46, 191, 50, 72, 8, 156, 194, 134, 92, 99, 100, 144, 16, 37, 153, 17, 184, 198, 200, 61, 32, 74, 47, 34, 109, 145, 141, 122, 64, 148, 94, 68, 218, 63, 7, 244}}; + +void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-192/clean/vector.c b/src/kem/hqc/hqc-rmrs-192/clean/vector.c new file mode 100644 index 00000000..6f9949c7 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/vector.c @@ -0,0 +1,176 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file vector.c + * @brief Implementation of vectors sampling and some utilities for the HQC scheme + */ + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. The vector + * is stored by position. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) { + size_t random_bytes_size = 3 * weight; + uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R + uint8_t inc; + size_t i, j; + + i = 0; + j = random_bytes_size; + while (i < weight) { + do { + if (j == random_bytes_size) { + seedexpander(ctx, rand_bytes, random_bytes_size); + j = 0; + } + + v[i] = ((uint32_t) rand_bytes[j++]) << 16; + v[i] |= ((uint32_t) rand_bytes[j++]) << 8; + v[i] |= rand_bytes[j++]; + + } while (v[i] >= UTILS_REJECTION_THRESHOLD); + + v[i] = v[i] % PARAM_N; + + inc = 1; + for (size_t k = 0; k < i; k++) { + if (v[k] == v[i]) { + inc = 0; + } + } + i += inc; + } +} + + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) { + uint32_t tmp[PARAM_OMEGA_R] = {0}; + + PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(ctx, tmp, weight); + + for (size_t i = 0; i < weight; ++i) { + int32_t index = tmp[i] / 64; + int32_t pos = tmp[i] % 64; + v[index] |= ((uint64_t) 1) << pos; + } +} + + + +/** + * @brief Generates a random vector of dimension PARAM_N + * + * This function generates a random binary vector of dimension PARAM_N. It generates a random + * array of bytes using the seedexpander function, and drop the extra bits using a mask. + * + * @param[in] v Pointer to an array + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) { + uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0}; + + seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES); + + PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES); + v[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief Adds two vectors + * + * @param[out] o Pointer to an array that is the result + * @param[in] v1 Pointer to an array that is the first vector + * @param[in] v2 Pointer to an array that is the second vector + * @param[in] size Integer that is the size of the vectors + */ +void PQCLEAN_HQCRMRS192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + o[i] = v1[i] ^ v2[i]; + } +} + + + +/** + * @brief Compares two vectors + * + * @param[in] v1 Pointer to an array that is first vector + * @param[in] v2 Pointer to an array that is second vector + * @param[in] size Integer that is the size of the vectors + * @returns 0 if the vectors are equals and a negative/psotive value otherwise + */ +uint8_t PQCLEAN_HQCRMRS192_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) { + uint64_t r = 0; + for (size_t i = 0; i < size; i++) { + r |= v1[i] ^ v2[i]; + } + r = (~r + 1) >> 63; + return (uint8_t) r; +} + + + +/** + * @brief Resize a vector so that it contains size_o bits + * + * @param[out] o Pointer to the output vector + * @param[in] size_o Integer that is the size of the output vector in bits + * @param[in] v Pointer to the input vector + * @param[in] size_v Integer that is the size of the input vector in bits + */ +void PQCLEAN_HQCRMRS192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) { + if (size_o < size_v) { + uint64_t mask = 0x7FFFFFFFFFFFFFFF; + int8_t val = 0; + + if (size_o % 64) { + val = 64 - (size_o % 64); + } + + memcpy(o, v, 8 * VEC_N1N2_SIZE_64); + + for (int8_t i = 0; i < val; ++i) { + o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); + } + } else { + memcpy(o, v, 8 * CEIL_DIVIDE(size_v, 64)); + } +} diff --git a/src/kem/hqc/hqc-rmrs-192/clean/vector.h b/src/kem/hqc/hqc-rmrs-192/clean/vector.h new file mode 100644 index 00000000..1b06a68b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-192/clean/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + + +/** + * @file vector.h + * @brief Header file for vector.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v); + + +void PQCLEAN_HQCRMRS192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size); + +uint8_t PQCLEAN_HQCRMRS192_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size); + +void PQCLEAN_HQCRMRS192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/CMakeLists.txt b/src/kem/hqc/hqc-rmrs-256/avx2/CMakeLists.txt new file mode 100644 index 00000000..535c8982 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/CMakeLists.txt @@ -0,0 +1,16 @@ +set( + SRC_AVX2_HQCRMRS256 + code.c + fft.c + gf2x.c + gf.c + hqc.c + kem.c + parsing.c + reed_muller.c + reed_solomon.c + vector.c +) + +define_kem_alg(hqcrmrs256_avx2 + PQCLEAN_HQCRMRS256_CLEAN "${SRC_AVX2_HQCRMRS256}" "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/api.h b/src/kem/hqc/hqc-rmrs-256/avx2/api.h new file mode 100644 index 00000000..6b5c9188 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/api.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_HQCRMRS256_AVX2_API_H +#define PQCLEAN_HQCRMRS256_AVX2_API_H +/** + * @file api.h + * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme + */ + +#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_ALGNAME "HQC-RMRS-256" + +#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_SECRETKEYBYTES 7285 +#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_PUBLICKEYBYTES 7245 +#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_BYTES 64 +#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_CIPHERTEXTBYTES 14469 + +// As a technicality, the public key is appended to the secret key in order to respect the NIST API. +// Without this constraint, PQCLEAN_HQCRMRS256_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32 + +int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk); + +int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/code.c b/src/kem/hqc/hqc-rmrs-256/avx2/code.c new file mode 100644 index 00000000..c1af29d2 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/code.c @@ -0,0 +1,47 @@ +#include "code.h" +#include "parameters.h" +#include "reed_muller.h" +#include "reed_solomon.h" +#include +#include +/** + * @file code.c + * @brief Implementation of concatenated code + */ + + + +/** + * + * @brief Encoding the message m to a code word em using the concatenated code + * + * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain + * a concatenated code word. + * + * @param[out] em Pointer to an array that is the tensor code word + * @param[in] m Pointer to an array that is the message + */ +void PQCLEAN_HQCRMRS256_AVX2_code_encode(uint8_t *em, const uint8_t *m) { + uint8_t tmp[8 * VEC_N1_SIZE_64] = {0}; + + PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(tmp, m); + PQCLEAN_HQCRMRS256_AVX2_reed_muller_encode(em, tmp); + +} + + + +/** + * @brief Decoding the code word em to a message m using the concatenated code + * + * @param[out] m Pointer to an array that is the message + * @param[in] em Pointer to an array that is the code word + */ +void PQCLEAN_HQCRMRS256_AVX2_code_decode(uint8_t *m, const uint8_t *em) { + uint8_t tmp[8 * VEC_N1_SIZE_64] = {0}; + + PQCLEAN_HQCRMRS256_AVX2_reed_muller_decode(tmp, em); + PQCLEAN_HQCRMRS256_AVX2_reed_solomon_decode(m, tmp); + + +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/code.h b/src/kem/hqc/hqc-rmrs-256/avx2/code.h new file mode 100644 index 00000000..cacce116 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/code.h @@ -0,0 +1,18 @@ +#ifndef CODE_H +#define CODE_H + + +/** + * @file code.h + * Header file of code.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS256_AVX2_code_encode(uint8_t *em, const uint8_t *message); + +void PQCLEAN_HQCRMRS256_AVX2_code_decode(uint8_t *m, const uint8_t *em); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/fft.c b/src/kem/hqc/hqc-rmrs-256/avx2/fft.c new file mode 100644 index 00000000..d49a05f7 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/fft.c @@ -0,0 +1,351 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include +#include +/** + * @file fft.c + * Implementation of the additive FFT and its transpose. + * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf + */ + + +static void compute_fft_betas(uint16_t *betas); +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size); +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); + + +/** + * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose + * + * @param[out] betas Array of size PARAM_M-1 + */ +static void compute_fft_betas(uint16_t *betas) { + size_t i; + for (i = 0; i < PARAM_M - 1; ++i) { + betas[i] = 1 << (PARAM_M - 1 - i); + } +} + + + +/** + * @brief Computes the subset sums of the given set + * + * The array subset_sums is such that its ith element is + * the subset sum of the set elements given by the binary form of i. + * + * @param[out] subset_sums Array of size 2^set_size receiving the subset sums + * @param[in] set Array of set_size elements + * @param[in] set_size Size of the array set + */ +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) { + uint16_t i, j; + subset_sums[0] = 0; + + for (i = 0; i < set_size; ++i) { + for (j = 0; j < (1 << i); ++j) { + subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; + } + } +} + + + +/** + * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x] + * + * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x) + * as proposed by Bernstein, Chou and Schwabe: + * https://binary.cr.yp.to/mcbits-20130616.pdf + * + * @param[out] f0 Array half the size of f + * @param[out] f1 Array half the size of f + * @param[in] f Array of size a power of 2 + * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f + */ +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + switch (m_f) { + case 4: + f0[4] = f[8] ^ f[12]; + f0[6] = f[12] ^ f[14]; + f0[7] = f[14] ^ f[15]; + f1[5] = f[11] ^ f[13]; + f1[6] = f[13] ^ f[14]; + f1[7] = f[15]; + f0[5] = f[10] ^ f[12] ^ f1[5]; + f1[4] = f[9] ^ f[13] ^ f0[5]; + + f0[0] = f[0]; + f1[3] = f[7] ^ f[11] ^ f[15]; + f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3]; + f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3]; + f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3]; + f1[2] = f[3] ^ f1[1] ^ f0[3]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 3: + f0[0] = f[0]; + f0[2] = f[4] ^ f[6]; + f0[3] = f[6] ^ f[7]; + f1[1] = f[3] ^ f[5] ^ f[7]; + f1[2] = f[5] ^ f[6]; + f1[3] = f[7]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 2: + f0[0] = f[0]; + f0[1] = f[2] ^ f[3]; + f1[0] = f[1] ^ f0[1]; + f1[1] = f[3]; + break; + + case 1: + f0[0] = f[0]; + f1[0] = f[1]; + break; + + default: + radix_big(f0, f1, f, m_f); + break; + } +} + +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1; + n <<= (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0; i < n; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + + + +/** + * @brief Evaluates f at all subset sums of a given set + * + * This function is a subroutine of the function PQCLEAN_HQCRMRS256_AVX2_fft. + * + * @param[out] w Array + * @param[in] f Array + * @param[in] f_coeffs Number of coefficients of f + * @param[in] m Number of betas + * @param[in] m_f Number of coefficients of f (one more than its degree) + * @param[in] betas FFT constants + */ +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; + uint16_t u[1 << (PARAM_M - 2)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; + size_t x; + + // Step 1 + if (m_f == 1) { + for (i = 0; i < m; ++i) { + tmp[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas[i], f[1]); + } + + w[0] = f[0]; + x = 1; + for (j = 0; j < m; ++j) { + for (k = 0; k < x; ++k) { + w[x + k] = w[k] ^ tmp[j]; + } + x <<= 1; + } + + return; + } + + // Step 2: compute g + if (betas[m - 1] != 1) { + beta_m_pow = 1; + x = 1; + x <<= m_f; + for (i = 1; i < x; ++i) { + beta_m_pow = PQCLEAN_HQCRMRS256_AVX2_gf_mul(beta_m_pow, betas[m - 1]); + f[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(beta_m_pow, f[i]); + } + } + + // Step 3 + radix(f0, f1, f, m_f); + + // Step 4: compute gammas and deltas + for (i = 0; i + 1 < m; ++i) { + gammas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS256_AVX2_gf_inverse(betas[m - 1])); + deltas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_square(gammas[i]) ^ gammas[i]; + } + + // Compute gammas sums + compute_subset_sums(gammas_sums, gammas, m - 1); + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + + k = 1; + k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. + if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant + w[0] = u[0]; + w[k] = u[0] ^ f1[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(gammas_sums[i], f1[0]); + w[k + i] = w[i] ^ f1[0]; + } + } else { + fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas); + + // Step 6 + memcpy(w + k, v, 2 * k); + w[0] = u[0]; + w[k] ^= u[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(gammas_sums[i], v[i]); + w[k + i] ^= w[i]; + } + } +} + + + +/** + * @brief Evaluates f on all fields elements using an additive FFT algorithm + * + * f_coeffs is the number of coefficients of f (one less than its degree).
+ * The FFT proceeds recursively to evaluate f at all subset sums of a basis B.
+ * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf
+ * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas, + * meaning the first gammas subset sums are actually the subset sums of betas (except 1).
+ * Also note that f is altered during computation (twisted at each level). + * + * @param[out] w Array + * @param[in] f Array of 2^PARAM_FFT elements + * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) + */ +void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; + + // Follows Gao and Mateer algorithm + compute_fft_betas(betas); + + // Step 1: PARAM_FFT > 1, nothing to do + + // Compute gammas sums + compute_subset_sums(betas_sums, betas, PARAM_M - 1); + + // Step 2: beta_m = 1, nothing to do + + // Step 3 + radix(f0, f1, f, PARAM_FFT); + + // Step 4: Compute deltas + for (i = 0; i < PARAM_M - 1; ++i) { + deltas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_square(betas[i]) ^ betas[i]; + } + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + + k = 1 << (PARAM_M - 1); + // Step 6, 7 and error polynomial computation + memcpy(w + k, v, 2 * k); + + // Check if 0 is root + w[0] = u[0]; + + // Check if 1 is root + w[k] ^= u[0]; + + // Find other roots + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas_sums[i], v[i]); + w[k + i] ^= w[i]; + } +} + + + +/** + * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements. + * + * @param[out] error Array with the error + * @param[out] error_compact Array with the error in a compact form + * @param[in] w Array of size 2^PARAM_M + */ +void PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t k; + size_t i, index; + + compute_fft_betas(gammas); + compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + + k = 1 << (PARAM_M - 1); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); + + for (i = 1; i < k; ++i) { + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]]; + error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); + + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1]; + error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15); + } +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/fft.h b/src/kem/hqc/hqc-rmrs-256/avx2/fft.h new file mode 100644 index 00000000..2428b88c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/fft.h @@ -0,0 +1,18 @@ +#ifndef FFT_H +#define FFT_H + + +/** + * @file fft.h + * Header file of fft.c + */ + +#include +#include + +void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs); + +void PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/gf.c b/src/kem/hqc/hqc-rmrs-256/avx2/gf.c new file mode 100644 index 00000000..3b5dcdc9 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/gf.c @@ -0,0 +1,176 @@ +#include "gf.h" +#include "parameters.h" +#include +/** + * @file gf.c + * Galois field implementation with multiplication using the pclmulqdq instruction + */ + + +static uint16_t gf_reduce(uint64_t x, size_t deg_x); + + + +/** + * Reduces polynomial x modulo primitive polynomial GF_POLY. + * @returns x mod GF_POLY + * @param[in] x Polynomial of degree less than 64 + * @param[in] deg_x The degree of polynomial x + */ +static uint16_t gf_reduce(uint64_t x, size_t deg_x) { + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; + + // Deduce the number of steps of reduction + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); + + // Reduce + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; + x &= (1 << PARAM_M) - 1; + x ^= mod; + + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; + x ^= mod; + rmdr ^= 1 << z2; + z1 = z2; + } + } + + return x; +} + + + +/** + * Multiplies two elements of GF(2^GF_M). + * @returns the product a*b + * @param[in] a Element of GF(2^GF_M) + * @param[in] b Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_mul(uint16_t a, uint16_t b) { + __m128i va = _mm_cvtsi32_si128(a); + __m128i vb = _mm_cvtsi32_si128(b); + __m128i vab = _mm_clmulepi64_si128(va, vb, 0); + uint32_t ab = _mm_cvtsi128_si32(vab); + + return gf_reduce(ab, 2 * (PARAM_M - 1)); +} + + + +/** + * Compute 16 products in GF(2^GF_M). + * @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M) + * @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers + * @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer + * + */ +__m256i PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(__m256i a, __m256i b) { + __m128i al = _mm256_extractf128_si256(a, 0); + __m128i ah = _mm256_extractf128_si256(a, 1); + __m128i bl = _mm256_extractf128_si256(b, 0); + __m128i bh = _mm256_extractf128_si256(b, 1); + + __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0); + abl0 &= CONST128_MIDDLEMASKL; + abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); + + __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11); + abh0 &= CONST128_MIDDLEMASKL; + abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); + + abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL); + abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH); + + __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0); + abl1 &= CONST128_MIDDLEMASKL; + abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH); + + __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11); + abh1 &= CONST128_MIDDLEMASKL; + abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH); + + abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL); + abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH); + + __m256i ret = _mm256_set_m128i(abl1, abl0); + + __m256i aux = CONST256_MR0; + + for (int32_t i = 0; i < 7; i++) { + ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux); + aux = aux << 1; + } + + ret &= CONST256_LASTMASK; + return ret; +} + + + +/** + * Squares an element of GF(2^GF_M). + * @returns a^2 + * @param[in] a Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_square(uint16_t a) { + uint32_t b = a; + uint32_t s = b & 1; + for (size_t i = 1; i < PARAM_M; ++i) { + b <<= 1; + s ^= b & (1 << 2 * i); + } + + return gf_reduce(s, 2 * (PARAM_M - 1)); +} + + + +/** + * Computes the inverse of an element of GF(2^8), + * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254 + * @returns the inverse of a + * @param[in] a Element of GF(2^GF_M) + */ +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_inverse(uint16_t a) { + uint16_t inv = a; + uint16_t tmp1, tmp2; + + inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(a); /* a^2 */ + tmp1 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, a); /* a^3 */ + inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^4 */ + tmp2 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, tmp1); /* a^7 */ + tmp1 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, tmp2); /* a^11 */ + inv = PQCLEAN_HQCRMRS256_AVX2_gf_mul(tmp1, inv); /* a^15 */ + inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^30 */ + inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^60 */ + inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^120 */ + inv = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inv, tmp2); /* a^127 */ + inv = PQCLEAN_HQCRMRS256_AVX2_gf_square(inv); /* a^254 */ + return inv; +} + + + +/** + * Returns i modulo 2^GF_M-1. + * i must be less than 2*(2^GF_M-1). + * Therefore, the return value is either i or i-2^GF_M+1. + * @returns i mod (2^GF_M-1) + * @param[in] i The integer whose modulo is taken + */ +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_mod(uint16_t i) { + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); + + // mask = 0xffff if (i < GF_MUL_ORDER) + uint16_t mask = -(tmp >> 15); + + return tmp + (mask & PARAM_GF_MUL_ORDER); +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/gf.h b/src/kem/hqc/hqc-rmrs-256/avx2/gf.h new file mode 100644 index 00000000..5086900e --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/gf.h @@ -0,0 +1,69 @@ +#ifndef GF_H +#define GF_H + + +/** + * @file gf.h + * Header file of gf.c + */ + +#include +#include +#include + +#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) + +/** + * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8. + * The last two elements are needed by the PQCLEAN_HQCRMRS256_AVX2_gf_mul function + * (for example if both elements to multiply are zero). + */ +static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 }; + + + +/** + * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8). + * The logarithm of 0 is set to 0 by convention. + */ +static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 }; + +/** + * Masks needed for the computation of 16 mult in GF(2^M) + */ +#define CONST256_MR0 _mm256_set1_epi64x((long long) 0x0100010001000100) +#define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff) +#define CONST128_MASKL _mm_set1_epi64x((long long) 0x0000ffff0000ffff) +#define CONST128_MASKH _mm_set1_epi64x((long long) 0xffff0000ffff0000) +#define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff) +#define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000) +#define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff) +#define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100) + +/** + * x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register + */ +static const __m256i red[7] = { + {0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL}, + {0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL}, + {0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL}, + {0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL}, + {0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL}, + {0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL}, + {0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL}, + +}; + + +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_mul(uint16_t a, uint16_t b); + +__m256i PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(__m256i a, __m256i b); + +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_square(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_inverse(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_mod(uint16_t i); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/gf2x.c b/src/kem/hqc/hqc-rmrs-256/avx2/gf2x.c new file mode 100644 index 00000000..2d3ac6d6 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/gf2x.c @@ -0,0 +1,603 @@ +#include "gf2x.h" +#include "parameters.h" +#include +#include +#include +/** + * \file gf2x.c + * \brief AVX2 implementation of multiplication of two polynomials + */ + + + +//Parameters for Toom-Cook and UB_Karatsuba +#define T_TM3R_3W (PARAM_N_MULT / 3) +#define T_TM3R (PARAM_N_MULT + 384) +#define tTM3R ((T_TM3R) / 64) +#define T_TM3R_3W_256 ((T_TM3R_3W + 128) / (256)) +#define T_TM3R_3W_64 (T_TM3R_3W_256 << 2) + +#define T_5W 4096 +#define T_5W_256 (T_5W >> 8) + +#define T2_5W_256 (2 * T_5W_256) +#define t5 (5 * T_5W / 64) + +static inline void reduce(uint64_t *o, const __m256i *a); +static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B); +static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B); +static inline void karat_mult5(__m256i *C, const __m256i *A, const __m256i *B); +static inline void divide_by_x_plus_one_256(__m256i *in, __m256i *out, int32_t size); +static void toom_3_mult(uint64_t *Out, const aligned_vec_t *A, const aligned_vec_t *B); + + +/** + * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$ + * + * This function computes the modular reduction of the polynomial a(x) + * + * @param[out] o Pointer to the result + * @param[in] a Pointer to the polynomial a(x) + */ +static inline void reduce(uint64_t *o, const __m256i *a256) { + size_t i, i2; + __m256i r256, carry256; + __m256i *o256 = (__m256i *)o; + const uint64_t *a64 = (const uint64_t *)a256; + uint64_t r, carry; + + i2 = 0; + for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) { + r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i])); + r256 = _mm256_srli_epi64(r256, PARAM_N & 63); + carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1])); + carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63); + r256 ^= carry256; + _mm256_storeu_si256(&o256[i2], a256[i2] ^ r256); + i2 += 1; + } + + i = i - (PARAM_N >> 6); + for (; i < (PARAM_N >> 6) + 1; i++) { + r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63); + carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63); + r ^= carry; + o[i] = a64[i] ^ r; + } + + o[PARAM_N >> 6] &= RED_MASK; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * A(x) and B(x) are stored in 128-bit registers + * This function computes A(x)*B(x) using Karatsuba + * + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) { + __m128i D1[2]; + __m128i D0[2], D2[2]; + __m128i Al = _mm_loadu_si128(A); + __m128i Ah = _mm_loadu_si128(A + 1); + __m128i Bl = _mm_loadu_si128(B); + __m128i Bh = _mm_loadu_si128(B + 1); + + // Compute Al.Bl=D0 + __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0); + __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11); + __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e)); + __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e)); + __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Compute Ah.Bh=D2 + DD0 = _mm_clmulepi64_si128(Ah, Bh, 0); + DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11); + AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e)); + BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e)); + DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Compute AlpAh.BlpBh=D1 + // Initialisation of AlpAh and BlpBh + __m128i AlpAh = _mm_xor_si128(Al, Ah); + __m128i BlpBh = _mm_xor_si128(Bl, Bh); + DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0); + DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11); + AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e)); + BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e)); + DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0)); + D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1)); + D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128())); + + // Final comutation of C + __m128i middle = _mm_xor_si128(D0[1], D2[0]); + C[0] = D0[0]; + C[1] = middle ^ D0[0] ^ D1[0]; + C[2] = middle ^ D1[1] ^ D2[1]; + C[3] = D2[1]; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) { + __m256i D0[2], D1[2], D2[2], SAA, SBB; + const __m128i *A128 = (const __m128i *)A; + const __m128i *B128 = (const __m128i *)B; + __m256i middle; + + karat_mult_1((__m128i *) D0, A128, B128); + karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2); + + SAA = A[0] ^ A[1]; + SBB = B[0] ^ B[1]; + karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB); + middle = _mm256_xor_si256(D0[1], D2[0]); + + C[0] = D0[0]; + C[1] = middle ^ D0[0] ^ D1[0]; + C[2] = middle ^ D1[1] ^ D2[1]; + C[3] = D2[1]; +} + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) { + __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2]; + __m256i middle0; + __m256i middle1; + + karat_mult_2(D0, A, B); + karat_mult_2(D2, A + 2, B + 2); + + SAA[0] = A[0] ^ A[2]; + SBB[0] = B[0] ^ B[2]; + SAA[1] = A[1] ^ A[3]; + SBB[1] = B[1] ^ B[3]; + + karat_mult_2(D1, SAA, SBB); + + middle0 = _mm256_xor_si256(D0[2], D2[0]); + middle1 = _mm256_xor_si256(D0[3], D2[1]); + + C[0] = D0[0]; + C[1] = D0[1]; + C[2] = middle0 ^ D0[0] ^ D1[0]; + C[3] = middle1 ^ D0[1] ^ D1[1]; + C[4] = middle0 ^ D1[2] ^ D2[2]; + C[5] = middle1 ^ D1[3] ^ D2[3]; + C[6] = D2[2]; + C[7] = D2[3]; +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) { + size_t i, is, is2, is3; + __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4]; + __m256i middle; + + karat_mult_4(D0, A, B); + karat_mult_4(D2, A + 4, B + 4); + + for (i = 0; i < 4; i++) { + is = i + 4; + SAA[i] = A[i] ^ A[is]; + SBB[i] = B[i] ^ B[is]; + } + + karat_mult_4(D1, SAA, SBB); + + for (i = 0; i < 4; i++) { + is = i + 4; + is2 = is + 4; + is3 = is2 + 4; + + middle = _mm256_xor_si256(D0[is], D2[i]); + + C[i] = D0[i]; + C[is] = middle ^ D0[i] ^ D1[i]; + C[is2] = middle ^ D1[is] ^ D2[is]; + C[is3] = D2[is]; + } +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +inline static void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B) { + size_t i, is, is2, is3; + __m256i middle; + __m256i D0[16], D1[16], D2[16], SAA[8], SBB[8]; + + karat_mult_8(D0, A, B); + karat_mult_8(D2, A + 8, B + 8); + + for (i = 0; i < 8; i++) { + is = i + 8; + SAA[i] = A[i] ^ A[is]; + SBB[i] = B[i] ^ B[is]; + } + + karat_mult_8(D1, SAA, SBB); + + for (i = 0; i < 8; i++) { + is = i + 8; + is2 = is + 8; + is3 = is2 + 8; + + middle = D0[is] ^ D2[i]; + + C[i] = D0[i]; + C[is] = middle ^ D0[i] ^ D1[i]; + C[is2] = middle ^ D1[is] ^ D2[is]; + C[is3] = D2[is]; + } +} + + +/** + * @brief Compute C(x) = A(x)*B(x) + * + * This function computes A(x)*B(x) using Karatsuba + * A(x) and B(x) are stored in 256-bit registers + * @param[out] C Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static inline void karat_mult5(__m256i *C, const __m256i *A, const __m256i *B) { + const __m256i *a0, *b0, *a1, *b1, *a2, *b2, * a3, * b3, *a4, *b4; + + __m256i aa01[T_5W_256], bb01[T_5W_256], aa02[T_5W_256], bb02[T_5W_256], aa03[T_5W_256], bb03[T_5W_256], aa04[T_5W_256], bb04[T_5W_256], + aa12[T_5W_256], bb12[T_5W_256], aa13[T_5W_256], bb13[T_5W_256], aa14[T_5W_256], bb14[T_5W_256], + aa23[T_5W_256], bb23[T_5W_256], aa24[T_5W_256], bb24[T_5W_256], + aa34[T_5W_256], bb34[T_5W_256]; + + __m256i D0[T2_5W_256], D1[T2_5W_256], D2[T2_5W_256], D3[T2_5W_256], D4[T2_5W_256], + D01[T2_5W_256], D02[T2_5W_256], D03[T2_5W_256], D04[T2_5W_256], + D12[T2_5W_256], D13[T2_5W_256], D14[T2_5W_256], + D23[T2_5W_256], D24[T2_5W_256], + D34[T2_5W_256]; + + __m256i ro256[t5 >> 1]; + + a0 = A; + a1 = a0 + T_5W_256; + a2 = a1 + T_5W_256; + a3 = a2 + T_5W_256; + a4 = a3 + T_5W_256; + b0 = B; + b1 = b0 + T_5W_256; + b2 = b1 + T_5W_256; + b3 = b2 + T_5W_256; + b4 = b3 + T_5W_256; + + for (int32_t i = 0; i < T_5W_256; i++) { + aa01[i] = a0[i] ^ a1[i]; + bb01[i] = b0[i] ^ b1[i]; + + aa02[i] = a0[i] ^ a2[i]; + bb02[i] = b0[i] ^ b2[i]; + + aa03[i] = a0[i] ^ a3[i]; + bb03[i] = b0[i] ^ b3[i]; + + aa04[i] = a0[i] ^ a4[i]; + bb04[i] = b0[i] ^ b4[i]; + + aa12[i] = a2[i] ^ a1[i]; + bb12[i] = b2[i] ^ b1[i]; + + aa13[i] = a3[i] ^ a1[i]; + bb13[i] = b3[i] ^ b1[i]; + + aa14[i] = a4[i] ^ a1[i]; + bb14[i] = b4[i] ^ b1[i]; + + aa23[i] = a2[i] ^ a3[i]; + bb23[i] = b2[i] ^ b3[i]; + + aa24[i] = a2[i] ^ a4[i]; + bb24[i] = b2[i] ^ b4[i]; + + aa34[i] = a3[i] ^ a4[i]; + bb34[i] = b3[i] ^ b4[i]; + } + + karat_mult_16(D0, a0, b0); + karat_mult_16(D1, a1, b1); + karat_mult_16(D2, a2, b2); + karat_mult_16(D3, a3, b3); + karat_mult_16(D4, a4, b4); + + karat_mult_16(D01, aa01, bb01); + karat_mult_16(D02, aa02, bb02); + karat_mult_16(D03, aa03, bb03); + karat_mult_16(D04, aa04, bb04); + + karat_mult_16(D12, aa12, bb12); + karat_mult_16(D13, aa13, bb13); + karat_mult_16(D14, aa14, bb14); + + karat_mult_16(D23, aa23, bb23); + karat_mult_16(D24, aa24, bb24); + + karat_mult_16(D34, aa34, bb34); + + for (int32_t i = 0; i < T_5W_256; i++) { + ro256[i] = D0[i]; + ro256[i + T_5W_256] = D0[i + T_5W_256] ^ D01[i] ^ D0[i] ^ D1[i]; + ro256[i + 2 * T_5W_256] = D1[i] ^ D02[i] ^ D0[i] ^ D2[i] ^ D01[i + T_5W_256] ^ D0[i + T_5W_256] ^ D1[i + T_5W_256]; + ro256[i + 3 * T_5W_256] = D1[i + T_5W_256] ^ D03[i] ^ D0[i] ^ D3[i] ^ D12[i] ^ D1[i] ^ D2[i] ^ D02[i + T_5W_256] ^ D0[i + T_5W_256] ^ D2[i + T_5W_256]; + ro256[i + 4 * T_5W_256] = D2[i] ^ D04[i] ^ D0[i] ^ D4[i] ^ D13[i] ^ D1[i] ^ D3[i] ^ D03[i + T_5W_256] ^ D0[i + T_5W_256] ^ D3[i + T_5W_256] ^ D12[i + T_5W_256] ^ D1[i + T_5W_256] ^ D2[i + T_5W_256]; + ro256[i + 5 * T_5W_256] = D2[i + T_5W_256] ^ D14[i] ^ D1[i] ^ D4[i] ^ D23[i] ^ D2[i] ^ D3[i] ^ D04[i + T_5W_256] ^ D0[i + T_5W_256] ^ D4[i + T_5W_256] ^ D13[i + T_5W_256] ^ D1[i + T_5W_256] ^ D3[i + T_5W_256]; + ro256[i + 6 * T_5W_256] = D3[i] ^ D24[i] ^ D2[i] ^ D4[i] ^ D14[i + T_5W_256] ^ D1[i + T_5W_256] ^ D4[i + T_5W_256] ^ D23[i + T_5W_256] ^ D2[i + T_5W_256] ^ D3[i + T_5W_256]; + ro256[i + 7 * T_5W_256] = D3[i + T_5W_256] ^ D34[i] ^ D3[i] ^ D4[i] ^ D24[i + T_5W_256] ^ D2[i + T_5W_256] ^ D4[i + T_5W_256]; + ro256[i + 8 * T_5W_256] = D4[i] ^ D34[i + T_5W_256] ^ D3[i + T_5W_256] ^ D4[i + T_5W_256]; + ro256[i + 9 * T_5W_256] = D4[i + T_5W_256]; + } + + for (int32_t i = 0; i < T_5W_256 * 10; i++) { + C[i] = ro256[i]; + } +} + + + +/** + * @brief Compute B(x) = A(x)/(x+1) + * + * This function computes A(x)/(x+1) using a Quercia like algorithm + * @param[out] out Pointer to the result + * @param[in] in Pointer to the polynomial A(x) + * @param[in] size used to define the number of coeeficients of A + */ +inline static void divide_by_x_plus_one_256(__m256i *in, __m256i *out, int32_t size) { + out[0] = in[0]; + for (int32_t i = 1; i < 2 * (size + 2); i++) { + out[i] = out[i - 1] ^ in[i]; + } +} + + + +/** + * @brief Compute C(x) = A(x)*B(x) using TOOM3Mult with recursive call + * + * This function computes A(x)*B(x) using recursive TOOM-COOK3 Multiplication + * @param[out] Out Pointer to the result + * @param[in] A Pointer to the polynomial A(x) + * @param[in] B Pointer to the polynomial B(x) + */ +static void toom_3_mult(uint64_t *Out, const aligned_vec_t *A, const aligned_vec_t *B) { + __m256i U0[T_TM3R_3W_256 + 2], V0[T_TM3R_3W_256 + 2], U1[T_TM3R_3W_256 + 2], V1[T_TM3R_3W_256 + 2], U2[T_TM3R_3W_256 + 2], V2[T_TM3R_3W_256 + 2]; + __m256i W0[2 * (T_TM3R_3W_256 + 2)], W1[2 * (T_TM3R_3W_256 + 2)], W2[2 * (T_TM3R_3W_256 + 2)], W3[2 * (T_TM3R_3W_256 + 2)], W4[2 * (T_TM3R_3W_256 + 2)]; + __m256i tmp[2 * (T_TM3R_3W_256 + 2) + 3]; + __m256i ro256[tTM3R / 2]; + const __m256i zero = {0ul, 0ul, 0ul, 0ul}; + int32_t T2 = T_TM3R_3W_64 << 1; + + for (int32_t i = 0; i < T_TM3R_3W_256; i++) { + int32_t i4 = i << 2; + U0[i] = _mm256_lddqu_si256((__m256i const *)(&A->arr64[i4])); + V0[i] = _mm256_lddqu_si256((__m256i const *)(&B->arr64[i4])); + U1[i] = _mm256_lddqu_si256((__m256i const *)(&A->arr64[i4 + T_TM3R_3W_64])); + V1[i] = _mm256_lddqu_si256((__m256i const *)(&B->arr64[i4 + T_TM3R_3W_64])); + U2[i] = _mm256_lddqu_si256((__m256i const *)(&A->arr64[i4 + T2])); + V2[i] = _mm256_lddqu_si256((__m256i const *)(&B->arr64[i4 + T2])); + } + + for (int32_t i = T_TM3R_3W_256; i < T_TM3R_3W_256 + 2; i++) { + U0[i] = zero; + V0[i] = zero; + U1[i] = zero; + V1[i] = zero; + U2[i] = zero; + V2[i] = zero; + } + + // EVALUATION PHASE : x= X^256 + // P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) + // Evaluation: 5*2 add, 2*2 shift; 5 mul (n) + //W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 + + for (int32_t i = 0; i < T_TM3R_3W_256; i++) { + W3[i] = U0[i] ^ U1[i] ^ U2[i]; + W2[i] = V0[i] ^ V1[i] ^ V2[i]; + } + + for (int32_t i = T_TM3R_3W_256; i < T_TM3R_3W_256 + 2; i++) { + W2[i] = zero; + W3[i] = zero; + } + + //W1 = W2 * W3 + karat_mult5(W1, W2, W3); + + //W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 + 2 !) + W0[0] = zero; + W4[0] = zero; + + W0[1] = U1[0]; + W4[1] = V1[0]; + + for (int32_t i = 1; i < T_TM3R_3W_256 + 1; i++) { + W0[i + 1] = U1[i] ^ U2[i - 1]; + W4[i + 1] = V1[i] ^ V2[i - 1]; + } + + W0[T_TM3R_3W_256 + 1] = U2[T_TM3R_3W_256 - 1]; + W4[T_TM3R_3W_256 + 1] = V2[T_TM3R_3W_256 - 1]; + + //W3 = W3 + W0 ; W2 = W2 + W4 + for (int32_t i = 0; i < T_TM3R_3W_256 + 2; i++) { + W3[i] ^= W0[i]; + W2[i] ^= W4[i]; + } + + //W0 = W0 + U0 ; W4 = W4 + V0 + for (int32_t i = 0; i < T_TM3R_3W_256 + 2; i++) { + W0[i] ^= U0[i]; + W4[i] ^= V0[i]; + } + + //W3 = W3 * W2 ; W2 = W0 * W4 + karat_mult5(tmp, W3, W2); + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { + W3[i] = tmp[i]; + } + + karat_mult5(W2, W0, W4); + + //W4 = U2 * V2 ; W0 = U0 * V0 + karat_mult5(W4, U2, V2); + karat_mult5(W0, U0, V0); + + //INTERPOLATION PHASE + //9 add, 1 shift, 1 Smul, 2 Sdiv (2n) + //W3 = W3 + W2 + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { + W3[i] ^= W2[i]; + } + + //W1 = W1 + W0 + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256); i++) { + W1[i] ^= W0[i]; + } + + //W2 =(W2 + W0)/x + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2) - 1; i++) { + int32_t i1 = i + 1; + W2[i] = W2[i1] ^ W0[i1]; + } + + W2[2 * (T_TM3R_3W_256 + 2) - 1] = zero; + + //W2 =(W2 + W3 + W4*(x^3+1))/(x+1) + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { + tmp[i] = W2[i] ^ W3[i] ^ W4[i]; + } + + tmp[2 * (T_TM3R_3W_256 + 2)] = zero; + tmp[2 * (T_TM3R_3W_256 + 2) + 1] = zero; + tmp[2 * (T_TM3R_3W_256 + 2) + 2] = zero; + + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256); i++) { + tmp[i + 3] ^= W4[i]; + } + + divide_by_x_plus_one_256(tmp, W2, T_TM3R_3W_256); + + //W3 =(W3 + W1)/(x*(x+1)) + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2) - 1; i++) { + int32_t i1 = i + 1; + tmp[i] = W3[i1] ^ W1[i1]; + } + + tmp[2 * (T_TM3R_3W_256 + 2) - 1] = (__m256i) { + 0ul, 0ul, 0ul, 0ul + }; + + divide_by_x_plus_one_256(tmp, W3, T_TM3R_3W_256); + + //W1 = W1 + W4 + W2 + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { + W1[i] ^= W2[i] ^ W4[i]; + } + + //W2 = W2 + W3 + for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { + W2[i] ^= W3[i]; + } + + //Recomposition + //W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 + //Note that : W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256+2) + for (int32_t i = 0; i < T_TM3R_3W_256; i++) { + ro256[i] = W0[i]; + ro256[i + T_TM3R_3W_256] = W0[i + T_TM3R_3W_256] ^ W1[i]; + ro256[i + 2 * T_TM3R_3W_256] = W1[i + T_TM3R_3W_256] ^ W2[i]; + ro256[i + 3 * T_TM3R_3W_256] = W2[i + T_TM3R_3W_256] ^ W3[i]; + ro256[i + 4 * T_TM3R_3W_256] = W3[i + T_TM3R_3W_256] ^ W4[i]; + ro256[i + 5 * T_TM3R_3W_256] = W4[i + T_TM3R_3W_256]; + } + + ro256[4 * T_TM3R_3W_256] ^= W2[2 * T_TM3R_3W_256]; + ro256[5 * T_TM3R_3W_256] ^= W3[2 * T_TM3R_3W_256]; + + ro256[1 + 4 * T_TM3R_3W_256] ^= W2[1 + 2 * T_TM3R_3W_256]; + ro256[1 + 5 * T_TM3R_3W_256] ^= W3[1 + 2 * T_TM3R_3W_256]; + + ro256[2 + 4 * T_TM3R_3W_256] ^= W2[2 + 2 * T_TM3R_3W_256]; + ro256[2 + 5 * T_TM3R_3W_256] ^= W3[2 + 2 * T_TM3R_3W_256]; + + ro256[3 + 4 * T_TM3R_3W_256] ^= W2[3 + 2 * T_TM3R_3W_256]; + ro256[3 + 5 * T_TM3R_3W_256] ^= W3[3 + 2 * T_TM3R_3W_256]; + + uint64_t *ro64 = (uint64_t *) ro256; + for (int32_t i = 0; i < VEC_N_256_SIZE_64 << 1; i++) { + Out[i] = ro64[i]; + } +} + + + +/** + * @brief Multiply two polynomials modulo \f$ X^n - 1\f$. + * + * This functions multiplies a dense polynomial a1 (of Hamming weight equal to weight) + * and a dense polynomial a2. The multiplication is done modulo \f$ X^n - 1\f$. + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to a polynomial + * @param[in] a2 Pointer to a polynomial + */ +void PQCLEAN_HQCRMRS256_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) { + __m256i a1_times_a2[VEC_N_256_SIZE_64 << 1] = {0}; + toom_3_mult((uint64_t *)a1_times_a2, a1, a2); + reduce(o, a1_times_a2); +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/gf2x.h b/src/kem/hqc/hqc-rmrs-256/avx2/gf2x.h new file mode 100644 index 00000000..646f0778 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/gf2x.h @@ -0,0 +1,21 @@ +#ifndef GF2X_H +#define GF2X_H + + +/** + * @file gf2x.h + * @brief Header file for gf2x.c + */ +#include "parameters.h" +#include +#include + +typedef union { + uint64_t arr64[VEC_N_256_SIZE_64]; + __m256i dummy; +} aligned_vec_t; + +void PQCLEAN_HQCRMRS256_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/hqc.c b/src/kem/hqc/hqc-rmrs-256/avx2/hqc.c new file mode 100644 index 00000000..f096841c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/hqc.c @@ -0,0 +1,168 @@ +#include "code.h" +#include "gf2x.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +#include +/** + * @file hqc.c + * @brief Implementation of hqc.h + */ + + + +/** + * @brief Keygen of the HQC_PKE IND_CPA scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) { + AES_XOF_struct sk_seedexpander; + AES_XOF_struct pk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + uint8_t pk_seed[SEED_BYTES] = {0}; + aligned_vec_t vx = {0}; + uint64_t *x = vx.arr64; + aligned_vec_t vy = {0}; + uint64_t *y = vy.arr64; + aligned_vec_t vh = {0}; + uint64_t *h = vh.arr64; + aligned_vec_t vs = {0}; + uint64_t *s = vs.arr64; + aligned_vec_t vtmp = {0}; + uint64_t *tmp = vtmp.arr64; + + // Create seed_expanders for public key and secret key + randombytes(sk_seed, SEED_BYTES); + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + randombytes(pk_seed, SEED_BYTES); + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute secret key + PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA); + + // Compute public key + PQCLEAN_HQCRMRS256_AVX2_vect_set_random(&pk_seedexpander, h); + PQCLEAN_HQCRMRS256_AVX2_vect_mul(tmp, &vy, &vh); + PQCLEAN_HQCRMRS256_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64); + + // Parse keys to string + PQCLEAN_HQCRMRS256_AVX2_hqc_public_key_to_string(pk, pk_seed, s); + PQCLEAN_HQCRMRS256_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk); + +} + + + +/** + * @brief Encryption of the HQC_PKE IND_CPA scheme + * + * The cihertext is composed of vectors u and v. + * + * @param[out] u Vector u (first part of the ciphertext) + * @param[out] v Vector v (second part of the ciphertext) + * @param[in] m Vector representing the message to encrypt + * @param[in] theta Seed used to derive randomness required for encryption + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) { + AES_XOF_struct seedexpander; + aligned_vec_t vh = {0}; + uint64_t *h = vh.arr64; + aligned_vec_t vs = {0}; + uint64_t *s = vs.arr64; + aligned_vec_t vr1 = {0}; + uint64_t *r1 = vr1.arr64; + aligned_vec_t vr2 = {0}; + uint64_t *r2 = vr2.arr64; + aligned_vec_t ve = {0}; + uint64_t *e = ve.arr64; + aligned_vec_t vtmp1 = {0}; + uint64_t *tmp1 = vtmp1.arr64; + aligned_vec_t vtmp2 = {0}; + uint64_t *tmp2 = vtmp2.arr64; + aligned_vec_t vtmp3 = {0}; + uint64_t *tmp3 = vtmp3.arr64; + + // Create seed_expander from theta + seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH); + + // Retrieve h and s from public key + PQCLEAN_HQCRMRS256_AVX2_hqc_public_key_from_string(h, s, pk); + + // Generate r1, r2 and e + PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E); + + + + // Compute u = r1 + r2.h + PQCLEAN_HQCRMRS256_AVX2_vect_mul(tmp1, &vr2, &vh); + PQCLEAN_HQCRMRS256_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64); + + // Compute v = m.G by encoding the message + PQCLEAN_HQCRMRS256_AVX2_code_encode((uint8_t *)v, m); + PQCLEAN_HQCRMRS256_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES); + PQCLEAN_HQCRMRS256_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + + // Compute v = m.G + s.r2 + e + PQCLEAN_HQCRMRS256_AVX2_vect_mul(tmp2, &vr2, &vs); + PQCLEAN_HQCRMRS256_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS256_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS256_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N); + +} + + + +/** + * @brief Decryption of the HQC_PKE IND_CPA scheme + * + * @param[out] m Vector representing the decrypted message + * @param[in] u Vector u (first part of the ciphertext) + * @param[in] v Vector v (second part of the ciphertext) + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) { + uint8_t pk[PUBLIC_KEY_BYTES] = {0}; + aligned_vec_t vx = {0}; + uint64_t *x = vx.arr64; + aligned_vec_t vy = {0}; + uint64_t *y = vy.arr64; + aligned_vec_t vtmp1 = {0}; + uint64_t *tmp1 = vtmp1.arr64; + aligned_vec_t vtmp2 = {0}; + uint64_t *tmp2 = vtmp2.arr64; + aligned_vec_t vtmp3 = {0}; + uint64_t *tmp3 = vtmp3.arr64; + + // Retrieve x, y, pk from secret key + PQCLEAN_HQCRMRS256_AVX2_hqc_secret_key_from_string(x, y, pk, sk); + + // Compute v - u.y + PQCLEAN_HQCRMRS256_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) { + tmp2[i] = u[i]; + } + PQCLEAN_HQCRMRS256_AVX2_vect_mul(tmp3, &vy, &vtmp2); + PQCLEAN_HQCRMRS256_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64); + + + // Compute m by decoding v - u.y + PQCLEAN_HQCRMRS256_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS256_AVX2_code_decode(m, (uint8_t *)tmp1); +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/hqc.h b/src/kem/hqc/hqc-rmrs-256/avx2/hqc.h new file mode 100644 index 00000000..e6466a37 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/hqc.h @@ -0,0 +1,19 @@ +#ifndef HQC_H +#define HQC_H + + +/** + * @file hqc.h + * @brief Functions of the HQC_PKE IND_CPA scheme + */ + +#include + +void PQCLEAN_HQCRMRS256_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk); + +void PQCLEAN_HQCRMRS256_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk); + +void PQCLEAN_HQCRMRS256_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/kem.c b/src/kem/hqc/hqc-rmrs-256/avx2/kem.c new file mode 100644 index 00000000..459a7374 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/kem.c @@ -0,0 +1,140 @@ +#include "api.h" +#include "fips202.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "sha2.h" +#include "vector.h" +#include +#include +/** + * @file kem.c + * @brief Implementation of api.h + */ + + + +/** + * @brief Keygen of the HQC_KEM IND_CAA2 scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + * @returns 0 if keygen is successful + */ +int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { + + PQCLEAN_HQCRMRS256_AVX2_hqc_pke_keygen(pk, sk); + return 0; +} + + + +/** + * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ct String containing the ciphertext + * @param[out] ss String containing the shared secret + * @param[in] pk String containing the public key + * @returns 0 if encapsulation is successful + */ +int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { + + uint8_t theta[SHA512_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + static uint64_t u[VEC_N_256_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Computing m + randombytes(m, VEC_K_SIZE_BYTES); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m + PQCLEAN_HQCRMRS256_AVX2_hqc_pke_encrypt(u, v, m, theta, pk); + + // Computing d + sha512(d, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS256_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS256_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Computing ciphertext + PQCLEAN_HQCRMRS256_AVX2_hqc_ciphertext_to_string(ct, u, v, d); + + + return 0; +} + + + +/** + * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ss String containing the shared secret + * @param[in] ct String containing the cipĥertext + * @param[in] sk String containing the secret key + * @returns 0 if decapsulation is successful, -1 otherwise + */ +int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { + + uint8_t result; + uint64_t u[VEC_N_256_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char pk[PUBLIC_KEY_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint8_t theta[SHA512_BYTES] = {0}; + uint64_t u2[VEC_N_256_SIZE_64] = {0}; + uint64_t v2[VEC_N1N2_256_SIZE_64] = {0}; + unsigned char d2[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Retrieving u, v and d from ciphertext + PQCLEAN_HQCRMRS256_AVX2_hqc_ciphertext_from_string(u, v, d, ct); + + // Retrieving pk from sk + memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES); + + // Decryting + PQCLEAN_HQCRMRS256_AVX2_hqc_pke_decrypt(m, u, v, sk); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m' + PQCLEAN_HQCRMRS256_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk); + + // Computing d' + sha512(d2, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS256_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64); + PQCLEAN_HQCRMRS256_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Abort if c != c' or d != d' + result = PQCLEAN_HQCRMRS256_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS256_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS256_AVX2_vect_compare(d, d2, SHA512_BYTES); + result = (uint8_t) (-((int16_t) result) >> 15); + for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { + ss[i] &= ~result; + } + + + return -(result & 1); +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/parameters.h b/src/kem/hqc/hqc-rmrs-256/avx2/parameters.h new file mode 100644 index 00000000..5c61888b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/parameters.h @@ -0,0 +1,109 @@ +#ifndef HQC_PARAMETERS_H +#define HQC_PARAMETERS_H + + +/** + * @file parameters.h + * @brief Parameters of the HQC_KEM IND-CCA2 scheme + */ +#include "api.h" + + +#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/ + +/* + #define PARAM_N Define the parameter n of the scheme + #define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code) + #define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code) + #define PARAM_N1N2 Define the length in bits of the Concatenated code + #define PARAM_OMEGA Define the parameter omega of the scheme + #define PARAM_OMEGA_E Define the parameter omega_e of the scheme + #define PARAM_OMEGA_R Define the parameter omega_r of the scheme + #define PARAM_SECURITY Define the security level corresponding to the chosen parameters + #define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters + + #define SECRET_KEY_BYTES Define the size of the secret key in bytes + #define PUBLIC_KEY_BYTES Define the size of the public key in bytes + #define SHARED_SECRET_BYTES Define the size of the shared secret in bytes + #define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes + + #define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function) + #define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes + #define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes + #define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes + #define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes + + #define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits + #define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits + #define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits + #define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits + + #define VEC_N_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits + #define VEC_N1N2_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits + + #define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code) + #define PARAM_M Define a positive integer + #define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form + #define PARAM_GF_POLY_WT Hamming weight of PARAM_GF_POLY + #define PARAM_GF_POLY_M2 Distance between the primitive polynomial first two set bits + #define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1 + #define PARAM_K Define the size of the information bits of the Reed-Solomon code + #define PARAM_G Define the size of the generator polynomial of Reed-Solomon code + #define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input + We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24 + The smallest power of 2 greater than 24+1 is 32=2^5 + #define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code + + #define RED_MASK A mask fot the higher bits of a vector + #define SHA512_BYTES Define the size of SHA512 output in bytes + #define SEED_BYTES Define the size of the seed in bytes + #define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length +*/ + +#define PARAM_N 57637 +#define PARAM_N1 90 +#define PARAM_N2 640 +#define PARAM_N1N2 57600 +#define PARAM_OMEGA 131 +#define PARAM_OMEGA_E 149 +#define PARAM_OMEGA_R 149 +#define PARAM_SECURITY 256 +#define PARAM_DFR_EXP 256 + +#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS256_AVX2_CRYPTO_SECRETKEYBYTES +#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS256_AVX2_CRYPTO_PUBLICKEYBYTES +#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS256_AVX2_CRYPTO_BYTES +#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS256_AVX2_CRYPTO_CIPHERTEXTBYTES + +#define UTILS_REJECTION_THRESHOLD 16772367 +#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8) +#define VEC_K_SIZE_BYTES PARAM_K +#define VEC_N1_SIZE_BYTES PARAM_N1 +#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8) + +#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64) +#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8) +#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8) +#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64) + +#define PARAM_N_MULT (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256)) +#define VEC_N_256_SIZE_64 (PARAM_N_MULT / 64) +#define VEC_N1N2_256_SIZE_64 (CEIL_DIVIDE(PARAM_N1N2, 256) << 2) + +#define PARAM_DELTA 29 +#define PARAM_M 8 +#define PARAM_GF_POLY 0x11D +#define PARAM_GF_POLY_WT 5 +#define PARAM_GF_POLY_M2 4 +#define PARAM_GF_MUL_ORDER 255 +#define PARAM_K 32 +#define PARAM_G 59 +#define PARAM_FFT 5 +#define RS_POLY_COEFS 49,167,49,39,200,121,124,91,240,63,148,71,150,123,87,101,32,215,159,71,201,115,97,210,186,183,141,217,123,12,31,243,180,219,152,239,99,141,4,246,191,144,8,232,47,27,141,178,130,64,124,47,39,188,216,48,199,187,1 + +#define RED_MASK 0x1fffffffff +#define SHA512_BYTES 64 +#define SEED_BYTES 40 +#define SEEDEXPANDER_MAX_LENGTH 4294967295 + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/parsing.c b/src/kem/hqc/hqc-rmrs-256/avx2/parsing.c new file mode 100644 index 00000000..cf786d2a --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/parsing.c @@ -0,0 +1,186 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file parsing.c + * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme + */ + + +void PQCLEAN_HQCRMRS256_AVX2_store8(unsigned char *out, uint64_t in) { + out[0] = (in >> 0x00) & 0xFF; + out[1] = (in >> 0x08) & 0xFF; + out[2] = (in >> 0x10) & 0xFF; + out[3] = (in >> 0x18) & 0xFF; + out[4] = (in >> 0x20) & 0xFF; + out[5] = (in >> 0x28) & 0xFF; + out[6] = (in >> 0x30) & 0xFF; + out[7] = (in >> 0x38) & 0xFF; +} + + +uint64_t PQCLEAN_HQCRMRS256_AVX2_load8(const unsigned char *in) { + uint64_t ret = in[7]; + + for (int8_t i = 6; i >= 0; i--) { + ret <<= 8; + ret |= in[i]; + } + + return ret; +} + +void PQCLEAN_HQCRMRS256_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) { + size_t index_in = 0; + size_t index_out = 0; + + // first copy by 8 bytes + if (inlen >= 8 && outlen >= 1) { + while (index_out < outlen && index_in + 8 <= inlen) { + out64[index_out] = PQCLEAN_HQCRMRS256_AVX2_load8(in8 + index_in); + + index_in += 8; + index_out += 1; + } + } + + // we now need to do the last 7 bytes if necessary + if (index_in >= inlen || index_out >= outlen) { + return; + } + out64[index_out] = in8[inlen - 1]; + for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) { + out64[index_out] <<= 8; + out64[index_out] |= in8[index_in + i]; + } +} + +void PQCLEAN_HQCRMRS256_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) { + for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) { + out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF; + index_out++; + if (index_out % 8 == 0) { + index_in++; + } + } +} + + +/** + * @brief Parse a secret key into a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] sk String containing the secret key + * @param[in] sk_seed Seed used to generate the secret key + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) { + memcpy(sk, sk_seed, SEED_BYTES); + sk += SEED_BYTES; + memcpy(sk, pk, PUBLIC_KEY_BYTES); +} + +/** + * @brief Parse a secret key from a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] x uint64_t representation of vector x + * @param[out] y uint64_t representation of vector y + * @param[out] pk String containing the public key + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) { + AES_XOF_struct sk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + + memcpy(sk_seed, sk, SEED_BYTES); + sk += SEED_BYTES; + memcpy(pk, sk, PUBLIC_KEY_BYTES); + + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA); +} + +/** + * @brief Parse a public key into a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] pk String containing the public key + * @param[in] pk_seed Seed used to generate the public key + * @param[in] s uint8_t representation of vector s + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) { + memcpy(pk, pk_seed, SEED_BYTES); + PQCLEAN_HQCRMRS256_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64); +} + + + +/** + * @brief Parse a public key from a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] h uint8_t representation of vector h + * @param[out] s uint8_t representation of vector s + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) { + AES_XOF_struct pk_seedexpander; + uint8_t pk_seed[SEED_BYTES] = {0}; + + memcpy(pk_seed, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_HQCRMRS256_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES); + + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS256_AVX2_vect_set_random(&pk_seedexpander, h); +} + + +/** + * @brief Parse a ciphertext into a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] ct String containing the ciphertext + * @param[in] u uint8_t representation of vector u + * @param[in] v uint8_t representation of vector v + * @param[in] d String containing the hash d + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) { + PQCLEAN_HQCRMRS256_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS256_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(ct, d, SHA512_BYTES); +} + + +/** + * @brief Parse a ciphertext from a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] u uint8_t representation of vector u + * @param[out] v uint8_t representation of vector v + * @param[out] d String containing the hash d + * @param[in] ct String containing the ciphertext + */ +void PQCLEAN_HQCRMRS256_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) { + PQCLEAN_HQCRMRS256_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS256_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(d, ct, SHA512_BYTES); +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/parsing.h b/src/kem/hqc/hqc-rmrs-256/avx2/parsing.h new file mode 100644 index 00000000..b854fc80 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/parsing.h @@ -0,0 +1,36 @@ +#ifndef PARSING_H +#define PARSING_H + + +/** + * @file parsing.h + * @brief Header file for parsing.c + */ + +#include + +void PQCLEAN_HQCRMRS256_AVX2_store8(unsigned char *out, uint64_t in); + +uint64_t PQCLEAN_HQCRMRS256_AVX2_load8(const unsigned char *in); + +void PQCLEAN_HQCRMRS256_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen); + +void PQCLEAN_HQCRMRS256_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen); + + +void PQCLEAN_HQCRMRS256_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk); + +void PQCLEAN_HQCRMRS256_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk); + + +void PQCLEAN_HQCRMRS256_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s); + +void PQCLEAN_HQCRMRS256_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk); + + +void PQCLEAN_HQCRMRS256_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d); + +void PQCLEAN_HQCRMRS256_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/reed_muller.c b/src/kem/hqc/hqc-rmrs-256/avx2/reed_muller.c new file mode 100644 index 00000000..22527b8a --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/reed_muller.c @@ -0,0 +1,389 @@ +#include "parameters.h" +#include "reed_muller.h" +#include +#include +#include +/** + * @file reed_muller.c + * Constant time implementation of Reed-Muller code RM(1,7) + */ + + +// number of repeated code words +#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) + +// copy bit 0 into all bits of a 64 bit value +#define BIT0MASK(x) (int64_t)(-((x) & 1)) + +static void encode(uint8_t *word, uint8_t message); +static void expand_and_sum(__m256i *dst, const uint64_t *src); +static void hadamard(__m256i *src, __m256i *dst); +static uint32_t find_peaks(__m256i *transform); + + + +/** + * @brief Encode a single byte into a single codeword using RM(1,7) + * + * Encoding matrix of this code: + * bit pattern (note that bits are numbered big endian) + * 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa + * 1 cccccccc cccccccc cccccccc cccccccc + * 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0 + * 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00 + * 4 ffff0000 ffff0000 ffff0000 ffff0000 + * 5 00000000 ffffffff 00000000 ffffffff + * 6 00000000 00000000 ffffffff ffffffff + * 7 ffffffff ffffffff ffffffff ffffffff + * + * @param[out] word An RM(1,7) codeword + * @param[in] message A message to encode + */ +static void encode(uint8_t *word, uint8_t message) { + uint32_t e; + // bit 7 flips all the bits, do that first to save work + e = BIT0MASK(message >> 7); + // bits 0, 1, 2, 3, 4 are the same for all four longs + // (Warning: in the bit matrix above, low bits are at the left!) + e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa; + e ^= BIT0MASK(message >> 1) & 0xcccccccc; + e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0; + e ^= BIT0MASK(message >> 3) & 0xff00ff00; + e ^= BIT0MASK(message >> 4) & 0xffff0000; + // we can store this in the first quarter + word[0 + 0] = (e >> 0x00) & 0xff; + word[0 + 1] = (e >> 0x08) & 0xff; + word[0 + 2] = (e >> 0x10) & 0xff; + word[0 + 3] = (e >> 0x18) & 0xff; + // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 + e ^= BIT0MASK(message >> 5); + word[4 + 0] = (e >> 0x00) & 0xff; + word[4 + 1] = (e >> 0x08) & 0xff; + word[4 + 2] = (e >> 0x10) & 0xff; + word[4 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 6); + word[12 + 0] = (e >> 0x00) & 0xff; + word[12 + 1] = (e >> 0x08) & 0xff; + word[12 + 2] = (e >> 0x10) & 0xff; + word[12 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 5); + word[8 + 0] = (e >> 0x00) & 0xff; + word[8 + 1] = (e >> 0x08) & 0xff; + word[8 + 2] = (e >> 0x10) & 0xff; + word[8 + 3] = (e >> 0x18) & 0xff; +} + + + +/** + * @brief Add multiple codewords into expanded codeword + * + * Note: this does not write the codewords as -1 or +1 as the green machine does + * instead, just 0 and 1 is used. + * The resulting hadamard transform has: + * all values are halved + * the first entry is 64 too high + * + * @param[out] dst Structure that contain the expanded codeword + * @param[in] src Structure that contain the codeword + */ +inline void expand_and_sum(__m256i *dst, const uint64_t *src) { + uint16_t v[16]; + for (size_t part = 0; part < 8; part++) { + dst[part] = _mm256_setzero_si256(); + } + for (size_t copy = 0; copy < MULTIPLICITY; copy++) { + for (size_t part = 0; part < 8; part++) { + for (size_t bit = 0; bit < 16; bit++) { + v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1; + } + dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8], + v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); + } + } +} + + + +/** + * @brief Hadamard transform + * + * Perform hadamard transform of src and store result in dst + * src is overwritten: it is also used as intermediate buffer + * Method is best explained if we use H(3) instead of H(7): + * + * The routine multiplies by the matrix H(3): + * [1 1 1 1 1 1 1 1] + * [1 -1 1 -1 1 -1 1 -1] + * [1 1 -1 -1 1 1 -1 -1] + * [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine + * [1 1 1 1 -1 -1 -1 -1] + * [1 -1 1 -1 -1 1 -1 1] + * [1 1 -1 -1 -1 -1 1 1] + * [1 -1 -1 1 -1 1 1 -1] + * You can do this in three passes, where each pass does this: + * set lower half of buffer to pairwise sums, + * and upper half to differences + * index 0 1 2 3 4 5 6 7 + * input: a, b, c, d, e, f, g, h + * pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h + * pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h + * pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h + * a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h + * This order of computation is chosen because it vectorises well. + * Likewise, this routine multiplies by H(7) in seven passes. + * + * @param[out] src Structure that contain the expanded codeword + * @param[out] dst Structure that contain the expanded codeword + */ +inline void hadamard(__m256i *src, __m256i *dst) { + // the passes move data: + // src -> dst -> src -> dst -> src -> dst -> src -> dst + // using p1 and p2 alternately + __m256i *p1 = src; + __m256i *p2 = dst; + __m256i *p3; + for (size_t pass = 0; pass < 7; pass++) { + // warning: hadd works "within lanes" as Intel call it + // so you have to swap the middle 64 bit blocks of the result + for (size_t part = 0; part < 4; part++) { + p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); + p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); + } + // swap p1, p2 for next round + p3 = p1; + p1 = p2; + p2 = p3; + } +} + + + +/** + * @brief Finding the location of the highest value + * + * This is the final step of the green machine: find the location of the highest value, + * and add 128 if the peak is positive + * Notes on decoding + * The standard "Green machine" decoder words as follows: + * if the received codeword is W, compute (2 * W - 1) * H7 + * The entries of the resulting vector are always even and vary from + * -128 (= the complement is a code word, add bit 7 to decode) + * via 0 (this is a different codeword) + * to 128 (this is the code word). + * + * Our decoding differs in two ways: + * - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1) + * - We take the sum of the repititions (so the entries are 0..MULTIPLICITY) + * This implies that we have to subtract 64M (M=MULTIPLICITY) + * from the first entry to make sure the first codewords is handled properly + * and that the entries vary from -64M to 64M. + * -64M or 64M stands for a perfect codeword. + * If there are fewer than 32M errors, there is always a unique codeword + * which an entry with absolute value > 32M; + * this is because an error changes an entry by 1. + * The highest number that seem to be decodable is 50 errors, so that the + * highest entries in the hadamard transform can be as low as 12. + * But this is different for the repeated code. + * Because multiple codewords are added, this changes: the lowest value of the + * hadamard transform of the sum of six words is seen to be as low as 43 (!), + * which is way less than 12*6. + * + * It is possible that there are more errors, but the word is still uniquely + * decodable: we found a word with distance of 50 from the nearest codeword. + * That means that the highest entry can be as low as 14M. + * Since we have to do binary search, we search for the range 1-64M + * which can be done in 6+l2g(M) steps. + * The binary search is based on (values>32M are unique): + * M 32M min> max> firstStep #steps + * 2 64 1 64 33 +- 16 6 + * 4 128 1 128 65 +- 32 7 + * 6 192 1 192 129 +- 64 8 + * + * As a check, we run a sample for M=6 to see the peak value; it ranged + * from 43 to 147, so my analysis looks right. Also, it shows that decoding + * far beyond the bound of 32M is needed. + * + * For the vectors, it would be tempting to use 8 bit ints, + * because the values "almost" fit in there. + * We could use some trickery to fit it in 8 bits, like saturated add or + * division by 2 in a late step. + * Unfortunately, these instructions do not exist. + * the adds _mm512_adds_epi8 is available only on the latest processors, + * and division, shift, mulhi are not available at all for 8 bits. + * So, we use 16 bit ints. + * + * For the search of the optimal comparison value, + * remember the transform contains 64M-d, + * where d are the distances to the codewords. + * The highest value gives the most likely codeword. + * There is not fast vectorized way to find this value, so we search for the + * maximum value itself. + * In each pass, we collect a bit map of the transform values that are, + * say >bound. There are three cases: + * bit map = 0: all code words are further away than 64M-bound (decrease bound) + * bit map has one bit: one unique code word has distance < 64M-bound + * bit map has multiple bits: multiple words (increase bound) + * We will search for the lowest value of bound that gives a nonzero bit map. + * + * @param[in] transform Structure that contain the expanded codeword + */ +inline uint32_t find_peaks(__m256i *transform) { + // a whole lot of vector variables + __m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows; + __m256i tmp = _mm256_setzero_si256(); + __m256i vect_mask; + __m256i res; + int32_t lower; + int32_t width; + uint32_t message; + uint32_t mask; + int8_t index; + int8_t abs_value; + int8_t mask1; + int8_t mask2; + uint16_t result; + + // compute absolute value of transform + for (size_t i = 0; i < 8; i++) { + abs_rows[i] = _mm256_abs_epi16(transform[i]); + } + // compute a vector of 16 elements which contains the maximum somewhere + // (later used to compute bits 0 through 3 of message) + max_abs_rows = abs_rows[0]; + for (size_t i = 1; i < 8; i++) { + max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]); + } + + // do binary search for the highest value that is lower than the maximum + // loop invariant: lower gives bit map = 0, lower + width gives bit map > 0 + lower = 1; + // this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6 + width = 1 << (5 + MULTIPLICITY / 2); + // if you don't unroll this loop, it fits in the loop cache + // uncomment the line below to speeding up the program by a few percent + // #pragma GCC unroll 0 + while (width > 1) { + width >>= 1; + // compare with lower + width; put result in bitmap + // make vector from value of new bound + bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width)); + bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound); + // step up if there are any matches + // rely on compiler to use conditional move here + mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap); + mask = ~(uint32_t) ((-(int64_t) mask) >> 63); + lower += mask & width; + } + // lower+width contains the maximum value of the vector + // or less, if the maximum is very high (which is OK) + // normally, there is one maximum, but sometimes there are more + // find where the maxima occur in the maximum vector + // (each determines lower 4 bits of peak position) + // construct vector filled with bound-1 + bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1)); + + // find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message + // find lowest value by searching backwards skip first check to save time + message = 0x70; + for (size_t i = 0; i < 8; i++) { + bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound); + mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap); + mask = ~(uint32_t) ((-(int64_t) mask) >> 63); + message ^= mask & (message ^ ((7 - i) << 4)); + } + // we decided which row of the matrix contains the lowest match + // select proper row + index = message >> 4; + + tmp = _mm256_setzero_si256(); + for (size_t i = 0; i < 8; i++) { + abs_value = (int8_t)(index - i); + mask1 = abs_value >> 7; + abs_value ^= mask1; + abs_value -= mask1; + mask2 = ((uint8_t) - abs_value >> 7); + mask = (-1ULL) + mask2; + vect_mask = _mm256_set1_epi32(mask); + res = _mm256_and_si256(abs_rows[i], vect_mask); + tmp = _mm256_or_si256(tmp, res); + } + + active_row = tmp; + + // get the column number of the vector element + // by setting the bits corresponding to the columns + // and then adding elements within two groups of 8 + vect_mask = _mm256_cmpgt_epi16(active_row, bound); + vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1); + for (size_t i = 0; i < 3; i++) { + vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask); + } + // add low 4 bits of message + message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8)); + + // set bit 7 if sign of biggest value is positive + // make sure a jump isn't generated by the compiler + tmp = _mm256_setzero_si256(); + for (size_t i = 0; i < 8; i++) { + mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63); + vect_mask = _mm256_set1_epi32(mask); + tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i])); + } + result = 0; + for (size_t i = 0; i < 16; i++) { + mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63); + result |= mask & ((uint16_t *)&tmp)[i]; + } + message |= (0x8000 & ~result) >> 8; + return message; +} + + + +/** + * @brief Encodes the received word + * + * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits, + * or MULTIPLICITY repeats of 128 bits + * + * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS256_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) { + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // encode first word + encode(&cdw[16 * i * MULTIPLICITY], msg[i]); + // copy to other identical codewords + for (size_t copy = 1; copy < MULTIPLICITY; copy++) { + memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16); + } + } +} + + + +/** + * @brief Decodes the received word + * + * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane. + * The theory of error-correcting codes codes @cite macwilliams1977theory + * + * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS256_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) { + __m256i expanded[8]; + __m256i transform[8]; + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // collect the codewords + expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]); + // apply hadamard transform + hadamard(expanded, transform); + // fix the first entry to get the half Hadamard transform + transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY); + // finish the decoding + msg[i] = find_peaks(transform); + } +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/reed_muller.h b/src/kem/hqc/hqc-rmrs-256/avx2/reed_muller.h new file mode 100644 index 00000000..bcfbfa56 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/reed_muller.h @@ -0,0 +1,18 @@ +#ifndef REED_MULLER_H +#define REED_MULLER_H + + +/** + * @file reed_muller.h + * Header file of reed_muller.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS256_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS256_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/reed_solomon.c b/src/kem/hqc/hqc-rmrs-256/avx2/reed_solomon.c new file mode 100644 index 00000000..2412c140 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/reed_solomon.c @@ -0,0 +1,744 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include "parsing.h" +#include "reed_solomon.h" +#include +#include +#include +/** + * @file reed_solomon.c + * Constant time implementation of Reed-Solomon codes + */ + + +static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw); +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes); +static void compute_roots(uint8_t *error, uint16_t *sigma); +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes); +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error); +static void correct_errors(uint8_t *cdw, const uint16_t *error_values); + +static const __m256i alpha_ij256_1[89] = { + {0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087}, + {0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006}, + {0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035}, + {0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014}, + {0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be}, + {0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078}, + {0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3}, + {0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d}, + {0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed}, + {0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e}, + {0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054}, + {0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4}, + {0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5}, + {0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062}, + {0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064}, + {0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051}, + {0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045}, + {0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb}, + {0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083}, + {0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020}, + {0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d}, + {0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0}, + {0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee}, + {0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba}, + {0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e}, + {0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb}, + {0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9}, + {0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd}, + {0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec}, + {0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9}, + {0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052}, + {0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1}, + {0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1}, + {0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc}, + {0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c}, + {0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2}, + {0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048}, + {0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016}, + {0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad}, + {0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074}, + {0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9}, + {0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025}, + {0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c}, + {0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de}, + {0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f}, + {0x00950021005b009f, 0x001f00c2007500f4, 0x00b500d800a70073, 0x0048009600da00fe}, + {0x00a5001500710023, 0x00760089000c00eb, 0x0050008000ef00fc, 0x00b0006400520022}, + {0x008200a800d90046, 0x001700e70027002c, 0x0061002d002400db, 0x0008005900bf003e}, + {0x00c800290043008c, 0x009e00fe003500e9, 0x0078003000eb006e, 0x005a002400e300cc}, + {0x001c005500110005, 0x004d00e200c1006c, 0x00df006a00e90064, 0x009c002c00ae0084}, + {0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd, 0x000a000100dd0092}, + {0x005100e4000d0014, 0x00d100bd00ba0020, 0x003e00de007400f2, 0x00c20026002b003f}, + {0x0079007300340028, 0x00e500f800a10074, 0x006600ca00b4008a, 0x00bb006000f7004b}, + {0x00c300bf00d00050, 0x00db00c5002f0026, 0x0021006b006000f5, 0x008600c100cf0082}, + {0x00ac0091006700a0, 0x0037002e000f00b4, 0x005500e2006a002c, 0x007c00b9002000a7}, + {0x001200fc0081005d, 0x0019009e00e70003, 0x00bf003400050036, 0x005c000f005a002b}, + {0x003d00b3003e00ba, 0x003800a8007f0060, 0x00f100ed00b90008, 0x002900df002700f5}, + {0x00f700f100f80069, 0x00a200aa00b6009c, 0x006e0085005e00cd, 0x0063001a002300fa}, + {0x00cb00db00c700d2, 0x009b00b70086006a, 0x0007004200fd0075, 0x004b003b006f0004}, + {0x002c0096003b00b9, 0x00240091001a00c1, 0x0059005500df0060, 0x006400a9000f0026}, + {0x00fa00c400ec006f, 0x00f3007b00ce0005, 0x008a00d100110035, 0x00b2005500e10018}, + {0x001b006e009700de, 0x0058004b00ed00a0, 0x00fb007b00670046, 0x00900091008800d4}, + {0x00ad0057006600a1, 0x0036006e00c500b9, 0x00cf00c4003b00ba, 0x007d009600ed0050}, + {0x000200820085005f, 0x00040019001700be, 0x00080032002e0061, 0x00100064005c00c2}, + {0x00200064002e00be, 0x0074001c00a9005e, 0x002600a70084000f, 0x00b40059004d00fd}, + {0x003a000700b80061, 0x002d00a60015000f, 0x000c00c30055006b, 0x00250024007300b6}, + {0x0087003800da00c2, 0x000600f9002900fd, 0x0035009000e600b6, 0x0014002c00f60034}, + {0x009800dd004f0099, 0x004e0045009200d6, 0x000a000b00d70044, 0x0099000100dc0093}, + {0x007500a60021002f, 0x00b5003d007300df, 0x00a10036009600ce, 0x006b0026003800b8}, + {0x000300590084005e, 0x000500eb009100e2, 0x000f002000ae003b, 0x0011006000ef004d}, + {0x003000f2002a00bc, 0x006900b000b30011, 0x007f004c001c0017, 0x00f800c1007a00b7}, + {0x002700c300a80065, 0x0061003600db001a, 0x0086000c00590021, 0x00b800b9007d00b3}, + {0x004a0056009a00ca, 0x0089000200c40067, 0x00ce009400ac0029, 0x0052000f00040095}, + {0x00d4008a00520089, 0x00d3001d0057007c, 0x00c5008c00f400e4, 0x00c600df004c008d}, + {0x00c100240055000f, 0x00df00260064003b, 0x00a900b9002c0091, 0x0096001a00600059}, + {0x008c003d0049001e, 0x004300ea00380033, 0x002900bc006c00f1, 0x00c8003b00ee0009}, + {0x002800f50039003c, 0x0034003000a6002e, 0x007300d3002000c4, 0x007900a9006900cb}, + {0x00ba00fb00e40078, 0x003e002500f200a9, 0x00b300b600260082, 0x003d005500650036}, + {0x006f008b00b700f0, 0x00ec007700560084, 0x00c4000d00030038, 0x00fa009100b10080}, + {0x00be002c00e600fd, 0x002e00050024004d, 0x0064007c009c0059, 0x00200096001100b4}, + {0x002f007d00bf00e7, 0x002100ba00f50055, 0x00a6006600c10056, 0x00750064003e0027}, + {0x00ca00cf00c600d3, 0x009a005f008b0072, 0x0056009e00a0003d, 0x004a00590085009f}, + {0x003c0036003f00bb, 0x003900bc007d00e6, 0x00f5005200be008b, 0x00280024002a00d2}, + {0x00e700ad00fc006b, 0x00bf007800360091, 0x007d0073000f00cf, 0x002f002c00e40065}, + {0x00d6000100d700d6, 0x00d700d6000100d7, 0x000100d700d60001, 0x00d6000100d700d6}, + {0x00e10008007b00b1, 0x00ab005b004000ff, 0x00cd003100e2003a, 0x0022002600950043}, + {0x00b6004000f1007f, 0x006e008600cd0096, 0x008f0082001a002d, 0x00ed0060000700ce}, + {0x00af003a00e300fe, 0x00320034002d0037, 0x002500e0007c000c, 0x006d00c100790097}, + {0x002200cd00ab00e1, 0x0070001f008f00ae, 0x004600f900330025, 0x00a400b90048009e} +}; +static const __m256i alpha_ij256_2[89] = { + {0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x009d00c000600030}, + {0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x005f00de00b90069}, + {0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x00d900b600df007f}, + {0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00850097003b00f8}, + {0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x00e600720055004d}, + {0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x0082006e009600f1}, + {0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x0012009b005900e0}, + {0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x0002001b002c00f7}, + {0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0027008f00260040}, + {0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x00be00a000c1009c}, + {0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00af00b1000f005f}, + {0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x001700ed001a00b6}, + {0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x00d1005200a900ec}, + {0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x0019004b009100aa}, + {0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0024005900640096}, + {0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00040058002400a2}, + {0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x004e00980001000b}, + {0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00610046006000cd}, + {0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x004300f000b900d4}, + {0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x002e006700df005e}, + {0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00bf0015003b0086}, + {0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0032007b00550085}, + {0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x004800e0009600d5}, + {0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000800fb0059006e}, + {0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x009c0074002c00ef}, + {0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00c200d4002600fa}, + {0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0086006500c1002d}, + {0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x005c0088000f0023}, + {0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x006300da001a001e}, + {0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0064009100a9001a}, + {0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x009000c8009100da}, + {0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0010009000640063}, + {0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0025000800240082}, + {0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0099004e00010045}, + {0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x001100be0060006c}, + {0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x00b800d900b9008f}, + {0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x00c600cc00df0028}, + {0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00c800b7003b00d3}, + {0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x003d0057005500ce}, + {0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x002000ac00960084}, + {0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x004a00d8005900e5}, + {0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x002f000c002c0007}, + {0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0022006900260090}, + {0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x006d00e100c10002}, + {0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x0091003b000f0060}, + {0x007200bd00a10098, 0x006b009400830038, 0x0087008a00e3002e, 0x008d00aa001a00d2}, + {0x00ff008500e7004e, 0x00d0006f0013008a, 0x00d4003600700072, 0x007a006200a900fe}, + {0x006400290086000a, 0x00b8006b0025007d, 0x002f0075003d0096, 0x004000f2009100ed}, + {0x00ef003f00ed0099, 0x00e400680069003a, 0x00af0046008e00a7, 0x009400fa0064009a}, + {0x00eb003700a900d6, 0x0096002e00fd0060, 0x0033000f000300f4, 0x005e00b4002400ff}, + {0x000100dd00920044, 0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd}, + {0x00b4000900b30093, 0x003d00e300970065, 0x00310017003c0003, 0x00da00d3006000f3}, + {0x006a00b00057004f, 0x00ad000e009a00b6, 0x00a200e400880005, 0x003f001f00b90080}, + {0x00b9004000a60092, 0x0075008a00fc003e, 0x008b00c40017000f, 0x000700a800df0025}, + {0x00fd0003002400d7, 0x00c100e900ae00a9, 0x0074005900720011, 0x00f400ff003b00be}, + {0x001100ee007d00dc, 0x002f0087007900e4, 0x0094008b00310033, 0x0080005300550071}, + {0x003b00a1004000dd, 0x00b6002500fb00db, 0x0061003a00a60055, 0x0035008b009600c5}, + {0x008400d3008f0045, 0x00ed00d200020007, 0x0071002500f300ff, 0x00bc008700590049}, + {0x00e6002200b5000b, 0x001500d300c90056, 0x00ec00a10010001c, 0x008800ee002c0031}, + {0x0096003b00b90001, 0x0091001a00c1002c, 0x005500df00600024, 0x00a9000f00260059}, + {0x001c004200780098, 0x0057008500c20040, 0x00ab00ed005d006c, 0x007e003400c10016}, + {0x00ac00b700d9004e, 0x00f200aa00e1000c, 0x005300a800d300b4, 0x000e009e000f0087}, + {0x002c00db003e000a, 0x008b00f100ce0046, 0x00fb00b300d000c1, 0x00f500fc001a00b5}, + {0x002000c800b80099, 0x0040008d006d002f, 0x0080000700da005e, 0x001d000e00a900bc}, + {0x000300ef005500d6, 0x006000ac007200df, 0x009c002400e600e2, 0x006a00f400910011}, + {0x00c100fb00fc0044, 0x0050007d00db00ce, 0x00a100ad006e003b, 0x0065004000640017}, + {0x00be0047006e0093, 0x007800e8000e00b8, 0x00a3008f0079004d, 0x000d004a002400b7}, + {0x00d6009800dd004f, 0x0044004e00450092, 0x0093000a000b00d7, 0x004f0099000100dc}, + {0x001a0025008a0092, 0x006600ba007d00f1, 0x00290078003a0064, 0x00fc0086006000c3}, + {0x003300a0002c00d7, 0x005500fd00740064, 0x00ff001a009c00ac, 0x001c002e00b900e9}, + {0x004d0089000800dc, 0x00f10088009d00c3, 0x00e000b8006f00e9, 0x00f700d100df005a}, + {0x009100b6007500dd, 0x000700660050008b, 0x00f50073007f0026, 0x003a0082003b0046}, + {0x0037008100350045, 0x008a00a4001e0008, 0x0010006e001f006a, 0x00d400090055003c}, + {0x00a7005c00ba000b, 0x00cf007b0043008f, 0x009d00f2004200be, 0x00ca008e00960034}, + {0x00240055000f0001, 0x00260064003b00c1, 0x00b9002c009100df, 0x001a0060005900a9}, + {0x00e900e500b60098, 0x0035002b002a0061, 0x00fe00cd0041007c, 0x009e006f002c00c6}, + {0x007400a500ce004e, 0x00a1005800c6007f, 0x00f80035009b0084, 0x00e5005b00260019}, + {0x006000a60017000a, 0x007f003a006e00d0, 0x00a80061007d0091, 0x003800c500c1008a}, + {0x0005004800290099, 0x00ce009d00510017, 0x007b00b6001300ae, 0x00f30039000f00d8}, + {0x005e00e9009100d6, 0x00a900a000f40055, 0x001c003b006a00ef, 0x00740037001a0003}, + {0x00df003a00c40044, 0x0073007800ad00b3, 0x003d00290061002c, 0x00b500c300a90050}, + {0x0067001800380093, 0x00c40022005a0082, 0x000200f1005b0074, 0x00890083009100bb}, + {0x002e00230056004f, 0x00a6009700d400f2, 0x0030003800c7009c, 0x003400c900640081}, + {0x00550061008b0092, 0x00f5002900a100fb, 0x00ba003d00a800b9, 0x0021005000240015}, + {0x00d700d6000100d7, 0x000100d700d60001, 0x00d6000100d700d6, 0x00d700d6000100d7}, + {0x00ae000d002d00dc, 0x008f001900680075, 0x001f000c00c80067, 0x007000f80060000e}, + {0x005900c5002500dd, 0x004600c3001700b5, 0x00150050008a00a9, 0x00fb002900b9003d}, + {0x00f4002a00500045, 0x0065001600aa00a1, 0x00e500e7001b00e6, 0x00e800ab00df0004}, + {0x006c00d10065000b, 0x00d9008000f6006b, 0x008d00d0005a0037, 0x007700a2003b00c0} +}; +static const __m256i alpha_ij256_3[89] = { + {0x0025009c004e0027, 0x006a00350094004a, 0x00ee007700b500d4, 0x00460023009f00c1}, + {0x0065005e00990061, 0x00fd0078001e0089, 0x00fe00b1006b00d3, 0x00d90071005b00df}, + {0x00d0001a00440086, 0x003b00ed003e00ce, 0x00b80017006600c5, 0x00a80015002100a9}, + {0x00a80084004f00b8, 0x00e600e400490052, 0x00e3007b00fc00c6, 0x008200a500950096}, + {0x009600ff00d70091, 0x001c006400ae0037, 0x00ac00ef005900a7, 0x002c00eb00f40024}, + {0x00c3005900dd0007, 0x002c00fb003d008a, 0x003a000800ad00cf, 0x0027000c00750026}, + {0x00ad00e9000b00f5, 0x0003002d00e80010, 0x0028009f0035009d, 0x00e7008900c200b9}, + {0x002700030098003a, 0x00be00ba008c00d4, 0x00af00e100e700ca, 0x00170076001f001a}, + {0x002f00b9000a00b5, 0x001a00d9007f0078, 0x001500b800c5003e, 0x00db00fc00730055}, + {0x001a00e200d6000f, 0x004d00a90033007c, 0x003700ff00910072, 0x002400ef00a70064}, + {0x0015002e009300d0, 0x003700f100630049, 0x007a002b00a600c8, 0x002d008000d8002c}, + {0x00db009100920015, 0x002400f200380057, 0x002d00400036008b, 0x0061005000b50060}, + {0x00f2001c00dc00f1, 0x007400ad00160048, 0x00de008c002500c9, 0x003e002200fe000f}, + {0x003600eb004500a6, 0x0005002500ea001d, 0x0034005b0078005f, 0x00bf005200da003b}, + {0x006000260001002c, 0x00df000f00b900c1, 0x005500a9003b001a, 0x0059006400960091}, + {0x00610005004e002d, 0x002e003e004300d3, 0x003200ab00bf009a, 0x000800b000480059}, + {0x004400d60099000a, 0x00d70092004f0093, 0x000b004500dd00dc, 0x000a004e00980001}, + {0x0021003b0044006b, 0x0059008200f100e4, 0x000c003a00cf003d, 0x008600e700a100c1}, + {0x00f10072004f00ed, 0x006c00f500790019, 0x00bc001400270013, 0x0029008500bd00df}, + {0x005900ae00d70055, 0x009c0026006c00f4, 0x007c00e2000f00a0, 0x006400ff007200a9}, + {0x00cf002400dd00c4, 0x000f0050002700cd, 0x0073002100ed00d9, 0x007d008a00380096}, + {0x000c0020000b00c3, 0x007c00b600bc008c, 0x00e000310073009e, 0x0025001300830024}, + {0x00a1006a00980036, 0x007200b800bd00b1, 0x00830012003800ab, 0x006b006f00940026}, + {0x0086000f000a000c, 0x006400b3002900c5, 0x002500cd007d0056, 0x00b800d0006b00b9}, + {0x00a9006700d600b9, 0x00eb0059003700e6, 0x00fd00a000600020, 0x00960072002e001a}, + {0x00b3004d009300b6, 0x00b400360009008d, 0x009700430065009f, 0x003d007000e30055}, + {0x00a6009600920066, 0x00b90027004000fb, 0x00fc0015003e007f, 0x00750036008a0064}, + {0x007d00ef00dc0073, 0x0011006500ee004c, 0x0079009500e40085, 0x002f00d40087002c}, + {0x008f006c00450082, 0x008400ce00d30028, 0x00020090000700e5, 0x00ed00fe00d20060}, + {0x00b9006000010024, 0x00960055003b00df, 0x00c10026002c0059, 0x009100a9001a000f}, + {0x00d900be004e0008, 0x00ac005700b700cc, 0x00e10069000c00d8, 0x00f2006200aa003b}, + {0x00b8001100990025, 0x0020003d00c800c6, 0x006d0022002f004a, 0x0040007a008d0091}, + {0x00fc00a90044002f, 0x00c100cd00fb0038, 0x00db00a800ce0078, 0x00500075007d0059}, + {0x00dd00d7004f0044, 0x00d6000a0098000b, 0x004500dc00920093, 0x00440099004e0001}, + {0x002c00a700d700a9, 0x003300df00a000b4, 0x007400f4006400e6, 0x0055007c00fd00c1}, + {0x0075002c00dd00fc, 0x0091001700b600ba, 0x0050002d008b0038, 0x000700bf006600df}, + {0x00ba00b4000b0038, 0x00a700fc005c0071, 0x0043006f008f00b0, 0x00cf00a2007b00a9}, + {0x00b600a0009800fb, 0x00e900a600e5005c, 0x002a000d00610018, 0x00350002002b0096}, + {0x001700df000a00cd, 0x006000cf00a600fc, 0x006e002900d0002f, 0x007f0046003a0024}, + {0x0091003300d600c1, 0x005e006000e900a7, 0x00f400ae00550067, 0x00a900e200a00026}, + {0x003800e600930078, 0x0067002f001800b0, 0x005a00f3008200aa, 0x00c4002a002200b9}, + {0x008b0064009200ce, 0x005500d00061008f, 0x00a1007500fb0082, 0x00f500570029001a}, + {0x002d00f400dc00a8, 0x00ae0029000d006f, 0x0068005f007500f3, 0x008f00cb00190055}, + {0x00500074004500db, 0x00f4006e002a0043, 0x00aa006800a1005a, 0x0065001800160064}, + {0x00df00c100010059, 0x00260024009600a9, 0x00640055001a00b9, 0x003b000f0060002c}, + {0x006600fd004e007d, 0x00a0003a002b007b, 0x0016001900290022, 0x00fc00ec001e0060}, + {0x00bf007c00990075, 0x00e20046000200a2, 0x001800cb0057002a, 0x00c300e500ec000f}, + {0x0007005500440050, 0x00a9007f003500cf, 0x0065008f00f500c4, 0x003a00c300fc003b}, + {0x00fb0037004f007f, 0x00ff0066003c0018, 0x00f800c2002d0012, 0x00ba001d00f90091}, + {0x002600ac00d7003b, 0x00ef0091007c00be, 0x00e6006700b90074, 0x001a00a000200059}, + {0x000a000100dd0092, 0x000100dd00920044, 0x00dd00920044000a, 0x00920044000a0001}, + {0x007f009c000b006e, 0x006a007d00410042, 0x001b00c800a8005b, 0x003800a400af00c1}, + {0x00c5005e00980056, 0x00fd000c007a00e3, 0x004a0016006e006d, 0x003600c8005400df}, + {0x0073001a000a00ad, 0x003b006100cd00f2, 0x00e7000c003d00f1, 0x00b5007d005700a9}, + {0x0064008400d60060, 0x00e6001a0005006c, 0x0033005e002600ef, 0x00df009c00eb0096}, + {0x00f500ff009300a1, 0x001c00a800fe009d, 0x00e5001f00ba0002, 0x002100d300060024}, + {0x00cd0059009200d9, 0x002c00c40066002f, 0x00f200e4008600b5, 0x006e001700650026}, + {0x004600e900dc0017, 0x0003008a00c60034, 0x0004000e001500bb, 0x00fb00e300c700b9}, + {0x006b0003004500bf, 0x00be004000700054, 0x009f00b000c40097, 0x000c000900c6001a}, + {0x003b00b900010064, 0x001a00c1002c0096, 0x00df006000240091, 0x000f002600590055}, + {0x00e400e2004e003d, 0x004d006b00c9009b, 0x00da00ca00cd0053, 0x00c500de00020064}, + {0x0082002e00990040, 0x003700c5006f008e, 0x00ab00f800500083, 0x00b300bd0023002c}, + {0x003d009100440035, 0x002400bf00860025, 0x008a007300d90027, 0x005600e400b60060}, + {0x003a001c004f0065, 0x00740038009e00ca, 0x00e8007000210089, 0x00cd00e00042000f}, + {0x00c100eb00d7001a, 0x0005002c00ff0067, 0x00a000e90096007c, 0x00b9006c0037003b}, + {0x00e7002600dd0021, 0x00df008f00f20029, 0x00860027008a00e4, 0x00d000b500f50091}, + {0x00ed0005000b00b3, 0x002e00a100d80095, 0x0054001e003a008d, 0x00e400e100ea0059}, + {0x009200d6009800dd, 0x00d70044004e0045, 0x00dc0093000a000b, 0x00dd004f00990001}, + {0x0057003b000a008b, 0x0059001500650008, 0x00f500bf00b6008f, 0x00ad00c4003e00c1}, + {0x0024007200d60026, 0x006c00960067006a, 0x00b400a700a900be, 0x00c100f400e600df}, + {0x004000ae00930046, 0x009c00560052003c, 0x005f001b00db0034, 0x00b600ea005300a9}, + {0x00b50024009200e7, 0x000f0008006e003e, 0x00d0002500560029, 0x0015002f00ad0096}, + {0x0078002000dc003e, 0x007c00b5001200aa, 0x004900f0004000a5, 0x005700f800770024}, + {0x003e006a00450029, 0x007200e7008000a5, 0x00c800ec0046007a, 0x008b006300e10026}, + {0x0055000f00010096, 0x0064003b00c10024, 0x002c009100df0026, 0x0060005900a900b9}, + {0x006e0067004e00f2, 0x00eb007300bb0080, 0x0030005100b8005d, 0x007800040031001a}, + {0x008a004d009900cf, 0x00b40007007600ee, 0x00ca00d800f100af, 0x0066008c00900055}, + {0x000800960044008f, 0x00b9008b007300e7, 0x00ed003500c30021, 0x00f100d9002d0064}, + {0x003500ef004f00ba, 0x00110075008d00c7, 0x00d100d30008004b, 0x008a0054005f002c}, + {0x000f006c00d700df, 0x008400b900eb0072, 0x00a7003300c100ac, 0x002600ae00670060}, + {0x00ce006000dd00c5, 0x00960086002d0082, 0x003600fc007f0040, 0x00a1008b00e4000f}, + {0x002900be000b00e4, 0x00ac0021005d007a, 0x009400b200170023, 0x00ce00300070003b}, + {0x00c4001100980057, 0x002000db007100e8, 0x00d3008e00b300fe, 0x0073001e001b0091}, + {0x005600a9000a008a, 0x00c100c300b80046, 0x006600b500f20017, 0x00a600c500350059}, + {0x000100d700d60001, 0x00d6000100d700d6, 0x00d700d6000100d7, 0x000100d700d60001}, + {0x002500a700930027, 0x00330035005100ec, 0x00f9008500b500b2, 0x0046009b005c00c1}, + {0x0065002c00920061, 0x0091007800cf0073, 0x000800b3006b00ad, 0x00d9003a00db00df}, + {0x00d000b400dc0086, 0x00a700ed003000c8, 0x002300f900660094, 0x00a8005d000900a9}, + {0x00a800a0004500b8, 0x00e900e400c200f3, 0x00a3000400fc00f0, 0x0082008800130096} +}; +static const __m256i alpha_ij256_4[89] = { + {0x0014000a0005008c, 0x005d00a000500028, 0x00000000006900ba, 0x0000000000000000}, + {0x000d004400110043, 0x0081006700d00034, 0x0000000000f8003e, 0x0000000000000000}, + {0x00e4009200550029, 0x00fc009100bf0073, 0x0000000000f100b3, 0x0000000000000000}, + {0x005100dd001c00c8, 0x001200ac00c30079, 0x0000000000f7003d, 0x0000000000000000}, + {0x00200001006c00e9, 0x000300b400260074, 0x00000000009c0060, 0x0000000000000000}, + {0x00ba000a00c10035, 0x00e7000f002f00a1, 0x0000000000b6007f, 0x0000000000000000}, + {0x00bd004400e200fe, 0x009e002e00c500f8, 0x0000000000aa00a8, 0x0000000000000000}, + {0x00d10092004d009e, 0x0019003700db00e5, 0x0000000000a20038, 0x0000000000000000}, + {0x00f200dd0064006e, 0x0036002c00f5008a, 0x0000000000cd0008, 0x0000000000000000}, + {0x0074000100e900eb, 0x0005006a006000b4, 0x00000000005e00b9, 0x0000000000000000}, + {0x00de000a006a0030, 0x003400e2006b00ca, 0x00000000008500ed, 0x0000000000000000}, + {0x003e004400df0078, 0x00bf005500210066, 0x00000000006e00f1, 0x0000000000000000}, + {0x003f0092008400cc, 0x002b00a70082004b, 0x0000000000fa00f5, 0x0000000000000000}, + {0x002b00dd00ae00e3, 0x005a002000cf00f7, 0x0000000000230027, 0x0000000000000000}, + {0x00260001002c0024, 0x000f00b900c10060, 0x00000000001a00df, 0x0000000000000000}, + {0x00c2000a009c005a, 0x005c007c008600bb, 0x0000000000630029, 0x0000000000000000}, + {0x0093004400d60099, 0x00dc00d70092004f, 0x00000000004500dd, 0x0000000000000000}, + {0x00b3009200a900ed, 0x007d002400a60057, 0x00000000008f0040, 0x0000000000000000}, + {0x000900dd0037003f, 0x00ee0003004000b0, 0x0000000000d300a1, 0x0000000000000000}, + {0x00b4000100eb00ef, 0x001100fd00b9006a, 0x000000000084003b, 0x0000000000000000}, + {0x0065000a0060003a, 0x00e400a9003e00b6, 0x00000000000700db, 0x0000000000000000}, + {0x0097004400fd0069, 0x007900ae00fc009a, 0x00000000000200fb, 0x0000000000000000}, + {0x00e30092002e0068, 0x008700e9008a000e, 0x0000000000d20025, 0x0000000000000000}, + {0x003d00dd009600e4, 0x002f00c1007500ad, 0x0000000000ed00b6, 0x0000000000000000}, + {0x0003000100f400a7, 0x00330011000f0005, 0x0000000000ff0055, 0x0000000000000000}, + {0x003c000a0003008e, 0x0031007200170088, 0x0000000000f300a6, 0x0000000000000000}, + {0x00170044000f0046, 0x008b005900c400e4, 0x000000000025003a, 0x0000000000000000}, + {0x00310092003300af, 0x00940074008b00a2, 0x0000000000710061, 0x0000000000000000}, + {0x00f300dd00ff009a, 0x007100be00250080, 0x00000000004900c5, 0x0000000000000000}, + {0x0060000100240064, 0x0055003b00df00b9, 0x0000000000590096, 0x0000000000000000}, + {0x00d3000a00b400fa, 0x005300ff00a8001f, 0x000000000087008b, 0x0000000000000000}, + {0x00da0044005e0094, 0x008000f40007003f, 0x0000000000bc0035, 0x0000000000000000}, + {0x006e0092003b007f, 0x00a1006000ad0056, 0x00000000001700d9, 0x0000000000000000}, + {0x000b00dd00d7004f, 0x009300d6000a0098, 0x0000000000dc0092, 0x0000000000000000}, + {0x009c000100ac0037, 0x00ff0084001a005e, 0x0000000000e90059, 0x0000000000000000}, + {0x007f000a002600fb, 0x00f50064007300c5, 0x00000000004600cd, 0x0000000000000000}, + {0x0042004400be0018, 0x009d006c00f200e3, 0x000000000034002f, 0x0000000000000000}, + {0x00410092007c003c, 0x00fe000500cd007a, 0x0000000000c60066, 0x0000000000000000}, + {0x007d00dd00910066, 0x00a8001a0061000c, 0x00000000008a00c4, 0x0000000000000000}, + {0x006a000100ef00ff, 0x001c00e6003b00fd, 0x000000000003002c, 0x0000000000000000}, + {0x005b000a00740012, 0x000200ef00f1006d, 0x0000000000bb00b5, 0x0000000000000000}, + {0x00a8004400b9002d, 0x00ba0026003d006e, 0x0000000000150086, 0x0000000000000000}, + {0x00c80092006700c2, 0x001f005e000c0016, 0x00000000000e00e4, 0x0000000000000000}, + {0x001b00dd00e600f8, 0x00e5003300e7004a, 0x00000000000400f2, 0x0000000000000000}, + {0x00c1000100590091, 0x0024009600a900df, 0x0000000000b90026, 0x0000000000000000}, + {0x00af000a002000f9, 0x000600eb00570054, 0x0000000000c70065, 0x0000000000000000}, + {0x00a4004400a0001d, 0x00d3009c007d00c8, 0x0000000000e30017, 0x0000000000000000}, + {0x00380092001a00ba, 0x002100df00b50036, 0x0000000000fb006e, 0x0000000000000000}, + {0x004700dd00720034, 0x0032004d00d90023, 0x00000000004a007d, 0x0000000000000000}, + {0x0005000100a70072, 0x006c001c00550011, 0x0000000000e200c1, 0x0000000000000000}, + {0x0044000a000100dd, 0x000a000100dd0092, 0x0000000000920044, 0x0000000000000000}, + {0x0039004400050047, 0x006800a000080053, 0x0000000000b20073, 0x0000000000000000}, + {0x0053009200110023, 0x0063006700ba0010, 0x00000000001300c3, 0x0000000000000000}, + {0x000800dd005500d9, 0x0056009100ce00ba, 0x000000000065002d, 0x0000000000000000}, + {0x00a00001001c004d, 0x00b400ac00910067, 0x00000000002e000f, 0x0000000000000000}, + {0x0068000a006c0032, 0x001e00b400560063, 0x0000000000a500b8, 0x0000000000000000}, + {0x0073004400c1007d, 0x00b8000f002d00c3, 0x0000000000cf0057, 0x0000000000000000}, + {0x00b2009200e2004a, 0x00a5002e00650013, 0x00000000008c00cf, 0x0000000000000000}, + {0x001d00dd004d00b1, 0x00fa0037006600c2, 0x0000000000680046, 0x0000000000000000}, + {0x00b90001006400a9, 0x00c1002c0096003b, 0x000000000091001a, 0x0000000000000000}, + {0x0081000a00e90095, 0x0022006a00fb00f6, 0x00000000000900bf, 0x0000000000000000}, + {0x00c60044006a00f3, 0x00d500e200270048, 0x0000000000060056, 0x0000000000000000}, + {0x00c3009200df000c, 0x00f20055007f008f, 0x00000000006b0075, 0x0000000000000000}, + {0x008700dd0084001e, 0x001300a70015003c, 0x00000000002a0078, 0x0000000000000000}, + {0x00be000100ae0033, 0x005e00200064002e, 0x00000000001c00a9, 0x0000000000000000}, + {0x00ed000a002c00f1, 0x006600b9003600c4, 0x0000000000080082, 0x0000000000000000}, + {0x00e50044009c0009, 0x0062007c004600cb, 0x00000000006f0036, 0x0000000000000000}, + {0x0045009200d60098, 0x000b00d70044004e, 0x000000000093000a, 0x0000000000000000}, + {0x002d00dd00a90061, 0x0035002400e4007f, 0x0000000000db00d0, 0x0000000000000000}, + {0x005e00010037007c, 0x00e2000300590084, 0x0000000000eb0091, 0x0000000000000000}, + {0x00ec000a00eb00c6, 0x00aa00fd003a0019, 0x000000000094008a, 0x0000000000000000}, + {0x00f10044006000f2, 0x00a600a900a100cf, 0x0000000000d9008f, 0x0000000000000000}, + {0x0048009200fd0080, 0x001d00ae00ed00ee, 0x00000000003900e7, 0x0000000000000000}, + {0x00c900dd002e005d, 0x005f00e900b300af, 0x0000000000790021, 0x0000000000000000}, + {0x000f00010096001a, 0x003b00c100240055, 0x0000000000260064, 0x0000000000000000}, + {0x00cc000a00f40039, 0x00e30011008f00e0, 0x0000000000ca00ad, 0x0000000000000000}, + {0x004b0044000300e0, 0x00f7007200780002, 0x00000000005c0050, 0x0000000000000000}, + {0x00f50092000f00ad, 0x0027005900b80050, 0x00000000005700ce, 0x0000000000000000}, + {0x001800dd0033009f, 0x00e10074006e0068, 0x00000000008300fc, 0x0000000000000000}, + {0x00fd000100ff00e2, 0x004d00be002c00e6, 0x0000000000050024, 0x0000000000000000}, + {0x00b8000a002400a8, 0x0038003b003500f2, 0x0000000000d0000c, 0x0000000000000000}, + {0x0095004400b40019, 0x000400ff00b600e8, 0x00000000003f006b, 0x0000000000000000}, + {0x00cb0092005e00b0, 0x006900f40029005f, 0x0000000000120015, 0x0000000000000000}, + {0x002700dd003b0025, 0x003e0060003800ed, 0x00000000000c0007, 0x0000000000000000}, + {0x00d6000100d700d6, 0x00d700d6000100d7, 0x0000000000d60001, 0x0000000000000000}, + {0x009e000a00ac00da, 0x0048008400500009, 0x00000000005400ba, 0x0000000000000000}, + {0x00570044002600c4, 0x000c006400d00075, 0x000000000038003e, 0x0000000000000000}, + {0x0058009200be00f7, 0x00bb006c00bf0089, 0x00000000001000b3, 0x0000000000000000}, + {0x009400dd007c0006, 0x0042000500c300cc, 0x0000000000de003d, 0x0000000000000000} +}; + + +/** + * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes + * + * Following @cite lin1983error (Chapter 4 - Cyclic Codes), + * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register + * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code. + * + * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_K_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) { + size_t i, k; + uint8_t gate_value = 0; + uint8_t prev, x; + + union { + uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)]; + __m256i dummy; + } tmp = {0}; + + union { + uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)]; + __m256i dummy; + } PARAM_RS_POLY = {{ RS_POLY_COEFS }}; + + __m256i *tmp256 = (__m256i *)tmp.arr16; + __m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16; + + for (i = 0; i < PARAM_K; ++i) { + gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]); + _mm256_storeu_si256(&tmp256[0], PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0])); + _mm256_storeu_si256(&tmp256[1], PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1])); + _mm256_storeu_si256(&tmp256[2], PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[2])); + _mm256_storeu_si256(&tmp256[3], PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[3])); + + prev = 0; + for (k = 0; k < PARAM_N1 - PARAM_K; k++) { + x = cdw[k]; + cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]); + prev = x; + } + } + + memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K); +} + + + +/** + * @brief Computes 2 * PARAM_DELTA syndromes + * + * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes + * @param[in] cdw Array of size PARAM_N1 storing the received vector + */ +void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { + __m256i *syndromes256 = (__m256i *) syndromes; + __m256i last_syndromes256; + syndromes256[0] = _mm256_set1_epi16(cdw[0]); + + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + syndromes256[0] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]); + } + + syndromes256[1] = _mm256_set1_epi16(cdw[0]); + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + syndromes256[1] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]); + } + + syndromes256[2] = _mm256_set1_epi16(cdw[0]); + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + syndromes256[2] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_3[i]); + } + + last_syndromes256 = _mm256_set1_epi16(cdw[0]); + for (size_t i = 0; i < PARAM_N1 - 1; ++i) { + last_syndromes256 ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_4[i]); + } + + __m128i *s128 = (__m128i *) &last_syndromes256; + _mm_store_si128((__m128i *) (syndromes + 48), *s128); + + uint32_t *s12 = (uint32_t *) (syndromes + 56); + uint32_t *s32 = ((uint32_t *) &last_syndromes256) + 4; + s12[0] = *s32; +} + + + +/** + * @brief Computes the error locator polynomial (ELP) sigma + * + * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes).
+ * We use the letter p for rho which is initialized at -1.
+ * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X).
+ * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p.
+ * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated.
+ * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA. + * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value + * and we only need to save its first PARAM_DELTA - 1 coefficients. + * + * @returns the degree of the ELP sigma + * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP + * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes + */ +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { + uint16_t deg_sigma = 0; + uint16_t deg_sigma_p = 0; + uint16_t deg_sigma_copy = 0; + uint16_t sigma_copy[PARAM_DELTA + 1] = {0}; + uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1}; + uint16_t pp = (uint16_t) -1; // 2*rho + uint16_t d_p = 1; + uint16_t d = syndromes[0]; + + uint16_t mask1, mask2, mask12; + uint16_t deg_X, deg_X_sigma_p; + uint16_t dd; + uint16_t mu; + + uint16_t i; + + sigma[0] = 1; + for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { + // Save sigma in case we need it to update X_sigma_p + memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); + deg_sigma_copy = deg_sigma; + + dd = PQCLEAN_HQCRMRS256_AVX2_gf_mul(d, PQCLEAN_HQCRMRS256_AVX2_gf_inverse(d_p)); + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + sigma[i] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(dd, X_sigma_p[i]); + } + + deg_X = mu - pp; + deg_X_sigma_p = deg_X + deg_sigma_p; + + // mask1 = 0xffff if(d != 0) and 0 otherwise + mask1 = -((uint16_t) - d >> 15); + + // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise + mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15); + + // mask12 = 0xffff if the deg_sigma increased and 0 otherwise + mask12 = mask1 & mask2; + deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma); + + if (mu == (2 * PARAM_DELTA - 1)) { + break; + } + + pp ^= mask12 & (mu ^ pp); + d_p ^= mask12 & (d ^ d_p); + for (i = PARAM_DELTA; i; --i) { + X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); + } + + deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p); + d = syndromes[mu + 1]; + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + d ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]); + } + } + + return deg_sigma; +} + + + +/** + * @brief Computes the error polynomial error from the error locator polynomial sigma + * + * See function PQCLEAN_HQCRMRS256_AVX2_fft for more details. + * + * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial + * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + */ +static void compute_roots(uint8_t *error, uint16_t *sigma) { + uint16_t w[1 << PARAM_M] = {0}; + + PQCLEAN_HQCRMRS256_AVX2_fft(w, sigma, PARAM_DELTA + 1); + PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(error, w); +} + + + +/** + * @brief Computes the polynomial z(x) + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x) + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + * @param[in] degree Integer that is the degree of polynomial sigma + * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes + */ +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) { + size_t i, j; + uint16_t mask; + + z[0] = 1; + + for (i = 1; i < PARAM_DELTA + 1; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] = mask & sigma[i]; + } + + z[1] ^= syndromes[0]; + + for (i = 2; i <= PARAM_DELTA; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] ^= mask & syndromes[i - 1]; + + for (j = 1; j < i; ++j) { + z[i] ^= mask & PQCLEAN_HQCRMRS256_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]); + } + } +} + + + +/** + * @brief Computes the error values + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] error_values Array of PARAM_DELTA elements receiving the error values + * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x) + * @param[in] z_degree Integer that is the degree of polynomial z(x) + * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error + */ +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) { + uint16_t beta_j[PARAM_DELTA] = {0}; + uint16_t e_j[PARAM_DELTA] = {0}; + + uint16_t delta_counter; + uint16_t delta_real_value; + uint16_t found; + uint16_t mask1; + uint16_t mask2; + uint16_t tmp1; + uint16_t tmp2; + uint16_t inverse; + uint16_t inverse_power_j; + + // Compute the beta_{j_i} page 31 of the documentation + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; i++) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + beta_j[j] += mask1 & mask2 & gf_exp[i]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } + delta_real_value = delta_counter; + + // Compute the e_{j_i} page 31 of the documentation + for (size_t i = 0; i < PARAM_DELTA; ++i) { + tmp1 = 1; + tmp2 = 1; + inverse = PQCLEAN_HQCRMRS256_AVX2_gf_inverse(beta_j[i]); + inverse_power_j = 1; + + for (size_t j = 1; j <= PARAM_DELTA; ++j) { + inverse_power_j = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse_power_j, inverse); + tmp1 ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse_power_j, z[j]); + } + for (size_t k = 1; k < PARAM_DELTA; ++k) { + tmp2 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); + } + mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value + e_j[i] = mask1 & PQCLEAN_HQCRMRS256_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS256_AVX2_gf_inverse(tmp2)); + } + + // Place the delta e_{j_i} values at the right coordinates of the output vector + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; ++i) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + error_values[i] += mask1 & mask2 & e_j[j]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } +} + + + +/** + * @brief Correct the errors + * + * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector + * @param[in] error Array of the error vector + * @param[in] error_values Array of PARAM_DELTA elements storing the error values + */ +static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { + for (size_t i = 0; i < PARAM_N1; ++i) { + cdw[i] ^= error_values[i]; + } +} + + + +/** + * @brief Decodes the received word + * + * This function relies on six steps: + *
    + *
  1. The first step, is the computation of the 2*PARAM_DELTA syndromes. + *
  2. The second step is the computation of the error-locator polynomial sigma. + *
  3. The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses. + *
  4. The fourth step, is the polynomial z(x). + *
  5. The fifth step, is the computation of the error values. + *
  6. The sixth step is the correction of the errors in the received polynomial. + *
+ * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error + * + * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) { + uint16_t syndromes[2 * PARAM_DELTA] = {0}; + uint16_t sigma[1 << PARAM_FFT] = {0}; + uint8_t error[1 << PARAM_M] = {0}; + uint16_t z[PARAM_N1] = {0}; + uint16_t error_values[PARAM_N1] = {0}; + uint16_t deg; + + // Calculate the 2*PARAM_DELTA syndromes + compute_syndromes(syndromes, cdw); + + // Compute the error locator polynomial sigma + // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room + deg = compute_elp(sigma, syndromes); + + // Compute the error polynomial error + compute_roots(error, sigma); + + // Compute the polynomial z(x) + compute_z_poly(z, sigma, deg, syndromes); + + // Compute the error values + compute_error_values(error_values, z, error); + + // Correct the errors + correct_errors(cdw, error_values); + + // Retrieve the message from the decoded codeword + memcpy(msg, cdw + (PARAM_G - 1), PARAM_K); + +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/reed_solomon.h b/src/kem/hqc/hqc-rmrs-256/avx2/reed_solomon.h new file mode 100644 index 00000000..bf9994d5 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/reed_solomon.h @@ -0,0 +1,20 @@ +#ifndef REED_SOLOMON_H +#define REED_SOLOMON_H + + +/** + * @file reed_solomon.h + * Header file of reed_solomon.c + */ +#include "parameters.h" +#include +#include + +static const uint16_t alpha_ij_pow [46][77] = {{2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60}, {4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, 157, 78, 37, 148, 106, 181, 238, 159, 70, 5, 20, 80, 93, 105, 185, 222, 95, 97, 153, 94, 101, 137, 30, 120, 253, 211, 107, 177, 254, 223, 91, 113, 217, 67, 17, 68, 13, 52, 208, 103, 129, 62, 248, 199, 59, 236, 151, 102, 133, 46, 184, 218, 79, 33, 132, 42, 168, 154, 82, 85, 73, 57}, {8, 64, 58, 205, 38, 45, 117, 143, 12, 96, 39, 37, 53, 181, 193, 70, 10, 80, 186, 185, 161, 97, 47, 101, 15, 120, 231, 107, 127, 223, 182, 217, 134, 68, 26, 208, 206, 62, 237, 59, 197, 102, 23, 184, 169, 33, 21, 168, 41, 85, 146, 228, 115, 191, 145, 252, 179, 241, 219, 150, 196, 110, 87, 130, 100, 7, 56, 221, 166, 89, 242, 195, 86, 138, 36, 61, 245}, {16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, 95, 153, 101, 30, 253, 107, 254, 91, 217, 17, 13, 208, 129, 248, 59, 151, 133, 184, 79, 132, 168, 82, 73, 228, 230, 198, 252, 123, 227, 150, 149, 165, 130, 200, 28, 221, 81, 121, 195, 172, 18, 61, 247, 203, 44, 250, 27, 173, 2, 32, 58, 135, 152, 117, 3, 48, 39, 74, 212, 193, 140, 40}, {32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174, 100, 28, 167, 89, 239, 172, 36, 244, 235, 44, 233, 108, 1, 32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46}, {64, 205, 45, 143, 96, 37, 181, 70, 80, 185, 97, 101, 120, 107, 223, 217, 68, 208, 62, 59, 102, 184, 33, 168, 85, 228, 191, 252, 241, 150, 110, 130, 7, 221, 89, 195, 138, 61, 251, 44, 207, 173, 8, 58, 38, 117, 12, 39, 53, 193, 10, 186, 161, 47, 15, 231, 127, 182, 134, 26, 206, 237, 197, 23, 169, 21, 41, 146, 115, 145, 179, 219, 196, 87, 100, 56, 166}, {128, 19, 117, 24, 156, 181, 140, 93, 161, 94, 60, 107, 163, 67, 26, 129, 147, 102, 109, 132, 41, 57, 209, 252, 255, 98, 87, 200, 224, 89, 155, 18, 245, 11, 233, 173, 16, 232, 45, 3, 157, 53, 159, 40, 185, 194, 137, 231, 254, 226, 68, 189, 248, 197, 46, 158, 168, 170, 183, 145, 123, 75, 110, 25, 28, 166, 249, 69, 61, 235, 176, 54, 2, 29, 38, 234, 48}, {29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, 133, 79, 168, 73, 230, 252, 227, 149, 130, 28, 81, 195, 18, 247, 44, 27, 2, 58, 152, 3, 39, 212, 140, 186, 190, 202, 231, 225, 175, 26, 31, 118, 23, 158, 77, 146, 209, 229, 219, 55, 25, 56, 162, 155, 36, 243, 88, 54, 4, 116, 45, 6, 78, 181, 5, 105, 97, 137, 211, 223, 67, 52}, {58, 45, 12, 37, 193, 80, 161, 101, 231, 223, 134, 208, 237, 102, 169, 168, 146, 191, 179, 150, 87, 7, 166, 195, 36, 251, 125, 173, 64, 38, 143, 39, 181, 10, 185, 47, 120, 127, 217, 26, 62, 197, 184, 21, 85, 115, 252, 219, 110, 100, 221, 242, 138, 245, 44, 54, 8, 205, 117, 96, 53, 70, 186, 97, 15, 107, 182, 68, 206, 59, 23, 33, 41, 228, 145, 241, 196}, {116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51, 169, 77, 114, 145, 255, 55, 100, 167, 239, 36, 235, 233, 1, 116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32}, {232, 234, 39, 238, 160, 97, 60, 254, 134, 103, 118, 184, 84, 57, 145, 227, 220, 7, 162, 172, 245, 176, 71, 58, 180, 192, 181, 40, 95, 15, 177, 175, 208, 147, 46, 21, 73, 99, 241, 55, 200, 166, 43, 122, 44, 216, 128, 45, 48, 106, 10, 222, 202, 107, 226, 52, 237, 133, 66, 85, 209, 123, 196, 50, 167, 195, 144, 11, 54, 32, 76, 12, 148, 140, 185, 188, 211}, {205, 143, 37, 70, 185, 101, 107, 217, 208, 59, 184, 168, 228, 252, 150, 130, 221, 195, 61, 44, 173, 58, 117, 39, 193, 186, 47, 231, 182, 26, 237, 23, 21, 146, 145, 219, 87, 56, 242, 36, 139, 54, 64, 45, 96, 181, 80, 97, 120, 223, 68, 62, 102, 33, 85, 191, 241, 110, 7, 89, 138, 251, 207, 8, 38, 12, 53, 10, 161, 15, 127, 134, 206, 197, 169, 41, 115}, {135, 6, 53, 20, 190, 120, 163, 13, 237, 46, 84, 228, 229, 98, 100, 81, 69, 251, 131, 32, 45, 192, 238, 186, 94, 187, 217, 189, 236, 169, 82, 209, 241, 220, 28, 242, 72, 22, 173, 116, 201, 37, 140, 222, 15, 254, 34, 62, 204, 132, 146, 63, 75, 130, 167, 43, 245, 250, 4, 38, 24, 212, 80, 194, 253, 182, 52, 147, 184, 77, 183, 179, 149, 141, 89, 9, 203}, {19, 24, 181, 93, 94, 107, 67, 129, 102, 132, 57, 252, 98, 200, 89, 18, 11, 173, 232, 3, 53, 40, 194, 231, 226, 189, 197, 158, 170, 145, 75, 25, 166, 69, 235, 54, 29, 234, 37, 5, 95, 120, 91, 52, 59, 218, 82, 191, 227, 174, 221, 43, 247, 207, 32, 90, 39, 35, 111, 15, 225, 136, 237, 92, 77, 115, 246, 220, 56, 239, 122, 125, 4, 76, 96, 238, 105}, {38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169}, {76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44, 2, 152, 39, 140, 190, 231, 175, 31, 23, 77, 209, 219, 25, 162, 36, 88, 4, 45, 78, 5, 97, 211, 67, 62, 46, 154, 191, 171, 50, 89, 72, 176, 8, 90, 156, 10, 194, 187, 134, 124, 92, 41, 99, 75, 100, 178, 144, 125, 16, 180, 37, 20, 153, 107, 17, 248, 184, 82, 198, 150, 200, 121}, {152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78}, {45, 37, 80, 101, 223, 208, 102, 168, 191, 150, 7, 195, 251, 173, 38, 39, 10, 47, 127, 26, 197, 21, 115, 219, 100, 242, 245, 54, 205, 96, 70, 97, 107, 68, 59, 33, 228, 241, 130, 89, 61, 207, 58, 12, 193, 161, 231, 134, 237, 169, 146, 179, 87, 166, 36, 125, 64, 143, 181, 185, 120, 217, 62, 184, 85, 252, 110, 221, 138, 44, 8, 117, 53, 186, 15, 182, 206}, {90, 148, 186, 30, 226, 62, 109, 73, 179, 174, 162, 61, 131, 232, 96, 140, 153, 127, 52, 51, 168, 99, 98, 56, 172, 22, 8, 234, 212, 185, 240, 67, 237, 79, 114, 241, 25, 121, 245, 108, 19, 39, 20, 188, 223, 189, 133, 41, 63, 55, 221, 9, 176, 64, 3, 238, 161, 211, 34, 59, 66, 183, 219, 200, 239, 251, 71, 152, 37, 160, 137, 182, 129, 92, 85, 229, 165}, {180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5, 94, 223, 103, 46, 85, 215, 174, 89, 244, 108, 38, 156, 160, 15, 226, 124, 169, 114, 255, 100, 239, 235, 1, 180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116}, {117, 181, 161, 107, 26, 102, 41, 252, 87, 89, 245, 173, 45, 53, 185, 231, 68, 197, 168, 145, 110, 166, 61, 54, 38, 37, 186, 120, 134, 59, 21, 191, 196, 221, 36, 207, 205, 39, 80, 15, 217, 237, 33, 115, 150, 56, 138, 125, 58, 96, 10, 101, 182, 62, 169, 228, 219, 7, 86, 44, 64, 12, 70, 47, 223, 206, 184, 146, 241, 100, 195, 139, 8, 143, 193, 97, 127}, {234, 238, 97, 254, 103, 184, 57, 227, 7, 172, 176, 58, 192, 40, 15, 175, 147, 21, 99, 55, 166, 122, 216, 45, 106, 222, 107, 52, 133, 85, 123, 50, 195, 11, 32, 12, 140, 188, 182, 124, 158, 115, 49, 224, 36, 131, 19, 37, 105, 253, 68, 151, 154, 252, 174, 121, 251, 2, 201, 193, 194, 225, 206, 109, 114, 219, 14, 69, 125, 116, 157, 80, 30, 67, 59, 42, 198}, {201, 159, 47, 91, 124, 33, 209, 149, 166, 244, 71, 117, 238, 194, 223, 31, 79, 115, 98, 167, 61, 216, 90, 181, 190, 254, 206, 218, 213, 150, 224, 72, 54, 152, 106, 161, 177, 189, 184, 114, 171, 56, 18, 131, 38, 148, 111, 107, 104, 46, 146, 227, 14, 138, 233, 135, 37, 210, 211, 26, 133, 170, 241, 141, 172, 125, 232, 78, 186, 253, 136, 102, 164, 123, 100, 43, 88}, {143, 70, 101, 217, 59, 168, 252, 130, 195, 44, 58, 39, 186, 231, 26, 23, 146, 219, 56, 36, 54, 45, 181, 97, 223, 62, 33, 191, 110, 89, 251, 8, 12, 10, 15, 134, 197, 41, 179, 100, 86, 125, 205, 37, 185, 107, 208, 184, 228, 150, 221, 61, 173, 117, 193, 47, 182, 237, 21, 145, 87, 242, 139, 64, 96, 80, 120, 68, 102, 85, 241, 7, 138, 207, 38, 53, 161}, {3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174, 239, 44, 116, 156, 185, 214, 103, 169, 230, 55, 89, 235, 32, 96, 160, 253, 26, 46, 114, 150, 167, 244, 1, 3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132}, {6, 20, 120, 13, 46, 228, 98, 81, 251, 32, 192, 186, 187, 189, 169, 209, 220, 242, 22, 116, 37, 222, 254, 62, 132, 63, 130, 43, 250, 38, 212, 194, 182, 147, 77, 179, 141, 9, 54, 180, 159, 101, 67, 151, 85, 227, 112, 61, 142, 3, 10, 60, 136, 23, 114, 49, 166, 243, 16, 96, 93, 211, 208, 218, 230, 110, 121, 11, 58, 156, 111, 127, 31, 66, 145, 65, 155}, {12, 80, 231, 208, 169, 191, 87, 195, 125, 38, 181, 47, 217, 197, 85, 219, 221, 245, 8, 96, 186, 107, 206, 33, 145, 130, 86, 207, 45, 193, 101, 134, 102, 146, 150, 166, 251, 64, 39, 185, 127, 62, 21, 252, 100, 138, 54, 117, 70, 15, 68, 23, 228, 196, 89, 139, 58, 37, 161, 223, 237, 168, 179, 7, 36, 173, 143, 10, 120, 26, 184, 115, 110, 242, 44, 205, 53}, {24, 93, 107, 129, 132, 252, 200, 18, 173, 3, 40, 231, 189, 158, 145, 25, 69, 54, 234, 5, 120, 52, 218, 191, 174, 43, 207, 90, 35, 15, 136, 92, 115, 220, 239, 125, 76, 238, 101, 17, 133, 228, 149, 121, 44, 135, 212, 47, 175, 51, 146, 49, 162, 139, 116, 148, 97, 113, 236, 85, 171, 83, 251, 128, 156, 161, 163, 147, 41, 255, 224, 245, 16, 157, 185, 254, 248}, {48, 105, 127, 248, 77, 241, 224, 247, 64, 156, 95, 182, 236, 170, 150, 162, 11, 205, 212, 94, 134, 133, 213, 110, 239, 250, 45, 35, 30, 26, 218, 99, 130, 69, 108, 143, 40, 211, 206, 132, 229, 7, 144, 2, 96, 210, 254, 237, 154, 255, 221, 243, 128, 37, 190, 113, 197, 73, 49, 89, 22, 135, 181, 188, 17, 23, 183, 220, 195, 233, 90, 70, 60, 52, 169, 198, 25}, {96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38}, {192, 222, 182, 151, 114, 110, 155, 27, 143, 160, 177, 237, 82, 75, 89, 88, 152, 70, 240, 103, 21, 123, 224, 251, 116, 212, 101, 136, 218, 145, 200, 144, 8, 78, 190, 217, 204, 183, 87, 172, 216, 12, 105, 225, 59, 170, 98, 242, 250, 180, 10, 211, 31, 168, 255, 83, 139, 135, 238, 15, 52, 158, 252, 14, 244, 64, 74, 153, 134, 46, 209, 130, 9, 142, 96, 111, 91}, {157, 95, 217, 133, 230, 130, 18, 2, 39, 190, 175, 23, 209, 25, 36, 4, 78, 97, 67, 46, 191, 50, 72, 8, 156, 194, 134, 92, 99, 100, 144, 16, 37, 153, 17, 184, 198, 200, 61, 32, 74, 47, 34, 109, 145, 141, 122, 64, 148, 94, 68, 218, 63, 7, 244, 128, 53, 188, 136, 169, 126, 14, 245, 29, 106, 101, 13, 79, 252, 28, 247, 58, 212, 202, 26, 158, 229}, {39, 97, 134, 184, 145, 7, 245, 58, 181, 15, 208, 21, 241, 166, 44, 45, 10, 107, 237, 85, 196, 195, 54, 12, 185, 182, 102, 115, 130, 36, 8, 37, 47, 68, 169, 252, 56, 251, 205, 193, 120, 206, 168, 219, 89, 125, 117, 80, 127, 59, 146, 110, 86, 173, 96, 161, 217, 23, 191, 100, 61, 64, 53, 101, 26, 33, 179, 221, 139, 38, 70, 231, 62, 41, 150, 242, 207}, {78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153}, {156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15, 103, 77, 150, 239, 108, 96, 190, 17, 169, 215, 167, 44, 180, 160, 223, 51, 230, 100, 244, 116, 193, 253, 124, 85, 55, 172, 1, 156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15, 103, 77}, {37, 101, 208, 168, 150, 195, 173, 39, 47, 26, 21, 219, 242, 54, 96, 97, 68, 33, 241, 89, 207, 12, 161, 134, 169, 179, 166, 125, 143, 185, 217, 184, 252, 221, 44, 117, 186, 182, 23, 145, 56, 139, 45, 80, 223, 102, 191, 7, 251, 38, 10, 127, 197, 115, 100, 245, 205, 70, 107, 59, 228, 130, 61, 58, 193, 231, 237, 146, 87, 36, 64, 181, 120, 62, 85, 110, 138}, {74, 137, 206, 82, 55, 138, 16, 212, 120, 124, 73, 87, 72, 29, 193, 211, 147, 228, 25, 244, 205, 140, 177, 197, 230, 141, 251, 76, 40, 223, 204, 198, 56, 11, 180, 186, 113, 92, 252, 167, 176, 143, 111, 67, 169, 123, 162, 207, 24, 190, 68, 66, 227, 242, 108, 157, 47, 52, 84, 150, 155, 142, 37, 202, 103, 41, 149, 69, 8, 106, 60, 62, 170, 165, 36, 128, 238}, {148, 30, 62, 73, 174, 61, 232, 140, 127, 51, 99, 56, 22, 234, 185, 67, 79, 241, 121, 108, 39, 188, 189, 41, 55, 9, 64, 238, 211, 59, 183, 200, 251, 152, 160, 182, 92, 229, 166, 233, 24, 97, 13, 42, 150, 43, 2, 53, 60, 124, 146, 65, 122, 205, 5, 254, 102, 198, 112, 44, 201, 111, 134, 158, 255, 242, 216, 78, 101, 103, 82, 110, 18, 128, 193, 187, 118}, {53, 120, 237, 228, 100, 251, 45, 186, 217, 169, 241, 242, 173, 37, 15, 62, 146, 130, 245, 38, 80, 182, 184, 179, 89, 54, 39, 101, 206, 85, 87, 61, 205, 10, 223, 23, 252, 166, 207, 96, 47, 208, 41, 110, 36, 58, 70, 127, 102, 145, 221, 125, 12, 97, 26, 168, 196, 138, 64, 193, 107, 197, 191, 56, 44, 143, 161, 68, 21, 150, 86, 8, 181, 231, 59, 115, 7}, {106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100, 235, 180, 185, 17, 132, 150, 172, 32, 193, 214, 51, 145, 167, 233, 96, 94, 103, 85, 174, 244, 38, 160, 226, 169, 255, 239, 1, 106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100, 235, 180}, {212, 211, 197, 198, 167, 207, 157, 202, 62, 114, 200, 139, 201, 95, 26, 154, 220, 61, 19, 160, 217, 158, 171, 86, 32, 159, 127, 133, 229, 89, 216, 74, 120, 147, 230, 56, 176, 24, 47, 103, 170, 130, 243, 90, 185, 34, 42, 196, 18, 116, 10, 91, 109, 241, 239, 2, 181, 187, 151, 145, 83, 131, 39, 137, 124, 228, 141, 11, 143, 190, 52, 41, 165, 122, 38, 93, 175}, {181, 107, 102, 252, 89, 173, 53, 231, 197, 145, 166, 54, 37, 120, 59, 191, 221, 207, 39, 15, 237, 115, 56, 125, 96, 101, 62, 228, 7, 44, 12, 47, 206, 146, 100, 139, 143, 97, 208, 85, 130, 251, 117, 161, 26, 41, 87, 245, 45, 185, 68, 168, 110, 61, 38, 186, 134, 21, 196, 36, 205, 80, 217, 33, 150, 138, 58, 10, 182, 169, 219, 86, 64, 70, 223, 184, 241}, {119, 177, 23, 123, 239, 8, 159, 225, 184, 255, 43, 64, 140, 91, 169, 171, 69, 58, 20, 226, 33, 49, 18, 205, 160, 67, 21, 149, 144, 38, 105, 34, 168, 220, 244, 45, 111, 13, 41, 174, 243, 117, 95, 104, 85, 25, 203, 143, 194, 103, 146, 200, 22, 12, 94, 31, 228, 14, 176, 96, 202, 248, 115, 112, 233, 39, 30, 147, 191, 167, 27, 37, 240, 236, 145, 81, 216}, {238, 254, 184, 227, 172, 58, 40, 175, 21, 55, 122, 45, 222, 52, 85, 50, 11, 12, 188, 124, 115, 224, 131, 37, 253, 151, 252, 121, 2, 193, 225, 109, 219, 69, 116, 80, 67, 42, 110, 244, 90, 161, 104, 170, 100, 22, 24, 101, 248, 230, 221, 27, 74, 231, 51, 229, 242, 4, 159, 223, 218, 171, 138, 232, 160, 134, 84, 220, 245, 180, 95, 208, 73, 200, 44, 48, 202}, {193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85}, {159, 91, 33, 149, 244, 117, 194, 31, 115, 167, 216, 181, 254, 218, 150, 72, 152, 161, 189, 114, 56, 131, 148, 107, 46, 227, 138, 135, 210, 26, 170, 141, 125, 78, 253, 102, 123, 43, 58, 160, 34, 41, 25, 22, 96, 30, 236, 252, 249, 32, 10, 175, 84, 87, 235, 6, 101, 199, 198, 89, 2, 35, 182, 66, 55, 245, 234, 153, 62, 230, 83, 173, 119, 225, 169, 49, 144}}; + +void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/vector.c b/src/kem/hqc/hqc-rmrs-256/avx2/vector.c new file mode 100644 index 00000000..83f0e06b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/vector.c @@ -0,0 +1,178 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +#include +/** + * @file vector.c + * @brief Implementation of vectors sampling and some utilities for the HQC scheme + */ + + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) { + size_t random_bytes_size = 3 * weight; + uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; + uint32_t tmp[PARAM_OMEGA_R] = {0}; + __m256i bit256[PARAM_OMEGA_R]; + __m256i bloc256[PARAM_OMEGA_R]; + __m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0); + __m256i pos256; + __m256i mask256; + __m256i aux; + __m256i i256; + uint64_t bloc, pos, bit64; + uint8_t inc; + size_t i, j, k; + + i = 0; + j = random_bytes_size; + while (i < weight) { + do { + if (j == random_bytes_size) { + seedexpander(ctx, rand_bytes, random_bytes_size); + j = 0; + } + + tmp[i] = ((uint32_t) rand_bytes[j++]) << 16; + tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8; + tmp[i] |= rand_bytes[j++]; + + } while (tmp[i] >= UTILS_REJECTION_THRESHOLD); + + tmp[i] = tmp[i] % PARAM_N; + + inc = 1; + for (k = 0; k < i; k++) { + if (tmp[k] == tmp[i]) { + inc = 0; + } + } + i += inc; + } + + for (i = 0; i < weight; i++) { + // we store the bloc number and bit position of each vb[i] + bloc = tmp[i] >> 6; + bloc256[i] = _mm256_set1_epi64x(bloc >> 2); + pos = (bloc & 0x3UL); + pos256 = _mm256_set1_epi64x(pos); + mask256 = _mm256_cmpeq_epi64(pos256, posCmp256); + bit64 = 1ULL << (tmp[i] & 0x3f); + bit256[i] = _mm256_set1_epi64x(bit64)&mask256; + } + + for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) { + aux = _mm256_loadu_si256(((__m256i *)v) + i); + i256 = _mm256_set1_epi64x(i); + + for (j = 0; j < weight; j++) { + mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); + aux ^= bit256[j] & mask256; + } + _mm256_storeu_si256(((__m256i *)v) + i, aux); + } + +} + + + +/** + * @brief Generates a random vector of dimension PARAM_N + * + * This function generates a random binary vector of dimension PARAM_N. It generates a random + * array of bytes using the seedexpander function, and drop the extra bits using a mask. + * + * @param[in] v Pointer to an array + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS256_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) { + uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0}; + + seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES); + + PQCLEAN_HQCRMRS256_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES); + v[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief Adds two vectors + * + * @param[out] o Pointer to an array that is the result + * @param[in] v1 Pointer to an array that is the first vector + * @param[in] v2 Pointer to an array that is the second vector + * @param[in] size Integer that is the size of the vectors + */ +void PQCLEAN_HQCRMRS256_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + o[i] = v1[i] ^ v2[i]; + } +} + + + +/** + * @brief Compares two vectors + * + * @param[in] v1 Pointer to an array that is first vector + * @param[in] v2 Pointer to an array that is second vector + * @param[in] size Integer that is the size of the vectors + * @returns 0 if the vectors are equals and a negative/psotive value otherwise + */ +uint8_t PQCLEAN_HQCRMRS256_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) { + uint64_t r = 0; + for (size_t i = 0; i < size; i++) { + r |= v1[i] ^ v2[i]; + } + r = (~r + 1) >> 63; + return (uint8_t) r; +} + + + +/** + * @brief Resize a vector so that it contains size_o bits + * + * @param[out] o Pointer to the output vector + * @param[in] size_o Integer that is the size of the output vector in bits + * @param[in] v Pointer to the input vector + * @param[in] size_v Integer that is the size of the input vector in bits + */ +void PQCLEAN_HQCRMRS256_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) { + uint64_t mask = 0x7FFFFFFFFFFFFFFF; + int8_t val = 0; + if (size_o < size_v) { + if (size_o % 64) { + val = 64 - (size_o % 64); + } + + memcpy(o, v, VEC_N1N2_SIZE_BYTES); + + for (int8_t i = 0; i < val; ++i) { + o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); + } + } else { + memcpy(o, v, CEIL_DIVIDE(size_v, 8)); + } +} diff --git a/src/kem/hqc/hqc-rmrs-256/avx2/vector.h b/src/kem/hqc/hqc-rmrs-256/avx2/vector.h new file mode 100644 index 00000000..1508d0b7 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/avx2/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + + +/** + * @file vector.h + * @brief Header file for vector.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS256_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v); + +void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_from_randombytes(uint64_t *v); + + +void PQCLEAN_HQCRMRS256_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size); + +uint8_t PQCLEAN_HQCRMRS256_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size); + +void PQCLEAN_HQCRMRS256_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/CMakeLists.txt b/src/kem/hqc/hqc-rmrs-256/clean/CMakeLists.txt new file mode 100644 index 00000000..6c1a2e69 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/CMakeLists.txt @@ -0,0 +1,16 @@ +set( + SRC_CLEAN_HQCRMRS256 + code.c + fft.c + gf2x.c + gf.c + hqc.c + kem.c + parsing.c + reed_muller.c + reed_solomon.c + vector.c +) + +define_kem_alg(hqcrmrs256_clean + PQCLEAN_HQCRMRS256_CLEAN "${SRC_CLEAN_HQCRMRS256}" "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/src/kem/hqc/hqc-rmrs-256/clean/api.h b/src/kem/hqc/hqc-rmrs-256/clean/api.h new file mode 100644 index 00000000..d8bd7d2e --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/api.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_HQCRMRS256_CLEAN_API_H +#define PQCLEAN_HQCRMRS256_CLEAN_API_H +/** + * @file api.h + * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme + */ + +#define PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_ALGNAME "HQC-RMRS-256" + +#define PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_SECRETKEYBYTES 7285 +#define PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_PUBLICKEYBYTES 7245 +#define PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_BYTES 64 +#define PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_CIPHERTEXTBYTES 14469 + +// As a technicality, the public key is appended to the secret key in order to respect the NIST API. +// Without this constraint, PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32 + +int PQCLEAN_HQCRMRS256_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk); + +int PQCLEAN_HQCRMRS256_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk); + +int PQCLEAN_HQCRMRS256_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/code.c b/src/kem/hqc/hqc-rmrs-256/clean/code.c new file mode 100644 index 00000000..1178b56e --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/code.c @@ -0,0 +1,46 @@ +#include "code.h" +#include "parameters.h" +#include "reed_muller.h" +#include "reed_solomon.h" +#include +#include +/** + * @file code.c + * @brief Implementation of concatenated code + */ + + + +/** + * + * @brief Encoding the message m to a code word em using the concatenated code + * + * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain + * a concatenated code word. + * + * @param[out] em Pointer to an array that is the tensor code word + * @param[in] m Pointer to an array that is the message + */ +void PQCLEAN_HQCRMRS256_CLEAN_code_encode(uint8_t *em, const uint8_t *m) { + uint8_t tmp[VEC_N1_SIZE_BYTES] = {0}; + + PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_encode(tmp, m); + PQCLEAN_HQCRMRS256_CLEAN_reed_muller_encode(em, tmp); + +} + + + +/** + * @brief Decoding the code word em to a message m using the concatenated code + * + * @param[out] m Pointer to an array that is the message + * @param[in] em Pointer to an array that is the code word + */ +void PQCLEAN_HQCRMRS256_CLEAN_code_decode(uint8_t *m, const uint8_t *em) { + uint8_t tmp[VEC_N1_SIZE_BYTES] = {0}; + + PQCLEAN_HQCRMRS256_CLEAN_reed_muller_decode(tmp, em); + PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_decode(m, tmp); + +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/code.h b/src/kem/hqc/hqc-rmrs-256/clean/code.h new file mode 100644 index 00000000..6de64ab9 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/code.h @@ -0,0 +1,18 @@ +#ifndef CODE_H +#define CODE_H + + +/** + * @file code.h + * Header file of code.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS256_CLEAN_code_encode(uint8_t *em, const uint8_t *message); + +void PQCLEAN_HQCRMRS256_CLEAN_code_decode(uint8_t *m, const uint8_t *em); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/fft.c b/src/kem/hqc/hqc-rmrs-256/clean/fft.c new file mode 100644 index 00000000..6a680972 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/fft.c @@ -0,0 +1,351 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include +#include +/** + * @file fft.c + * Implementation of the additive FFT and its transpose. + * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf + */ + + +static void compute_fft_betas(uint16_t *betas); +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size); +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); + + +/** + * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose + * + * @param[out] betas Array of size PARAM_M-1 + */ +static void compute_fft_betas(uint16_t *betas) { + size_t i; + for (i = 0; i < PARAM_M - 1; ++i) { + betas[i] = 1 << (PARAM_M - 1 - i); + } +} + + + +/** + * @brief Computes the subset sums of the given set + * + * The array subset_sums is such that its ith element is + * the subset sum of the set elements given by the binary form of i. + * + * @param[out] subset_sums Array of size 2^set_size receiving the subset sums + * @param[in] set Array of set_size elements + * @param[in] set_size Size of the array set + */ +static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) { + uint16_t i, j; + subset_sums[0] = 0; + + for (i = 0; i < set_size; ++i) { + for (j = 0; j < (1 << i); ++j) { + subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; + } + } +} + + + +/** + * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x] + * + * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x) + * as proposed by Bernstein, Chou and Schwabe: + * https://binary.cr.yp.to/mcbits-20130616.pdf + * + * @param[out] f0 Array half the size of f + * @param[out] f1 Array half the size of f + * @param[in] f Array of size a power of 2 + * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f + */ +static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + switch (m_f) { + case 4: + f0[4] = f[8] ^ f[12]; + f0[6] = f[12] ^ f[14]; + f0[7] = f[14] ^ f[15]; + f1[5] = f[11] ^ f[13]; + f1[6] = f[13] ^ f[14]; + f1[7] = f[15]; + f0[5] = f[10] ^ f[12] ^ f1[5]; + f1[4] = f[9] ^ f[13] ^ f0[5]; + + f0[0] = f[0]; + f1[3] = f[7] ^ f[11] ^ f[15]; + f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3]; + f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3]; + f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3]; + f1[2] = f[3] ^ f1[1] ^ f0[3]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 3: + f0[0] = f[0]; + f0[2] = f[4] ^ f[6]; + f0[3] = f[6] ^ f[7]; + f1[1] = f[3] ^ f[5] ^ f[7]; + f1[2] = f[5] ^ f[6]; + f1[3] = f[7]; + f0[1] = f[2] ^ f0[2] ^ f1[1]; + f1[0] = f[1] ^ f0[1]; + break; + + case 2: + f0[0] = f[0]; + f0[1] = f[2] ^ f[3]; + f1[0] = f[1] ^ f0[1]; + f1[1] = f[3]; + break; + + case 1: + f0[0] = f[0]; + f1[0] = f[1]; + break; + + default: + radix_big(f0, f1, f, m_f); + break; + } +} + +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1; + n <<= (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0; i < n; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + + + +/** + * @brief Evaluates f at all subset sums of a given set + * + * This function is a subroutine of the function PQCLEAN_HQCRMRS256_CLEAN_fft. + * + * @param[out] w Array + * @param[in] f Array + * @param[in] f_coeffs Number of coefficients of f + * @param[in] m Number of betas + * @param[in] m_f Number of coefficients of f (one more than its degree) + * @param[in] betas FFT constants + */ +static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; + uint16_t u[1 << (PARAM_M - 2)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; + size_t x; + + // Step 1 + if (m_f == 1) { + for (i = 0; i < m; ++i) { + tmp[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas[i], f[1]); + } + + w[0] = f[0]; + x = 1; + for (j = 0; j < m; ++j) { + for (k = 0; k < x; ++k) { + w[x + k] = w[k] ^ tmp[j]; + } + x <<= 1; + } + + return; + } + + // Step 2: compute g + if (betas[m - 1] != 1) { + beta_m_pow = 1; + x = 1; + x <<= m_f; + for (i = 1; i < x; ++i) { + beta_m_pow = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); + f[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(beta_m_pow, f[i]); + } + } + + // Step 3 + radix(f0, f1, f, m_f); + + // Step 4: compute gammas and deltas + for (i = 0; i + 1 < m; ++i) { + gammas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(betas[m - 1])); + deltas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_square(gammas[i]) ^ gammas[i]; + } + + // Compute gammas sums + compute_subset_sums(gammas_sums, gammas, m - 1); + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + + k = 1; + k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. + if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant + w[0] = u[0]; + w[k] = u[0] ^ f1[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gammas_sums[i], f1[0]); + w[k + i] = w[i] ^ f1[0]; + } + } else { + fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas); + + // Step 6 + memcpy(w + k, v, 2 * k); + w[0] = u[0]; + w[k] ^= u[0]; + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gammas_sums[i], v[i]); + w[k + i] ^= w[i]; + } + } +} + + + +/** + * @brief Evaluates f on all fields elements using an additive FFT algorithm + * + * f_coeffs is the number of coefficients of f (one less than its degree).
+ * The FFT proceeds recursively to evaluate f at all subset sums of a basis B.
+ * This implementation is based on the paper from Gao and Mateer:
+ * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields, + * IEEE Transactions on Information Theory 56 (2010), 6265--6272. + * http://www.math.clemson.edu/~sgao/papers/GM10.pdf
+ * and includes improvements proposed by Bernstein, Chou and Schwabe here: + * https://binary.cr.yp.to/mcbits-20130616.pdf
+ * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas, + * meaning the first gammas subset sums are actually the subset sums of betas (except 1).
+ * Also note that f is altered during computation (twisted at each level). + * + * @param[out] w Array + * @param[in] f Array of 2^PARAM_FFT elements + * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) + */ +void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; + + // Follows Gao and Mateer algorithm + compute_fft_betas(betas); + + // Step 1: PARAM_FFT > 1, nothing to do + + // Compute gammas sums + compute_subset_sums(betas_sums, betas, PARAM_M - 1); + + // Step 2: beta_m = 1, nothing to do + + // Step 3 + radix(f0, f1, f, PARAM_FFT); + + // Step 4: Compute deltas + for (i = 0; i < PARAM_M - 1; ++i) { + deltas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_square(betas[i]) ^ betas[i]; + } + + // Step 5 + fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + + k = 1 << (PARAM_M - 1); + // Step 6, 7 and error polynomial computation + memcpy(w + k, v, 2 * k); + + // Check if 0 is root + w[0] = u[0]; + + // Check if 1 is root + w[k] ^= u[0]; + + // Find other roots + for (i = 1; i < k; ++i) { + w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas_sums[i], v[i]); + w[k + i] ^= w[i]; + } +} + + + +/** + * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements. + * + * @param[out] error Array with the error + * @param[out] error_compact Array with the error in a compact form + * @param[in] w Array of size 2^PARAM_M + */ +void PQCLEAN_HQCRMRS256_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t k; + size_t i, index; + + compute_fft_betas(gammas); + compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + + k = 1 << (PARAM_M - 1); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); + + for (i = 1; i < k; ++i) { + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]]; + error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); + + index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1]; + error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15); + } +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/fft.h b/src/kem/hqc/hqc-rmrs-256/clean/fft.h new file mode 100644 index 00000000..e53d9daa --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/fft.h @@ -0,0 +1,18 @@ +#ifndef FFT_H +#define FFT_H + + +/** + * @file fft.h + * Header file of fft.c + */ + +#include +#include + +void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs); + +void PQCLEAN_HQCRMRS256_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/gf.c b/src/kem/hqc/hqc-rmrs-256/clean/gf.c new file mode 100644 index 00000000..1f10ccdf --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/gf.c @@ -0,0 +1,63 @@ +#include "gf.h" +#include "parameters.h" +#include +/** + * @file gf.c + * Galois field implementation with multiplication using lookup tables + */ + + +/** + * @brief Multiplies nonzero element a by element b + * @returns the product a*b + * @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero) + * @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero) + */ +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_mul(uint16_t a, uint16_t b) { + uint16_t mask; + mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0 + return mask & gf_exp[PQCLEAN_HQCRMRS256_CLEAN_gf_mod(gf_log[a] + gf_log[b])]; +} + + + +/** + * @brief Squares an element of GF(2^PARAM_M) + * @returns a^2 + * @param[in] a Element of GF(2^PARAM_M) + */ +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_square(uint16_t a) { + int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + return mask & gf_exp[PQCLEAN_HQCRMRS256_CLEAN_gf_mod(2 * gf_log[a])]; +} + + + +/** + * @brief Computes the inverse of an element of GF(2^PARAM_M) + * @returns the inverse of a + * @param[in] a Element of GF(2^PARAM_M) + */ +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(uint16_t a) { + int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0 + return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]]; +} + + + +/** + * @brief Returns i modulo 2^PARAM_M-1 + * i must be less than 2*(2^PARAM_M-1). + * Therefore, the return value is either i or i-2^PARAM_M+1. + * @returns i mod (2^PARAM_M-1) + * @param[in] i The integer whose modulo is taken + */ +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_mod(uint16_t i) { + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); + + // mask = 0xffff if(i < PARAM_GF_MUL_ORDER) + uint16_t mask = -(tmp >> 15); + + return tmp + (mask & PARAM_GF_MUL_ORDER); +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/gf.h b/src/kem/hqc/hqc-rmrs-256/clean/gf.h new file mode 100644 index 00000000..0d94dd8d --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/gf.h @@ -0,0 +1,39 @@ +#ifndef GF_H +#define GF_H + + +/** + * @file gf.h + * Header file of gf.c + */ + +#include +#include + + +/** + * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8. + * The last two elements are needed by the PQCLEAN_HQCRMRS256_CLEAN_gf_mul function + * (for example if both elements to multiply are zero). + */ +static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 }; + + + +/** + * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8). + * The logarithm of 0 is set to 0 by convention. + */ +static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 }; + + +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_mul(uint16_t a, uint16_t b); + +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_square(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(uint16_t a); + +uint16_t PQCLEAN_HQCRMRS256_CLEAN_gf_mod(uint16_t i); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/gf2x.c b/src/kem/hqc/hqc-rmrs-256/clean/gf2x.c new file mode 100644 index 00000000..bcd1f985 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/gf2x.c @@ -0,0 +1,154 @@ +#include "gf2x.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include +/** + * \file gf2x.c + * \brief Implementation of multiplication of two polynomials + */ + + +static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2); +static void reduce(uint64_t *o, const uint64_t *a); +static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx); + +/** + * @brief swap two elements in a table + * + * This function exchanges tab[elt1] with tab[elt2] + * + * @param[in] tab Pointer to the table + * @param[in] elt1 Index of the first element + * @param[in] elt2 Index of the second element + */ +static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) { + uint16_t tmp = tab[elt1]; + + tab[elt1] = tab[elt2]; + tab[elt2] = tmp; +} + + + +/** + * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$ + * + * This function computes the modular reduction of the polynomial a(x) + * + * @param[in] a Pointer to the polynomial a(x) + * @param[out] o Pointer to the result + */ +static void reduce(uint64_t *o, const uint64_t *a) { + size_t i; + uint64_t r; + uint64_t carry; + + for (i = 0; i < VEC_N_SIZE_64; i++) { + r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); + carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); + o[i] = a[i] ^ r ^ carry; + } + + o[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief computes product of the polynomial a1(x) with the sparse polynomial a2 + * + * o(x) = a1(x)a2(x) + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2) + * @param[in] a2 Pointer to the polynomial a1(x) + * @param[in] weight Hamming wifht of the sparse polynomial a2 + * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process + */ +static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { +//static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx) + uint64_t carry; + uint32_t dec, s; + uint64_t table[16 * (VEC_N_SIZE_64 + 1)]; + uint16_t permuted_table[16]; + uint16_t permutation_table[16]; + uint16_t permuted_sparse_vect[PARAM_OMEGA_E]; + uint16_t permutation_sparse_vect[PARAM_OMEGA_E]; + uint64_t tmp; + uint64_t *pt; + uint8_t *res; + size_t i, j; + + for (i = 0; i < 16; i++) { + permuted_table[i] = (uint16_t) i; + } + + seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); + + for (i = 0; i < 15; i++) { + swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); + } + + pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); + for (j = 0; j < VEC_N_SIZE_64; j++) { + pt[j] = a2[j]; + } + pt[VEC_N_SIZE_64] = 0x0; + + for (i = 1; i < 16; i++) { + carry = 0; + pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); + for (j = 0; j < VEC_N_SIZE_64; j++) { + pt[j] = (a2[j] << i) ^ carry; + carry = (a2[j] >> ((64 - i))); + } + pt[VEC_N_SIZE_64] = carry; + } + + for (i = 0; i < weight; i++) { + permuted_sparse_vect[i] = (uint16_t) i; + } + + seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); + + for (i = 0; i + 1 < weight; i++) { + swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i))); + } + + for (i = 0; i < weight; i++) { + dec = a1[permuted_sparse_vect[i]] & 0xf; + s = a1[permuted_sparse_vect[i]] >> 4; + res = o + 2 * s; + pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); + + for (j = 0; j < VEC_N_SIZE_64 + 1; j++) { + tmp = PQCLEAN_HQCRMRS256_CLEAN_load8(res); + PQCLEAN_HQCRMRS256_CLEAN_store8(res, tmp ^ pt[j]); + res += 8; + } + } +} + + + +/** + * @brief Multiply two polynomials modulo \f$ X^n - 1\f$. + * + * This functions multiplies a sparse polynomial a1 (of Hamming weight equal to weight) + * and a dense polynomial a2. The multiplication is done modulo \f$ X^n - 1\f$. + * + * @param[out] o Pointer to the result + * @param[in] a1 Pointer to the sparse polynomial + * @param[in] a2 Pointer to the dense polynomial + * @param[in] weight Integer that is the weigt of the sparse polynomial + * @param[in] ctx Pointer to the randomness context + */ +void PQCLEAN_HQCRMRS256_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { + uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0}; + + fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx); + PQCLEAN_HQCRMRS256_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp)); + reduce(o, tmp); +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/gf2x.h b/src/kem/hqc/hqc-rmrs-256/clean/gf2x.h new file mode 100644 index 00000000..0aa8cf5f --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/gf2x.h @@ -0,0 +1,16 @@ +#ifndef GF2X_H +#define GF2X_H + + +/** + * @file gf2x.h + * @brief Header file for gf2x.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS256_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/hqc.c b/src/kem/hqc/hqc-rmrs-256/clean/hqc.c new file mode 100644 index 00000000..fdf908c2 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/hqc.c @@ -0,0 +1,144 @@ +#include "code.h" +#include "gf2x.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +/** + * @file hqc.c + * @brief Implementation of hqc.h + */ + + + +/** + * @brief Keygen of the HQC_PKE IND_CPA scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) { + AES_XOF_struct sk_seedexpander; + AES_XOF_struct pk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + uint8_t pk_seed[SEED_BYTES] = {0}; + uint64_t x[VEC_N_SIZE_64] = {0}; + uint32_t y[PARAM_OMEGA] = {0}; + uint64_t h[VEC_N_SIZE_64] = {0}; + uint64_t s[VEC_N_SIZE_64] = {0}; + + // Create seed_expanders for public key and secret key + randombytes(sk_seed, SEED_BYTES); + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + randombytes(pk_seed, SEED_BYTES); + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute secret key + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA); + + // Compute public key + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random(&pk_seedexpander, h); + PQCLEAN_HQCRMRS256_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander); + PQCLEAN_HQCRMRS256_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64); + + // Parse keys to string + PQCLEAN_HQCRMRS256_CLEAN_hqc_public_key_to_string(pk, pk_seed, s); + PQCLEAN_HQCRMRS256_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk); + +} + + + +/** + * @brief Encryption of the HQC_PKE IND_CPA scheme + * + * The cihertext is composed of vectors u and v. + * + * @param[out] u Vector u (first part of the ciphertext) + * @param[out] v Vector v (second part of the ciphertext) + * @param[in] m Vector representing the message to encrypt + * @param[in] theta Seed used to derive randomness required for encryption + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) { + AES_XOF_struct seedexpander; + uint64_t h[VEC_N_SIZE_64] = {0}; + uint64_t s[VEC_N_SIZE_64] = {0}; + uint64_t r1[VEC_N_SIZE_64] = {0}; + uint32_t r2[PARAM_OMEGA_R] = {0}; + uint64_t e[VEC_N_SIZE_64] = {0}; + uint64_t tmp1[VEC_N_SIZE_64] = {0}; + uint64_t tmp2[VEC_N_SIZE_64] = {0}; + + // Create seed_expander from theta + seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH); + + // Retrieve h and s from public key + PQCLEAN_HQCRMRS256_CLEAN_hqc_public_key_from_string(h, s, pk); + + // Generate r1, r2 and e + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R); + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E); + + // Compute u = r1 + r2.h + PQCLEAN_HQCRMRS256_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander); + PQCLEAN_HQCRMRS256_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64); + + // Compute v = m.G by encoding the message + PQCLEAN_HQCRMRS256_CLEAN_code_encode((uint8_t *)v, m); + PQCLEAN_HQCRMRS256_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES); + PQCLEAN_HQCRMRS256_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + + // Compute v = m.G + s.r2 + e + PQCLEAN_HQCRMRS256_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander); + PQCLEAN_HQCRMRS256_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS256_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS256_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N); + +} + + + +/** + * @brief Decryption of the HQC_PKE IND_CPA scheme + * + * @param[out] m Vector representing the decrypted message + * @param[in] u Vector u (first part of the ciphertext) + * @param[in] v Vector v (second part of the ciphertext) + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) { + uint8_t pk[PUBLIC_KEY_BYTES] = {0}; + uint64_t tmp1[VEC_N_SIZE_64] = {0}; + uint64_t tmp2[VEC_N_SIZE_64] = {0}; + uint32_t y[PARAM_OMEGA] = {0}; + AES_XOF_struct perm_seedexpander; + uint8_t perm_seed[SEED_BYTES] = {0}; + + // Retrieve x, y, pk from secret key + PQCLEAN_HQCRMRS256_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk); + + randombytes(perm_seed, SEED_BYTES); + seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH); + + // Compute v - u.y + PQCLEAN_HQCRMRS256_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2); + PQCLEAN_HQCRMRS256_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander); + PQCLEAN_HQCRMRS256_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64); + + + // Compute m by decoding v - u.y + PQCLEAN_HQCRMRS256_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS256_CLEAN_code_decode(m, (uint8_t *)tmp1); +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/hqc.h b/src/kem/hqc/hqc-rmrs-256/clean/hqc.h new file mode 100644 index 00000000..ade5dfc9 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/hqc.h @@ -0,0 +1,19 @@ +#ifndef HQC_H +#define HQC_H + + +/** + * @file hqc.h + * @brief Functions of the HQC_PKE IND_CPA scheme + */ + +#include + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk); + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk); + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/kem.c b/src/kem/hqc/hqc-rmrs-256/clean/kem.c new file mode 100644 index 00000000..3320be82 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/kem.c @@ -0,0 +1,140 @@ +#include "api.h" +#include "fips202.h" +#include "hqc.h" +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "sha2.h" +#include "vector.h" +#include +#include +/** + * @file kem.c + * @brief Implementation of api.h + */ + + + +/** + * @brief Keygen of the HQC_KEM IND_CAA2 scheme + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h. + * + * The secret key is composed of the seed used to generate vectors x and y. + * As a technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] pk String containing the public key + * @param[out] sk String containing the secret key + * @returns 0 if keygen is successful + */ +int PQCLEAN_HQCRMRS256_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { + + PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_keygen(pk, sk); + return 0; +} + + + +/** + * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ct String containing the ciphertext + * @param[out] ss String containing the shared secret + * @param[in] pk String containing the public key + * @returns 0 if encapsulation is successful + */ +int PQCLEAN_HQCRMRS256_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { + + uint8_t theta[SHA512_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint64_t u[VEC_N_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Computing m + randombytes(m, VEC_K_SIZE_BYTES); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m + PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk); + + // Computing d + sha512(d, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS256_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS256_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Computing ciphertext + PQCLEAN_HQCRMRS256_CLEAN_hqc_ciphertext_to_string(ct, u, v, d); + + + return 0; +} + + + +/** + * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme + * + * @param[out] ss String containing the shared secret + * @param[in] ct String containing the cipĥertext + * @param[in] sk String containing the secret key + * @returns 0 if decapsulation is successful, -1 otherwise + */ +int PQCLEAN_HQCRMRS256_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { + + uint8_t result; + uint64_t u[VEC_N_SIZE_64] = {0}; + uint64_t v[VEC_N1N2_SIZE_64] = {0}; + unsigned char d[SHA512_BYTES] = {0}; + unsigned char pk[PUBLIC_KEY_BYTES] = {0}; + uint8_t m[VEC_K_SIZE_BYTES] = {0}; + uint8_t theta[SHA512_BYTES] = {0}; + uint64_t u2[VEC_N_SIZE_64] = {0}; + uint64_t v2[VEC_N1N2_SIZE_64] = {0}; + unsigned char d2[SHA512_BYTES] = {0}; + unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0}; + + // Retrieving u, v and d from ciphertext + PQCLEAN_HQCRMRS256_CLEAN_hqc_ciphertext_from_string(u, v, d, ct); + + // Retrieving pk from sk + memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES); + + // Decryting + PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_decrypt(m, u, v, sk); + + // Computing theta + sha3_512(theta, m, VEC_K_SIZE_BYTES); + + // Encrypting m' + PQCLEAN_HQCRMRS256_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk); + + // Computing d' + sha512(d2, m, VEC_K_SIZE_BYTES); + + // Computing shared secret + memcpy(mc, m, VEC_K_SIZE_BYTES); + PQCLEAN_HQCRMRS256_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + PQCLEAN_HQCRMRS256_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES); + + // Abort if c != c' or d != d' + result = PQCLEAN_HQCRMRS256_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS256_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES); + result |= PQCLEAN_HQCRMRS256_CLEAN_vect_compare(d, d2, SHA512_BYTES); + result = (uint8_t) (-((int16_t) result) >> 15); + for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { + ss[i] &= ~result; + } + + + return -(result & 1); +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/parameters.h b/src/kem/hqc/hqc-rmrs-256/clean/parameters.h new file mode 100644 index 00000000..69d0f17c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/parameters.h @@ -0,0 +1,98 @@ +#ifndef HQC_PARAMETERS_H +#define HQC_PARAMETERS_H + + +/** + * @file parameters.h + * @brief Parameters of the HQC_KEM IND-CCA2 scheme + */ +#include "api.h" + + +#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/ + +/* + #define PARAM_N Define the parameter n of the scheme + #define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code) + #define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code) + #define PARAM_N1N2 Define the length in bits of the Concatenated code + #define PARAM_OMEGA Define the parameter omega of the scheme + #define PARAM_OMEGA_E Define the parameter omega_e of the scheme + #define PARAM_OMEGA_R Define the parameter omega_r of the scheme + #define PARAM_SECURITY Define the security level corresponding to the chosen parameters + #define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters + + #define SECRET_KEY_BYTES Define the size of the secret key in bytes + #define PUBLIC_KEY_BYTES Define the size of the public key in bytes + #define SHARED_SECRET_BYTES Define the size of the shared secret in bytes + #define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes + + #define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function) + #define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes + #define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes + #define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes + #define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes + + #define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits + #define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits + #define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits + #define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits + + #define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code) + #define PARAM_M Define a positive integer + #define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form + #define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1 + #define PARAM_K Define the size of the information bits of the Reed-Solomon code + #define PARAM_G Define the size of the generator polynomial of Reed-Solomon code + #define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input + We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24 + The smallest power of 2 greater than 24+1 is 32=2^5 + #define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code + + #define RED_MASK A mask fot the higher bits of a vector + #define SHA512_BYTES Define the size of SHA512 output in bytes + #define SEED_BYTES Define the size of the seed in bytes + #define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length +*/ + +#define PARAM_N 57637 +#define PARAM_N1 90 +#define PARAM_N2 640 +#define PARAM_N1N2 57600 +#define PARAM_OMEGA 131 +#define PARAM_OMEGA_E 149 +#define PARAM_OMEGA_R 149 +#define PARAM_SECURITY 256 +#define PARAM_DFR_EXP 256 + +#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_SECRETKEYBYTES +#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_PUBLICKEYBYTES +#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_BYTES +#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS256_CLEAN_CRYPTO_CIPHERTEXTBYTES + +#define UTILS_REJECTION_THRESHOLD 16772367 +#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8) +#define VEC_K_SIZE_BYTES PARAM_K +#define VEC_N1_SIZE_BYTES PARAM_N1 +#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8) + +#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64) +#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8) +#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8) +#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64) + +#define PARAM_DELTA 29 +#define PARAM_M 8 +#define PARAM_GF_POLY 0x11D +#define PARAM_GF_MUL_ORDER 255 +#define PARAM_K 32 +#define PARAM_G 59 +#define PARAM_FFT 5 +#define RS_POLY_COEFS 49,167,49,39,200,121,124,91,240,63,148,71,150,123,87,101,32,215,159,71,201,115,97,210,186,183,141,217,123,12,31,243,180,219,152,239,99,141,4,246,191,144,8,232,47,27,141,178,130,64,124,47,39,188,216,48,199,187,1 + +#define RED_MASK 0x1fffffffff +#define SHA512_BYTES 64 +#define SEED_BYTES 40 +#define SEEDEXPANDER_MAX_LENGTH 4294967295 + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/parsing.c b/src/kem/hqc/hqc-rmrs-256/clean/parsing.c new file mode 100644 index 00000000..0178147a --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/parsing.c @@ -0,0 +1,186 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file parsing.c + * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme + */ + + +void PQCLEAN_HQCRMRS256_CLEAN_store8(unsigned char *out, uint64_t in) { + out[0] = (in >> 0x00) & 0xFF; + out[1] = (in >> 0x08) & 0xFF; + out[2] = (in >> 0x10) & 0xFF; + out[3] = (in >> 0x18) & 0xFF; + out[4] = (in >> 0x20) & 0xFF; + out[5] = (in >> 0x28) & 0xFF; + out[6] = (in >> 0x30) & 0xFF; + out[7] = (in >> 0x38) & 0xFF; +} + + +uint64_t PQCLEAN_HQCRMRS256_CLEAN_load8(const unsigned char *in) { + uint64_t ret = in[7]; + + for (int8_t i = 6; i >= 0; i--) { + ret <<= 8; + ret |= in[i]; + } + + return ret; +} + +void PQCLEAN_HQCRMRS256_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) { + size_t index_in = 0; + size_t index_out = 0; + + // first copy by 8 bytes + if (inlen >= 8 && outlen >= 1) { + while (index_out < outlen && index_in + 8 <= inlen) { + out64[index_out] = PQCLEAN_HQCRMRS256_CLEAN_load8(in8 + index_in); + + index_in += 8; + index_out += 1; + } + } + + // we now need to do the last 7 bytes if necessary + if (index_in >= inlen || index_out >= outlen) { + return; + } + out64[index_out] = in8[inlen - 1]; + for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) { + out64[index_out] <<= 8; + out64[index_out] |= in8[index_in + i]; + } +} + +void PQCLEAN_HQCRMRS256_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) { + for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) { + out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF; + index_out++; + if (index_out % 8 == 0) { + index_in++; + } + } +} + + +/** + * @brief Parse a secret key into a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] sk String containing the secret key + * @param[in] sk_seed Seed used to generate the secret key + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) { + memcpy(sk, sk_seed, SEED_BYTES); + sk += SEED_BYTES; + memcpy(sk, pk, PUBLIC_KEY_BYTES); +} + +/** + * @brief Parse a secret key from a string + * + * The secret key is composed of the seed used to generate vectors x and y. + * As technicality, the public key is appended to the secret key in order to respect NIST API. + * + * @param[out] x uint64_t representation of vector x + * @param[out] y uint32_t representation of vector y + * @param[out] pk String containing the public key + * @param[in] sk String containing the secret key + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) { + AES_XOF_struct sk_seedexpander; + uint8_t sk_seed[SEED_BYTES] = {0}; + + memcpy(sk_seed, sk, SEED_BYTES); + sk += SEED_BYTES; + memcpy(pk, sk, PUBLIC_KEY_BYTES); + + seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA); + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA); +} + +/** + * @brief Parse a public key into a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] pk String containing the public key + * @param[in] pk_seed Seed used to generate the public key + * @param[in] s uint8_t representation of vector s + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) { + memcpy(pk, pk_seed, SEED_BYTES); + PQCLEAN_HQCRMRS256_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64); +} + + + +/** + * @brief Parse a public key from a string + * + * The public key is composed of the syndrome s as well as the seed used to generate the vector h + * + * @param[out] h uint8_t representation of vector h + * @param[out] s uint8_t representation of vector s + * @param[in] pk String containing the public key + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) { + AES_XOF_struct pk_seedexpander; + uint8_t pk_seed[SEED_BYTES] = {0}; + + memcpy(pk_seed, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_HQCRMRS256_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES); + + seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH); + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random(&pk_seedexpander, h); +} + + +/** + * @brief Parse a ciphertext into a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] ct String containing the ciphertext + * @param[in] u uint8_t representation of vector u + * @param[in] v uint8_t representation of vector v + * @param[in] d String containing the hash d + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) { + PQCLEAN_HQCRMRS256_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS256_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(ct, d, SHA512_BYTES); +} + + +/** + * @brief Parse a ciphertext from a string + * + * The ciphertext is composed of vectors u, v and hash d. + * + * @param[out] u uint8_t representation of vector u + * @param[out] v uint8_t representation of vector v + * @param[out] d String containing the hash d + * @param[in] ct String containing the ciphertext + */ +void PQCLEAN_HQCRMRS256_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) { + PQCLEAN_HQCRMRS256_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES); + ct += VEC_N_SIZE_BYTES; + PQCLEAN_HQCRMRS256_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES); + ct += VEC_N1N2_SIZE_BYTES; + memcpy(d, ct, SHA512_BYTES); +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/parsing.h b/src/kem/hqc/hqc-rmrs-256/clean/parsing.h new file mode 100644 index 00000000..26eb332b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/parsing.h @@ -0,0 +1,36 @@ +#ifndef PARSING_H +#define PARSING_H + + +/** + * @file parsing.h + * @brief Header file for parsing.c + */ + +#include + +void PQCLEAN_HQCRMRS256_CLEAN_store8(unsigned char *out, uint64_t in); + +uint64_t PQCLEAN_HQCRMRS256_CLEAN_load8(const unsigned char *in); + +void PQCLEAN_HQCRMRS256_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen); + +void PQCLEAN_HQCRMRS256_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen); + + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk); + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk); + + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s); + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk); + + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d); + +void PQCLEAN_HQCRMRS256_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/reed_muller.c b/src/kem/hqc/hqc-rmrs-256/clean/reed_muller.c new file mode 100644 index 00000000..cf24481e --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/reed_muller.c @@ -0,0 +1,237 @@ +#include "parameters.h" +#include "reed_muller.h" +#include +#include +/** + * @file reed_muller.c + * Constant time implementation of Reed-Muller code RM(1,7) + */ + + + +// number of repeated code words +#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) + +// copy bit 0 into all bits of a 32 bit value +#define BIT0MASK(x) (-((x) & 1)) + + +static void encode(uint8_t *word, uint8_t message); +static void hadamard(uint16_t src[128], uint16_t dst[128]); +static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]); +static uint8_t find_peaks(const uint16_t transform[128]); + + + +/** + * @brief Encode a single byte into a single codeword using RM(1,7) + * + * Encoding matrix of this code: + * bit pattern (note that bits are numbered big endian) + * 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa + * 1 cccccccc cccccccc cccccccc cccccccc + * 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0 + * 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00 + * 4 ffff0000 ffff0000 ffff0000 ffff0000 + * 5 ffffffff 00000000 ffffffff 00000000 + * 6 ffffffff ffffffff 00000000 00000000 + * 7 ffffffff ffffffff ffffffff ffffffff + * + * @param[out] word An RM(1,7) codeword + * @param[in] message A message + */ +static void encode(uint8_t *word, uint8_t message) { + uint32_t e; + // bit 7 flips all the bits, do that first to save work + e = BIT0MASK(message >> 7); + // bits 0, 1, 2, 3, 4 are the same for all four longs + // (Warning: in the bit matrix above, low bits are at the left!) + e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa; + e ^= BIT0MASK(message >> 1) & 0xcccccccc; + e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0; + e ^= BIT0MASK(message >> 3) & 0xff00ff00; + e ^= BIT0MASK(message >> 4) & 0xffff0000; + // we can store this in the first quarter + word[0 + 0] = (e >> 0x00) & 0xff; + word[0 + 1] = (e >> 0x08) & 0xff; + word[0 + 2] = (e >> 0x10) & 0xff; + word[0 + 3] = (e >> 0x18) & 0xff; + // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 + e ^= BIT0MASK(message >> 5); + word[4 + 0] = (e >> 0x00) & 0xff; + word[4 + 1] = (e >> 0x08) & 0xff; + word[4 + 2] = (e >> 0x10) & 0xff; + word[4 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 6); + word[12 + 0] = (e >> 0x00) & 0xff; + word[12 + 1] = (e >> 0x08) & 0xff; + word[12 + 2] = (e >> 0x10) & 0xff; + word[12 + 3] = (e >> 0x18) & 0xff; + e ^= BIT0MASK(message >> 5); + word[8 + 0] = (e >> 0x00) & 0xff; + word[8 + 1] = (e >> 0x08) & 0xff; + word[8 + 2] = (e >> 0x10) & 0xff; + word[8 + 3] = (e >> 0x18) & 0xff; +} + + + +/** + * @brief Hadamard transform + * + * Perform hadamard transform of src and store result in dst + * src is overwritten: it is also used as intermediate buffer + * Method is best explained if we use H(3) instead of H(7): + * + * The routine multiplies by the matrix H(3): + * [1 1 1 1 1 1 1 1] + * [1 -1 1 -1 1 -1 1 -1] + * [1 1 -1 -1 1 1 -1 -1] + * [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine + * [1 1 1 1 -1 -1 -1 -1] + * [1 -1 1 -1 -1 1 -1 1] + * [1 1 -1 -1 -1 -1 1 1] + * [1 -1 -1 1 -1 1 1 -1] + * You can do this in three passes, where each pass does this: + * set lower half of buffer to pairwise sums, + * and upper half to differences + * index 0 1 2 3 4 5 6 7 + * input: a, b, c, d, e, f, g, h + * pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h + * pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h + * pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h + * a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h + * This order of computation is chosen because it vectorises well. + * Likewise, this routine multiplies by H(7) in seven passes. + * + * @param[out] src Structure that contain the expanded codeword + * @param[out] dst Structure that contain the expanded codeword + */ +static void hadamard(uint16_t src[128], uint16_t dst[128]) { + // the passes move data: + // src -> dst -> src -> dst -> src -> dst -> src -> dst + // using p1 and p2 alternately + uint16_t *p1 = src; + uint16_t *p2 = dst; + uint16_t *p3; + for (uint32_t pass = 0; pass < 7; pass++) { + for (uint32_t i = 0; i < 64; i++) { + p2[i] = p1[2 * i] + p1[2 * i + 1]; + p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; + } + // swap p1, p2 for next round + p3 = p1; + p1 = p2; + p2 = p3; + } +} + + + +/** + * @brief Add multiple codewords into expanded codeword + * + * Accesses memory in order + * Note: this does not write the codewords as -1 or +1 as the green machine does + * instead, just 0 and 1 is used. + * The resulting hadamard transform has: + * all values are halved + * the first entry is 64 too high + * + * @param[out] dest Structure that contain the expanded codeword + * @param[in] src Structure that contain the codeword + */ +static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) { + size_t part, bit, copy; + // start with the first copy + for (part = 0; part < 16; part++) { + for (bit = 0; bit < 8; bit++) { + dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1); + } + } + // sum the rest of the copies + for (copy = 1; copy < MULTIPLICITY; copy++) { + for (part = 0; part < 16; part++) { + for (bit = 0; bit < 8; bit++) { + dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1); + } + } + } +} + + + +/** + * @brief Finding the location of the highest value + * + * This is the final step of the green machine: find the location of the highest value, + * and add 128 if the peak is positive + * if there are two identical peaks, the peak with smallest value + * in the lowest 7 bits it taken + * @param[in] transform Structure that contain the expanded codeword + */ +static uint8_t find_peaks(const uint16_t transform[128]) { + uint16_t peak_abs = 0; + uint16_t peak = 0; + uint16_t pos = 0; + uint16_t t, abs, mask; + for (uint16_t i = 0; i < 128; i++) { + t = transform[i]; + abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) + mask = -(((uint16_t)(peak_abs - abs)) >> 15); + peak ^= mask & (peak ^ t); + pos ^= mask & (pos ^ i); + peak_abs ^= mask & (peak_abs ^ abs); + } + pos |= 128 & ((peak >> 15) - 1); + return (uint8_t) pos; +} + + + + +/** + * @brief Encodes the received word + * + * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits, + * or MULTIPLICITY repeats of 128 bits + * + * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) { + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // encode first word + encode(&cdw[16 * i * MULTIPLICITY], msg[i]); + // copy to other identical codewords + for (size_t copy = 1; copy < MULTIPLICITY; copy++) { + memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16); + } + } +} + + + +/** + * @brief Decodes the received word + * + * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane. + * The theory of error-correcting codes codes @cite macwilliams1977theory + * + * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) { + uint16_t expanded[128]; + uint16_t transform[128]; + for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { + // collect the codewords + expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]); + // apply hadamard transform + hadamard(expanded, transform); + // fix the first entry to get the half Hadamard transform + transform[0] -= 64 * MULTIPLICITY; + // finish the decoding + msg[i] = find_peaks(transform); + } +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/reed_muller.h b/src/kem/hqc/hqc-rmrs-256/clean/reed_muller.h new file mode 100644 index 00000000..c221705c --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/reed_muller.h @@ -0,0 +1,18 @@ +#ifndef REED_MULLER_H +#define REED_MULLER_H + + +/** + * @file reed_muller.h + * Header file of reed_muller.c + */ +#include "parameters.h" +#include +#include + +void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/reed_solomon.c b/src/kem/hqc/hqc-rmrs-256/clean/reed_solomon.c new file mode 100644 index 00000000..9a8b393b --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/reed_solomon.c @@ -0,0 +1,349 @@ +#include "fft.h" +#include "gf.h" +#include "parameters.h" +#include "parsing.h" +#include "reed_solomon.h" +#include +#include +#include +/** + * @file reed_solomon.c + * Constant time implementation of Reed-Solomon codes + */ + + +static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw); +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes); +static void compute_roots(uint8_t *error, uint16_t *sigma); +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes); +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error); +static void correct_errors(uint8_t *cdw, const uint16_t *error_values); + +/** + * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes + * + * Following @cite lin1983error (Chapter 4 - Cyclic Codes), + * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register + * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code. + * + * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message + * @param[in] msg Array of size VEC_K_SIZE_64 storing the message + */ +void PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) { + size_t i, j, k; + uint8_t gate_value = 0; + + uint16_t tmp[PARAM_G] = {0}; + uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS}; + uint8_t prev, x; + + for (i = 0; i < PARAM_N1; ++i) { + cdw[i] = 0; + } + + for (i = 0; i < PARAM_K; ++i) { + gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]); + + for (j = 0; j < PARAM_G; ++j) { + tmp[j] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]); + } + + prev = 0; + for (k = 0; k < PARAM_N1 - PARAM_K; k++) { + x = cdw[k]; + cdw[k] = (uint8_t) (prev ^ tmp[k]); + prev = x; + } + } + + memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K); +} + + + +/** + * @brief Computes 2 * PARAM_DELTA syndromes + * + * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes + * @param[in] cdw Array of size PARAM_N1 storing the received vector + */ +void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { + for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { + for (size_t j = 1; j < PARAM_N1; ++j) { + syndromes[i] ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); + } + syndromes[i] ^= cdw[0]; + } +} + + + +/** + * @brief Computes the error locator polynomial (ELP) sigma + * + * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes).
+ * We use the letter p for rho which is initialized at -1.
+ * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X).
+ * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p.
+ * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated.
+ * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA. + * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value + * and we only need to save its first PARAM_DELTA - 1 coefficients. + * + * @returns the degree of the ELP sigma + * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP + * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes + */ +static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { + uint16_t deg_sigma = 0; + uint16_t deg_sigma_p = 0; + uint16_t deg_sigma_copy = 0; + uint16_t sigma_copy[PARAM_DELTA + 1] = {0}; + uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1}; + uint16_t pp = (uint16_t) -1; // 2*rho + uint16_t d_p = 1; + uint16_t d = syndromes[0]; + + uint16_t mask1, mask2, mask12; + uint16_t deg_X, deg_X_sigma_p; + uint16_t dd; + uint16_t mu; + + uint16_t i; + + sigma[0] = 1; + for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { + // Save sigma in case we need it to update X_sigma_p + memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); + deg_sigma_copy = deg_sigma; + + dd = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(d_p)); + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + sigma[i] ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(dd, X_sigma_p[i]); + } + + deg_X = mu - pp; + deg_X_sigma_p = deg_X + deg_sigma_p; + + // mask1 = 0xffff if(d != 0) and 0 otherwise + mask1 = -((uint16_t) - d >> 15); + + // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise + mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15); + + // mask12 = 0xffff if the deg_sigma increased and 0 otherwise + mask12 = mask1 & mask2; + deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma); + + if (mu == (2 * PARAM_DELTA - 1)) { + break; + } + + pp ^= mask12 & (mu ^ pp); + d_p ^= mask12 & (d ^ d_p); + for (i = PARAM_DELTA; i; --i) { + X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); + } + + deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p); + d = syndromes[mu + 1]; + + for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { + d ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]); + } + } + + return deg_sigma; +} + + + +/** + * @brief Computes the error polynomial error from the error locator polynomial sigma + * + * See function PQCLEAN_HQCRMRS256_CLEAN_fft for more details. + * + * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial + * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + */ +static void compute_roots(uint8_t *error, uint16_t *sigma) { + uint16_t w[1 << PARAM_M] = {0}; + + PQCLEAN_HQCRMRS256_CLEAN_fft(w, sigma, PARAM_DELTA + 1); + PQCLEAN_HQCRMRS256_CLEAN_fft_retrieve_error_poly(error, w); +} + + + +/** + * @brief Computes the polynomial z(x) + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x) + * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial + * @param[in] degree Integer that is the degree of polynomial sigma + * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes + */ +static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) { + size_t i, j; + uint16_t mask; + + z[0] = 1; + + for (i = 1; i < PARAM_DELTA + 1; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] = mask & sigma[i]; + } + + z[1] ^= syndromes[0]; + + for (i = 2; i <= PARAM_DELTA; ++i) { + mask = -((uint16_t) (i - degree - 1) >> 15); + z[i] ^= mask & syndromes[i - 1]; + + for (j = 1; j < i; ++j) { + z[i] ^= mask & PQCLEAN_HQCRMRS256_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]); + } + } +} + + + +/** + * @brief Computes the error values + * + * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. + * + * @param[out] error_values Array of PARAM_DELTA elements receiving the error values + * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x) + * @param[in] z_degree Integer that is the degree of polynomial z(x) + * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error + */ +static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) { + uint16_t beta_j[PARAM_DELTA] = {0}; + uint16_t e_j[PARAM_DELTA] = {0}; + + uint16_t delta_counter; + uint16_t delta_real_value; + uint16_t found; + uint16_t mask1; + uint16_t mask2; + uint16_t tmp1; + uint16_t tmp2; + uint16_t inverse; + uint16_t inverse_power_j; + + // Compute the beta_{j_i} page 31 of the documentation + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; i++) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + beta_j[j] += mask1 & mask2 & gf_exp[i]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } + delta_real_value = delta_counter; + + // Compute the e_{j_i} page 31 of the documentation + for (size_t i = 0; i < PARAM_DELTA; ++i) { + tmp1 = 1; + tmp2 = 1; + inverse = PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(beta_j[i]); + inverse_power_j = 1; + + for (size_t j = 1; j <= PARAM_DELTA; ++j) { + inverse_power_j = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(inverse_power_j, inverse); + tmp1 ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(inverse_power_j, z[j]); + } + for (size_t k = 1; k < PARAM_DELTA; ++k) { + tmp2 = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); + } + mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value + e_j[i] = mask1 & PQCLEAN_HQCRMRS256_CLEAN_gf_mul(tmp1, PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(tmp2)); + } + + // Place the delta e_{j_i} values at the right coordinates of the output vector + delta_counter = 0; + for (size_t i = 0; i < PARAM_N1; ++i) { + found = 0; + mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 + for (size_t j = 0; j < PARAM_DELTA; j++) { + mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter + error_values[i] += mask1 & mask2 & e_j[j]; + found += mask1 & mask2 & 1; + } + delta_counter += found; + } +} + + + +/** + * @brief Correct the errors + * + * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector + * @param[in] error Array of the error vector + * @param[in] error_values Array of PARAM_DELTA elements storing the error values + */ +static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { + for (size_t i = 0; i < PARAM_N1; ++i) { + cdw[i] ^= error_values[i]; + } +} + + + +/** + * @brief Decodes the received word + * + * This function relies on six steps: + *
    + *
  1. The first step, is the computation of the 2*PARAM_DELTA syndromes. + *
  2. The second step is the computation of the error-locator polynomial sigma. + *
  3. The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses. + *
  4. The fourth step, is the polynomial z(x). + *
  5. The fifth step, is the computation of the error values. + *
  6. The sixth step is the correction of the errors in the received polynomial. + *
+ * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error + * + * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message + * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word + */ +void PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) { + uint16_t syndromes[2 * PARAM_DELTA] = {0}; + uint16_t sigma[1 << PARAM_FFT] = {0}; + uint8_t error[1 << PARAM_M] = {0}; + uint16_t z[PARAM_N1] = {0}; + uint16_t error_values[PARAM_N1] = {0}; + uint16_t deg; + + // Calculate the 2*PARAM_DELTA syndromes + compute_syndromes(syndromes, cdw); + + // Compute the error locator polynomial sigma + // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room + deg = compute_elp(sigma, syndromes); + + // Compute the error polynomial error + compute_roots(error, sigma); + + // Compute the polynomial z(x) + compute_z_poly(z, sigma, deg, syndromes); + + // Compute the error values + compute_error_values(error_values, z, error); + + // Correct the errors + correct_errors(cdw, error_values); + + // Retrieve the message from the decoded codeword + memcpy(msg, cdw + (PARAM_G - 1), PARAM_K); + +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/reed_solomon.h b/src/kem/hqc/hqc-rmrs-256/clean/reed_solomon.h new file mode 100644 index 00000000..dd46fde9 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/reed_solomon.h @@ -0,0 +1,20 @@ +#ifndef REED_SOLOMON_H +#define REED_SOLOMON_H + + +/** + * @file reed_solomon.h + * Header file of reed_solomon.c + */ +#include "parameters.h" +#include +#include + +static const uint16_t alpha_ij_pow [58][89] = {{2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225}, {4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, 157, 78, 37, 148, 106, 181, 238, 159, 70, 5, 20, 80, 93, 105, 185, 222, 95, 97, 153, 94, 101, 137, 30, 120, 253, 211, 107, 177, 254, 223, 91, 113, 217, 67, 17, 68, 13, 52, 208, 103, 129, 62, 248, 199, 59, 236, 151, 102, 133, 46, 184, 218, 79, 33, 132, 42, 168, 154, 82, 85, 73, 57, 228, 183, 230, 191, 198, 63, 252, 215, 123, 241, 227, 171}, {8, 64, 58, 205, 38, 45, 117, 143, 12, 96, 39, 37, 53, 181, 193, 70, 10, 80, 186, 185, 161, 97, 47, 101, 15, 120, 231, 107, 127, 223, 182, 217, 134, 68, 26, 208, 206, 62, 237, 59, 197, 102, 23, 184, 169, 33, 21, 168, 41, 85, 146, 228, 115, 191, 145, 252, 179, 241, 219, 150, 196, 110, 87, 130, 100, 7, 56, 221, 166, 89, 242, 195, 86, 138, 36, 61, 245, 251, 139, 44, 125, 207, 54, 173, 1, 8, 64, 58, 205}, {16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, 95, 153, 101, 30, 253, 107, 254, 91, 217, 17, 13, 208, 129, 248, 59, 151, 133, 184, 79, 132, 168, 82, 73, 228, 230, 198, 252, 123, 227, 150, 149, 165, 130, 200, 28, 221, 81, 121, 195, 172, 18, 61, 247, 203, 44, 250, 27, 173, 2, 32, 58, 135, 152, 117, 3, 48, 39, 74, 212, 193, 140, 40, 186, 111, 190, 47, 202, 60, 231, 214, 225, 182, 175, 34}, {32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174, 100, 28, 167, 89, 239, 172, 36, 244, 235, 44, 233, 108, 1, 32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174}, {64, 205, 45, 143, 96, 37, 181, 70, 80, 185, 97, 101, 120, 107, 223, 217, 68, 208, 62, 59, 102, 184, 33, 168, 85, 228, 191, 252, 241, 150, 110, 130, 7, 221, 89, 195, 138, 61, 251, 44, 207, 173, 8, 58, 38, 117, 12, 39, 53, 193, 10, 186, 161, 47, 15, 231, 127, 182, 134, 26, 206, 237, 197, 23, 169, 21, 41, 146, 115, 145, 179, 219, 196, 87, 100, 56, 166, 242, 86, 36, 245, 139, 125, 54, 1, 64, 205, 45, 143}, {128, 19, 117, 24, 156, 181, 140, 93, 161, 94, 60, 107, 163, 67, 26, 129, 147, 102, 109, 132, 41, 57, 209, 252, 255, 98, 87, 200, 224, 89, 155, 18, 245, 11, 233, 173, 16, 232, 45, 3, 157, 53, 159, 40, 185, 194, 137, 231, 254, 226, 68, 189, 248, 197, 46, 158, 168, 170, 183, 145, 123, 75, 110, 25, 28, 166, 249, 69, 61, 235, 176, 54, 2, 29, 38, 234, 48, 37, 119, 5, 186, 95, 188, 120, 214, 91, 134, 52, 31}, {29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, 133, 79, 168, 73, 230, 252, 227, 149, 130, 28, 81, 195, 18, 247, 44, 27, 2, 58, 152, 3, 39, 212, 140, 186, 190, 202, 231, 225, 175, 26, 31, 118, 23, 158, 77, 146, 209, 229, 219, 55, 25, 56, 162, 155, 36, 243, 88, 54, 4, 116, 45, 6, 78, 181, 5, 105, 97, 137, 211, 223, 67, 52, 62, 236, 46, 33, 154, 57, 191, 215, 171, 110, 50, 112}, {58, 45, 12, 37, 193, 80, 161, 101, 231, 223, 134, 208, 237, 102, 169, 168, 146, 191, 179, 150, 87, 7, 166, 195, 36, 251, 125, 173, 64, 38, 143, 39, 181, 10, 185, 47, 120, 127, 217, 26, 62, 197, 184, 21, 85, 115, 252, 219, 110, 100, 221, 242, 138, 245, 44, 54, 8, 205, 117, 96, 53, 70, 186, 97, 15, 107, 182, 68, 206, 59, 23, 33, 41, 228, 145, 241, 196, 130, 56, 89, 86, 61, 139, 207, 1, 58, 45, 12, 37}, {116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51, 169, 77, 114, 145, 255, 55, 100, 167, 239, 36, 235, 233, 1, 116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51}, {232, 234, 39, 238, 160, 97, 60, 254, 134, 103, 118, 184, 84, 57, 145, 227, 220, 7, 162, 172, 245, 176, 71, 58, 180, 192, 181, 40, 95, 15, 177, 175, 208, 147, 46, 21, 73, 99, 241, 55, 200, 166, 43, 122, 44, 216, 128, 45, 48, 106, 10, 222, 202, 107, 226, 52, 237, 133, 66, 85, 209, 123, 196, 50, 167, 195, 144, 11, 54, 32, 76, 12, 148, 140, 185, 188, 211, 182, 13, 124, 102, 158, 82, 115, 215, 49, 130, 224, 249}, {205, 143, 37, 70, 185, 101, 107, 217, 208, 59, 184, 168, 228, 252, 150, 130, 221, 195, 61, 44, 173, 58, 117, 39, 193, 186, 47, 231, 182, 26, 237, 23, 21, 146, 145, 219, 87, 56, 242, 36, 139, 54, 64, 45, 96, 181, 80, 97, 120, 223, 68, 62, 102, 33, 85, 191, 241, 110, 7, 89, 138, 251, 207, 8, 38, 12, 53, 10, 161, 15, 127, 134, 206, 197, 169, 41, 115, 179, 196, 100, 166, 86, 245, 125, 1, 205, 143, 37, 70}, {135, 6, 53, 20, 190, 120, 163, 13, 237, 46, 84, 228, 229, 98, 100, 81, 69, 251, 131, 32, 45, 192, 238, 186, 94, 187, 217, 189, 236, 169, 82, 209, 241, 220, 28, 242, 72, 22, 173, 116, 201, 37, 140, 222, 15, 254, 34, 62, 204, 132, 146, 63, 75, 130, 167, 43, 245, 250, 4, 38, 24, 212, 80, 194, 253, 182, 52, 147, 184, 77, 183, 179, 149, 141, 89, 9, 203, 54, 128, 180, 39, 159, 210, 101, 214, 67, 206, 151, 158}, {19, 24, 181, 93, 94, 107, 67, 129, 102, 132, 57, 252, 98, 200, 89, 18, 11, 173, 232, 3, 53, 40, 194, 231, 226, 189, 197, 158, 170, 145, 75, 25, 166, 69, 235, 54, 29, 234, 37, 5, 95, 120, 91, 52, 59, 218, 82, 191, 227, 174, 221, 43, 247, 207, 32, 90, 39, 35, 111, 15, 225, 136, 237, 92, 77, 115, 246, 220, 56, 239, 122, 125, 4, 76, 96, 238, 105, 101, 177, 17, 62, 133, 42, 228, 215, 149, 7, 121, 72}, {38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185, 15, 223, 26, 59, 169, 85, 145, 150, 100, 89, 36, 44, 1, 38, 96, 193, 185}, {76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44, 2, 152, 39, 140, 190, 231, 175, 31, 23, 77, 209, 219, 25, 162, 36, 88, 4, 45, 78, 5, 97, 211, 67, 62, 46, 154, 191, 171, 50, 89, 72, 176, 8, 90, 156, 10, 194, 187, 134, 124, 92, 41, 99, 75, 100, 178, 144, 125, 16, 180, 37, 20, 153, 107, 17, 248, 184, 82, 198, 150, 200, 121, 61, 250, 32, 117, 74, 40, 47, 214, 34, 237, 109, 164}, {152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11, 1, 152, 78, 10, 153, 214, 68, 147, 79, 146, 215, 220, 221, 69, 11}, {45, 37, 80, 101, 223, 208, 102, 168, 191, 150, 7, 195, 251, 173, 38, 39, 10, 47, 127, 26, 197, 21, 115, 219, 100, 242, 245, 54, 205, 96, 70, 97, 107, 68, 59, 33, 228, 241, 130, 89, 61, 207, 58, 12, 193, 161, 231, 134, 237, 169, 146, 179, 87, 166, 36, 125, 64, 143, 181, 185, 120, 217, 62, 184, 85, 252, 110, 221, 138, 44, 8, 117, 53, 186, 15, 182, 206, 23, 41, 145, 196, 56, 86, 139, 1, 45, 37, 80, 101}, {90, 148, 186, 30, 226, 62, 109, 73, 179, 174, 162, 61, 131, 232, 96, 140, 153, 127, 52, 51, 168, 99, 98, 56, 172, 22, 8, 234, 212, 185, 240, 67, 237, 79, 114, 241, 25, 121, 245, 108, 19, 39, 20, 188, 223, 189, 133, 41, 63, 55, 221, 9, 176, 64, 3, 238, 161, 211, 34, 59, 66, 183, 219, 200, 239, 251, 71, 152, 37, 160, 137, 182, 129, 92, 85, 229, 165, 166, 72, 233, 58, 24, 35, 97, 214, 13, 197, 42, 209}, {180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5, 94, 223, 103, 46, 85, 215, 174, 89, 244, 108, 38, 156, 160, 15, 226, 124, 169, 114, 255, 100, 239, 235, 1, 180, 106, 185, 253, 17, 59, 132, 230, 150, 28, 172, 44, 32, 3, 193, 190, 214, 26, 51, 77, 145, 55, 167, 36, 233, 116, 96, 5, 94, 223, 103, 46, 85, 215, 174, 89, 244, 108}, {117, 181, 161, 107, 26, 102, 41, 252, 87, 89, 245, 173, 45, 53, 185, 231, 68, 197, 168, 145, 110, 166, 61, 54, 38, 37, 186, 120, 134, 59, 21, 191, 196, 221, 36, 207, 205, 39, 80, 15, 217, 237, 33, 115, 150, 56, 138, 125, 58, 96, 10, 101, 182, 62, 169, 228, 219, 7, 86, 44, 64, 12, 70, 47, 223, 206, 184, 146, 241, 100, 195, 139, 8, 143, 193, 97, 127, 208, 23, 85, 179, 130, 242, 251, 1, 117, 181, 161, 107}, {234, 238, 97, 254, 103, 184, 57, 227, 7, 172, 176, 58, 192, 40, 15, 175, 147, 21, 99, 55, 166, 122, 216, 45, 106, 222, 107, 52, 133, 85, 123, 50, 195, 11, 32, 12, 140, 188, 182, 124, 158, 115, 49, 224, 36, 131, 19, 37, 105, 253, 68, 151, 154, 252, 174, 121, 251, 2, 201, 193, 194, 225, 206, 109, 114, 219, 14, 69, 125, 116, 157, 80, 30, 67, 59, 42, 198, 110, 81, 244, 173, 90, 212, 161, 214, 104, 23, 170, 246}, {201, 159, 47, 91, 124, 33, 209, 149, 166, 244, 71, 117, 238, 194, 223, 31, 79, 115, 98, 167, 61, 216, 90, 181, 190, 254, 206, 218, 213, 150, 224, 72, 54, 152, 106, 161, 177, 189, 184, 114, 171, 56, 18, 131, 38, 148, 111, 107, 104, 46, 146, 227, 14, 138, 233, 135, 37, 210, 211, 26, 133, 170, 241, 141, 172, 125, 232, 78, 186, 253, 136, 102, 164, 123, 100, 43, 88, 58, 157, 160, 120, 34, 151, 41, 215, 25, 195, 22, 128}, {143, 70, 101, 217, 59, 168, 252, 130, 195, 44, 58, 39, 186, 231, 26, 23, 146, 219, 56, 36, 54, 45, 181, 97, 223, 62, 33, 191, 110, 89, 251, 8, 12, 10, 15, 134, 197, 41, 179, 100, 86, 125, 205, 37, 185, 107, 208, 184, 228, 150, 221, 61, 173, 117, 193, 47, 182, 237, 21, 145, 87, 242, 139, 64, 96, 80, 120, 68, 102, 85, 241, 7, 138, 207, 38, 53, 161, 127, 206, 169, 115, 196, 166, 245, 1, 143, 70, 101, 217}, {3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174, 239, 44, 116, 156, 185, 214, 103, 169, 230, 55, 89, 235, 32, 96, 160, 253, 26, 46, 114, 150, 167, 244, 1, 3, 5, 15, 17, 51, 85, 255, 28, 36, 108, 180, 193, 94, 226, 59, 77, 215, 100, 172, 233, 38, 106, 190, 223, 124, 132, 145, 174, 239, 44, 116, 156, 185, 214, 103, 169, 230, 55}, {6, 20, 120, 13, 46, 228, 98, 81, 251, 32, 192, 186, 187, 189, 169, 209, 220, 242, 22, 116, 37, 222, 254, 62, 132, 63, 130, 43, 250, 38, 212, 194, 182, 147, 77, 179, 141, 9, 54, 180, 159, 101, 67, 151, 85, 227, 112, 61, 142, 3, 10, 60, 136, 23, 114, 49, 166, 243, 16, 96, 93, 211, 208, 218, 230, 110, 121, 11, 58, 156, 111, 127, 31, 66, 145, 65, 155, 125, 19, 106, 97, 91, 199, 168, 215, 200, 138, 27, 90}, {12, 80, 231, 208, 169, 191, 87, 195, 125, 38, 181, 47, 217, 197, 85, 219, 221, 245, 8, 96, 186, 107, 206, 33, 145, 130, 86, 207, 45, 193, 101, 134, 102, 146, 150, 166, 251, 64, 39, 185, 127, 62, 21, 252, 100, 138, 54, 117, 70, 15, 68, 23, 228, 196, 89, 139, 58, 37, 161, 223, 237, 168, 179, 7, 36, 173, 143, 10, 120, 26, 184, 115, 110, 242, 44, 205, 53, 97, 182, 59, 41, 241, 56, 61, 1, 12, 80, 231, 208}, {24, 93, 107, 129, 132, 252, 200, 18, 173, 3, 40, 231, 189, 158, 145, 25, 69, 54, 234, 5, 120, 52, 218, 191, 174, 43, 207, 90, 35, 15, 136, 92, 115, 220, 239, 125, 76, 238, 101, 17, 133, 228, 149, 121, 44, 135, 212, 47, 175, 51, 146, 49, 162, 139, 116, 148, 97, 113, 236, 85, 171, 83, 251, 128, 156, 161, 163, 147, 41, 255, 224, 245, 16, 157, 185, 254, 248, 168, 123, 28, 61, 2, 48, 186, 214, 31, 21, 229, 141}, {48, 105, 127, 248, 77, 241, 224, 247, 64, 156, 95, 182, 236, 170, 150, 162, 11, 205, 212, 94, 134, 133, 213, 110, 239, 250, 45, 35, 30, 26, 218, 99, 130, 69, 108, 143, 40, 211, 206, 132, 229, 7, 144, 2, 96, 210, 254, 237, 154, 255, 221, 243, 128, 37, 190, 113, 197, 73, 49, 89, 22, 135, 181, 188, 17, 23, 183, 220, 195, 233, 90, 70, 60, 52, 169, 198, 25, 138, 216, 3, 80, 187, 129, 21, 215, 14, 61, 4, 192}, {96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59, 85, 150, 89, 44, 38, 193, 15, 26, 169, 145, 100, 36, 1, 96, 185, 223, 59}, {192, 222, 182, 151, 114, 110, 155, 27, 143, 160, 177, 237, 82, 75, 89, 88, 152, 70, 240, 103, 21, 123, 224, 251, 116, 212, 101, 136, 218, 145, 200, 144, 8, 78, 190, 217, 204, 183, 87, 172, 216, 12, 105, 225, 59, 170, 98, 242, 250, 180, 10, 211, 31, 168, 255, 83, 139, 135, 238, 15, 52, 158, 252, 14, 244, 64, 74, 153, 134, 46, 209, 130, 9, 142, 96, 111, 91, 197, 57, 55, 195, 131, 201, 80, 214, 248, 41, 171, 162}, {157, 95, 217, 133, 230, 130, 18, 2, 39, 190, 175, 23, 209, 25, 36, 4, 78, 97, 67, 46, 191, 50, 72, 8, 156, 194, 134, 92, 99, 100, 144, 16, 37, 153, 17, 184, 198, 200, 61, 32, 74, 47, 34, 109, 145, 141, 122, 64, 148, 94, 68, 218, 63, 7, 244, 128, 53, 188, 136, 169, 126, 14, 245, 29, 106, 101, 13, 79, 252, 28, 247, 58, 212, 202, 26, 158, 229, 56, 243, 116, 181, 137, 52, 33, 215, 112, 251, 232, 119}, {39, 97, 134, 184, 145, 7, 245, 58, 181, 15, 208, 21, 241, 166, 44, 45, 10, 107, 237, 85, 196, 195, 54, 12, 185, 182, 102, 115, 130, 36, 8, 37, 47, 68, 169, 252, 56, 251, 205, 193, 120, 206, 168, 219, 89, 125, 117, 80, 127, 59, 146, 110, 86, 173, 96, 161, 217, 23, 191, 100, 61, 64, 53, 101, 26, 33, 179, 221, 139, 38, 70, 231, 62, 41, 150, 242, 207, 143, 186, 223, 197, 228, 87, 138, 1, 39, 97, 134, 184}, {78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69, 1, 78, 153, 68, 79, 215, 221, 11, 152, 10, 214, 147, 146, 220, 69}, {156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15, 103, 77, 150, 239, 108, 96, 190, 17, 169, 215, 167, 44, 180, 160, 223, 51, 230, 100, 244, 116, 193, 253, 124, 85, 55, 172, 1, 156, 94, 26, 132, 255, 89, 233, 3, 185, 226, 46, 145, 28, 235, 38, 5, 214, 59, 114, 174, 36, 32, 106, 15, 103, 77, 150, 239, 108, 96, 190, 17, 169, 215, 167, 44, 180, 160}, {37, 101, 208, 168, 150, 195, 173, 39, 47, 26, 21, 219, 242, 54, 96, 97, 68, 33, 241, 89, 207, 12, 161, 134, 169, 179, 166, 125, 143, 185, 217, 184, 252, 221, 44, 117, 186, 182, 23, 145, 56, 139, 45, 80, 223, 102, 191, 7, 251, 38, 10, 127, 197, 115, 100, 245, 205, 70, 107, 59, 228, 130, 61, 58, 193, 231, 237, 146, 87, 36, 64, 181, 120, 62, 85, 110, 138, 8, 53, 15, 206, 41, 196, 86, 1, 37, 101, 208, 168}, {74, 137, 206, 82, 55, 138, 16, 212, 120, 124, 73, 87, 72, 29, 193, 211, 147, 228, 25, 244, 205, 140, 177, 197, 230, 141, 251, 76, 40, 223, 204, 198, 56, 11, 180, 186, 113, 92, 252, 167, 176, 143, 111, 67, 169, 123, 162, 207, 24, 190, 68, 66, 227, 242, 108, 157, 47, 52, 84, 150, 155, 142, 37, 202, 103, 41, 149, 69, 8, 106, 60, 62, 170, 165, 36, 128, 238, 231, 199, 114, 130, 122, 232, 70, 214, 236, 115, 200, 243}, {148, 30, 62, 73, 174, 61, 232, 140, 127, 51, 99, 56, 22, 234, 185, 67, 79, 241, 121, 108, 39, 188, 189, 41, 55, 9, 64, 238, 211, 59, 183, 200, 251, 152, 160, 182, 92, 229, 166, 233, 24, 97, 13, 42, 150, 43, 2, 53, 60, 124, 146, 65, 122, 205, 5, 254, 102, 198, 112, 44, 201, 111, 134, 158, 255, 242, 216, 78, 101, 103, 82, 110, 18, 128, 193, 187, 118, 115, 141, 235, 45, 93, 113, 184, 215, 81, 207, 48, 194}, {53, 120, 237, 228, 100, 251, 45, 186, 217, 169, 241, 242, 173, 37, 15, 62, 146, 130, 245, 38, 80, 182, 184, 179, 89, 54, 39, 101, 206, 85, 87, 61, 205, 10, 223, 23, 252, 166, 207, 96, 47, 208, 41, 110, 36, 58, 70, 127, 102, 145, 221, 125, 12, 97, 26, 168, 196, 138, 64, 193, 107, 197, 191, 56, 44, 143, 161, 68, 21, 150, 86, 8, 181, 231, 59, 115, 7, 139, 117, 185, 134, 33, 219, 195, 1, 53, 120, 237, 228}, {106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100, 235, 180, 185, 17, 132, 150, 172, 32, 193, 214, 51, 145, 167, 233, 96, 94, 103, 85, 174, 244, 38, 160, 226, 169, 255, 239, 1, 106, 253, 59, 230, 28, 44, 3, 190, 26, 77, 55, 36, 116, 5, 223, 46, 215, 89, 108, 156, 15, 124, 114, 100, 235, 180, 185, 17, 132, 150, 172, 32, 193, 214, 51, 145, 167, 233}, {212, 211, 197, 198, 167, 207, 157, 202, 62, 114, 200, 139, 201, 95, 26, 154, 220, 61, 19, 160, 217, 158, 171, 86, 32, 159, 127, 133, 229, 89, 216, 74, 120, 147, 230, 56, 176, 24, 47, 103, 170, 130, 243, 90, 185, 34, 42, 196, 18, 116, 10, 91, 109, 241, 239, 2, 181, 187, 151, 145, 83, 131, 39, 137, 124, 228, 141, 11, 143, 190, 52, 41, 165, 122, 38, 93, 175, 33, 75, 172, 64, 35, 254, 23, 215, 178, 173, 148, 240}, {181, 107, 102, 252, 89, 173, 53, 231, 197, 145, 166, 54, 37, 120, 59, 191, 221, 207, 39, 15, 237, 115, 56, 125, 96, 101, 62, 228, 7, 44, 12, 47, 206, 146, 100, 139, 143, 97, 208, 85, 130, 251, 117, 161, 26, 41, 87, 245, 45, 185, 68, 168, 110, 61, 38, 186, 134, 21, 196, 36, 205, 80, 217, 33, 150, 138, 58, 10, 182, 169, 219, 86, 64, 70, 223, 184, 241, 195, 8, 193, 127, 23, 179, 242, 1, 181, 107, 102, 252}, {119, 177, 23, 123, 239, 8, 159, 225, 184, 255, 43, 64, 140, 91, 169, 171, 69, 58, 20, 226, 33, 49, 18, 205, 160, 67, 21, 149, 144, 38, 105, 34, 168, 220, 244, 45, 111, 13, 41, 174, 243, 117, 95, 104, 85, 25, 203, 143, 194, 103, 146, 200, 22, 12, 94, 31, 228, 14, 176, 96, 202, 248, 115, 112, 233, 39, 30, 147, 191, 167, 27, 37, 240, 236, 145, 81, 216, 53, 211, 51, 252, 178, 142, 181, 214, 133, 179, 249, 4}, {238, 254, 184, 227, 172, 58, 40, 175, 21, 55, 122, 45, 222, 52, 85, 50, 11, 12, 188, 124, 115, 224, 131, 37, 253, 151, 252, 121, 2, 193, 225, 109, 219, 69, 116, 80, 67, 42, 110, 244, 90, 161, 104, 170, 100, 22, 24, 101, 248, 230, 221, 27, 74, 231, 51, 229, 242, 4, 159, 223, 218, 171, 138, 232, 160, 134, 84, 220, 245, 180, 95, 208, 73, 200, 44, 48, 202, 237, 209, 167, 54, 148, 211, 102, 215, 249, 8, 35, 163}, {193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150, 36, 38, 185, 26, 85, 100, 44, 96, 15, 59, 145, 89, 1, 193, 223, 169, 150}, {159, 91, 33, 149, 244, 117, 194, 31, 115, 167, 216, 181, 254, 218, 150, 72, 152, 161, 189, 114, 56, 131, 148, 107, 46, 227, 138, 135, 210, 26, 170, 141, 125, 78, 253, 102, 123, 43, 58, 160, 34, 41, 25, 22, 96, 30, 236, 252, 249, 32, 10, 175, 84, 87, 235, 6, 101, 199, 198, 89, 2, 35, 182, 66, 55, 245, 234, 153, 62, 230, 83, 173, 119, 225, 169, 49, 144, 45, 95, 103, 228, 112, 27, 53, 214, 92, 219, 9, 19}, {35, 113, 21, 165, 235, 12, 137, 118, 252, 239, 128, 80, 34, 82, 100, 176, 78, 231, 133, 255, 138, 19, 111, 208, 114, 112, 54, 212, 254, 169, 98, 122, 117, 153, 124, 191, 162, 2, 70, 226, 42, 87, 203, 24, 15, 236, 229, 195, 29, 160, 68, 164, 200, 125, 156, 211, 23, 227, 9, 38, 222, 189, 228, 224, 108, 181, 225, 79, 196, 244, 234, 47, 248, 99, 89, 4, 140, 217, 84, 174, 139, 48, 30, 197, 215, 155, 58, 93, 136}, {70, 217, 168, 130, 44, 39, 231, 23, 219, 36, 45, 97, 62, 191, 89, 8, 10, 134, 41, 100, 125, 37, 107, 184, 150, 61, 117, 47, 237, 145, 242, 64, 80, 68, 85, 7, 207, 53, 127, 169, 196, 245, 143, 101, 59, 252, 195, 58, 186, 26, 146, 56, 54, 181, 223, 33, 110, 251, 12, 15, 197, 179, 86, 205, 185, 208, 228, 221, 173, 193, 182, 21, 87, 139, 96, 120, 102, 241, 138, 38, 161, 206, 115, 166, 1, 70, 217, 168, 130}, {140, 67, 41, 200, 233, 53, 254, 158, 110, 235, 48, 120, 204, 227, 36, 90, 153, 237, 63, 239, 58, 105, 104, 228, 167, 142, 70, 175, 154, 100, 250, 148, 127, 79, 55, 251, 24, 60, 102, 255, 18, 45, 194, 248, 145, 249, 29, 186, 52, 114, 221, 71, 35, 217, 77, 50, 125, 74, 177, 169, 149, 243, 12, 30, 51, 241, 9, 152, 97, 124, 198, 242, 128, 93, 26, 57, 224, 173, 159, 226, 168, 25, 176, 37, 214, 218, 196, 247, 6}, {5, 17, 85, 28, 108, 193, 226, 77, 100, 233, 106, 223, 132, 174, 44, 156, 214, 169, 55, 235, 96, 253, 46, 150, 244, 3, 15, 51, 255, 36, 180, 94, 59, 215, 172, 38, 190, 124, 145, 239, 116, 185, 103, 230, 89, 32, 160, 26, 114, 167, 1, 5, 17, 85, 28, 108, 193, 226, 77, 100, 233, 106, 223, 132, 174, 44, 156, 214, 169, 55, 235, 96, 253, 46, 150, 244, 3, 15, 51, 255, 36, 180, 94, 59, 215, 172, 38, 190, 124}, {10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221, 1, 10, 68, 146, 221}, {20, 13, 228, 81, 32, 186, 189, 209, 242, 116, 222, 62, 63, 43, 38, 194, 147, 179, 9, 180, 101, 151, 227, 61, 3, 60, 23, 49, 243, 96, 211, 218, 110, 11, 156, 127, 66, 65, 125, 106, 91, 168, 200, 27, 193, 175, 164, 56, 71, 5, 68, 57, 83, 8, 160, 104, 115, 178, 29, 185, 129, 198, 195, 135, 190, 237, 229, 69, 45, 94, 236, 241, 72, 201, 15, 204, 75, 245, 24, 253, 184, 149, 203, 39, 214, 158, 87, 88, 148}, {40, 52, 115, 121, 116, 161, 248, 229, 138, 180, 202, 102, 75, 247, 96, 187, 79, 87, 176, 106, 182, 154, 14, 173, 5, 136, 228, 162, 128, 185, 31, 63, 86, 152, 94, 197, 227, 122, 12, 253, 109, 110, 22, 74, 223, 84, 200, 54, 35, 17, 146, 83, 16, 186, 103, 99, 195, 19, 194, 59, 246, 72, 143, 60, 46, 196, 203, 78, 127, 132, 25, 207, 238, 175, 85, 224, 2, 80, 104, 230, 242, 232, 95, 237, 215, 9, 117, 137, 204}, {80, 208, 191, 195, 38, 47, 197, 219, 245, 96, 107, 33, 130, 207, 193, 134, 146, 166, 64, 185, 62, 252, 138, 117, 15, 23, 196, 139, 37, 223, 168, 7, 173, 10, 26, 115, 242, 205, 97, 59, 241, 61, 12, 231, 169, 87, 125, 181, 217, 85, 221, 8, 186, 206, 145, 86, 45, 101, 102, 150, 251, 39, 127, 21, 100, 54, 70, 68, 228, 89, 58, 161, 237, 179, 36, 143, 120, 184, 110, 44, 53, 182, 41, 56, 1, 80, 208, 191, 195}, {160, 103, 145, 172, 180, 15, 46, 55, 44, 106, 226, 85, 167, 32, 185, 124, 215, 36, 3, 253, 169, 174, 233, 193, 17, 114, 89, 116, 190, 59, 255, 244, 96, 214, 132, 100, 108, 5, 26, 230, 239, 38, 94, 51, 150, 235, 156, 223, 77, 28, 1, 160, 103, 145, 172, 180, 15, 46, 55, 44, 106, 226, 85, 167, 32, 185, 124, 215, 36, 3, 253, 169, 174, 233, 193, 17, 114, 89, 116, 190, 59, 255, 244, 96, 214, 132, 100, 108, 5}, {93, 129, 252, 18, 3, 231, 158, 25, 54, 5, 52, 191, 43, 90, 15, 92, 220, 125, 238, 17, 228, 121, 135, 47, 51, 49, 139, 148, 113, 85, 83, 128, 161, 147, 255, 245, 157, 254, 168, 28, 2, 186, 31, 229, 36, 6, 211, 33, 50, 108, 10, 104, 99, 86, 180, 30, 184, 165, 250, 193, 34, 213, 242, 19, 94, 102, 98, 11, 53, 226, 170, 166, 29, 95, 59, 227, 247, 39, 225, 77, 56, 4, 105, 62, 215, 72, 12, 187, 66}, {186, 62, 179, 61, 96, 127, 168, 56, 8, 185, 237, 241, 245, 39, 223, 41, 221, 64, 161, 59, 219, 251, 37, 182, 85, 166, 58, 97, 197, 150, 139, 53, 217, 146, 89, 205, 47, 102, 196, 44, 181, 134, 228, 242, 38, 101, 23, 110, 125, 193, 68, 115, 195, 45, 15, 184, 87, 207, 70, 26, 191, 86, 117, 120, 169, 130, 54, 10, 208, 145, 138, 143, 231, 33, 100, 173, 80, 206, 252, 36, 12, 107, 21, 7, 1, 186, 62, 179, 61}, {105, 248, 241, 247, 156, 182, 170, 162, 205, 94, 133, 110, 250, 35, 26, 99, 69, 143, 211, 132, 7, 2, 210, 237, 255, 243, 37, 113, 73, 89, 135, 188, 23, 220, 233, 70, 52, 198, 138, 3, 187, 21, 14, 4, 185, 199, 227, 251, 74, 226, 146, 178, 19, 101, 46, 165, 207, 140, 104, 145, 9, 6, 107, 42, 28, 8, 111, 147, 219, 235, 148, 217, 57, 121, 38, 202, 92, 87, 131, 5, 208, 63, 18, 12, 214, 84, 56, 16, 222}}; + +void PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg); + +void PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw); + + +#endif diff --git a/src/kem/hqc/hqc-rmrs-256/clean/vector.c b/src/kem/hqc/hqc-rmrs-256/clean/vector.c new file mode 100644 index 00000000..139e5bc3 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/vector.c @@ -0,0 +1,176 @@ +#include "nistseedexpander.h" +#include "parameters.h" +#include "parsing.h" +#include "randombytes.h" +#include "vector.h" +#include +#include +/** + * @file vector.c + * @brief Implementation of vectors sampling and some utilities for the HQC scheme + */ + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. The vector + * is stored by position. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) { + size_t random_bytes_size = 3 * weight; + uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R + uint8_t inc; + size_t i, j; + + i = 0; + j = random_bytes_size; + while (i < weight) { + do { + if (j == random_bytes_size) { + seedexpander(ctx, rand_bytes, random_bytes_size); + j = 0; + } + + v[i] = ((uint32_t) rand_bytes[j++]) << 16; + v[i] |= ((uint32_t) rand_bytes[j++]) << 8; + v[i] |= rand_bytes[j++]; + + } while (v[i] >= UTILS_REJECTION_THRESHOLD); + + v[i] = v[i] % PARAM_N; + + inc = 1; + for (size_t k = 0; k < i; k++) { + if (v[k] == v[i]) { + inc = 0; + } + } + i += inc; + } +} + + + +/** + * @brief Generates a vector of a given Hamming weight + * + * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter weight. + * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow: + * 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$. + * 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$ + * 3. If \f$ x \geq t\f$, go to 1 + * 4. It return \f$ r = x \mod 70853\f$ + * + * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h). + * + * @param[in] v Pointer to an array + * @param[in] weight Integer that is the Hamming weight + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) { + uint32_t tmp[PARAM_OMEGA_R] = {0}; + + PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(ctx, tmp, weight); + + for (size_t i = 0; i < weight; ++i) { + int32_t index = tmp[i] / 64; + int32_t pos = tmp[i] % 64; + v[index] |= ((uint64_t) 1) << pos; + } +} + + + +/** + * @brief Generates a random vector of dimension PARAM_N + * + * This function generates a random binary vector of dimension PARAM_N. It generates a random + * array of bytes using the seedexpander function, and drop the extra bits using a mask. + * + * @param[in] v Pointer to an array + * @param[in] ctx Pointer to the context of the seed expander + */ +void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) { + uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0}; + + seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES); + + PQCLEAN_HQCRMRS256_CLEAN_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES); + v[VEC_N_SIZE_64 - 1] &= RED_MASK; +} + + + +/** + * @brief Adds two vectors + * + * @param[out] o Pointer to an array that is the result + * @param[in] v1 Pointer to an array that is the first vector + * @param[in] v2 Pointer to an array that is the second vector + * @param[in] size Integer that is the size of the vectors + */ +void PQCLEAN_HQCRMRS256_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + o[i] = v1[i] ^ v2[i]; + } +} + + + +/** + * @brief Compares two vectors + * + * @param[in] v1 Pointer to an array that is first vector + * @param[in] v2 Pointer to an array that is second vector + * @param[in] size Integer that is the size of the vectors + * @returns 0 if the vectors are equals and a negative/psotive value otherwise + */ +uint8_t PQCLEAN_HQCRMRS256_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) { + uint64_t r = 0; + for (size_t i = 0; i < size; i++) { + r |= v1[i] ^ v2[i]; + } + r = (~r + 1) >> 63; + return (uint8_t) r; +} + + + +/** + * @brief Resize a vector so that it contains size_o bits + * + * @param[out] o Pointer to the output vector + * @param[in] size_o Integer that is the size of the output vector in bits + * @param[in] v Pointer to the input vector + * @param[in] size_v Integer that is the size of the input vector in bits + */ +void PQCLEAN_HQCRMRS256_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) { + if (size_o < size_v) { + uint64_t mask = 0x7FFFFFFFFFFFFFFF; + int8_t val = 0; + + if (size_o % 64) { + val = 64 - (size_o % 64); + } + + memcpy(o, v, 8 * VEC_N1N2_SIZE_64); + + for (int8_t i = 0; i < val; ++i) { + o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); + } + } else { + memcpy(o, v, 8 * CEIL_DIVIDE(size_v, 64)); + } +} diff --git a/src/kem/hqc/hqc-rmrs-256/clean/vector.h b/src/kem/hqc/hqc-rmrs-256/clean/vector.h new file mode 100644 index 00000000..439cfd67 --- /dev/null +++ b/src/kem/hqc/hqc-rmrs-256/clean/vector.h @@ -0,0 +1,27 @@ +#ifndef VECTOR_H +#define VECTOR_H + + +/** + * @file vector.h + * @brief Header file for vector.c + */ +#include "nistseedexpander.h" +#include "randombytes.h" +#include + +void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight); + +void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v); + + +void PQCLEAN_HQCRMRS256_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size); + +uint8_t PQCLEAN_HQCRMRS256_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size); + +void PQCLEAN_HQCRMRS256_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v); + + +#endif diff --git a/src/rustapi/pqc-sys/src/bindings.rs b/src/rustapi/pqc-sys/src/bindings.rs index 416c8fbf..7b340a4d 100644 --- a/src/rustapi/pqc-sys/src/bindings.rs +++ b/src/rustapi/pqc-sys/src/bindings.rs @@ -253,7 +253,10 @@ pub const NTRULPR857: ::std::os::raw::c_uint = 12; pub const LIGHTSABER: ::std::os::raw::c_uint = 13; pub const FIRESABER: ::std::os::raw::c_uint = 14; pub const SABER: ::std::os::raw::c_uint = 15; -pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 16; +pub const HQCRMRS128: ::std::os::raw::c_uint = 16; +pub const HQCRMRS192: ::std::os::raw::c_uint = 17; +pub const HQCRMRS256: ::std::os::raw::c_uint = 18; +pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 19; pub type _bindgen_ty_2 = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] diff --git a/test/katrunner/src/main.rs b/test/katrunner/src/main.rs index c795b820..1ea6ddbf 100644 --- a/test/katrunner/src/main.rs +++ b/test/katrunner/src/main.rs @@ -117,6 +117,10 @@ const KATS: &'static[Register] = &[ REG_KEM!(LIGHTSABER, "round3/saber/LightSaber/PQCkemKAT_1568.rsp"), REG_KEM!(FIRESABER, "round3/saber/FireSaber/PQCkemKAT_3040.rsp"), REG_KEM!(SABER, "round3/saber/Saber/PQCkemKAT_2304.rsp"), + REG_KEM!(HQCRMRS128, "round3/hqc/hqc-128/hqc-128_kat.rsp"), + REG_KEM!(HQCRMRS192, "round3/hqc/hqc-192/hqc-192_kat.rsp"), + REG_KEM!(HQCRMRS256, "round3/hqc/hqc-256/hqc-256_kat.rsp"), + // Those are Round2. KATs are very big, so skip testing until it makes sense to do so. //REG_SIGN!(RAINBOWVCLASSIC), //REG_SIGN!(RAINBOWICLASSIC),