adds HQC-RMS-128/192/256

3 lat temu · fddd697fc4
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -141,6 +141,9 @@ add_subdirectory(src/kem/ntru/ntruhps2048677/clean)
 add_subdirectory(src/kem/ntru_prime/ntrulpr761/clean)
 add_subdirectory(src/kem/ntru_prime/ntrulpr653/clean)
 add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean)
 add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean)
 add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean)
 add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean)

 # Hardware optimized targets
 if(${ARCH} STREQUAL "ARCH_x86_64")
@@ -196,8 +199,13 @@ add_subdirectory(src/kem/ntru/ntruhps2048677/avx2)
 add_subdirectory(src/kem/ntru_prime/ntrulpr761/avx2)
 add_subdirectory(src/kem/ntru_prime/ntrulpr653/avx2)
 add_subdirectory(src/kem/ntru_prime/ntrulpr857/avx2)
 add_subdirectory(src/kem/hqc/hqc-rmrs-128/avx2)
 add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2)
 add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2)
 endif()



 # The rest of the library
 set(SRC_COMMON_GENERIC
  src/common/aes.c
@@ -205,6 +213,7 @@ set(SRC_COMMON_GENERIC
  src/common/sp800-185.c
  src/common/randombytes.c
  src/common/sha2.c
  src/common/nistseedexpander.c
  src/capi/pqapi.c
 )

--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ Users shouldn't expect any level of security provided by this code. The library
 | SABER                    | 3          |  x |
 | FrodoKEM                 | 3          |    |
 | NTRU Prime               | 3          |  x |
 | HQC-RMRS                 | 3          |  x |
 | Dilithium                | 3          |  x |
 | Falcon                   | 2          |    |
 | Rainbow                  | 3          |    |
--- a/public/pqc/pqc.h
+++ b/public/pqc/pqc.h
@@ -60,7 +60,10 @@ extern "C" {
    _(NTRULPR857)        \
    _(LIGHTSABER)        \
    _(FIRESABER)         \
    _(SABER)
    _(SABER)             \
    _(HQCRMRS128)        \
    _(HQCRMRS192)        \
    _(HQCRMRS256)

 // Defines IDs for each algorithm. The
 // PQC_ALG_SIG/KEM_MAX indicates number
--- a/src/capi/pqapi.c
+++ b/src/capi/pqapi.c
@@ -113,6 +113,12 @@
 #include "kem/saber/firesaber/avx2/api.h"
 #include "kem/saber/saber/clean/api.h"
 #include "kem/saber/saber/avx2/api.h"
 #include "kem/hqc/hqc-rmrs-128/clean/api.h"
 #include "kem/hqc/hqc-rmrs-192/clean/api.h"
 #include "kem/hqc/hqc-rmrs-256/clean/api.h"
 #include "kem/hqc/hqc-rmrs-128/avx2/api.h"
 #include "kem/hqc/hqc-rmrs-192/avx2/api.h"
 #include "kem/hqc/hqc-rmrs-256/avx2/api.h"

 // not proud of this thingy
 #define OPT_VERSION _CLEAN_
--- a/src/common/cpucycles.c
+++ b/src/common/cpucycles.c
@@ -1,17 +0,0 @@
 #include <stdint.h>
 #include "cpucycles.h"

 uint64_t cpucycles_overhead(void) {
  uint64_t t0, t1, overhead = -1LL;
  unsigned int i;

  for(i=0;i<100000;i++) {
    t0 = cpucycles();
    __asm__ volatile ("");
    t1 = cpucycles();
    if(t1 - t0 < overhead)
      overhead = t1 - t0;
  }

  return overhead;
 }
--- a/src/common/cpucycles.h
+++ b/src/common/cpucycles.h
@@ -1,33 +0,0 @@
 #ifndef CPUCYCLES_H
 #define CPUCYCLES_H

 #include <stdint.h>

 #ifdef USE_RDPMC  /* Needs echo 2 > /sys/devices/cpu/rdpmc */

 static inline uint64_t cpucycles(void) {
  const uint32_t ecx = (1U << 30) + 1;
  uint64_t result;

  __asm__ volatile ("rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax"
    : "=a" (result) : "c" (ecx) : "rdx");

  return result;
 }

 #else

 static inline uint64_t cpucycles(void) {
  uint64_t result;

  __asm__ volatile ("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax"
    : "=a" (result) : : "%rdx");

  return result;
 }

 #endif

 uint64_t cpucycles_overhead(void);

 #endif
--- a/src/common/speed_print.c
+++ b/src/common/speed_print.c
@@ -1,51 +0,0 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include "cpucycles.h"
 #include "speed_print.h"

 static int cmp_uint64(const void *a, const void *b) {
  if(*(uint64_t *)a < *(uint64_t *)b) return -1;
  if(*(uint64_t *)a > *(uint64_t *)b) return 1;
  return 0;
 }

 static uint64_t median(uint64_t *l, size_t llen) {
  qsort(l,llen,sizeof(uint64_t),cmp_uint64);

  if(llen%2) return l[llen/2];
  else return (l[llen/2-1]+l[llen/2])/2;
 }

 static uint64_t average(uint64_t *t, size_t tlen) {
  size_t i;
  uint64_t acc=0;

  for(i=0;i<tlen;i++)
    acc += t[i];

  return acc/tlen;
 }

 void print_results(const char *s, uint64_t *t, size_t tlen) {
  size_t i;
  static uint64_t overhead = -1;

  if(tlen < 2) {
    fprintf(stderr, "ERROR: Need a least two cycle counts!\n");
    return;
  }

  if(overhead  == (uint64_t)-1)
    overhead = cpucycles_overhead();

  tlen--;
  for(i=0;i<tlen;++i)
    t[i] = t[i+1] - t[i] - overhead;

  printf("%s\n", s);
  printf("median: %llu cycles/ticks\n", (unsigned long long)median(t, tlen));
  printf("average: %llu cycles/ticks\n", (unsigned long long)average(t, tlen));
  printf("\n");
 }
--- a/src/common/speed_print.h
+++ b/src/common/speed_print.h
@@ -1,9 +0,0 @@
 #ifndef PRINT_SPEED_H
 #define PRINT_SPEED_H

 #include <stddef.h>
 #include <stdint.h>

 void print_results(const char *s, uint64_t *t, size_t tlen);

 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/CMakeLists.txt
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/CMakeLists.txt
@@ -0,0 +1,16 @@
 set(
  	SRC_AVX2_HQCRMRS128
 	code.c
 	fft.c
 	gf2x.c
 	gf.c
 	hqc.c
 	kem.c
 	parsing.c
 	reed_muller.c
 	reed_solomon.c
 	vector.c
 )

 define_kem_alg(hqcrmrs128_avx2
  PQCLEAN_HQCRMRS128_CLEAN "${SRC_AVX2_HQCRMRS128}" "${CMAKE_CURRENT_SOURCE_DIR}")
--- a/src/kem/hqc/hqc-rmrs-128/avx2/api.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQCRMRS128_AVX2_API_H
 #define PQCLEAN_HQCRMRS128_AVX2_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_ALGNAME                      "HQC-RMRS-128"

 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES               2289
 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES               2249
 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES                        64
 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES              4481

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/code.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/code.c
@@ -0,0 +1,47 @@
 #include "code.h"
 #include "parameters.h"
 #include "reed_muller.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of concatenated code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the concatenated code
 *
 * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
 * a concatenated code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *m) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(tmp, m);
    PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the concatenated code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(tmp, em);
    PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(m, tmp);


 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/code.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/code.h
@@ -0,0 +1,18 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *message);

 void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/fft.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/fft.c
@@ -0,0 +1,351 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    size_t i;
    for (i = 0; i < PARAM_M - 1; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
        for (j = 0; j < (1 << i); ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        break;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        break;

    default:
        radix_big(f0, f1, f, m_f);
        break;
    }
 }

 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
    uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

    uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

    size_t i, n;

    n = 1;
    n <<= (m_f - 2);
    memcpy(Q, f + 3 * n, 2 * n);
    memcpy(Q + n, f + 3 * n, 2 * n);
    memcpy(R, f, 4 * n);

    for (i = 0; i < n; ++i) {
        Q[i] ^= f[2 * n + i];
        R[n + i] ^= Q[i];
    }

    radix(Q0, Q1, Q, m_f - 1);
    radix(R0, R1, R, m_f - 1);

    memcpy(f0, R0, 2 * n);
    memcpy(f0 + n, Q0, 2 * n);
    memcpy(f1, R1, 2 * n);
    memcpy(f1 + n, Q1, 2 * n);
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function PQCLEAN_HQCRMRS128_AVX2_fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t gammas[PARAM_M - 2] = {0};
    uint16_t deltas[PARAM_M - 2] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};
    uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

    uint16_t beta_m_pow;
    size_t i, j, k;
    size_t x;

    // Step 1
    if (m_f == 1) {
        for (i = 0; i < m; ++i) {
            tmp[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        x = 1;
        for (j = 0; j < m; ++j) {
            for (k = 0; k < x; ++k) {
                w[x + k] = w[k] ^ tmp[j];
            }
            x <<= 1;
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        beta_m_pow = 1;
        x = 1;
        x <<= m_f;
        for (i = 1; i < x; ++i) {
            beta_m_pow = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (i = 0; i + 1 < m; ++i) {
        gammas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS128_AVX2_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    k = 1;
    k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1] = {0};
    uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
    uint16_t deltas[PARAM_M - 1] = {0};
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};

    size_t i, k;

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (i = 0; i < PARAM_M - 1; ++i) {
        deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    k = 1 << (PARAM_M - 1);
    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (i = 1; i < k; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array with the error
 * @param[out] error_compact Array with the error in a compact form
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    k = 1 << (PARAM_M - 1);
    error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
    error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

    for (i = 1; i < k; ++i) {
        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
        error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
        error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/fft.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/fft.h
@@ -0,0 +1,18 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/gf.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf.c
@@ -0,0 +1,176 @@
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 /**
 * @file gf.c
 * Galois field implementation with multiplication using the pclmulqdq instruction
 */


 static uint16_t gf_reduce(uint64_t x, size_t deg_x);



 /**
 * Reduces polynomial x modulo primitive polynomial GF_POLY.
 * @returns x mod GF_POLY
 * @param[in] x Polynomial of degree less than 64
 * @param[in] deg_x The degree of polynomial x
 */
 static uint16_t gf_reduce(uint64_t x, size_t deg_x) {
    uint16_t z1, z2, rmdr, dist;
    uint64_t mod;
    size_t steps, i, j;

    // Deduce the number of steps of reduction
    steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2);

    // Reduce
    for (i = 0; i < steps; ++i) {
        mod = x >> PARAM_M;
        x &= (1 << PARAM_M) - 1;
        x ^= mod;

        z1 = 0;
        rmdr = PARAM_GF_POLY ^ 1;
        for (j = PARAM_GF_POLY_WT - 2; j; --j) {
            z2 = __tzcnt_u16(rmdr);
            dist = (uint16_t) (z2 - z1);
            mod <<= dist;
            x ^= mod;
            rmdr ^= 1 << z2;
            z1 = z2;
        }
    }

    return x;
 }



 /**
 * Multiplies two elements of GF(2^GF_M).
 * @returns the product a*b
 * @param[in] a Element of GF(2^GF_M)
 * @param[in] b Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b) {
    __m128i va = _mm_cvtsi32_si128(a);
    __m128i vb = _mm_cvtsi32_si128(b);
    __m128i vab = _mm_clmulepi64_si128(va, vb, 0);
    uint32_t ab = _mm_cvtsi128_si32(vab);

    return gf_reduce(ab, 2 * (PARAM_M - 1));
 }



 /**
 *  Compute 16 products in GF(2^GF_M).
 *  @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M)
 *  @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers
 *  @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer
 *
 */
 __m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b) {
    __m128i al = _mm256_extractf128_si256(a, 0);
    __m128i ah = _mm256_extractf128_si256(a, 1);
    __m128i bl = _mm256_extractf128_si256(b, 0);
    __m128i bh = _mm256_extractf128_si256(b, 1);

    __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0);
    abl0 &= CONST128_MIDDLEMASKL;
    abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

    __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11);
    abh0 &= CONST128_MIDDLEMASKL;
    abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

    abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL);
    abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH);

    __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0);
    abl1 &= CONST128_MIDDLEMASKL;
    abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

    __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11);
    abh1 &= CONST128_MIDDLEMASKL;
    abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

    abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL);
    abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH);

    __m256i ret = _mm256_set_m128i(abl1, abl0);

    __m256i aux = CONST256_MR0;

    for (int32_t i = 0; i < 7; i++) {
        ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux);
        aux = aux << 1;
    }

    ret &= CONST256_LASTMASK;
    return ret;
 }



 /**
 * Squares an element of GF(2^GF_M).
 * @returns a^2
 * @param[in] a Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a) {
    uint32_t b = a;
    uint32_t s = b & 1;
    for (size_t i = 1; i < PARAM_M; ++i) {
        b <<= 1;
        s ^= b & (1 << 2 * i);
    }

    return gf_reduce(s, 2 * (PARAM_M - 1));
 }



 /**
 * Computes the inverse of an element of GF(2^8),
 * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254
 * @returns the inverse of a
 * @param[in] a Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a) {
    uint16_t inv = a;
    uint16_t tmp1, tmp2;

    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(a); /* a^2 */
    tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, a); /* a^3 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^4 */
    tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp1); /* a^7 */
    tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^11 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, inv); /* a^15 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^30 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^60 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^120 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^127 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^254 */
    return inv;
 }



 /**
 * Returns i modulo 2^GF_M-1.
 * i must be less than 2*(2^GF_M-1).
 * Therefore, the return value is either i or i-2^GF_M+1.
 * @returns i mod (2^GF_M-1)
 * @param[in] i The integer whose modulo is taken
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i) {
    uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

    // mask = 0xffff if (i < GF_MUL_ORDER)
    uint16_t mask = -(tmp >> 15);

    return tmp + (mask & PARAM_GF_MUL_ORDER);
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/gf.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf.h
@@ -0,0 +1,69 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <immintrin.h>
 #include <stddef.h>
 #include <stdint.h>

 #define _mm256_set_m128i(v0, v1)  _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)

 /**
 * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
 * The last two elements are needed by the PQCLEAN_HQCRMRS128_AVX2_gf_mul function
 * (for example if both elements to multiply are zero).
 */
 static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



 /**
 * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
 * The logarithm of 0 is set to 0 by convention.
 */
 static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };

 /**
 * Masks needed for the computation of 16 mult in GF(2^M)
 */
 #define CONST256_MR0      _mm256_set1_epi64x((long long) 0x0100010001000100)
 #define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff)
 #define CONST128_MASKL       _mm_set1_epi64x((long long) 0x0000ffff0000ffff)
 #define CONST128_MASKH       _mm_set1_epi64x((long long) 0xffff0000ffff0000)
 #define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff)
 #define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000)
 #define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff)
 #define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100)

 /**
 * x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register
 */
 static const __m256i red[7] = {
    {0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL},
    {0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL},
    {0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL},
    {0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL},
    {0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL},
    {0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL},
    {0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL},

 };


 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b);

 __m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b);

 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.c
@@ -0,0 +1,369 @@
 #include "gf2x.h"
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * \file gf2x.c
 * \brief AVX2 implementation of multiplication of two polynomials
 */



 #define VEC_N_SPLIT_3x3 CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256)
 #define VEC_N_SPLIT_3 (3*VEC_N_SPLIT_3x3)

 static inline void reduce(uint64_t *o, const __m256i *a);
 static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B);
 static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B);


 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a Pointer to the polynomial a(x)
 */
 static inline void reduce(uint64_t *o, const __m256i *a256) {
    size_t i, i2;
    __m256i r256, carry256;
    __m256i *o256 = (__m256i *)o;
    const uint64_t *a64 = (const uint64_t *)a256;
    uint64_t r, carry;

    i2 = 0;
    for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) {
        r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i]));
        r256 = _mm256_srli_epi64(r256, PARAM_N & 63);
        carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1]));
        carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63);
        r256 ^= carry256;
        _mm256_storeu_si256(&o256[i2], a256[i2] ^ r256);
        i2 += 1;
    }

    i = i - (PARAM_N >> 6);
    for (; i < (PARAM_N >> 6) + 1; i++) {
        r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63);
        carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63);
        r ^= carry;
        o[i] = a64[i] ^ r;
    }

    o[PARAM_N >> 6] &= RED_MASK;
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 * A(x) and B(x) are stored in 128-bit registers
 * This function computes A(x)*B(x) using Karatsuba
 *
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) {
    __m128i D1[2];
    __m128i D0[2], D2[2];
    __m128i Al = _mm_loadu_si128(A);
    __m128i Ah = _mm_loadu_si128(A + 1);
    __m128i Bl = _mm_loadu_si128(B);
    __m128i Bh = _mm_loadu_si128(B + 1);

    //  Compute Al.Bl=D0
    __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0);
    __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11);
    __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e));
    __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e));
    __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    //  Compute Ah.Bh=D2
    DD0 = _mm_clmulepi64_si128(Ah, Bh, 0);
    DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11);
    AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e));
    BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Compute AlpAh.BlpBh=D1
    // Initialisation of AlpAh and BlpBh
    __m128i AlpAh = _mm_xor_si128(Al, Ah);
    __m128i BlpBh = _mm_xor_si128(Bl, Bh);
    DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0);
    DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11);
    AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e));
    BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Final comutation of C
    __m128i middle = _mm_xor_si128(D0[1], D2[0]);
    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) {
    __m256i D0[2], D1[2], D2[2], SAA, SBB;
    const __m128i *A128 = (const __m128i *)A;
    const __m128i *B128 = (const __m128i *)B;
    __m256i middle;

    karat_mult_1((__m128i *) D0, A128, B128);
    karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2);

    SAA = _mm256_xor_si256(A[0], A[1]);
    SBB = _mm256_xor_si256(B[0], B[1]);

    karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB);
    middle = _mm256_xor_si256(D0[1], D2[0]);

    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) {
    __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2];
    __m256i middle0;
    __m256i middle1;

    karat_mult_2(D0, A, B);
    karat_mult_2(D2, A + 2, B + 2);

    SAA[0] = A[0] ^ A[2];
    SBB[0] = B[0] ^ B[2];
    SAA[1] = A[1] ^ A[3];
    SBB[1] = B[1] ^ B[3];

    karat_mult_2( D1, SAA, SBB);

    middle0 = _mm256_xor_si256(D0[2], D2[0]);
    middle1 = _mm256_xor_si256(D0[3], D2[1]);

    C[0] = D0[0];
    C[1] = D0[1];
    C[2] = middle0 ^ D0[0] ^ D1[0];
    C[3] = middle1 ^ D0[1] ^ D1[1];
    C[4] = middle0 ^ D1[2] ^ D2[2];
    C[5] = middle1 ^ D1[3] ^ D2[3];
    C[6] = D2[2];
    C[7] = D2[3];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) {
    size_t i, is, is2, is3;
    __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4];
    __m256i middle;

    karat_mult_4(D0, A, B);
    karat_mult_4(D2, A + 4, B + 4);

    for (i = 0; i < 4; i++) {
        is = i + 4;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_4(D1, SAA, SBB);

    for (i = 0; i < 4; i++) {
        is = i + 4;
        is2 = is + 4;
        is3 = is2 + 4;

        middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba 3 part split
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B) {
    size_t i, j;
    const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
    __m256i aa01[VEC_N_SPLIT_3x3], bb01[VEC_N_SPLIT_3x3], aa02[VEC_N_SPLIT_3x3], bb02[VEC_N_SPLIT_3x3], aa12[VEC_N_SPLIT_3x3], bb12[VEC_N_SPLIT_3x3];
    __m256i D0[2 * VEC_N_SPLIT_3x3], D1[2 * VEC_N_SPLIT_3x3], D2[2 * VEC_N_SPLIT_3x3], D3[2 * VEC_N_SPLIT_3x3], D4[2 * VEC_N_SPLIT_3x3], D5[2 * VEC_N_SPLIT_3x3];
    __m256i ro256[6 * VEC_N_SPLIT_3x3];
    __m256i middle0;

    a0 = A;
    a1 = A + VEC_N_SPLIT_3x3;
    a2 = A + (VEC_N_SPLIT_3x3 << 1);

    b0 = B;
    b1 = B + VEC_N_SPLIT_3x3;
    b2 = B + (VEC_N_SPLIT_3x3 << 1);

    for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
        aa01[i] = a0[i] ^ a1[i];
        bb01[i] = b0[i] ^ b1[i];

        aa12[i] = a2[i] ^ a1[i];
        bb12[i] = b2[i] ^ b1[i];

        aa02[i] = a0[i] ^ a2[i];
        bb02[i] = b0[i] ^ b2[i];
    }

    karat_mult_8(D0, a0, b0);
    karat_mult_8(D1, a1, b1);
    karat_mult_8(D2, a2, b2);

    karat_mult_8(D3, aa01, bb01);
    karat_mult_8(D4, aa02, bb02);
    karat_mult_8(D5, aa12, bb12);

    for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
        j = i + VEC_N_SPLIT_3x3;
        middle0 = D0[i] ^ D1[i] ^ D0[j];
        ro256[i] = D0[i];
        ro256[j]  = D3[i] ^ middle0;
        ro256[j + VEC_N_SPLIT_3x3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
        middle0 = D1[j] ^ D2[i] ^ D2[j];
        ro256[j + (VEC_N_SPLIT_3x3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
        ro256[i + (VEC_N_SPLIT_3x3 << 2)] = D5[j] ^ middle0;
        ro256[j + (VEC_N_SPLIT_3x3 << 2)] = D2[j];
    }

    for (i = 0; i < 2 * VEC_N_SPLIT_3; i++) {
        C[i] = ro256[i];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba 3 part split
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B) {
    size_t i, j;
    const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
    __m256i aa01[VEC_N_SPLIT_3], bb01[VEC_N_SPLIT_3], aa02[VEC_N_SPLIT_3], bb02[VEC_N_SPLIT_3], aa12[VEC_N_SPLIT_3], bb12[VEC_N_SPLIT_3];
    __m256i D0[2 * VEC_N_SPLIT_3], D1[2 * VEC_N_SPLIT_3], D2[2 * VEC_N_SPLIT_3], D3[2 * VEC_N_SPLIT_3], D4[2 * VEC_N_SPLIT_3], D5[2 * VEC_N_SPLIT_3];
    __m256i middle0;

    a0 = (__m256i *)(A->arr64);
    a1 = a0 + VEC_N_SPLIT_3;
    a2 = a0 + (2 * VEC_N_SPLIT_3);

    b0 = (__m256i *)(B->arr64);
    b1 = b0 + VEC_N_SPLIT_3;
    b2 = b0 + (2 * VEC_N_SPLIT_3);

    for (i = 0; i < VEC_N_SPLIT_3; i++) {
        aa01[i] = a0[i] ^ a1[i];
        bb01[i] = b0[i] ^ b1[i];

        aa12[i] = a2[i] ^ a1[i];
        bb12[i] = b2[i] ^ b1[i];

        aa02[i] = a0[i] ^ a2[i];
        bb02[i] = b0[i] ^ b2[i];
    }

    karat_three_way_mult(D0, a0, b0);
    karat_three_way_mult(D1, a1, b1);
    karat_three_way_mult(D2, a2, b2);

    karat_three_way_mult(D3, aa01, bb01);
    karat_three_way_mult(D4, aa02, bb02);
    karat_three_way_mult(D5, aa12, bb12);

    for (i = 0; i < VEC_N_SPLIT_3; i++) {
        j = i + VEC_N_SPLIT_3;
        middle0 = D0[i] ^ D1[i] ^ D0[j];
        C[i] = D0[i];
        C[j]  = D3[i] ^ middle0;
        C[j + VEC_N_SPLIT_3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
        middle0 = D1[j] ^ D2[i] ^ D2[j];
        C[j + (VEC_N_SPLIT_3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
        C[i + (VEC_N_SPLIT_3 << 2)] = D5[j] ^ middle0;
        C[j + (VEC_N_SPLIT_3 << 2)] = D2[j];
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a dense polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to a polynomial
 * @param[in] a2 Pointer to a polynomial
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) {
    __m256i a1_times_a2[2 * PARAM_N_MULT + 1] = {0};
    karat_mult9(a1_times_a2, a1, a2);
    reduce(o, a1_times_a2);
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/gf2x.h
@@ -0,0 +1,21 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>

 typedef union {
    uint64_t arr64[VEC_N_256_SIZE_64];
    __m256i dummy;
 } aligned_vec_t;

 void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/hqc.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/hqc.c
@@ -0,0 +1,168 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    aligned_vec_t vx = {0};
    uint64_t *x = vx.arr64;
    aligned_vec_t vy = {0};
    uint64_t *y = vy.arr64;
    aligned_vec_t vh = {0};
    uint64_t *h = vh.arr64;
    aligned_vec_t vs = {0};
    uint64_t *s = vs.arr64;
    aligned_vec_t vtmp = {0};
    uint64_t *tmp = vtmp.arr64;

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp, &vy, &vh);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    aligned_vec_t vh = {0};
    uint64_t *h = vh.arr64;
    aligned_vec_t vs = {0};
    uint64_t *s = vs.arr64;
    aligned_vec_t vr1 = {0};
    uint64_t *r1 = vr1.arr64;
    aligned_vec_t vr2 = {0};
    uint64_t *r2 = vr2.arr64;
    aligned_vec_t ve = {0};
    uint64_t *e = ve.arr64;
    aligned_vec_t vtmp1 = {0};
    uint64_t *tmp1 = vtmp1.arr64;
    aligned_vec_t vtmp2 = {0};
    uint64_t *tmp2 = vtmp2.arr64;
    aligned_vec_t vtmp3 = {0};
    uint64_t *tmp3 = vtmp3.arr64;

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);



    // Compute u = r1 + r2.h
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp1, &vr2, &vh);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQCRMRS128_AVX2_code_encode((uint8_t *)v, m);
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp2, &vr2, &vs);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    aligned_vec_t vx = {0};
    uint64_t *x = vx.arr64;
    aligned_vec_t vy = {0};
    uint64_t *y = vy.arr64;
    aligned_vec_t vtmp1 = {0};
    uint64_t *tmp1 = vtmp1.arr64;
    aligned_vec_t vtmp2 = {0};
    uint64_t *tmp2 = vtmp2.arr64;
    aligned_vec_t vtmp3 = {0};
    uint64_t *tmp3 = vtmp3.arr64;

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(x, y, pk, sk);

    // Compute v - u.y
    PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) {
        tmp2[i] = u[i];
    }
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp3, &vy, &vtmp2);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQCRMRS128_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_code_decode(m, (uint8_t *)tmp1);
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/hqc.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/hqc.h
@@ -0,0 +1,19 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/kem.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/kem.c
@@ -0,0 +1,140 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    static uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    randombytes(m, VEC_K_SIZE_BYTES);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    uint8_t result;
    uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_256_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare(d, d2, SHA512_BYTES);
    result = (uint8_t) (-((int16_t) result) >> 15);
    for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
        ss[i] &= ~result;
    }


    return -(result & 1);
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/parameters.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/parameters.h
@@ -0,0 +1,111 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H


 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */
 #include "api.h"


 #define CEIL_DIVIDE(a, b)  (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of Reed-Solomon code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
  #define PARAM_N1N2                            Define the length in bits of the Concatenated code
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1Y_SIZE_BYTES                    Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define VEC_N_256_SIZE_64                     Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits
  #define VEC_N1N2_256_SIZE_64                  Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_POLY_WT                      Hamming weight of PARAM_GF_POLY
  #define PARAM_GF_POLY_M2                      Distance between the primitive polynomial first two set bits
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the Reed-Solomon code
  #define PARAM_G                               Define the size of the generator polynomial of Reed-Solomon code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
                                                The smallest power of 2 greater than 24+1 is 32=2^5
  #define RS_POLY_COEFS                         Coefficients of the generator polynomial of the Reed-Solomon code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                 17669
 #define PARAM_N1                                46
 #define PARAM_N2                                384
 #define PARAM_N1N2                              17664
 #define PARAM_OMEGA                             66
 #define PARAM_OMEGA_E                           75
 #define PARAM_OMEGA_R                           75
 #define PARAM_SECURITY                          128
 #define PARAM_DFR_EXP                           128

 #define SECRET_KEY_BYTES                        PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD               16767881
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        PARAM_K
 #define VEC_N1_SIZE_BYTES                       PARAM_N1
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_256                          CEIL_DIVIDE(PARAM_N, 256)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_N_MULT                            (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256))
 #define VEC_N_256_SIZE_64                       (PARAM_N_MULT / 64)
 #define VEC_N1N2_256_SIZE_64                    (CEIL_DIVIDE(PARAM_N1N2, 256) << 2)

 #define PARAM_DELTA                             15
 #define PARAM_M                                 8
 #define PARAM_GF_POLY                           0x11D
 #define PARAM_GF_POLY_WT                        5
 #define PARAM_GF_POLY_M2                        4
 #define PARAM_GF_MUL_ORDER                      255
 #define PARAM_K                                 16
 #define PARAM_G                                 31
 #define PARAM_FFT                               5
 #define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1

 #define RED_MASK                                0x1f
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/parsing.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/parsing.c
@@ -0,0 +1,186 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */


 void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in) {
    out[0] = (in >> 0x00) & 0xFF;
    out[1] = (in >> 0x08) & 0xFF;
    out[2] = (in >> 0x10) & 0xFF;
    out[3] = (in >> 0x18) & 0xFF;
    out[4] = (in >> 0x20) & 0xFF;
    out[5] = (in >> 0x28) & 0xFF;
    out[6] = (in >> 0x30) & 0xFF;
    out[7] = (in >> 0x38) & 0xFF;
 }


 uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in) {
    uint64_t ret = in[7];

    for (int8_t i = 6; i >= 0; i--) {
        ret <<= 8;
        ret |= in[i];
    }

    return ret;
 }

 void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
    size_t index_in = 0;
    size_t index_out = 0;

    // first copy by 8 bytes
    if (inlen >= 8 && outlen >= 1) {
        while (index_out < outlen && index_in + 8 <= inlen) {
            out64[index_out] = PQCLEAN_HQCRMRS128_AVX2_load8(in8 + index_in);

            index_in += 8;
            index_out += 1;
        }
    }

    // we now need to do the last 7 bytes if necessary
    if (index_in >= inlen || index_out >= outlen) {
        return;
    }
    out64[index_out] = in8[inlen - 1];
    for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
        out64[index_out] <<= 8;
        out64[index_out] |= in8[index_in + i];
    }
 }

 void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
    for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
        out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
        index_out++;
        if (index_out % 8 == 0) {
            index_in++;
        }
    }
 }


 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(sk, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint64_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(pk, sk, PUBLIC_KEY_BYTES);

    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    pk += SEED_BYTES;
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(ct, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(d, ct, SHA512_BYTES);
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/parsing.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/parsing.h
@@ -0,0 +1,36 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in);

 uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in);

 void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

 void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.c
@@ -0,0 +1,389 @@
 #include "parameters.h"
 #include "reed_muller.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file reed_muller.c
 * Constant time implementation of Reed-Muller code RM(1,7)
 */


 // number of repeated code words
 #define MULTIPLICITY                   CEIL_DIVIDE(PARAM_N2, 128)

 // copy bit 0 into all bits of a 64 bit value
 #define BIT0MASK(x) (int64_t)(-((x) & 1))

 static void encode(uint8_t *word, uint8_t message);
 static void expand_and_sum(__m256i *dst, const uint64_t *src);
 static void hadamard(__m256i *src, __m256i *dst);
 static uint32_t find_peaks(__m256i *transform);



 /**
 * @brief Encode a single byte into a single codeword using RM(1,7)
 *
 * Encoding matrix of this code:
 * bit pattern (note that bits are numbered big endian)
 * 0   aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
 * 1   cccccccc cccccccc cccccccc cccccccc
 * 2   f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
 * 3   ff00ff00 ff00ff00 ff00ff00 ff00ff00
 * 4   ffff0000 ffff0000 ffff0000 ffff0000
 * 5   00000000 ffffffff 00000000 ffffffff
 * 6   00000000 00000000 ffffffff ffffffff
 * 7   ffffffff ffffffff ffffffff ffffffff
 *
 * @param[out] word An RM(1,7) codeword
 * @param[in] message A message to encode
 */
 static void encode(uint8_t *word, uint8_t message) {
    uint32_t e;
    // bit 7 flips all the bits, do that first to save work
    e = BIT0MASK(message >> 7);
    // bits 0, 1, 2, 3, 4 are the same for all four longs
    // (Warning: in the bit matrix above, low bits are at the left!)
    e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
    e ^= BIT0MASK(message >> 1) & 0xcccccccc;
    e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
    e ^= BIT0MASK(message >> 3) & 0xff00ff00;
    e ^= BIT0MASK(message >> 4) & 0xffff0000;
    // we can store this in the first quarter
    word[0 + 0] = (e >> 0x00) & 0xff;
    word[0 + 1] = (e >> 0x08) & 0xff;
    word[0 + 2] = (e >> 0x10) & 0xff;
    word[0 + 3] = (e >> 0x18) & 0xff;
    // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
    e ^= BIT0MASK(message >> 5);
    word[4 + 0] = (e >> 0x00) & 0xff;
    word[4 + 1] = (e >> 0x08) & 0xff;
    word[4 + 2] = (e >> 0x10) & 0xff;
    word[4 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 6);
    word[12 + 0] = (e >> 0x00) & 0xff;
    word[12 + 1] = (e >> 0x08) & 0xff;
    word[12 + 2] = (e >> 0x10) & 0xff;
    word[12 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 5);
    word[8 + 0] = (e >> 0x00) & 0xff;
    word[8 + 1] = (e >> 0x08) & 0xff;
    word[8 + 2] = (e >> 0x10) & 0xff;
    word[8 + 3] = (e >> 0x18) & 0xff;
 }



 /**
 * @brief Add multiple codewords into expanded codeword
 *
 * Note: this does not write the codewords as -1 or +1 as the green machine does
 * instead, just 0 and 1 is used.
 * The resulting hadamard transform has:
 * all values are halved
 * the first entry is 64 too high
 *
 * @param[out] dst Structure that contain the expanded codeword
 * @param[in] src Structure that contain the codeword
 */
 inline void expand_and_sum(__m256i *dst, const uint64_t *src) {
    uint16_t v[16];
    for (size_t part = 0; part < 8; part++) {
        dst[part] = _mm256_setzero_si256();
    }
    for (size_t copy = 0; copy < MULTIPLICITY; copy++) {
        for (size_t part = 0; part < 8; part++) {
            for (size_t bit = 0; bit < 16; bit++) {
                v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1;
            }
            dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
                                          v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
        }
    }
 }



 /**
 * @brief Hadamard transform
 *
 * Perform hadamard transform of src and store result in dst
 * src is overwritten: it is also used as intermediate buffer
 * Method is best explained if we use H(3) instead of H(7):
 *
 * The routine multiplies by the matrix H(3):
 *                     [1  1  1  1  1  1  1  1]
 *                     [1 -1  1 -1  1 -1  1 -1]
 *                     [1  1 -1 -1  1  1 -1 -1]
 * [a b c d e f g h] * [1 -1 -1  1  1 -1 -1  1] = result of routine
 *                     [1  1  1  1 -1 -1 -1 -1]
 *                     [1 -1  1 -1 -1  1 -1  1]
 *                     [1  1 -1 -1 -1 -1  1  1]
 *                     [1 -1 -1  1 -1  1  1 -1]
 * You can do this in three passes, where each pass does this:
 * set lower half of buffer to pairwise sums,
 * and upper half to differences
 * index     0        1        2        3        4        5        6        7
 * input:    a,       b,       c,       d,       e,       f,       g,       h
 * pass 1:   a+b,     c+d,     e+f,     g+h,     a-b,     c-d,     e-f,     g-h
 * pass 2:   a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
 * pass 3:   a+b+c+d+e+f+g+h   a+b-c-d+e+f-g-h   a+b+c+d-e-f-g-h   a+b-c-d-e+-f+g+h
 * a-b+c-d+e-f+g-h   a-b-c+d+e-f-g+h   a-b+c-d-e+f-g+h   a-b-c+d-e+f+g-h
 * This order of computation is chosen because it vectorises well.
 * Likewise, this routine multiplies by H(7) in seven passes.
 *
 * @param[out] src Structure that contain the expanded codeword
 * @param[out] dst Structure that contain the expanded codeword
 */
 inline void hadamard(__m256i *src, __m256i *dst) {
    // the passes move data:
    // src -> dst -> src -> dst -> src -> dst -> src -> dst
    // using p1 and p2 alternately
    __m256i *p1 = src;
    __m256i *p2 = dst;
    __m256i *p3;
    for (size_t pass = 0; pass < 7; pass++) {
        // warning: hadd works "within lanes" as Intel call it
        // so you have to swap the middle 64 bit blocks of the result
        for (size_t part = 0; part < 4; part++) {
            p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
            p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
        }
        // swap p1, p2 for next round
        p3 = p1;
        p1 = p2;
        p2 = p3;
    }
 }



 /**
 * @brief Finding the location of the highest value
 *
 * This is the final step of the green machine: find the location of the highest value,
 * and add 128 if the peak is positive
 * Notes on decoding
 * The standard "Green machine" decoder words as follows:
 * if the received codeword is W, compute (2 * W - 1) * H7
 * The entries of the resulting vector are always even and vary from
 * -128 (= the complement is a code word, add bit 7 to decode)
 * via 0 (this is a different codeword)
 * to 128 (this is the code word).
 *
 * Our decoding differs in two ways:
 * - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1)
 * - We take the sum of the repititions (so the entries are 0..MULTIPLICITY)
 * This implies that we have to subtract 64M (M=MULTIPLICITY)
 * from the first entry to make sure the first codewords is handled properly
 * and that the entries vary from -64M to 64M.
 * -64M or 64M stands for a perfect codeword.
 * If there are fewer than 32M errors, there is always a unique codeword
 * which an entry with absolute value > 32M;
 * this is because an error changes an entry by 1.
 * The highest number that seem to be decodable is 50 errors, so that the
 * highest entries in the hadamard transform can be as low as 12.
 * But this is different for the repeated code.
 * Because multiple codewords are added, this changes: the lowest value of the
 * hadamard transform of the sum of six words is seen to be as low as 43 (!),
 * which is way less than 12*6.
 *
 * It is possible that there are more errors, but the word is still uniquely
 * decodable: we found a word with distance of 50 from the nearest codeword.
 * That means that the highest entry can be as low as 14M.
 * Since we have to do binary search, we search for the range 1-64M
 * which can be done in 6+l2g(M) steps.
 * The binary search is based on (values>32M are unique):
 * M  32M     min>  max>  firstStep #steps
 * 2   64       1   64    33 +- 16    6
 * 4  128       1  128    65 +- 32    7
 * 6  192       1  192   129 +- 64    8
 *
 * As a check, we run a sample for M=6 to see the peak value; it ranged
 * from 43 to 147, so my analysis looks right. Also, it shows that decoding
 * far beyond the bound of 32M is needed.
 *
 * For the vectors, it would be tempting to use 8 bit ints,
 * because the values "almost" fit in there.
 * We could use some trickery to fit it in 8 bits, like saturated add or
 * division by 2 in a late step.
 * Unfortunately, these instructions do not exist.
 * the adds _mm512_adds_epi8 is available only on the latest processors,
 * and division, shift, mulhi are not available at all for 8 bits.
 * So, we use 16 bit ints.
 *
 * For the search of the optimal comparison value,
 * remember the transform contains 64M-d,
 * where d are the distances to the codewords.
 * The highest value gives the most likely codeword.
 * There is not fast vectorized way to find this value, so we search for the
 * maximum value itself.
 * In each pass, we collect a bit map of the transform values that are,
 * say >bound.  There are three cases:
 * bit map = 0: all code words are further away than 64M-bound (decrease bound)
 * bit map has one bit: one unique code word has distance < 64M-bound
 * bit map has multiple bits: multiple words (increase bound)
 * We will search for the lowest value of bound that gives a nonzero bit map.
 *
 * @param[in] transform Structure that contain the expanded codeword
 */
 inline uint32_t find_peaks(__m256i *transform) {
    // a whole lot of vector variables
    __m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows;
    __m256i tmp = _mm256_setzero_si256();
    __m256i vect_mask;
    __m256i res;
    int32_t lower;
    int32_t width;
    uint32_t message;
    uint32_t mask;
    int8_t index;
    int8_t abs_value;
    int8_t mask1;
    int8_t mask2;
    uint16_t result;

    // compute absolute value of transform
    for (size_t i = 0; i < 8; i++) {
        abs_rows[i] = _mm256_abs_epi16(transform[i]);
    }
    // compute a vector of 16 elements which contains the maximum somewhere
    // (later used to compute bits 0 through 3 of message)
    max_abs_rows = abs_rows[0];
    for (size_t i = 1; i < 8; i++) {
        max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]);
    }

    // do binary search for the highest value that is lower than the maximum
    // loop invariant: lower gives bit map = 0, lower + width gives bit map > 0
    lower = 1;
    // this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6
    width = 1 << (5 + MULTIPLICITY / 2);
    // if you don't unroll this loop, it fits in the loop cache
    // uncomment the line below to speeding up the program by a few percent
    // #pragma GCC unroll 0
    while (width > 1) {
        width >>= 1;
        // compare with lower + width; put result in bitmap
        // make vector from value of new bound
        bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width));
        bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound);
        // step up if there are any matches
        // rely on compiler to use conditional move here
        mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
        mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
        lower += mask & width;
    }
    // lower+width contains the maximum value of the vector
    // or less, if the maximum is very high (which is OK)
    // normally, there is one maximum, but sometimes there are more
    // find where the maxima occur in the maximum vector
    // (each determines lower 4 bits of peak position)
    // construct vector filled with bound-1
    bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1));

    // find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message
    // find lowest value by searching backwards skip first check to save time
    message = 0x70;
    for (size_t i = 0; i < 8; i++) {
        bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound);
        mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
        mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
        message ^= mask & (message ^ ((7 - i) << 4));
    }
    // we decided which row of the matrix contains the lowest match
    // select proper row
    index = message >> 4;

    tmp = _mm256_setzero_si256();
    for (size_t i = 0; i < 8; i++) {
        abs_value = (int8_t)(index - i);
        mask1 = abs_value >> 7;
        abs_value ^= mask1;
        abs_value -= mask1;
        mask2 = ((uint8_t) - abs_value >> 7);
        mask = (-1ULL) + mask2;
        vect_mask = _mm256_set1_epi32(mask);
        res = _mm256_and_si256(abs_rows[i], vect_mask);
        tmp = _mm256_or_si256(tmp, res);
    }

    active_row = tmp;

    // get the column number of the vector element
    // by setting the bits corresponding to the columns
    // and then adding elements within two groups of 8
    vect_mask = _mm256_cmpgt_epi16(active_row, bound);
    vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1);
    for (size_t i = 0; i < 3; i++) {
        vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask);
    }
    // add low 4 bits of message
    message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8));

    // set bit 7 if sign of biggest value is positive
    // make sure a jump isn't generated by the compiler
    tmp = _mm256_setzero_si256();
    for (size_t i = 0; i < 8; i++) {
        mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63);
        vect_mask = _mm256_set1_epi32(mask);
        tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i]));
    }
    result = 0;
    for (size_t i = 0; i < 16; i++) {
        mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63);
        result |= mask & ((uint16_t *)&tmp)[i];
    }
    message |= (0x8000 & ~result) >> 8;
    return message;
 }



 /**
 * @brief Encodes the received word
 *
 * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
 * or MULTIPLICITY repeats of 128 bits
 *
 * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // encode first word
        encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
        // copy to other identical codewords
        for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
            memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
        }
    }
 }



 /**
 * @brief Decodes the received word
 *
 * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
 * The theory of error-correcting codes codes @cite macwilliams1977theory
 *
 * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
    __m256i expanded[8];
    __m256i transform[8];
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // collect the codewords
        expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]);
        // apply hadamard transform
        hadamard(expanded, transform);
        // fix the first entry to get the half Hadamard transform
        transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0,
                                         0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY);
        // finish the decoding
        msg[i] = find_peaks(transform);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.h
@@ -0,0 +1,18 @@
 #ifndef REED_MULLER_H
 #define REED_MULLER_H


 /**
 * @file reed_muller.h
 * Header file of reed_muller.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.c
@@ -0,0 +1,466 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file reed_solomon.c
 * Constant time implementation of Reed-Solomon codes
 */


 static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void compute_roots(uint8_t *error, uint16_t *sigma);
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

 static const __m256i alpha_ij256_1[45] = {
    {0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087},
    {0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006},
    {0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035},
    {0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014},
    {0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be},
    {0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078},
    {0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3},
    {0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d},
    {0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed},
    {0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e},
    {0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054},
    {0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4},
    {0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5},
    {0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062},
    {0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064},
    {0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051},
    {0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045},
    {0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb},
    {0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083},
    {0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020},
    {0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d},
    {0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0},
    {0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee},
    {0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba},
    {0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e},
    {0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb},
    {0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9},
    {0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd},
    {0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec},
    {0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9},
    {0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052},
    {0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1},
    {0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1},
    {0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc},
    {0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c},
    {0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2},
    {0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048},
    {0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016},
    {0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad},
    {0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074},
    {0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9},
    {0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025},
    {0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c},
    {0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de},
    {0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f}
 };
 static const __m256i alpha_ij256_2[45] = {
    {0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x0000000000600030},
    {0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x0000000000b90069},
    {0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x0000000000df007f},
    {0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00000000003b00f8},
    {0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x000000000055004d},
    {0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x00000000009600f1},
    {0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x00000000005900e0},
    {0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x00000000002c00f7},
    {0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0000000000260040},
    {0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x0000000000c1009c},
    {0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00000000000f005f},
    {0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x00000000001a00b6},
    {0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x0000000000a900ec},
    {0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x00000000009100aa},
    {0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0000000000640096},
    {0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00000000002400a2},
    {0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x000000000001000b},
    {0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00000000006000cd},
    {0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x0000000000b900d4},
    {0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x0000000000df005e},
    {0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00000000003b0086},
    {0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0000000000550085},
    {0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x00000000009600d5},
    {0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000000000059006e},
    {0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x00000000002c00ef},
    {0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00000000002600fa},
    {0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0000000000c1002d},
    {0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x00000000000f0023},
    {0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x00000000001a001e},
    {0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0000000000a9001a},
    {0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x00000000009100da},
    {0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0000000000640063},
    {0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0000000000240082},
    {0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0000000000010045},
    {0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x000000000060006c},
    {0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x0000000000b9008f},
    {0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x0000000000df0028},
    {0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00000000003b00d3},
    {0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x00000000005500ce},
    {0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x0000000000960084},
    {0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x00000000005900e5},
    {0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x00000000002c0007},
    {0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0000000000260090},
    {0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x0000000000c10002},
    {0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x00000000000f0060}
 };


 /**
 * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
 *
 * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
 * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
 * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
 *
 * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_K_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
    size_t i, k;
    uint8_t gate_value = 0;
    uint8_t prev, x;

    union {
        uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
        __m256i dummy;
    } tmp = {0};

    union {
        uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
        __m256i dummy;
    } PARAM_RS_POLY = {{ RS_POLY_COEFS }};

    __m256i *tmp256 = (__m256i *)tmp.arr16;
    __m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16;

    for (i = 0; i < PARAM_K; ++i) {
        gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);
        _mm256_storeu_si256(&tmp256[0], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0]));
        _mm256_storeu_si256(&tmp256[1], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1]));

        prev = 0;
        for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
            x = cdw[k];
            cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]);
            prev = x;
        }
    }

    memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
 }



 /**
 * @brief Computes 2 * PARAM_DELTA syndromes
 *
 * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
 * @param[in] cdw Array of size PARAM_N1 storing the received vector
 */
 void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
    __m256i *syndromes256 = (__m256i *) syndromes;
    __m256i last_syndromes256;
    syndromes256[0] = _mm256_set1_epi16(cdw[0]);

    for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
        syndromes256[0] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]);
    }

    last_syndromes256 = _mm256_set1_epi16(cdw[0]);

    for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
        last_syndromes256 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]);
    }

    __m128i *s128 = (__m128i *) &last_syndromes256;
    _mm_store_si128((__m128i *) (syndromes + 16), *s128);

    uint64_t *s8 = (uint64_t *) (syndromes + 24);
    s8[0] = _mm_extract_epi64(s128[1], 0);

    uint32_t *s12 = (uint32_t *) (syndromes + 28);
    uint32_t *s32 = ((uint32_t *) &last_syndromes256) + 6;
    s12[0] = *s32;
 }



 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
 * We use the letter p for rho which is initialized at -1. <br>
 * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    uint16_t deg_sigma = 0;
    uint16_t deg_sigma_p = 0;
    uint16_t deg_sigma_copy = 0;
    uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    uint16_t pp = (uint16_t) -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    uint16_t mask1, mask2, mask12;
    uint16_t deg_X, deg_X_sigma_p;
    uint16_t dd;
    uint16_t mu;

    uint16_t i;

    sigma[0] = 1;
    for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
        deg_sigma_copy = deg_sigma;

        dd = PQCLEAN_HQCRMRS128_AVX2_gf_mul(d, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(d_p));

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            sigma[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(dd, X_sigma_p[i]);
        }

        deg_X = mu - pp;
        deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        mask12 = mask1 & mask2;
        deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

        if (mu == (2 * PARAM_DELTA - 1)) {
            break;
        }

        pp ^= mask12 & (mu ^ pp);
        d_p ^= mask12 & (d ^ d_p);
        for (i = PARAM_DELTA; i; --i) {
            X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }

        deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
        d = syndromes[mu + 1];

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            d ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQCRMRS128_AVX2_fft for more details.
 *
 * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
 * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint8_t *error, uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0};

    PQCLEAN_HQCRMRS128_AVX2_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(error, w);
 }



 /**
 * @brief Computes the polynomial z(x)
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 * @param[in] degree Integer that is the degree of polynomial sigma
 * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
 */
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
    size_t i, j;
    uint16_t mask;

    z[0] = 1;

    for (i = 1; i < PARAM_DELTA + 1; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] = mask & sigma[i];
    }

    z[1] ^= syndromes[0];

    for (i = 2; i <= PARAM_DELTA; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] ^= mask & syndromes[i - 1];

        for (j = 1; j < i; ++j) {
            z[i] ^= mask & PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]);
        }
    }
 }



 /**
 * @brief Computes the error values
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] error_values Array of PARAM_DELTA elements receiving the error values
 * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
 * @param[in] z_degree Integer that is the degree of polynomial z(x)
 * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
 */
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
    uint16_t beta_j[PARAM_DELTA] = {0};
    uint16_t e_j[PARAM_DELTA] = {0};

    uint16_t delta_counter;
    uint16_t delta_real_value;
    uint16_t found;
    uint16_t mask1;
    uint16_t mask2;
    uint16_t tmp1;
    uint16_t tmp2;
    uint16_t inverse;
    uint16_t inverse_power_j;

    // Compute the beta_{j_i} page 31 of the documentation
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; i++) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            beta_j[j] += mask1 & mask2 & gf_exp[i];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
    delta_real_value = delta_counter;

    // Compute the e_{j_i} page 31 of the documentation
    for (size_t i = 0; i < PARAM_DELTA; ++i) {
        tmp1 = 1;
        tmp2 = 1;
        inverse = PQCLEAN_HQCRMRS128_AVX2_gf_inverse(beta_j[i]);
        inverse_power_j = 1;

        for (size_t j = 1; j <= PARAM_DELTA; ++j) {
            inverse_power_j = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, inverse);
            tmp1 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, z[j]);
        }
        for (size_t k = 1; k < PARAM_DELTA; ++k) {
            tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
        }
        mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
        e_j[i] = mask1 & PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(tmp2));
    }

    // Place the delta e_{j_i} values at the right coordinates of the output vector
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; ++i) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            error_values[i] += mask1 & mask2 & e_j[j];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
 }



 /**
 * @brief Correct the errors
 *
 * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
 * @param[in] error Array of the error vector
 * @param[in] error_values Array of PARAM_DELTA elements storing the error values
 */
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
    for (size_t i = 0; i < PARAM_N1; ++i) {
        cdw[i] ^= error_values[i];
    }
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on six steps:
 *    <ol>
 *    <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step, is the polynomial z(x).
 *    <li> The fifth step, is the computation of the error values.
 *    <li> The sixth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
    uint16_t syndromes[2 * PARAM_DELTA] = {0};
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint8_t error[1 << PARAM_M] = {0};
    uint16_t z[PARAM_N1] = {0};
    uint16_t error_values[PARAM_N1] = {0};
    uint16_t deg;

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes, cdw);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    deg = compute_elp(sigma, syndromes);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Compute the polynomial z(x)
    compute_z_poly(z, sigma, deg, syndromes);

    // Compute the error values
    compute_error_values(error_values, z, error);

    // Correct the errors
    correct_errors(cdw, error_values);

    // Retrieve the message from the decoded codeword
    memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.h
--- a/src/kem/hqc/hqc-rmrs-128/avx2/vector.c
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/vector.c
@@ -0,0 +1,178 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0};
    uint32_t tmp[PARAM_OMEGA_R] = {0};
    __m256i bit256[PARAM_OMEGA_R];
    __m256i bloc256[PARAM_OMEGA_R];
    __m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0);
    __m256i pos256;
    __m256i mask256;
    __m256i aux;
    __m256i i256;
    uint64_t bloc, pos, bit64;
    uint8_t inc;
    size_t i, j, k;

    i = 0;
    j = random_bytes_size;
    while (i < weight) {
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            tmp[i] = ((uint32_t) rand_bytes[j++]) << 16;
            tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8;
            tmp[i] |= rand_bytes[j++];

        } while (tmp[i] >= UTILS_REJECTION_THRESHOLD);

        tmp[i] = tmp[i] % PARAM_N;

        inc = 1;
        for (k = 0; k < i; k++) {
            if (tmp[k] == tmp[i]) {
                inc = 0;
            }
        }
        i += inc;
    }

    for (i = 0; i < weight; i++) {
        // we store the bloc number and bit position of each vb[i]
        bloc = tmp[i] >> 6;
        bloc256[i] = _mm256_set1_epi64x(bloc >> 2);
        pos = (bloc & 0x3UL);
        pos256 = _mm256_set1_epi64x(pos);
        mask256 = _mm256_cmpeq_epi64(pos256, posCmp256);
        bit64 = 1ULL << (tmp[i] & 0x3f);
        bit256[i] = _mm256_set1_epi64x(bit64)&mask256;
    }

    for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) {
        aux = _mm256_loadu_si256(((__m256i *)v) + i);
        i256 = _mm256_set1_epi64x(i);

        for (j = 0; j < weight; j++) {
            mask256 = _mm256_cmpeq_epi64(bloc256[j], i256);
            aux ^= bit256[j] & mask256;
        }
        _mm256_storeu_si256(((__m256i *)v) + i, aux);
    }

 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0; i < size; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }



 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
    uint64_t r = 0;
    for (size_t i = 0; i < size; i++) {
        r |= v1[i] ^ v2[i];
    }
    r = (~r + 1) >> 63;
    return (uint8_t) r;
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    uint64_t mask = 0x7FFFFFFFFFFFFFFF;
    int8_t val = 0;
    if (size_o < size_v) {
        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, VEC_N1N2_SIZE_BYTES);

        for (int8_t i = 0; i < val; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, CEIL_DIVIDE(size_v, 8));
    }
 }
--- a/src/kem/hqc/hqc-rmrs-128/avx2/vector.h
+++ b/src/kem/hqc/hqc-rmrs-128/avx2/vector.h
@@ -0,0 +1,27 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_from_randombytes(uint64_t *v);


 void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

 void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/CMakeLists.txt
+++ b/src/kem/hqc/hqc-rmrs-128/clean/CMakeLists.txt
@@ -0,0 +1,16 @@
 set(
  	SRC_CLEAN_HQCRMRS128
 	code.c
 	fft.c
 	gf2x.c
 	gf.c
 	hqc.c
 	kem.c
 	parsing.c
 	reed_muller.c
 	reed_solomon.c
 	vector.c
 )

 define_kem_alg(hqcrmrs128_clean
  PQCLEAN_HQCRMRS128_CLEAN "${SRC_CLEAN_HQCRMRS128}" "${CMAKE_CURRENT_SOURCE_DIR}")
--- a/src/kem/hqc/hqc-rmrs-128/clean/api.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQCRMRS128_CLEAN_API_H
 #define PQCLEAN_HQCRMRS128_CLEAN_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_ALGNAME                      "HQC-RMRS-128"

 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES               2289
 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES               2249
 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES                        64
 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES              4481

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/code.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/code.c
@@ -0,0 +1,46 @@
 #include "code.h"
 #include "parameters.h"
 #include "reed_muller.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of concatenated code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the concatenated code
 *
 * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
 * a concatenated code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *m) {
    uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

    PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(tmp, m);
    PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the concatenated code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em) {
    uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

    PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(tmp, em);
    PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(m, tmp);

 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/code.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/code.h
@@ -0,0 +1,18 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *message);

 void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/fft.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/fft.c
@@ -0,0 +1,351 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    size_t i;
    for (i = 0; i < PARAM_M - 1; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
        for (j = 0; j < (1 << i); ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        break;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        break;

    default:
        radix_big(f0, f1, f, m_f);
        break;
    }
 }

 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
    uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

    uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

    size_t i, n;

    n = 1;
    n <<= (m_f - 2);
    memcpy(Q, f + 3 * n, 2 * n);
    memcpy(Q + n, f + 3 * n, 2 * n);
    memcpy(R, f, 4 * n);

    for (i = 0; i < n; ++i) {
        Q[i] ^= f[2 * n + i];
        R[n + i] ^= Q[i];
    }

    radix(Q0, Q1, Q, m_f - 1);
    radix(R0, R1, R, m_f - 1);

    memcpy(f0, R0, 2 * n);
    memcpy(f0 + n, Q0, 2 * n);
    memcpy(f1, R1, 2 * n);
    memcpy(f1 + n, Q1, 2 * n);
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function PQCLEAN_HQCRMRS128_CLEAN_fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t gammas[PARAM_M - 2] = {0};
    uint16_t deltas[PARAM_M - 2] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};
    uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

    uint16_t beta_m_pow;
    size_t i, j, k;
    size_t x;

    // Step 1
    if (m_f == 1) {
        for (i = 0; i < m; ++i) {
            tmp[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        x = 1;
        for (j = 0; j < m; ++j) {
            for (k = 0; k < x; ++k) {
                w[x + k] = w[k] ^ tmp[j];
            }
            x <<= 1;
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        beta_m_pow = 1;
        x = 1;
        x <<= m_f;
        for (i = 1; i < x; ++i) {
            beta_m_pow = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (i = 0; i + 1 < m; ++i) {
        gammas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    k = 1;
    k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1] = {0};
    uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
    uint16_t deltas[PARAM_M - 1] = {0};
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};

    size_t i, k;

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (i = 0; i < PARAM_M - 1; ++i) {
        deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    k = 1 << (PARAM_M - 1);
    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (i = 1; i < k; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array with the error
 * @param[out] error_compact Array with the error in a compact form
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    k = 1 << (PARAM_M - 1);
    error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
    error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

    for (i = 1; i < k; ++i) {
        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
        error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
        error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/fft.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/fft.h
@@ -0,0 +1,18 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/gf.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/gf.c
@@ -0,0 +1,63 @@
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 /**
 * @file gf.c
 * Galois field implementation with multiplication using lookup tables
 */


 /**
 * @brief Multiplies nonzero element a by element b
 * @returns the product a*b
 * @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero)
 * @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero)
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b) {
    uint16_t mask;
    mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0
    return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(gf_log[a] + gf_log[b])];
 }



 /**
 * @brief Squares an element of GF(2^PARAM_M)
 * @returns a^2
 * @param[in] a Element of GF(2^PARAM_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a) {
    int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(2 * gf_log[a])];
 }



 /**
 * @brief Computes the inverse of an element of GF(2^PARAM_M)
 * @returns the inverse of a
 * @param[in] a Element of GF(2^PARAM_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a) {
    int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]];
 }



 /**
 * @brief Returns i modulo 2^PARAM_M-1
 * i must be less than 2*(2^PARAM_M-1).
 * Therefore, the return value is either i or i-2^PARAM_M+1.
 * @returns i mod (2^PARAM_M-1)
 * @param[in] i The integer whose modulo is taken
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i) {
    uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    uint16_t mask = -(tmp >> 15);

    return tmp + (mask & PARAM_GF_MUL_ORDER);
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/gf.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/gf.h
@@ -0,0 +1,39 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <stddef.h>
 #include <stdint.h>


 /**
 * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
 * The last two elements are needed by the PQCLEAN_HQCRMRS128_CLEAN_gf_mul function
 * (for example if both elements to multiply are zero).
 */
 static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



 /**
 * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
 * The logarithm of 0 is set to 0 by convention.
 */
 static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };


 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b);

 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/gf2x.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/gf2x.c
@@ -0,0 +1,154 @@
 #include "gf2x.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include <stdint.h>
 /**
 * \file gf2x.c
 * \brief Implementation of multiplication of two polynomials
 */


 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2);
 static void reduce(uint64_t *o, const uint64_t *a);
 static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);

 /**
 * @brief swap two elements in a table
 *
 * This function exchanges tab[elt1] with tab[elt2]
 *
 * @param[in] tab Pointer to the table
 * @param[in] elt1 Index of the first element
 * @param[in] elt2 Index of the second element
 */
 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) {
    uint16_t tmp = tab[elt1];

    tab[elt1] = tab[elt2];
    tab[elt2] = tmp;
 }



 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[in] a Pointer to the polynomial a(x)
 * @param[out] o Pointer to the result
 */
 static void reduce(uint64_t *o, const uint64_t *a) {
    size_t i;
    uint64_t r;
    uint64_t carry;

    for (i = 0; i < VEC_N_SIZE_64; i++) {
        r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63);
        carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63)));
        o[i] = a[i] ^ r ^ carry;
    }

    o[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief computes product of the polynomial a1(x) with the sparse polynomial a2
 *
 *  o(x) = a1(x)a2(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2)
 * @param[in] a2 Pointer to the polynomial a1(x)
 * @param[in] weight Hamming wifht of the sparse polynomial a2
 * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process
 */
 static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
 //static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx)
    uint64_t carry;
    uint32_t dec, s;
    uint64_t table[16 * (VEC_N_SIZE_64 + 1)];
    uint16_t permuted_table[16];
    uint16_t permutation_table[16];
    uint16_t permuted_sparse_vect[PARAM_OMEGA_E];
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t tmp;
    uint64_t *pt;
    uint8_t *res;
    size_t i, j;

    for (i = 0; i < 16; i++) {
        permuted_table[i] = (uint16_t) i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = (uint16_t) i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i)));
    }

    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res = o + 2 * s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            tmp = PQCLEAN_HQCRMRS128_CLEAN_load8(res);
            PQCLEAN_HQCRMRS128_CLEAN_store8(res, tmp ^ pt[j]);
            res += 8;
        }
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial
 * @param[in] a2 Pointer to the dense polynomial
 * @param[in] weight Integer that is the weigt of the sparse polynomial
 * @param[in] ctx Pointer to the randomness context
 */
 void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
    uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0};

    fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx);
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp));
    reduce(o, tmp);
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/gf2x.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/gf2x.h
@@ -0,0 +1,16 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/hqc.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/hqc.c
@@ -0,0 +1,144 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    uint64_t x[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};
    uint64_t r1[VEC_N_SIZE_64] = {0};
    uint32_t r2[PARAM_OMEGA_R] = {0};
    uint64_t e[VEC_N_SIZE_64] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

    // Compute u = r1 + r2.h
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQCRMRS128_CLEAN_code_encode((uint8_t *)v, m);
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    AES_XOF_struct perm_seedexpander;
    uint8_t perm_seed[SEED_BYTES] = {0};

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk);

    randombytes(perm_seed, SEED_BYTES);
    seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute v - u.y
    PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_code_decode(m, (uint8_t *)tmp1);
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/hqc.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/hqc.h
@@ -0,0 +1,19 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/kem.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/kem.c
@@ -0,0 +1,140 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    randombytes(m, VEC_K_SIZE_BYTES);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    uint8_t result;
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare(d, d2, SHA512_BYTES);
    result = (uint8_t) (-((int16_t) result) >> 15);
    for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
        ss[i] &= ~result;
    }


    return -(result & 1);
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/parameters.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/parameters.h
@@ -0,0 +1,98 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H


 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */
 #include "api.h"


 #define CEIL_DIVIDE(a, b)  (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of Reed-Solomon code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
  #define PARAM_N1N2                            Define the length in bits of the Concatenated code
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1Y_SIZE_BYTES                    Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the Reed-Solomon code
  #define PARAM_G                               Define the size of the generator polynomial of Reed-Solomon code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
                                                The smallest power of 2 greater than 24+1 is 32=2^5
  #define RS_POLY_COEFS                         Coefficients of the generator polynomial of the Reed-Solomon code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                                             17669
 #define PARAM_N1                                46
 #define PARAM_N2                                384
 #define PARAM_N1N2                              17664
 #define PARAM_OMEGA                             66
 #define PARAM_OMEGA_E                           75
 #define PARAM_OMEGA_R                           75
 #define PARAM_SECURITY                          128
 #define PARAM_DFR_EXP                           128

 #define SECRET_KEY_BYTES                        PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD             16767881
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        PARAM_K
 #define VEC_N1_SIZE_BYTES                       PARAM_N1
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_DELTA                             15
 #define PARAM_M                                 8
 #define PARAM_GF_POLY                           0x11D
 #define PARAM_GF_MUL_ORDER                      255
 #define PARAM_K                                 16
 #define PARAM_G                                 31
 #define PARAM_FFT                               5
 #define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1

 #define RED_MASK                                0x1f
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/parsing.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/parsing.c
@@ -0,0 +1,186 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */


 void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in) {
    out[0] = (in >> 0x00) & 0xFF;
    out[1] = (in >> 0x08) & 0xFF;
    out[2] = (in >> 0x10) & 0xFF;
    out[3] = (in >> 0x18) & 0xFF;
    out[4] = (in >> 0x20) & 0xFF;
    out[5] = (in >> 0x28) & 0xFF;
    out[6] = (in >> 0x30) & 0xFF;
    out[7] = (in >> 0x38) & 0xFF;
 }


 uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in) {
    uint64_t ret = in[7];

    for (int8_t i = 6; i >= 0; i--) {
        ret <<= 8;
        ret |= in[i];
    }

    return ret;
 }

 void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
    size_t index_in = 0;
    size_t index_out = 0;

    // first copy by 8 bytes
    if (inlen >= 8 && outlen >= 1) {
        while (index_out < outlen && index_in + 8 <= inlen) {
            out64[index_out] = PQCLEAN_HQCRMRS128_CLEAN_load8(in8 + index_in);

            index_in += 8;
            index_out += 1;
        }
    }

    // we now need to do the last 7 bytes if necessary
    if (index_in >= inlen || index_out >= outlen) {
        return;
    }
    out64[index_out] = in8[inlen - 1];
    for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
        out64[index_out] <<= 8;
        out64[index_out] |= in8[index_in + i];
    }
 }

 void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
    for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
        out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
        index_out++;
        if (index_out % 8 == 0) {
            index_in++;
        }
    }
 }


 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(sk, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint32_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(pk, sk, PUBLIC_KEY_BYTES);

    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    pk += SEED_BYTES;
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(ct, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(d, ct, SHA512_BYTES);
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/parsing.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/parsing.h
@@ -0,0 +1,36 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in);

 uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in);

 void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

 void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.c
@@ -0,0 +1,237 @@
 #include "parameters.h"
 #include "reed_muller.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file reed_muller.c
 * Constant time implementation of Reed-Muller code RM(1,7)
 */



 // number of repeated code words
 #define MULTIPLICITY                   CEIL_DIVIDE(PARAM_N2, 128)

 // copy bit 0 into all bits of a 32 bit value
 #define BIT0MASK(x) (-((x) & 1))


 static void encode(uint8_t *word, uint8_t message);
 static void hadamard(uint16_t src[128], uint16_t dst[128]);
 static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]);
 static uint8_t find_peaks(const uint16_t transform[128]);



 /**
 * @brief Encode a single byte into a single codeword using RM(1,7)
 *
 * Encoding matrix of this code:
 * bit pattern (note that bits are numbered big endian)
 * 0   aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
 * 1   cccccccc cccccccc cccccccc cccccccc
 * 2   f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
 * 3   ff00ff00 ff00ff00 ff00ff00 ff00ff00
 * 4   ffff0000 ffff0000 ffff0000 ffff0000
 * 5   ffffffff 00000000 ffffffff 00000000
 * 6   ffffffff ffffffff 00000000 00000000
 * 7   ffffffff ffffffff ffffffff ffffffff
 *
 * @param[out] word An RM(1,7) codeword
 * @param[in] message A message
 */
 static void encode(uint8_t *word, uint8_t message) {
    uint32_t e;
    // bit 7 flips all the bits, do that first to save work
    e = BIT0MASK(message >> 7);
    // bits 0, 1, 2, 3, 4 are the same for all four longs
    // (Warning: in the bit matrix above, low bits are at the left!)
    e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
    e ^= BIT0MASK(message >> 1) & 0xcccccccc;
    e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
    e ^= BIT0MASK(message >> 3) & 0xff00ff00;
    e ^= BIT0MASK(message >> 4) & 0xffff0000;
    // we can store this in the first quarter
    word[0 + 0] = (e >> 0x00) & 0xff;
    word[0 + 1] = (e >> 0x08) & 0xff;
    word[0 + 2] = (e >> 0x10) & 0xff;
    word[0 + 3] = (e >> 0x18) & 0xff;
    // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
    e ^= BIT0MASK(message >> 5);
    word[4 + 0] = (e >> 0x00) & 0xff;
    word[4 + 1] = (e >> 0x08) & 0xff;
    word[4 + 2] = (e >> 0x10) & 0xff;
    word[4 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 6);
    word[12 + 0] = (e >> 0x00) & 0xff;
    word[12 + 1] = (e >> 0x08) & 0xff;
    word[12 + 2] = (e >> 0x10) & 0xff;
    word[12 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 5);
    word[8 + 0] = (e >> 0x00) & 0xff;
    word[8 + 1] = (e >> 0x08) & 0xff;
    word[8 + 2] = (e >> 0x10) & 0xff;
    word[8 + 3] = (e >> 0x18) & 0xff;
 }



 /**
 * @brief Hadamard transform
 *
 * Perform hadamard transform of src and store result in dst
 * src is overwritten: it is also used as intermediate buffer
 * Method is best explained if we use H(3) instead of H(7):
 *
 * The routine multiplies by the matrix H(3):
 *                     [1  1  1  1  1  1  1  1]
 *                     [1 -1  1 -1  1 -1  1 -1]
 *                     [1  1 -1 -1  1  1 -1 -1]
 * [a b c d e f g h] * [1 -1 -1  1  1 -1 -1  1] = result of routine
 *                     [1  1  1  1 -1 -1 -1 -1]
 *                     [1 -1  1 -1 -1  1 -1  1]
 *                     [1  1 -1 -1 -1 -1  1  1]
 *                     [1 -1 -1  1 -1  1  1 -1]
 * You can do this in three passes, where each pass does this:
 * set lower half of buffer to pairwise sums,
 * and upper half to differences
 * index     0        1        2        3        4        5        6        7
 * input:    a,       b,       c,       d,       e,       f,       g,       h
 * pass 1:   a+b,     c+d,     e+f,     g+h,     a-b,     c-d,     e-f,     g-h
 * pass 2:   a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
 * pass 3:   a+b+c+d+e+f+g+h   a+b-c-d+e+f-g-h   a+b+c+d-e-f-g-h   a+b-c-d-e+-f+g+h
 *                    a-b+c-d+e-f+g-h   a-b-c+d+e-f-g+h   a-b+c-d-e+f-g+h   a-b-c+d-e+f+g-h
 * This order of computation is chosen because it vectorises well.
 * Likewise, this routine multiplies by H(7) in seven passes.
 *
 * @param[out] src Structure that contain the expanded codeword
 * @param[out] dst Structure that contain the expanded codeword
 */
 static void hadamard(uint16_t src[128], uint16_t dst[128]) {
    // the passes move data:
    // src -> dst -> src -> dst -> src -> dst -> src -> dst
    // using p1 and p2 alternately
    uint16_t *p1 = src;
    uint16_t *p2 = dst;
    uint16_t *p3;
    for (uint32_t pass = 0; pass < 7; pass++) {
        for (uint32_t i = 0; i < 64; i++) {
            p2[i] = p1[2 * i] + p1[2 * i + 1];
            p2[i + 64] = p1[2 * i] - p1[2 * i + 1];
        }
        // swap p1, p2 for next round
        p3 = p1;
        p1 = p2;
        p2 = p3;
    }
 }



 /**
 * @brief Add multiple codewords into expanded codeword
 *
 * Accesses memory in order
 * Note: this does not write the codewords as -1 or +1 as the green machine does
 * instead, just 0 and 1 is used.
 * The resulting hadamard transform has:
 * all values are halved
 * the first entry is 64 too high
 *
 * @param[out] dest Structure that contain the expanded codeword
 * @param[in] src Structure that contain the codeword
 */
 static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) {
    size_t part, bit, copy;
    // start with the first copy
    for (part = 0; part < 16; part++) {
        for (bit = 0; bit < 8; bit++) {
            dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1);
        }
    }
    // sum the rest of the copies
    for (copy = 1; copy < MULTIPLICITY; copy++) {
        for (part = 0; part < 16; part++) {
            for (bit = 0; bit < 8; bit++) {
                dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1);
            }
        }
    }
 }



 /**
 * @brief Finding the location of the highest value
 *
 * This is the final step of the green machine: find the location of the highest value,
 * and add 128 if the peak is positive
 * if there are two identical peaks, the peak with smallest value
 * in the lowest 7 bits it taken
 * @param[in] transform Structure that contain the expanded codeword
 */
 static uint8_t find_peaks(const uint16_t transform[128]) {
    uint16_t peak_abs = 0;
    uint16_t peak = 0;
    uint16_t pos = 0;
    uint16_t t, abs, mask;
    for (uint16_t i = 0; i < 128; i++) {
        t = transform[i];
        abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t)
        mask = -(((uint16_t)(peak_abs - abs)) >> 15);
        peak ^= mask & (peak ^ t);
        pos ^= mask & (pos ^ i);
        peak_abs ^= mask & (peak_abs ^ abs);
    }
    pos |= 128 & ((peak >> 15) - 1);
    return (uint8_t) pos;
 }




 /**
 * @brief Encodes the received word
 *
 * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
 * or MULTIPLICITY repeats of 128 bits
 *
 * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // encode first word
        encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
        // copy to other identical codewords
        for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
            memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
        }
    }
 }



 /**
 * @brief Decodes the received word
 *
 * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
 * The theory of error-correcting codes codes @cite macwilliams1977theory
 *
 * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
    uint16_t expanded[128];
    uint16_t transform[128];
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // collect the codewords
        expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]);
        // apply hadamard transform
        hadamard(expanded, transform);
        // fix the first entry to get the half Hadamard transform
        transform[0] -= 64 * MULTIPLICITY;
        // finish the decoding
        msg[i] = find_peaks(transform);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_muller.h
@@ -0,0 +1,18 @@
 #ifndef REED_MULLER_H
 #define REED_MULLER_H


 /**
 * @file reed_muller.h
 * Header file of reed_muller.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


 #endif
--- a/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.c
@@ -0,0 +1,349 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file reed_solomon.c
 * Constant time implementation of Reed-Solomon codes
 */


 static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void compute_roots(uint8_t *error, uint16_t *sigma);
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

 /**
 * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
 *
 * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
 * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
 * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
 *
 * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_K_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
    size_t i, j, k;
    uint8_t gate_value = 0;

    uint16_t tmp[PARAM_G] = {0};
    uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS};
    uint8_t prev, x;

    for (i = 0; i < PARAM_N1; ++i) {
        cdw[i] = 0;
    }

    for (i = 0; i < PARAM_K; ++i) {
        gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);

        for (j = 0; j < PARAM_G; ++j) {
            tmp[j] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]);
        }

        prev = 0;
        for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
            x = cdw[k];
            cdw[k] = (uint8_t) (prev ^ tmp[k]);
            prev = x;
        }
    }

    memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
 }



 /**
 * @brief Computes 2 * PARAM_DELTA syndromes
 *
 * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
 * @param[in] cdw Array of size PARAM_N1 storing the received vector
 */
 void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
    for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) {
        for (size_t j = 1; j < PARAM_N1; ++j) {
            syndromes[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]);
        }
        syndromes[i] ^= cdw[0];
    }
 }



 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
 * We use the letter p for rho which is initialized at -1. <br>
 * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    uint16_t deg_sigma = 0;
    uint16_t deg_sigma_p = 0;
    uint16_t deg_sigma_copy = 0;
    uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    uint16_t pp = (uint16_t) -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    uint16_t mask1, mask2, mask12;
    uint16_t deg_X, deg_X_sigma_p;
    uint16_t dd;
    uint16_t mu;

    uint16_t i;

    sigma[0] = 1;
    for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
        deg_sigma_copy = deg_sigma;

        dd = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(d_p));

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            sigma[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(dd, X_sigma_p[i]);
        }

        deg_X = mu - pp;
        deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        mask12 = mask1 & mask2;
        deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

        if (mu == (2 * PARAM_DELTA - 1)) {
            break;
        }

        pp ^= mask12 & (mu ^ pp);
        d_p ^= mask12 & (d ^ d_p);
        for (i = PARAM_DELTA; i; --i) {
            X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }

        deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
        d = syndromes[mu + 1];

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            d ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQCRMRS128_CLEAN_fft for more details.
 *
 * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
 * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint8_t *error, uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0};

    PQCLEAN_HQCRMRS128_CLEAN_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(error, w);
 }



 /**
 * @brief Computes the polynomial z(x)
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 * @param[in] degree Integer that is the degree of polynomial sigma
 * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
 */
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
    size_t i, j;
    uint16_t mask;

    z[0] = 1;

    for (i = 1; i < PARAM_DELTA + 1; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] = mask & sigma[i];
    }

    z[1] ^= syndromes[0];

    for (i = 2; i <= PARAM_DELTA; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] ^= mask & syndromes[i - 1];

        for (j = 1; j < i; ++j) {
            z[i] ^= mask & PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]);
        }
    }
 }



 /**
 * @brief Computes the error values
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] error_values Array of PARAM_DELTA elements receiving the error values
 * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
 * @param[in] z_degree Integer that is the degree of polynomial z(x)
 * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
 */
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
    uint16_t beta_j[PARAM_DELTA] = {0};
    uint16_t e_j[PARAM_DELTA] = {0};

    uint16_t delta_counter;
    uint16_t delta_real_value;
    uint16_t found;
    uint16_t mask1;
    uint16_t mask2;
    uint16_t tmp1;
    uint16_t tmp2;
    uint16_t inverse;
    uint16_t inverse_power_j;

    // Compute the beta_{j_i} page 31 of the documentation
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; i++) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            beta_j[j] += mask1 & mask2 & gf_exp[i];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
    delta_real_value = delta_counter;

    // Compute the e_{j_i} page 31 of the documentation
    for (size_t i = 0; i < PARAM_DELTA; ++i) {
        tmp1 = 1;
        tmp2 = 1;
        inverse = PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(beta_j[i]);
        inverse_power_j = 1;

        for (size_t j = 1; j <= PARAM_DELTA; ++j) {
            inverse_power_j = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, inverse);
            tmp1 ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, z[j]);
        }
        for (size_t k = 1; k < PARAM_DELTA; ++k) {
            tmp2 = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
        }
        mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
        e_j[i] = mask1 & PQCLEAN_HQCRMRS128_CLEAN_gf_mul(tmp1, PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(tmp2));
    }

    // Place the delta e_{j_i} values at the right coordinates of the output vector
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; ++i) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            error_values[i] += mask1 & mask2 & e_j[j];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
 }



 /**
 * @brief Correct the errors
 *
 * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
 * @param[in] error Array of the error vector
 * @param[in] error_values Array of PARAM_DELTA elements storing the error values
 */
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
    for (size_t i = 0; i < PARAM_N1; ++i) {
        cdw[i] ^= error_values[i];
    }
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on six steps:
 *    <ol>
 *    <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step, is the polynomial z(x).
 *    <li> The fifth step, is the computation of the error values.
 *    <li> The sixth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
    uint16_t syndromes[2 * PARAM_DELTA] = {0};
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint8_t error[1 << PARAM_M] = {0};
    uint16_t z[PARAM_N1] = {0};
    uint16_t error_values[PARAM_N1] = {0};
    uint16_t deg;

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes, cdw);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    deg = compute_elp(sigma, syndromes);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Compute the polynomial z(x)
    compute_z_poly(z, sigma, deg, syndromes);

    // Compute the error values
    compute_error_values(error_values, z, error);

    // Correct the errors
    correct_errors(cdw, error_values);

    // Retrieve the message from the decoded codeword
    memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.h
--- a/src/kem/hqc/hqc-rmrs-128/clean/vector.c
+++ b/src/kem/hqc/hqc-rmrs-128/clean/vector.c
@@ -0,0 +1,176 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */


 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>. The vector
 * is stored by position.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
    uint8_t inc;
    size_t i, j;

    i = 0;
    j = random_bytes_size;
    while (i < weight) {
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            v[i]  = ((uint32_t) rand_bytes[j++]) << 16;
            v[i] |= ((uint32_t) rand_bytes[j++]) << 8;
            v[i] |= rand_bytes[j++];

        } while (v[i] >= UTILS_REJECTION_THRESHOLD);

        v[i] = v[i] % PARAM_N;

        inc = 1;
        for (size_t k = 0; k < i; k++) {
            if (v[k] == v[i]) {
                inc = 0;
            }
        }
        i += inc;
    }
 }



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
    uint32_t tmp[PARAM_OMEGA_R] = {0};

    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(ctx, tmp, weight);

    for (size_t i = 0; i < weight; ++i) {
        int32_t index = tmp[i] / 64;
        int32_t pos = tmp[i] % 64;
        v[index] |= ((uint64_t) 1) << pos;
    }
 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQCRMRS128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0; i < size; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }



 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 uint8_t PQCLEAN_HQCRMRS128_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
    uint64_t r = 0;
    for (size_t i = 0; i < size; i++) {
        r |= v1[i] ^ v2[i];
    }
    r = (~r + 1) >> 63;
    return (uint8_t) r;
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQCRMRS128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    if (size_o < size_v) {
        uint64_t mask = 0x7FFFFFFFFFFFFFFF;
        int8_t val = 0;

        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, 8 * VEC_N1N2_SIZE_64);

        for (int8_t i = 0; i < val; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, 8 * CEIL_DIVIDE(size_v, 64));
    }
 }
--- a/src/kem/hqc/hqc-rmrs-128/clean/vector.h
+++ b/src/kem/hqc/hqc-rmrs-128/clean/vector.h
@@ -0,0 +1,27 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight);

 void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);


 void PQCLEAN_HQCRMRS128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 uint8_t PQCLEAN_HQCRMRS128_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

 void PQCLEAN_HQCRMRS128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/CMakeLists.txt
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/CMakeLists.txt
@@ -0,0 +1,16 @@
 set(
  	SRC_AVX2_HQCRMRS192
 	code.c
 	fft.c
 	gf2x.c
 	gf.c
 	hqc.c
 	kem.c
 	parsing.c
 	reed_muller.c
 	reed_solomon.c
 	vector.c
 )

 define_kem_alg(hqcrmrs192_avx2
  PQCLEAN_HQCRMRS192_CLEAN "${SRC_AVX2_HQCRMRS192}" "${CMAKE_CURRENT_SOURCE_DIR}")
--- a/src/kem/hqc/hqc-rmrs-192/avx2/api.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQCRMRS192_AVX2_API_H
 #define PQCLEAN_HQCRMRS192_AVX2_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_ALGNAME                      "HQC-RMRS-192"

 #define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES               4562
 #define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_PUBLICKEYBYTES               4522
 #define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_BYTES                        64
 #define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_CIPHERTEXTBYTES              9026

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/code.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/code.c
@@ -0,0 +1,47 @@
 #include "code.h"
 #include "parameters.h"
 #include "reed_muller.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of concatenated code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the concatenated code
 *
 * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
 * a concatenated code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQCRMRS192_AVX2_code_encode(uint8_t *em, const uint8_t *m) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(tmp, m);
    PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the concatenated code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQCRMRS192_AVX2_code_decode(uint8_t *m, const uint8_t *em) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(tmp, em);
    PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(m, tmp);


 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/code.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/code.h
@@ -0,0 +1,18 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_AVX2_code_encode(uint8_t *em, const uint8_t *message);

 void PQCLEAN_HQCRMRS192_AVX2_code_decode(uint8_t *m, const uint8_t *em);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/fft.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/fft.c
@@ -0,0 +1,351 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    size_t i;
    for (i = 0; i < PARAM_M - 1; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
        for (j = 0; j < (1 << i); ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        break;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        break;

    default:
        radix_big(f0, f1, f, m_f);
        break;
    }
 }

 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
    uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

    uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

    size_t i, n;

    n = 1;
    n <<= (m_f - 2);
    memcpy(Q, f + 3 * n, 2 * n);
    memcpy(Q + n, f + 3 * n, 2 * n);
    memcpy(R, f, 4 * n);

    for (i = 0; i < n; ++i) {
        Q[i] ^= f[2 * n + i];
        R[n + i] ^= Q[i];
    }

    radix(Q0, Q1, Q, m_f - 1);
    radix(R0, R1, R, m_f - 1);

    memcpy(f0, R0, 2 * n);
    memcpy(f0 + n, Q0, 2 * n);
    memcpy(f1, R1, 2 * n);
    memcpy(f1 + n, Q1, 2 * n);
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function PQCLEAN_HQCRMRS192_AVX2_fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t gammas[PARAM_M - 2] = {0};
    uint16_t deltas[PARAM_M - 2] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};
    uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

    uint16_t beta_m_pow;
    size_t i, j, k;
    size_t x;

    // Step 1
    if (m_f == 1) {
        for (i = 0; i < m; ++i) {
            tmp[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        x = 1;
        for (j = 0; j < m; ++j) {
            for (k = 0; k < x; ++k) {
                w[x + k] = w[k] ^ tmp[j];
            }
            x <<= 1;
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        beta_m_pow = 1;
        x = 1;
        x <<= m_f;
        for (i = 1; i < x; ++i) {
            beta_m_pow = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (i = 0; i + 1 < m; ++i) {
        gammas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS192_AVX2_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    k = 1;
    k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1] = {0};
    uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
    uint16_t deltas[PARAM_M - 1] = {0};
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};

    size_t i, k;

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (i = 0; i < PARAM_M - 1; ++i) {
        deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    k = 1 << (PARAM_M - 1);
    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (i = 1; i < k; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array with the error
 * @param[out] error_compact Array with the error in a compact form
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    k = 1 << (PARAM_M - 1);
    error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
    error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

    for (i = 1; i < k; ++i) {
        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
        error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
        error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/fft.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/fft.h
@@ -0,0 +1,18 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/gf.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf.c
@@ -0,0 +1,176 @@
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 /**
 * @file gf.c
 * Galois field implementation with multiplication using the pclmulqdq instruction
 */


 static uint16_t gf_reduce(uint64_t x, size_t deg_x);



 /**
 * Reduces polynomial x modulo primitive polynomial GF_POLY.
 * @returns x mod GF_POLY
 * @param[in] x Polynomial of degree less than 64
 * @param[in] deg_x The degree of polynomial x
 */
 static uint16_t gf_reduce(uint64_t x, size_t deg_x) {
    uint16_t z1, z2, rmdr, dist;
    uint64_t mod;
    size_t steps, i, j;

    // Deduce the number of steps of reduction
    steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2);

    // Reduce
    for (i = 0; i < steps; ++i) {
        mod = x >> PARAM_M;
        x &= (1 << PARAM_M) - 1;
        x ^= mod;

        z1 = 0;
        rmdr = PARAM_GF_POLY ^ 1;
        for (j = PARAM_GF_POLY_WT - 2; j; --j) {
            z2 = __tzcnt_u16(rmdr);
            dist = (uint16_t) (z2 - z1);
            mod <<= dist;
            x ^= mod;
            rmdr ^= 1 << z2;
            z1 = z2;
        }
    }

    return x;
 }



 /**
 * Multiplies two elements of GF(2^GF_M).
 * @returns the product a*b
 * @param[in] a Element of GF(2^GF_M)
 * @param[in] b Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mul(uint16_t a, uint16_t b) {
    __m128i va = _mm_cvtsi32_si128(a);
    __m128i vb = _mm_cvtsi32_si128(b);
    __m128i vab = _mm_clmulepi64_si128(va, vb, 0);
    uint32_t ab = _mm_cvtsi128_si32(vab);

    return gf_reduce(ab, 2 * (PARAM_M - 1));
 }



 /**
 *  Compute 16 products in GF(2^GF_M).
 *  @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M)
 *  @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers
 *  @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer
 *
 */
 __m256i PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(__m256i a, __m256i b) {
    __m128i al = _mm256_extractf128_si256(a, 0);
    __m128i ah = _mm256_extractf128_si256(a, 1);
    __m128i bl = _mm256_extractf128_si256(b, 0);
    __m128i bh = _mm256_extractf128_si256(b, 1);

    __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0);
    abl0 &= CONST128_MIDDLEMASKL;
    abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

    __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11);
    abh0 &= CONST128_MIDDLEMASKL;
    abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

    abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL);
    abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH);

    __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0);
    abl1 &= CONST128_MIDDLEMASKL;
    abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

    __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11);
    abh1 &= CONST128_MIDDLEMASKL;
    abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

    abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL);
    abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH);

    __m256i ret = _mm256_set_m128i(abl1, abl0);

    __m256i aux = CONST256_MR0;

    for (int32_t i = 0; i < 7; i++) {
        ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux);
        aux = aux << 1;
    }

    ret &= CONST256_LASTMASK;
    return ret;
 }



 /**
 * Squares an element of GF(2^GF_M).
 * @returns a^2
 * @param[in] a Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_square(uint16_t a) {
    uint32_t b = a;
    uint32_t s = b & 1;
    for (size_t i = 1; i < PARAM_M; ++i) {
        b <<= 1;
        s ^= b & (1 << 2 * i);
    }

    return gf_reduce(s, 2 * (PARAM_M - 1));
 }



 /**
 * Computes the inverse of an element of GF(2^8),
 * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254
 * @returns the inverse of a
 * @param[in] a Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_inverse(uint16_t a) {
    uint16_t inv = a;
    uint16_t tmp1, tmp2;

    inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(a); /* a^2 */
    tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, a); /* a^3 */
    inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^4 */
    tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp1); /* a^7 */
    tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^11 */
    inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, inv); /* a^15 */
    inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^30 */
    inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^60 */
    inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^120 */
    inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^127 */
    inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^254 */
    return inv;
 }



 /**
 * Returns i modulo 2^GF_M-1.
 * i must be less than 2*(2^GF_M-1).
 * Therefore, the return value is either i or i-2^GF_M+1.
 * @returns i mod (2^GF_M-1)
 * @param[in] i The integer whose modulo is taken
 */
 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mod(uint16_t i) {
    uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

    // mask = 0xffff if (i < GF_MUL_ORDER)
    uint16_t mask = -(tmp >> 15);

    return tmp + (mask & PARAM_GF_MUL_ORDER);
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/gf.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf.h
@@ -0,0 +1,69 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <immintrin.h>
 #include <stddef.h>
 #include <stdint.h>

 #define _mm256_set_m128i(v0, v1)  _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)

 /**
 * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
 * The last two elements are needed by the PQCLEAN_HQCRMRS192_AVX2_gf_mul function
 * (for example if both elements to multiply are zero).
 */
 static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



 /**
 * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
 * The logarithm of 0 is set to 0 by convention.
 */
 static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };

 /**
 * Masks needed for the computation of 16 mult in GF(2^M)
 */
 #define CONST256_MR0      _mm256_set1_epi64x((long long) 0x0100010001000100)
 #define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff)
 #define CONST128_MASKL       _mm_set1_epi64x((long long) 0x0000ffff0000ffff)
 #define CONST128_MASKH       _mm_set1_epi64x((long long) 0xffff0000ffff0000)
 #define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff)
 #define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000)
 #define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff)
 #define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100)

 /**
 * x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register
 */
 static const __m256i red[7] = {
    {0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL},
    {0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL},
    {0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL},
    {0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL},
    {0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL},
    {0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL},
    {0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL},

 };


 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mul(uint16_t a, uint16_t b);

 __m256i PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(__m256i a, __m256i b);

 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mod(uint16_t i);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.c
@@ -0,0 +1,408 @@
 #include "gf2x.h"
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * \file gf2x.c
 * \brief AVX2 implementation of multiplication of two polynomials
 */



 #define VEC_N_SPLIT_3x3 CEIL_DIVIDE(PARAM_N/9, 256)
 #define VEC_N_SPLIT_3 (3*VEC_N_SPLIT_3x3)

 static inline void reduce(uint64_t *o, const __m256i *a);
 static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B);
 static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B);


 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a Pointer to the polynomial a(x)
 */
 static inline void reduce(uint64_t *o, const __m256i *a256) {
    size_t i, i2;
    __m256i r256, carry256;
    __m256i *o256 = (__m256i *)o;
    const uint64_t *a64 = (const uint64_t *)a256;
    uint64_t r, carry;

    i2 = 0;
    for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) {
        r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i]));
        r256 = _mm256_srli_epi64(r256, PARAM_N & 63);
        carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1]));
        carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63);
        r256 ^= carry256;
        _mm256_storeu_si256(&o256[i2], a256[i2] ^ r256);
        i2 += 1;
    }

    i = i - (PARAM_N >> 6);
    for (; i < (PARAM_N >> 6) + 1; i++) {
        r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63);
        carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63);
        r ^= carry;
        o[i] = a64[i] ^ r;
    }

    o[PARAM_N >> 6] &= RED_MASK;
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 * A(x) and B(x) are stored in 128-bit registers
 * This function computes A(x)*B(x) using Karatsuba
 *
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) {
    __m128i D1[2];
    __m128i D0[2], D2[2];
    __m128i Al = _mm_loadu_si128(A);
    __m128i Ah = _mm_loadu_si128(A + 1);
    __m128i Bl = _mm_loadu_si128(B);
    __m128i Bh = _mm_loadu_si128(B + 1);

    //  Compute Al.Bl=D0
    __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0);
    __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11);
    __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e));
    __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e));
    __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    //  Compute Ah.Bh=D2
    DD0 = _mm_clmulepi64_si128(Ah, Bh, 0);
    DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11);
    AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e));
    BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Compute AlpAh.BlpBh=D1
    // Initialisation of AlpAh and BlpBh
    __m128i AlpAh = _mm_xor_si128(Al, Ah);
    __m128i BlpBh = _mm_xor_si128(Bl, Bh);
    DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0);
    DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11);
    AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e));
    BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Final comutation of C
    __m128i middle = _mm_xor_si128(D0[1], D2[0]);
    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) {
    __m256i D0[2], D1[2], D2[2], SAA, SBB;
    const __m128i *A128 = (const __m128i *)A;
    const __m128i *B128 = (const __m128i *)B;
    __m256i middle;

    karat_mult_1((__m128i *) D0, A128, B128);
    karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2);

    SAA = A[0] ^ A[1];
    SBB = B[0] ^ B[1];
    karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB);
    middle = _mm256_xor_si256(D0[1], D2[0]);

    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }


 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) {
    __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2];
    __m256i middle0;
    __m256i middle1;

    karat_mult_2(D0, A, B);
    karat_mult_2(D2, A + 2, B + 2);

    SAA[0] = A[0] ^ A[2];
    SBB[0] = B[0] ^ B[2];
    SAA[1] = A[1] ^ A[3];
    SBB[1] = B[1] ^ B[3];

    karat_mult_2(D1, SAA, SBB);

    middle0 = _mm256_xor_si256(D0[2], D2[0]);
    middle1 = _mm256_xor_si256(D0[3], D2[1]);

    C[0] = D0[0];
    C[1] = D0[1];
    C[2] = middle0 ^ D0[0] ^ D1[0];
    C[3] = middle1 ^ D0[1] ^ D1[1];
    C[4] = middle0 ^ D1[2] ^ D2[2];
    C[5] = middle1 ^ D1[3] ^ D2[3];
    C[6] = D2[2];
    C[7] = D2[3];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) {
    size_t i, is, is2, is3;
    __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4];
    __m256i middle;

    karat_mult_4(D0, A, B);
    karat_mult_4(D2, A + 4, B + 4);

    for (i = 0; i < 4; i++) {
        is = i + 4;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_4(D1, SAA, SBB);

    for (i = 0; i < 4; i++) {
        is = i + 4;
        is2 = is + 4;
        is3 = is2 + 4;

        middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B) {
    size_t i, is, is2, is3;
    __m256i middle;
    __m256i D0[16], D1[16], D2[16], SAA[8], SBB[8];

    karat_mult_8(D0, A, B);
    karat_mult_8(D2, A + 8, B + 8);

    for (i = 0; i < 8; i++) {
        is = i + 8;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_8(D1, SAA, SBB);

    for (i = 0; i < 8; i++) {
        is = i + 8;
        is2 = is + 8;
        is3 = is2 + 8;

        middle = D0[is] ^ D2[i];

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }


 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba 3 part split
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B) {
    size_t i, j;
    const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
    __m256i aa01[VEC_N_SPLIT_3x3], bb01[VEC_N_SPLIT_3x3], aa02[VEC_N_SPLIT_3x3], bb02[VEC_N_SPLIT_3x3], aa12[VEC_N_SPLIT_3x3], bb12[VEC_N_SPLIT_3x3];
    __m256i D0[2 * VEC_N_SPLIT_3x3], D1[2 * VEC_N_SPLIT_3x3], D2[2 * VEC_N_SPLIT_3x3], D3[2 * VEC_N_SPLIT_3x3], D4[2 * VEC_N_SPLIT_3x3], D5[2 * VEC_N_SPLIT_3x3];
    __m256i ro256[6 * VEC_N_SPLIT_3x3];
    __m256i middle0;

    a0 = A;
    a1 = A + VEC_N_SPLIT_3x3;
    a2 = A + (VEC_N_SPLIT_3x3 << 1);

    b0 = B;
    b1 = B + VEC_N_SPLIT_3x3;
    b2 = B + (VEC_N_SPLIT_3x3 << 1);

    for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
        aa01[i] = a0[i] ^ a1[i];
        bb01[i] = b0[i] ^ b1[i];

        aa12[i] = a2[i] ^ a1[i];
        bb12[i] = b2[i] ^ b1[i];

        aa02[i] = a0[i] ^ a2[i];
        bb02[i] = b0[i] ^ b2[i];
    }

    karat_mult_16(D0, a0, b0);
    karat_mult_16(D1, a1, b1);
    karat_mult_16(D2, a2, b2);

    karat_mult_16(D3, aa01, bb01);
    karat_mult_16(D4, aa02, bb02);
    karat_mult_16(D5, aa12, bb12);

    for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
        j = i + VEC_N_SPLIT_3x3;
        middle0 = D0[i] ^ D1[i] ^ D0[j];
        ro256[i] = D0[i];
        ro256[j]  = D3[i] ^ middle0;
        ro256[j + VEC_N_SPLIT_3x3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
        middle0 = D1[j] ^ D2[i] ^ D2[j];
        ro256[j + (VEC_N_SPLIT_3x3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
        ro256[i + (VEC_N_SPLIT_3x3 << 2)] = D5[j] ^ middle0;
        ro256[j + (VEC_N_SPLIT_3x3 << 2)] = D2[j];
    }

    for (i = 0; i < 2 * VEC_N_SPLIT_3; i++) {
        C[i] = ro256[i];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba 3 part split
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B) {
    size_t i, j;
    const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
    __m256i aa01[VEC_N_SPLIT_3], bb01[VEC_N_SPLIT_3], aa02[VEC_N_SPLIT_3], bb02[VEC_N_SPLIT_3], aa12[VEC_N_SPLIT_3], bb12[VEC_N_SPLIT_3];
    __m256i D0[2 * VEC_N_SPLIT_3], D1[2 * VEC_N_SPLIT_3], D2[2 * VEC_N_SPLIT_3], D3[2 * VEC_N_SPLIT_3], D4[2 * VEC_N_SPLIT_3], D5[2 * VEC_N_SPLIT_3];
    __m256i middle0;

    a0 = (__m256i *)(A->arr64);
    a1 = a0 + VEC_N_SPLIT_3;
    a2 = a0 + (2 * VEC_N_SPLIT_3);

    b0 = (__m256i *)(B->arr64);
    b1 = b0 + VEC_N_SPLIT_3;
    b2 = b0 + (2 * VEC_N_SPLIT_3);

    for (i = 0; i < VEC_N_SPLIT_3; i++) {
        aa01[i] = a0[i] ^ a1[i];
        bb01[i] = b0[i] ^ b1[i];

        aa12[i] = a2[i] ^ a1[i];
        bb12[i] = b2[i] ^ b1[i];

        aa02[i] = a0[i] ^ a2[i];
        bb02[i] = b0[i] ^ b2[i];
    }

    karat_three_way_mult(D0, a0, b0);
    karat_three_way_mult(D1, a1, b1);
    karat_three_way_mult(D2, a2, b2);

    karat_three_way_mult(D3, aa01, bb01);
    karat_three_way_mult(D4, aa02, bb02);
    karat_three_way_mult(D5, aa12, bb12);

    for (i = 0; i < VEC_N_SPLIT_3; i++) {
        j = i + VEC_N_SPLIT_3;
        middle0 = D0[i] ^ D1[i] ^ D0[j];
        C[i] = D0[i];
        C[j]  = D3[i] ^ middle0;
        C[j + VEC_N_SPLIT_3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
        middle0 = D1[j] ^ D2[i] ^ D2[j];
        C[j + (VEC_N_SPLIT_3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
        C[i + (VEC_N_SPLIT_3 << 2)] = D5[j] ^ middle0;
        C[j + (VEC_N_SPLIT_3 << 2)] = D2[j];
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a dense polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to a polynomial
 * @param[in] a2 Pointer to a polynomial
 */
 void PQCLEAN_HQCRMRS192_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) {
    __m256i a1_times_a2[2 * PARAM_N_MULT + 1] = {0};
    karat_mult9(a1_times_a2, a1, a2);
    reduce(o, a1_times_a2);
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/gf2x.h
@@ -0,0 +1,21 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>

 typedef union {
    uint64_t arr64[VEC_N_256_SIZE_64];
    __m256i dummy;
 } aligned_vec_t;

 void PQCLEAN_HQCRMRS192_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/hqc.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/hqc.c
@@ -0,0 +1,168 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    aligned_vec_t vx = {0};
    uint64_t *x = vx.arr64;
    aligned_vec_t vy = {0};
    uint64_t *y = vy.arr64;
    aligned_vec_t vh = {0};
    uint64_t *h = vh.arr64;
    aligned_vec_t vs = {0};
    uint64_t *s = vs.arr64;
    aligned_vec_t vtmp = {0};
    uint64_t *tmp = vtmp.arr64;

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp, &vy, &vh);
    PQCLEAN_HQCRMRS192_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    aligned_vec_t vh = {0};
    uint64_t *h = vh.arr64;
    aligned_vec_t vs = {0};
    uint64_t *s = vs.arr64;
    aligned_vec_t vr1 = {0};
    uint64_t *r1 = vr1.arr64;
    aligned_vec_t vr2 = {0};
    uint64_t *r2 = vr2.arr64;
    aligned_vec_t ve = {0};
    uint64_t *e = ve.arr64;
    aligned_vec_t vtmp1 = {0};
    uint64_t *tmp1 = vtmp1.arr64;
    aligned_vec_t vtmp2 = {0};
    uint64_t *tmp2 = vtmp2.arr64;
    aligned_vec_t vtmp3 = {0};
    uint64_t *tmp3 = vtmp3.arr64;

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);



    // Compute u = r1 + r2.h
    PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp1, &vr2, &vh);
    PQCLEAN_HQCRMRS192_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQCRMRS192_AVX2_code_encode((uint8_t *)v, m);
    PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
    PQCLEAN_HQCRMRS192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp2, &vr2, &vs);
    PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS192_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    aligned_vec_t vx = {0};
    uint64_t *x = vx.arr64;
    aligned_vec_t vy = {0};
    uint64_t *y = vy.arr64;
    aligned_vec_t vtmp1 = {0};
    uint64_t *tmp1 = vtmp1.arr64;
    aligned_vec_t vtmp2 = {0};
    uint64_t *tmp2 = vtmp2.arr64;
    aligned_vec_t vtmp3 = {0};
    uint64_t *tmp3 = vtmp3.arr64;

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(x, y, pk, sk);

    // Compute v - u.y
    PQCLEAN_HQCRMRS192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) {
        tmp2[i] = u[i];
    }
    PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp3, &vy, &vtmp2);
    PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQCRMRS192_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS192_AVX2_code_decode(m, (uint8_t *)tmp1);
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/hqc.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/hqc.h
@@ -0,0 +1,19 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/kem.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/kem.c
@@ -0,0 +1,140 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    static uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    randombytes(m, VEC_K_SIZE_BYTES);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    uint8_t result;
    uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_256_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = PQCLEAN_HQCRMRS192_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS192_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS192_AVX2_vect_compare(d, d2, SHA512_BYTES);
    result = (uint8_t) (-((int16_t) result) >> 15);
    for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
        ss[i] &= ~result;
    }


    return -(result & 1);
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/parameters.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/parameters.h
@@ -0,0 +1,109 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H


 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */
 #include "api.h"


 #define CEIL_DIVIDE(a, b)  (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of Reed-Solomon code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
  #define PARAM_N1N2                            Define the length in bits of the Concatenated code
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1Y_SIZE_BYTES                    Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define VEC_N_256_SIZE_64                     Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits
  #define VEC_N1N2_256_SIZE_64                  Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_POLY_WT                      Hamming weight of PARAM_GF_POLY
  #define PARAM_GF_POLY_M2                      Distance between the primitive polynomial first two set bits
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the Reed-Solomon code
  #define PARAM_G                               Define the size of the generator polynomial of Reed-Solomon code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
                                                The smallest power of 2 greater than 24+1 is 32=2^5
  #define RS_POLY_COEFS                         Coefficients of the generator polynomial of the Reed-Solomon code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                 35851
 #define PARAM_N1                                56
 #define PARAM_N2                                640
 #define PARAM_N1N2                              35840
 #define PARAM_OMEGA                             100
 #define PARAM_OMEGA_E                           114
 #define PARAM_OMEGA_R                           114
 #define PARAM_SECURITY                          192
 #define PARAM_DFR_EXP                           192

 #define SECRET_KEY_BYTES                        PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQCRMRS192_AVX2_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQCRMRS192_AVX2_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQCRMRS192_AVX2_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD               16742417
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        PARAM_K
 #define VEC_N1_SIZE_BYTES                       PARAM_N1
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                         CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_N_MULT                            (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256))
 #define VEC_N_256_SIZE_64                       (PARAM_N_MULT / 64)
 #define VEC_N1N2_256_SIZE_64                    (CEIL_DIVIDE(PARAM_N1N2, 256) << 2)

 #define PARAM_DELTA                             16
 #define PARAM_M                                 8
 #define PARAM_GF_POLY                           0x11D
 #define PARAM_GF_POLY_WT                        5
 #define PARAM_GF_POLY_M2                        4
 #define PARAM_GF_MUL_ORDER                      255
 #define PARAM_K                                 24
 #define PARAM_G                                 33
 #define PARAM_FFT                               5
 #define RS_POLY_COEFS 45,216,239,24,253,104,27,40,107,50,163,210,227,134,224,158,119,13,158,1,238,164,82,43,15,232,246,142,50,189,29,232,1

 #define RED_MASK                                0x7ff
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/parsing.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/parsing.c
@@ -0,0 +1,186 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */


 void PQCLEAN_HQCRMRS192_AVX2_store8(unsigned char *out, uint64_t in) {
    out[0] = (in >> 0x00) & 0xFF;
    out[1] = (in >> 0x08) & 0xFF;
    out[2] = (in >> 0x10) & 0xFF;
    out[3] = (in >> 0x18) & 0xFF;
    out[4] = (in >> 0x20) & 0xFF;
    out[5] = (in >> 0x28) & 0xFF;
    out[6] = (in >> 0x30) & 0xFF;
    out[7] = (in >> 0x38) & 0xFF;
 }


 uint64_t PQCLEAN_HQCRMRS192_AVX2_load8(const unsigned char *in) {
    uint64_t ret = in[7];

    for (int8_t i = 6; i >= 0; i--) {
        ret <<= 8;
        ret |= in[i];
    }

    return ret;
 }

 void PQCLEAN_HQCRMRS192_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
    size_t index_in = 0;
    size_t index_out = 0;

    // first copy by 8 bytes
    if (inlen >= 8 && outlen >= 1) {
        while (index_out < outlen && index_in + 8 <= inlen) {
            out64[index_out] = PQCLEAN_HQCRMRS192_AVX2_load8(in8 + index_in);

            index_in += 8;
            index_out += 1;
        }
    }

    // we now need to do the last 7 bytes if necessary
    if (index_in >= inlen || index_out >= outlen) {
        return;
    }
    out64[index_out] = in8[inlen - 1];
    for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
        out64[index_out] <<= 8;
        out64[index_out] |= in8[index_in + i];
    }
 }

 void PQCLEAN_HQCRMRS192_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
    for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
        out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
        index_out++;
        if (index_out % 8 == 0) {
            index_in++;
        }
    }
 }


 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(sk, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint64_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(pk, sk, PUBLIC_KEY_BYTES);

    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    PQCLEAN_HQCRMRS192_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    pk += SEED_BYTES;
    PQCLEAN_HQCRMRS192_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS192_AVX2_vect_set_random(&pk_seedexpander, h);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    PQCLEAN_HQCRMRS192_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS192_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(ct, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    PQCLEAN_HQCRMRS192_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(d, ct, SHA512_BYTES);
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/parsing.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/parsing.h
@@ -0,0 +1,36 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_AVX2_store8(unsigned char *out, uint64_t in);

 uint64_t PQCLEAN_HQCRMRS192_AVX2_load8(const unsigned char *in);

 void PQCLEAN_HQCRMRS192_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

 void PQCLEAN_HQCRMRS192_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


 void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.c
@@ -0,0 +1,389 @@
 #include "parameters.h"
 #include "reed_muller.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file reed_muller.c
 * Constant time implementation of Reed-Muller code RM(1,7)
 */


 // number of repeated code words
 #define MULTIPLICITY                   CEIL_DIVIDE(PARAM_N2, 128)

 // copy bit 0 into all bits of a 64 bit value
 #define BIT0MASK(x) (int64_t)(-((x) & 1))

 static void encode(uint8_t *word, uint8_t message);
 static void expand_and_sum(__m256i *dst, const uint64_t *src);
 static void hadamard(__m256i *src, __m256i *dst);
 static uint32_t find_peaks(__m256i *transform);



 /**
 * @brief Encode a single byte into a single codeword using RM(1,7)
 *
 * Encoding matrix of this code:
 * bit pattern (note that bits are numbered big endian)
 * 0   aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
 * 1   cccccccc cccccccc cccccccc cccccccc
 * 2   f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
 * 3   ff00ff00 ff00ff00 ff00ff00 ff00ff00
 * 4   ffff0000 ffff0000 ffff0000 ffff0000
 * 5   00000000 ffffffff 00000000 ffffffff
 * 6   00000000 00000000 ffffffff ffffffff
 * 7   ffffffff ffffffff ffffffff ffffffff
 *
 * @param[out] word An RM(1,7) codeword
 * @param[in] message A message to encode
 */
 static void encode(uint8_t *word, uint8_t message) {
    uint32_t e;
    // bit 7 flips all the bits, do that first to save work
    e = BIT0MASK(message >> 7);
    // bits 0, 1, 2, 3, 4 are the same for all four longs
    // (Warning: in the bit matrix above, low bits are at the left!)
    e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
    e ^= BIT0MASK(message >> 1) & 0xcccccccc;
    e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
    e ^= BIT0MASK(message >> 3) & 0xff00ff00;
    e ^= BIT0MASK(message >> 4) & 0xffff0000;
    // we can store this in the first quarter
    word[0 + 0] = (e >> 0x00) & 0xff;
    word[0 + 1] = (e >> 0x08) & 0xff;
    word[0 + 2] = (e >> 0x10) & 0xff;
    word[0 + 3] = (e >> 0x18) & 0xff;
    // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
    e ^= BIT0MASK(message >> 5);
    word[4 + 0] = (e >> 0x00) & 0xff;
    word[4 + 1] = (e >> 0x08) & 0xff;
    word[4 + 2] = (e >> 0x10) & 0xff;
    word[4 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 6);
    word[12 + 0] = (e >> 0x00) & 0xff;
    word[12 + 1] = (e >> 0x08) & 0xff;
    word[12 + 2] = (e >> 0x10) & 0xff;
    word[12 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 5);
    word[8 + 0] = (e >> 0x00) & 0xff;
    word[8 + 1] = (e >> 0x08) & 0xff;
    word[8 + 2] = (e >> 0x10) & 0xff;
    word[8 + 3] = (e >> 0x18) & 0xff;
 }



 /**
 * @brief Add multiple codewords into expanded codeword
 *
 * Note: this does not write the codewords as -1 or +1 as the green machine does
 * instead, just 0 and 1 is used.
 * The resulting hadamard transform has:
 * all values are halved
 * the first entry is 64 too high
 *
 * @param[out] dst Structure that contain the expanded codeword
 * @param[in] src Structure that contain the codeword
 */
 inline void expand_and_sum(__m256i *dst, const uint64_t *src) {
    uint16_t v[16];
    for (size_t part = 0; part < 8; part++) {
        dst[part] = _mm256_setzero_si256();
    }
    for (size_t copy = 0; copy < MULTIPLICITY; copy++) {
        for (size_t part = 0; part < 8; part++) {
            for (size_t bit = 0; bit < 16; bit++) {
                v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1;
            }
            dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
                                          v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
        }
    }
 }



 /**
 * @brief Hadamard transform
 *
 * Perform hadamard transform of src and store result in dst
 * src is overwritten: it is also used as intermediate buffer
 * Method is best explained if we use H(3) instead of H(7):
 *
 * The routine multiplies by the matrix H(3):
 *                     [1  1  1  1  1  1  1  1]
 *                     [1 -1  1 -1  1 -1  1 -1]
 *                     [1  1 -1 -1  1  1 -1 -1]
 * [a b c d e f g h] * [1 -1 -1  1  1 -1 -1  1] = result of routine
 *                     [1  1  1  1 -1 -1 -1 -1]
 *                     [1 -1  1 -1 -1  1 -1  1]
 *                     [1  1 -1 -1 -1 -1  1  1]
 *                     [1 -1 -1  1 -1  1  1 -1]
 * You can do this in three passes, where each pass does this:
 * set lower half of buffer to pairwise sums,
 * and upper half to differences
 * index     0        1        2        3        4        5        6        7
 * input:    a,       b,       c,       d,       e,       f,       g,       h
 * pass 1:   a+b,     c+d,     e+f,     g+h,     a-b,     c-d,     e-f,     g-h
 * pass 2:   a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
 * pass 3:   a+b+c+d+e+f+g+h   a+b-c-d+e+f-g-h   a+b+c+d-e-f-g-h   a+b-c-d-e+-f+g+h
 * a-b+c-d+e-f+g-h   a-b-c+d+e-f-g+h   a-b+c-d-e+f-g+h   a-b-c+d-e+f+g-h
 * This order of computation is chosen because it vectorises well.
 * Likewise, this routine multiplies by H(7) in seven passes.
 *
 * @param[out] src Structure that contain the expanded codeword
 * @param[out] dst Structure that contain the expanded codeword
 */
 inline void hadamard(__m256i *src, __m256i *dst) {
    // the passes move data:
    // src -> dst -> src -> dst -> src -> dst -> src -> dst
    // using p1 and p2 alternately
    __m256i *p1 = src;
    __m256i *p2 = dst;
    __m256i *p3;
    for (size_t pass = 0; pass < 7; pass++) {
        // warning: hadd works "within lanes" as Intel call it
        // so you have to swap the middle 64 bit blocks of the result
        for (size_t part = 0; part < 4; part++) {
            p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
            p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
        }
        // swap p1, p2 for next round
        p3 = p1;
        p1 = p2;
        p2 = p3;
    }
 }



 /**
 * @brief Finding the location of the highest value
 *
 * This is the final step of the green machine: find the location of the highest value,
 * and add 128 if the peak is positive
 * Notes on decoding
 * The standard "Green machine" decoder words as follows:
 * if the received codeword is W, compute (2 * W - 1) * H7
 * The entries of the resulting vector are always even and vary from
 * -128 (= the complement is a code word, add bit 7 to decode)
 * via 0 (this is a different codeword)
 * to 128 (this is the code word).
 *
 * Our decoding differs in two ways:
 * - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1)
 * - We take the sum of the repititions (so the entries are 0..MULTIPLICITY)
 * This implies that we have to subtract 64M (M=MULTIPLICITY)
 * from the first entry to make sure the first codewords is handled properly
 * and that the entries vary from -64M to 64M.
 * -64M or 64M stands for a perfect codeword.
 * If there are fewer than 32M errors, there is always a unique codeword
 * which an entry with absolute value > 32M;
 * this is because an error changes an entry by 1.
 * The highest number that seem to be decodable is 50 errors, so that the
 * highest entries in the hadamard transform can be as low as 12.
 * But this is different for the repeated code.
 * Because multiple codewords are added, this changes: the lowest value of the
 * hadamard transform of the sum of six words is seen to be as low as 43 (!),
 * which is way less than 12*6.
 *
 * It is possible that there are more errors, but the word is still uniquely
 * decodable: we found a word with distance of 50 from the nearest codeword.
 * That means that the highest entry can be as low as 14M.
 * Since we have to do binary search, we search for the range 1-64M
 * which can be done in 6+l2g(M) steps.
 * The binary search is based on (values>32M are unique):
 * M  32M     min>  max>  firstStep #steps
 * 2   64       1   64    33 +- 16    6
 * 4  128       1  128    65 +- 32    7
 * 6  192       1  192   129 +- 64    8
 *
 * As a check, we run a sample for M=6 to see the peak value; it ranged
 * from 43 to 147, so my analysis looks right. Also, it shows that decoding
 * far beyond the bound of 32M is needed.
 *
 * For the vectors, it would be tempting to use 8 bit ints,
 * because the values "almost" fit in there.
 * We could use some trickery to fit it in 8 bits, like saturated add or
 * division by 2 in a late step.
 * Unfortunately, these instructions do not exist.
 * the adds _mm512_adds_epi8 is available only on the latest processors,
 * and division, shift, mulhi are not available at all for 8 bits.
 * So, we use 16 bit ints.
 *
 * For the search of the optimal comparison value,
 * remember the transform contains 64M-d,
 * where d are the distances to the codewords.
 * The highest value gives the most likely codeword.
 * There is not fast vectorized way to find this value, so we search for the
 * maximum value itself.
 * In each pass, we collect a bit map of the transform values that are,
 * say >bound.  There are three cases:
 * bit map = 0: all code words are further away than 64M-bound (decrease bound)
 * bit map has one bit: one unique code word has distance < 64M-bound
 * bit map has multiple bits: multiple words (increase bound)
 * We will search for the lowest value of bound that gives a nonzero bit map.
 *
 * @param[in] transform Structure that contain the expanded codeword
 */
 inline uint32_t find_peaks(__m256i *transform) {
    // a whole lot of vector variables
    __m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows;
    __m256i tmp = _mm256_setzero_si256();
    __m256i vect_mask;
    __m256i res;
    int32_t lower;
    int32_t width;
    uint32_t message;
    uint32_t mask;
    int8_t index;
    int8_t abs_value;
    int8_t mask1;
    int8_t mask2;
    uint16_t result;

    // compute absolute value of transform
    for (size_t i = 0; i < 8; i++) {
        abs_rows[i] = _mm256_abs_epi16(transform[i]);
    }
    // compute a vector of 16 elements which contains the maximum somewhere
    // (later used to compute bits 0 through 3 of message)
    max_abs_rows = abs_rows[0];
    for (size_t i = 1; i < 8; i++) {
        max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]);
    }

    // do binary search for the highest value that is lower than the maximum
    // loop invariant: lower gives bit map = 0, lower + width gives bit map > 0
    lower = 1;
    // this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6
    width = 1 << (5 + MULTIPLICITY / 2);
    // if you don't unroll this loop, it fits in the loop cache
    // uncomment the line below to speeding up the program by a few percent
    // #pragma GCC unroll 0
    while (width > 1) {
        width >>= 1;
        // compare with lower + width; put result in bitmap
        // make vector from value of new bound
        bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width));
        bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound);
        // step up if there are any matches
        // rely on compiler to use conditional move here
        mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
        mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
        lower += mask & width;
    }
    // lower+width contains the maximum value of the vector
    // or less, if the maximum is very high (which is OK)
    // normally, there is one maximum, but sometimes there are more
    // find where the maxima occur in the maximum vector
    // (each determines lower 4 bits of peak position)
    // construct vector filled with bound-1
    bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1));

    // find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message
    // find lowest value by searching backwards skip first check to save time
    message = 0x70;
    for (size_t i = 0; i < 8; i++) {
        bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound);
        mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
        mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
        message ^= mask & (message ^ ((7 - i) << 4));
    }
    // we decided which row of the matrix contains the lowest match
    // select proper row
    index = message >> 4;

    tmp = _mm256_setzero_si256();
    for (size_t i = 0; i < 8; i++) {
        abs_value = (int8_t)(index - i);
        mask1 = abs_value >> 7;
        abs_value ^= mask1;
        abs_value -= mask1;
        mask2 = ((uint8_t) - abs_value >> 7);
        mask = (-1ULL) + mask2;
        vect_mask = _mm256_set1_epi32(mask);
        res = _mm256_and_si256(abs_rows[i], vect_mask);
        tmp = _mm256_or_si256(tmp, res);
    }

    active_row = tmp;

    // get the column number of the vector element
    // by setting the bits corresponding to the columns
    // and then adding elements within two groups of 8
    vect_mask = _mm256_cmpgt_epi16(active_row, bound);
    vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1);
    for (size_t i = 0; i < 3; i++) {
        vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask);
    }
    // add low 4 bits of message
    message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8));

    // set bit 7 if sign of biggest value is positive
    // make sure a jump isn't generated by the compiler
    tmp = _mm256_setzero_si256();
    for (size_t i = 0; i < 8; i++) {
        mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63);
        vect_mask = _mm256_set1_epi32(mask);
        tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i]));
    }
    result = 0;
    for (size_t i = 0; i < 16; i++) {
        mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63);
        result |= mask & ((uint16_t *)&tmp)[i];
    }
    message |= (0x8000 & ~result) >> 8;
    return message;
 }



 /**
 * @brief Encodes the received word
 *
 * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
 * or MULTIPLICITY repeats of 128 bits
 *
 * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // encode first word
        encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
        // copy to other identical codewords
        for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
            memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
        }
    }
 }



 /**
 * @brief Decodes the received word
 *
 * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
 * The theory of error-correcting codes codes @cite macwilliams1977theory
 *
 * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
    __m256i expanded[8];
    __m256i transform[8];
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // collect the codewords
        expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]);
        // apply hadamard transform
        hadamard(expanded, transform);
        // fix the first entry to get the half Hadamard transform
        transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0,
                                         0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY);
        // finish the decoding
        msg[i] = find_peaks(transform);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.h
@@ -0,0 +1,18 @@
 #ifndef REED_MULLER_H
 #define REED_MULLER_H


 /**
 * @file reed_muller.h
 * Header file of reed_muller.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

 void PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.c
@@ -0,0 +1,476 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file reed_solomon.c
 * Constant time implementation of Reed-Solomon codes
 */


 static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void compute_roots(uint8_t *error, uint16_t *sigma);
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

 static const __m256i alpha_ij256_1[55] = {
    {0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087},
    {0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006},
    {0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035},
    {0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014},
    {0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be},
    {0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078},
    {0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3},
    {0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d},
    {0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed},
    {0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e},
    {0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054},
    {0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4},
    {0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5},
    {0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062},
    {0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064},
    {0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051},
    {0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045},
    {0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb},
    {0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083},
    {0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020},
    {0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d},
    {0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0},
    {0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee},
    {0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba},
    {0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e},
    {0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb},
    {0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9},
    {0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd},
    {0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec},
    {0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9},
    {0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052},
    {0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1},
    {0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1},
    {0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc},
    {0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c},
    {0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2},
    {0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048},
    {0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016},
    {0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad},
    {0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074},
    {0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9},
    {0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025},
    {0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c},
    {0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de},
    {0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f},
    {0x00950021005b009f, 0x001f00c2007500f4, 0x00b500d800a70073, 0x0048009600da00fe},
    {0x00a5001500710023, 0x00760089000c00eb, 0x0050008000ef00fc, 0x00b0006400520022},
    {0x008200a800d90046, 0x001700e70027002c, 0x0061002d002400db, 0x0008005900bf003e},
    {0x00c800290043008c, 0x009e00fe003500e9, 0x0078003000eb006e, 0x005a002400e300cc},
    {0x001c005500110005, 0x004d00e200c1006c, 0x00df006a00e90064, 0x009c002c00ae0084},
    {0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd, 0x000a000100dd0092},
    {0x005100e4000d0014, 0x00d100bd00ba0020, 0x003e00de007400f2, 0x00c20026002b003f},
    {0x0079007300340028, 0x00e500f800a10074, 0x006600ca00b4008a, 0x00bb006000f7004b},
    {0x00c300bf00d00050, 0x00db00c5002f0026, 0x0021006b006000f5, 0x008600c100cf0082},
    {0x00ac0091006700a0, 0x0037002e000f00b4, 0x005500e2006a002c, 0x007c00b9002000a7}
 };
 static const __m256i alpha_ij256_2[55] = {
    {0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x009d00c000600030},
    {0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x005f00de00b90069},
    {0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x00d900b600df007f},
    {0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00850097003b00f8},
    {0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x00e600720055004d},
    {0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x0082006e009600f1},
    {0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x0012009b005900e0},
    {0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x0002001b002c00f7},
    {0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0027008f00260040},
    {0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x00be00a000c1009c},
    {0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00af00b1000f005f},
    {0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x001700ed001a00b6},
    {0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x00d1005200a900ec},
    {0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x0019004b009100aa},
    {0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0024005900640096},
    {0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00040058002400a2},
    {0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x004e00980001000b},
    {0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00610046006000cd},
    {0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x004300f000b900d4},
    {0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x002e006700df005e},
    {0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00bf0015003b0086},
    {0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0032007b00550085},
    {0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x004800e0009600d5},
    {0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000800fb0059006e},
    {0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x009c0074002c00ef},
    {0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00c200d4002600fa},
    {0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0086006500c1002d},
    {0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x005c0088000f0023},
    {0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x006300da001a001e},
    {0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0064009100a9001a},
    {0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x009000c8009100da},
    {0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0010009000640063},
    {0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0025000800240082},
    {0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0099004e00010045},
    {0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x001100be0060006c},
    {0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x00b800d900b9008f},
    {0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x00c600cc00df0028},
    {0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00c800b7003b00d3},
    {0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x003d0057005500ce},
    {0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x002000ac00960084},
    {0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x004a00d8005900e5},
    {0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x002f000c002c0007},
    {0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0022006900260090},
    {0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x006d00e100c10002},
    {0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x0091003b000f0060},
    {0x007200bd00a10098, 0x006b009400830038, 0x0087008a00e3002e, 0x008d00aa001a00d2},
    {0x00ff008500e7004e, 0x00d0006f0013008a, 0x00d4003600700072, 0x007a006200a900fe},
    {0x006400290086000a, 0x00b8006b0025007d, 0x002f0075003d0096, 0x004000f2009100ed},
    {0x00ef003f00ed0099, 0x00e400680069003a, 0x00af0046008e00a7, 0x009400fa0064009a},
    {0x00eb003700a900d6, 0x0096002e00fd0060, 0x0033000f000300f4, 0x005e00b4002400ff},
    {0x000100dd00920044, 0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd},
    {0x00b4000900b30093, 0x003d00e300970065, 0x00310017003c0003, 0x00da00d3006000f3},
    {0x006a00b00057004f, 0x00ad000e009a00b6, 0x00a200e400880005, 0x003f001f00b90080},
    {0x00b9004000a60092, 0x0075008a00fc003e, 0x008b00c40017000f, 0x000700a800df0025},
    {0x00fd0003002400d7, 0x00c100e900ae00a9, 0x0074005900720011, 0x00f400ff003b00be}
 };

 /**
 * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
 *
 * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
 * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
 * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
 *
 * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_K_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
    size_t i, k;
    uint8_t gate_value = 0;
    uint8_t prev, x;

    union {
        uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
        __m256i dummy;
    } tmp = {0};

    union {
        uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
        __m256i dummy;
    } PARAM_RS_POLY = {{ RS_POLY_COEFS }};

    __m256i *tmp256 = (__m256i *)tmp.arr16;
    __m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16;

    for (i = 0; i < PARAM_K; ++i) {
        gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);
        tmp256[0] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0]);
        tmp256[1] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1]);

        for (size_t j = 32; j < PARAM_G; ++j) {
            tmp.arr16[j] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(gate_value, PARAM_RS_POLY.arr16[j]);
        }

        prev = 0;
        for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
            x = cdw[k];
            cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]);
            prev = x;
        }
    }

    memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
 }



 /**
 * @brief Computes 2 * PARAM_DELTA syndromes
 *
 * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
 * @param[in] cdw Array of size PARAM_N1 storing the received vector
 */
 void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
    __m256i *syndromes256 = (__m256i *) syndromes;
    syndromes256[0] = _mm256_set1_epi16(cdw[0]);

    for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
        syndromes256[0] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]);
    }

    for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
        syndromes256[1] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]);
    }
 }



 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
 * We use the letter p for rho which is initialized at -1. <br>
 * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    uint16_t deg_sigma = 0;
    uint16_t deg_sigma_p = 0;
    uint16_t deg_sigma_copy = 0;
    uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    uint16_t pp = (uint16_t) -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    uint16_t mask1, mask2, mask12;
    uint16_t deg_X, deg_X_sigma_p;
    uint16_t dd;
    uint16_t mu;

    uint16_t i;

    sigma[0] = 1;
    for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
        deg_sigma_copy = deg_sigma;

        dd = PQCLEAN_HQCRMRS192_AVX2_gf_mul(d, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(d_p));

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            sigma[i] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(dd, X_sigma_p[i]);
        }

        deg_X = mu - pp;
        deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        mask12 = mask1 & mask2;
        deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

        if (mu == (2 * PARAM_DELTA - 1)) {
            break;
        }

        pp ^= mask12 & (mu ^ pp);
        d_p ^= mask12 & (d ^ d_p);
        for (i = PARAM_DELTA; i; --i) {
            X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }

        deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
        d = syndromes[mu + 1];

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            d ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQCRMRS192_AVX2_fft for more details.
 *
 * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
 * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint8_t *error, uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0};

    PQCLEAN_HQCRMRS192_AVX2_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(error, w);
 }



 /**
 * @brief Computes the polynomial z(x)
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 * @param[in] degree Integer that is the degree of polynomial sigma
 * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
 */
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
    size_t i, j;
    uint16_t mask;

    z[0] = 1;

    for (i = 1; i < PARAM_DELTA + 1; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] = mask & sigma[i];
    }

    z[1] ^= syndromes[0];

    for (i = 2; i <= PARAM_DELTA; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] ^= mask & syndromes[i - 1];

        for (j = 1; j < i; ++j) {
            z[i] ^= mask & PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]);
        }
    }
 }



 /**
 * @brief Computes the error values
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] error_values Array of PARAM_DELTA elements receiving the error values
 * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
 * @param[in] z_degree Integer that is the degree of polynomial z(x)
 * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
 */
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
    uint16_t beta_j[PARAM_DELTA] = {0};
    uint16_t e_j[PARAM_DELTA] = {0};

    uint16_t delta_counter;
    uint16_t delta_real_value;
    uint16_t found;
    uint16_t mask1;
    uint16_t mask2;
    uint16_t tmp1;
    uint16_t tmp2;
    uint16_t inverse;
    uint16_t inverse_power_j;

    // Compute the beta_{j_i} page 31 of the documentation
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; i++) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            beta_j[j] += mask1 & mask2 & gf_exp[i];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
    delta_real_value = delta_counter;

    // Compute the e_{j_i} page 31 of the documentation
    for (size_t i = 0; i < PARAM_DELTA; ++i) {
        tmp1 = 1;
        tmp2 = 1;
        inverse = PQCLEAN_HQCRMRS192_AVX2_gf_inverse(beta_j[i]);
        inverse_power_j = 1;

        for (size_t j = 1; j <= PARAM_DELTA; ++j) {
            inverse_power_j = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, inverse);
            tmp1 ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, z[j]);
        }
        for (size_t k = 1; k < PARAM_DELTA; ++k) {
            tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
        }
        mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
        e_j[i] = mask1 & PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(tmp2));
    }

    // Place the delta e_{j_i} values at the right coordinates of the output vector
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; ++i) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            error_values[i] += mask1 & mask2 & e_j[j];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
 }



 /**
 * @brief Correct the errors
 *
 * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
 * @param[in] error Array of the error vector
 * @param[in] error_values Array of PARAM_DELTA elements storing the error values
 */
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
    for (size_t i = 0; i < PARAM_N1; ++i) {
        cdw[i] ^= error_values[i];
    }
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on six steps:
 *    <ol>
 *    <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step, is the polynomial z(x).
 *    <li> The fifth step, is the computation of the error values.
 *    <li> The sixth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
    uint16_t syndromes[2 * PARAM_DELTA] = {0};
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint8_t error[1 << PARAM_M] = {0};
    uint16_t z[PARAM_N1] = {0};
    uint16_t error_values[PARAM_N1] = {0};
    uint16_t deg;

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes, cdw);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    deg = compute_elp(sigma, syndromes);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Compute the polynomial z(x)
    compute_z_poly(z, sigma, deg, syndromes);

    // Compute the error values
    compute_error_values(error_values, z, error);

    // Correct the errors
    correct_errors(cdw, error_values);

    // Retrieve the message from the decoded codeword
    memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.h
--- a/src/kem/hqc/hqc-rmrs-192/avx2/vector.c
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/vector.c
@@ -0,0 +1,178 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0};
    uint32_t tmp[PARAM_OMEGA_R] = {0};
    __m256i bit256[PARAM_OMEGA_R];
    __m256i bloc256[PARAM_OMEGA_R];
    __m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0);
    __m256i pos256;
    __m256i mask256;
    __m256i aux;
    __m256i i256;
    uint64_t bloc, pos, bit64;
    uint8_t inc;
    size_t i, j, k;

    i = 0;
    j = random_bytes_size;
    while (i < weight) {
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            tmp[i] = ((uint32_t) rand_bytes[j++]) << 16;
            tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8;
            tmp[i] |= rand_bytes[j++];

        } while (tmp[i] >= UTILS_REJECTION_THRESHOLD);

        tmp[i] = tmp[i] % PARAM_N;

        inc = 1;
        for (k = 0; k < i; k++) {
            if (tmp[k] == tmp[i]) {
                inc = 0;
            }
        }
        i += inc;
    }

    for (i = 0; i < weight; i++) {
        // we store the bloc number and bit position of each vb[i]
        bloc = tmp[i] >> 6;
        bloc256[i] = _mm256_set1_epi64x(bloc >> 2);
        pos = (bloc & 0x3UL);
        pos256 = _mm256_set1_epi64x(pos);
        mask256 = _mm256_cmpeq_epi64(pos256, posCmp256);
        bit64 = 1ULL << (tmp[i] & 0x3f);
        bit256[i] = _mm256_set1_epi64x(bit64)&mask256;
    }

    for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) {
        aux = _mm256_loadu_si256(((__m256i *)v) + i);
        i256 = _mm256_set1_epi64x(i);

        for (j = 0; j < weight; j++) {
            mask256 = _mm256_cmpeq_epi64(bloc256[j], i256);
            aux ^= bit256[j] & mask256;
        }
        _mm256_storeu_si256(((__m256i *)v) + i, aux);
    }

 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0; i < size; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }



 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 uint8_t PQCLEAN_HQCRMRS192_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
    uint64_t r = 0;
    for (size_t i = 0; i < size; i++) {
        r |= v1[i] ^ v2[i];
    }
    r = (~r + 1) >> 63;
    return (uint8_t) r;
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQCRMRS192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    uint64_t mask = 0x7FFFFFFFFFFFFFFF;
    int8_t val = 0;
    if (size_o < size_v) {
        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, VEC_N1N2_SIZE_BYTES);

        for (int8_t i = 0; i < val; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, CEIL_DIVIDE(size_v, 8));
    }
 }
--- a/src/kem/hqc/hqc-rmrs-192/avx2/vector.h
+++ b/src/kem/hqc/hqc-rmrs-192/avx2/vector.h
@@ -0,0 +1,27 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQCRMRS192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

 void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_from_randombytes(uint64_t *v);


 void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 uint8_t PQCLEAN_HQCRMRS192_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

 void PQCLEAN_HQCRMRS192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/CMakeLists.txt
+++ b/src/kem/hqc/hqc-rmrs-192/clean/CMakeLists.txt
@@ -0,0 +1,16 @@
 set(
  	SRC_CLEAN_HQCRMRS192
 	code.c
 	fft.c
 	gf2x.c
 	gf.c
 	hqc.c
 	kem.c
 	parsing.c
 	reed_muller.c
 	reed_solomon.c
 	vector.c
 )

 define_kem_alg(hqcrmrs192_clean
  PQCLEAN_HQCRMRS192_CLEAN "${SRC_CLEAN_HQCRMRS192}" "${CMAKE_CURRENT_SOURCE_DIR}")
--- a/src/kem/hqc/hqc-rmrs-192/clean/api.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQCRMRS192_CLEAN_API_H
 #define PQCLEAN_HQCRMRS192_CLEAN_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_ALGNAME                      "HQC-RMRS-192"

 #define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES               4562
 #define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_PUBLICKEYBYTES               4522
 #define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_BYTES                        64
 #define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_CIPHERTEXTBYTES              9026

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/code.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/code.c
@@ -0,0 +1,46 @@
 #include "code.h"
 #include "parameters.h"
 #include "reed_muller.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of concatenated code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the concatenated code
 *
 * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
 * a concatenated code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQCRMRS192_CLEAN_code_encode(uint8_t *em, const uint8_t *m) {
    uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

    PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(tmp, m);
    PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the concatenated code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQCRMRS192_CLEAN_code_decode(uint8_t *m, const uint8_t *em) {
    uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

    PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(tmp, em);
    PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_decode(m, tmp);

 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/code.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/code.h
@@ -0,0 +1,18 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_CLEAN_code_encode(uint8_t *em, const uint8_t *message);

 void PQCLEAN_HQCRMRS192_CLEAN_code_decode(uint8_t *m, const uint8_t *em);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/fft.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/fft.c
@@ -0,0 +1,351 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    size_t i;
    for (i = 0; i < PARAM_M - 1; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
        for (j = 0; j < (1 << i); ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        break;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        break;

    default:
        radix_big(f0, f1, f, m_f);
        break;
    }
 }

 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
    uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

    uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

    size_t i, n;

    n = 1;
    n <<= (m_f - 2);
    memcpy(Q, f + 3 * n, 2 * n);
    memcpy(Q + n, f + 3 * n, 2 * n);
    memcpy(R, f, 4 * n);

    for (i = 0; i < n; ++i) {
        Q[i] ^= f[2 * n + i];
        R[n + i] ^= Q[i];
    }

    radix(Q0, Q1, Q, m_f - 1);
    radix(R0, R1, R, m_f - 1);

    memcpy(f0, R0, 2 * n);
    memcpy(f0 + n, Q0, 2 * n);
    memcpy(f1, R1, 2 * n);
    memcpy(f1 + n, Q1, 2 * n);
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function PQCLEAN_HQCRMRS192_CLEAN_fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t gammas[PARAM_M - 2] = {0};
    uint16_t deltas[PARAM_M - 2] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};
    uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

    uint16_t beta_m_pow;
    size_t i, j, k;
    size_t x;

    // Step 1
    if (m_f == 1) {
        for (i = 0; i < m; ++i) {
            tmp[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        x = 1;
        for (j = 0; j < m; ++j) {
            for (k = 0; k < x; ++k) {
                w[x + k] = w[k] ^ tmp[j];
            }
            x <<= 1;
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        beta_m_pow = 1;
        x = 1;
        x <<= m_f;
        for (i = 1; i < x; ++i) {
            beta_m_pow = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (i = 0; i + 1 < m; ++i) {
        gammas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    k = 1;
    k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1] = {0};
    uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
    uint16_t deltas[PARAM_M - 1] = {0};
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};

    size_t i, k;

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (i = 0; i < PARAM_M - 1; ++i) {
        deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    k = 1 << (PARAM_M - 1);
    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (i = 1; i < k; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array with the error
 * @param[out] error_compact Array with the error in a compact form
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    k = 1 << (PARAM_M - 1);
    error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
    error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

    for (i = 1; i < k; ++i) {
        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
        error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
        error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/fft.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/fft.h
@@ -0,0 +1,18 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/gf.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/gf.c
@@ -0,0 +1,63 @@
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 /**
 * @file gf.c
 * Galois field implementation with multiplication using lookup tables
 */


 /**
 * @brief Multiplies nonzero element a by element b
 * @returns the product a*b
 * @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero)
 * @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero)
 */
 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mul(uint16_t a, uint16_t b) {
    uint16_t mask;
    mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0
    return mask & gf_exp[PQCLEAN_HQCRMRS192_CLEAN_gf_mod(gf_log[a] + gf_log[b])];
 }



 /**
 * @brief Squares an element of GF(2^PARAM_M)
 * @returns a^2
 * @param[in] a Element of GF(2^PARAM_M)
 */
 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_square(uint16_t a) {
    int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    return mask & gf_exp[PQCLEAN_HQCRMRS192_CLEAN_gf_mod(2 * gf_log[a])];
 }



 /**
 * @brief Computes the inverse of an element of GF(2^PARAM_M)
 * @returns the inverse of a
 * @param[in] a Element of GF(2^PARAM_M)
 */
 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(uint16_t a) {
    int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]];
 }



 /**
 * @brief Returns i modulo 2^PARAM_M-1
 * i must be less than 2*(2^PARAM_M-1).
 * Therefore, the return value is either i or i-2^PARAM_M+1.
 * @returns i mod (2^PARAM_M-1)
 * @param[in] i The integer whose modulo is taken
 */
 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mod(uint16_t i) {
    uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    uint16_t mask = -(tmp >> 15);

    return tmp + (mask & PARAM_GF_MUL_ORDER);
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/gf.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/gf.h
@@ -0,0 +1,39 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <stddef.h>
 #include <stdint.h>


 /**
 * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
 * The last two elements are needed by the PQCLEAN_HQCRMRS192_CLEAN_gf_mul function
 * (for example if both elements to multiply are zero).
 */
 static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



 /**
 * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
 * The logarithm of 0 is set to 0 by convention.
 */
 static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };


 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mul(uint16_t a, uint16_t b);

 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mod(uint16_t i);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/gf2x.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/gf2x.c
@@ -0,0 +1,154 @@
 #include "gf2x.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include <stdint.h>
 /**
 * \file gf2x.c
 * \brief Implementation of multiplication of two polynomials
 */


 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2);
 static void reduce(uint64_t *o, const uint64_t *a);
 static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);

 /**
 * @brief swap two elements in a table
 *
 * This function exchanges tab[elt1] with tab[elt2]
 *
 * @param[in] tab Pointer to the table
 * @param[in] elt1 Index of the first element
 * @param[in] elt2 Index of the second element
 */
 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) {
    uint16_t tmp = tab[elt1];

    tab[elt1] = tab[elt2];
    tab[elt2] = tmp;
 }



 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[in] a Pointer to the polynomial a(x)
 * @param[out] o Pointer to the result
 */
 static void reduce(uint64_t *o, const uint64_t *a) {
    size_t i;
    uint64_t r;
    uint64_t carry;

    for (i = 0; i < VEC_N_SIZE_64; i++) {
        r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63);
        carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63)));
        o[i] = a[i] ^ r ^ carry;
    }

    o[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief computes product of the polynomial a1(x) with the sparse polynomial a2
 *
 *  o(x) = a1(x)a2(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2)
 * @param[in] a2 Pointer to the polynomial a1(x)
 * @param[in] weight Hamming wifht of the sparse polynomial a2
 * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process
 */
 static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
 //static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx)
    uint64_t carry;
    uint32_t dec, s;
    uint64_t table[16 * (VEC_N_SIZE_64 + 1)];
    uint16_t permuted_table[16];
    uint16_t permutation_table[16];
    uint16_t permuted_sparse_vect[PARAM_OMEGA_E];
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t tmp;
    uint64_t *pt;
    uint8_t *res;
    size_t i, j;

    for (i = 0; i < 16; i++) {
        permuted_table[i] = (uint16_t) i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = (uint16_t) i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i)));
    }

    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res = o + 2 * s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            tmp = PQCLEAN_HQCRMRS192_CLEAN_load8(res);
            PQCLEAN_HQCRMRS192_CLEAN_store8(res, tmp ^ pt[j]);
            res += 8;
        }
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial
 * @param[in] a2 Pointer to the dense polynomial
 * @param[in] weight Integer that is the weigt of the sparse polynomial
 * @param[in] ctx Pointer to the randomness context
 */
 void PQCLEAN_HQCRMRS192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
    uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0};

    fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx);
    PQCLEAN_HQCRMRS192_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp));
    reduce(o, tmp);
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/gf2x.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/gf2x.h
@@ -0,0 +1,16 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/hqc.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/hqc.c
@@ -0,0 +1,144 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    uint64_t x[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQCRMRS192_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander);
    PQCLEAN_HQCRMRS192_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};
    uint64_t r1[VEC_N_SIZE_64] = {0};
    uint32_t r2[PARAM_OMEGA_R] = {0};
    uint64_t e[VEC_N_SIZE_64] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

    // Compute u = r1 + r2.h
    PQCLEAN_HQCRMRS192_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQCRMRS192_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQCRMRS192_CLEAN_code_encode((uint8_t *)v, m);
    PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
    PQCLEAN_HQCRMRS192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQCRMRS192_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS192_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    AES_XOF_struct perm_seedexpander;
    uint8_t perm_seed[SEED_BYTES] = {0};

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk);

    randombytes(perm_seed, SEED_BYTES);
    seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute v - u.y
    PQCLEAN_HQCRMRS192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    PQCLEAN_HQCRMRS192_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander);
    PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS192_CLEAN_code_decode(m, (uint8_t *)tmp1);
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/hqc.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/hqc.h
@@ -0,0 +1,19 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/kem.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/kem.c
@@ -0,0 +1,140 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    randombytes(m, VEC_K_SIZE_BYTES);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    uint8_t result;
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = PQCLEAN_HQCRMRS192_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS192_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS192_CLEAN_vect_compare(d, d2, SHA512_BYTES);
    result = (uint8_t) (-((int16_t) result) >> 15);
    for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
        ss[i] &= ~result;
    }


    return -(result & 1);
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/parameters.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/parameters.h
@@ -0,0 +1,98 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H


 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */
 #include "api.h"


 #define CEIL_DIVIDE(a, b)  (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of Reed-Solomon code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
  #define PARAM_N1N2                            Define the length in bits of the Concatenated code
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1Y_SIZE_BYTES                    Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the Reed-Solomon code
  #define PARAM_G                               Define the size of the generator polynomial of Reed-Solomon code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
                                                The smallest power of 2 greater than 24+1 is 32=2^5
  #define RS_POLY_COEFS                         Coefficients of the generator polynomial of the Reed-Solomon code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                                             35851
 #define PARAM_N1                                56
 #define PARAM_N2                                640
 #define PARAM_N1N2                              35840
 #define PARAM_OMEGA                             100
 #define PARAM_OMEGA_E                           114
 #define PARAM_OMEGA_R                           114
 #define PARAM_SECURITY                          192
 #define PARAM_DFR_EXP                           192

 #define SECRET_KEY_BYTES                        PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD             16742417
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        PARAM_K
 #define VEC_N1_SIZE_BYTES                       PARAM_N1
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_DELTA                             16
 #define PARAM_M                                 8
 #define PARAM_GF_POLY                           0x11D
 #define PARAM_GF_MUL_ORDER                      255
 #define PARAM_K                                 24
 #define PARAM_G                                 33
 #define PARAM_FFT                               5
 #define RS_POLY_COEFS 45,216,239,24,253,104,27,40,107,50,163,210,227,134,224,158,119,13,158,1,238,164,82,43,15,232,246,142,50,189,29,232,1

 #define RED_MASK                                0x7ff
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/parsing.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/parsing.c
@@ -0,0 +1,186 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */


 void PQCLEAN_HQCRMRS192_CLEAN_store8(unsigned char *out, uint64_t in) {
    out[0] = (in >> 0x00) & 0xFF;
    out[1] = (in >> 0x08) & 0xFF;
    out[2] = (in >> 0x10) & 0xFF;
    out[3] = (in >> 0x18) & 0xFF;
    out[4] = (in >> 0x20) & 0xFF;
    out[5] = (in >> 0x28) & 0xFF;
    out[6] = (in >> 0x30) & 0xFF;
    out[7] = (in >> 0x38) & 0xFF;
 }


 uint64_t PQCLEAN_HQCRMRS192_CLEAN_load8(const unsigned char *in) {
    uint64_t ret = in[7];

    for (int8_t i = 6; i >= 0; i--) {
        ret <<= 8;
        ret |= in[i];
    }

    return ret;
 }

 void PQCLEAN_HQCRMRS192_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
    size_t index_in = 0;
    size_t index_out = 0;

    // first copy by 8 bytes
    if (inlen >= 8 && outlen >= 1) {
        while (index_out < outlen && index_in + 8 <= inlen) {
            out64[index_out] = PQCLEAN_HQCRMRS192_CLEAN_load8(in8 + index_in);

            index_in += 8;
            index_out += 1;
        }
    }

    // we now need to do the last 7 bytes if necessary
    if (index_in >= inlen || index_out >= outlen) {
        return;
    }
    out64[index_out] = in8[inlen - 1];
    for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
        out64[index_out] <<= 8;
        out64[index_out] |= in8[index_in + i];
    }
 }

 void PQCLEAN_HQCRMRS192_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
    for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
        out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
        index_out++;
        if (index_out % 8 == 0) {
            index_in++;
        }
    }
 }


 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(sk, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint32_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(pk, sk, PUBLIC_KEY_BYTES);

    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    pk += SEED_BYTES;
    PQCLEAN_HQCRMRS192_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(&pk_seedexpander, h);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS192_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(ct, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    PQCLEAN_HQCRMRS192_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(d, ct, SHA512_BYTES);
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/parsing.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/parsing.h
@@ -0,0 +1,36 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_CLEAN_store8(unsigned char *out, uint64_t in);

 uint64_t PQCLEAN_HQCRMRS192_CLEAN_load8(const unsigned char *in);

 void PQCLEAN_HQCRMRS192_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

 void PQCLEAN_HQCRMRS192_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


 void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.c
@@ -0,0 +1,237 @@
 #include "parameters.h"
 #include "reed_muller.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file reed_muller.c
 * Constant time implementation of Reed-Muller code RM(1,7)
 */



 // number of repeated code words
 #define MULTIPLICITY                   CEIL_DIVIDE(PARAM_N2, 128)

 // copy bit 0 into all bits of a 32 bit value
 #define BIT0MASK(x) (-((x) & 1))


 static void encode(uint8_t *word, uint8_t message);
 static void hadamard(uint16_t src[128], uint16_t dst[128]);
 static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]);
 static uint8_t find_peaks(const uint16_t transform[128]);



 /**
 * @brief Encode a single byte into a single codeword using RM(1,7)
 *
 * Encoding matrix of this code:
 * bit pattern (note that bits are numbered big endian)
 * 0   aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
 * 1   cccccccc cccccccc cccccccc cccccccc
 * 2   f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
 * 3   ff00ff00 ff00ff00 ff00ff00 ff00ff00
 * 4   ffff0000 ffff0000 ffff0000 ffff0000
 * 5   ffffffff 00000000 ffffffff 00000000
 * 6   ffffffff ffffffff 00000000 00000000
 * 7   ffffffff ffffffff ffffffff ffffffff
 *
 * @param[out] word An RM(1,7) codeword
 * @param[in] message A message
 */
 static void encode(uint8_t *word, uint8_t message) {
    uint32_t e;
    // bit 7 flips all the bits, do that first to save work
    e = BIT0MASK(message >> 7);
    // bits 0, 1, 2, 3, 4 are the same for all four longs
    // (Warning: in the bit matrix above, low bits are at the left!)
    e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
    e ^= BIT0MASK(message >> 1) & 0xcccccccc;
    e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
    e ^= BIT0MASK(message >> 3) & 0xff00ff00;
    e ^= BIT0MASK(message >> 4) & 0xffff0000;
    // we can store this in the first quarter
    word[0 + 0] = (e >> 0x00) & 0xff;
    word[0 + 1] = (e >> 0x08) & 0xff;
    word[0 + 2] = (e >> 0x10) & 0xff;
    word[0 + 3] = (e >> 0x18) & 0xff;
    // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
    e ^= BIT0MASK(message >> 5);
    word[4 + 0] = (e >> 0x00) & 0xff;
    word[4 + 1] = (e >> 0x08) & 0xff;
    word[4 + 2] = (e >> 0x10) & 0xff;
    word[4 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 6);
    word[12 + 0] = (e >> 0x00) & 0xff;
    word[12 + 1] = (e >> 0x08) & 0xff;
    word[12 + 2] = (e >> 0x10) & 0xff;
    word[12 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 5);
    word[8 + 0] = (e >> 0x00) & 0xff;
    word[8 + 1] = (e >> 0x08) & 0xff;
    word[8 + 2] = (e >> 0x10) & 0xff;
    word[8 + 3] = (e >> 0x18) & 0xff;
 }



 /**
 * @brief Hadamard transform
 *
 * Perform hadamard transform of src and store result in dst
 * src is overwritten: it is also used as intermediate buffer
 * Method is best explained if we use H(3) instead of H(7):
 *
 * The routine multiplies by the matrix H(3):
 *                     [1  1  1  1  1  1  1  1]
 *                     [1 -1  1 -1  1 -1  1 -1]
 *                     [1  1 -1 -1  1  1 -1 -1]
 * [a b c d e f g h] * [1 -1 -1  1  1 -1 -1  1] = result of routine
 *                     [1  1  1  1 -1 -1 -1 -1]
 *                     [1 -1  1 -1 -1  1 -1  1]
 *                     [1  1 -1 -1 -1 -1  1  1]
 *                     [1 -1 -1  1 -1  1  1 -1]
 * You can do this in three passes, where each pass does this:
 * set lower half of buffer to pairwise sums,
 * and upper half to differences
 * index     0        1        2        3        4        5        6        7
 * input:    a,       b,       c,       d,       e,       f,       g,       h
 * pass 1:   a+b,     c+d,     e+f,     g+h,     a-b,     c-d,     e-f,     g-h
 * pass 2:   a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
 * pass 3:   a+b+c+d+e+f+g+h   a+b-c-d+e+f-g-h   a+b+c+d-e-f-g-h   a+b-c-d-e+-f+g+h
 *                    a-b+c-d+e-f+g-h   a-b-c+d+e-f-g+h   a-b+c-d-e+f-g+h   a-b-c+d-e+f+g-h
 * This order of computation is chosen because it vectorises well.
 * Likewise, this routine multiplies by H(7) in seven passes.
 *
 * @param[out] src Structure that contain the expanded codeword
 * @param[out] dst Structure that contain the expanded codeword
 */
 static void hadamard(uint16_t src[128], uint16_t dst[128]) {
    // the passes move data:
    // src -> dst -> src -> dst -> src -> dst -> src -> dst
    // using p1 and p2 alternately
    uint16_t *p1 = src;
    uint16_t *p2 = dst;
    uint16_t *p3;
    for (uint32_t pass = 0; pass < 7; pass++) {
        for (uint32_t i = 0; i < 64; i++) {
            p2[i] = p1[2 * i] + p1[2 * i + 1];
            p2[i + 64] = p1[2 * i] - p1[2 * i + 1];
        }
        // swap p1, p2 for next round
        p3 = p1;
        p1 = p2;
        p2 = p3;
    }
 }



 /**
 * @brief Add multiple codewords into expanded codeword
 *
 * Accesses memory in order
 * Note: this does not write the codewords as -1 or +1 as the green machine does
 * instead, just 0 and 1 is used.
 * The resulting hadamard transform has:
 * all values are halved
 * the first entry is 64 too high
 *
 * @param[out] dest Structure that contain the expanded codeword
 * @param[in] src Structure that contain the codeword
 */
 static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) {
    size_t part, bit, copy;
    // start with the first copy
    for (part = 0; part < 16; part++) {
        for (bit = 0; bit < 8; bit++) {
            dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1);
        }
    }
    // sum the rest of the copies
    for (copy = 1; copy < MULTIPLICITY; copy++) {
        for (part = 0; part < 16; part++) {
            for (bit = 0; bit < 8; bit++) {
                dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1);
            }
        }
    }
 }



 /**
 * @brief Finding the location of the highest value
 *
 * This is the final step of the green machine: find the location of the highest value,
 * and add 128 if the peak is positive
 * if there are two identical peaks, the peak with smallest value
 * in the lowest 7 bits it taken
 * @param[in] transform Structure that contain the expanded codeword
 */
 static uint8_t find_peaks(const uint16_t transform[128]) {
    uint16_t peak_abs = 0;
    uint16_t peak = 0;
    uint16_t pos = 0;
    uint16_t t, abs, mask;
    for (uint16_t i = 0; i < 128; i++) {
        t = transform[i];
        abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t)
        mask = -(((uint16_t)(peak_abs - abs)) >> 15);
        peak ^= mask & (peak ^ t);
        pos ^= mask & (pos ^ i);
        peak_abs ^= mask & (peak_abs ^ abs);
    }
    pos |= 128 & ((peak >> 15) - 1);
    return (uint8_t) pos;
 }




 /**
 * @brief Encodes the received word
 *
 * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
 * or MULTIPLICITY repeats of 128 bits
 *
 * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // encode first word
        encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
        // copy to other identical codewords
        for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
            memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
        }
    }
 }



 /**
 * @brief Decodes the received word
 *
 * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
 * The theory of error-correcting codes codes @cite macwilliams1977theory
 *
 * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
    uint16_t expanded[128];
    uint16_t transform[128];
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // collect the codewords
        expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]);
        // apply hadamard transform
        hadamard(expanded, transform);
        // fix the first entry to get the half Hadamard transform
        transform[0] -= 64 * MULTIPLICITY;
        // finish the decoding
        msg[i] = find_peaks(transform);
    }
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_muller.h
@@ -0,0 +1,18 @@
 #ifndef REED_MULLER_H
 #define REED_MULLER_H


 /**
 * @file reed_muller.h
 * Header file of reed_muller.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

 void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


 #endif
--- a/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.c
@@ -0,0 +1,349 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file reed_solomon.c
 * Constant time implementation of Reed-Solomon codes
 */


 static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void compute_roots(uint8_t *error, uint16_t *sigma);
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

 /**
 * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
 *
 * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
 * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
 * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
 *
 * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_K_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
    size_t i, j, k;
    uint8_t gate_value = 0;

    uint16_t tmp[PARAM_G] = {0};
    uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS};
    uint8_t prev, x;

    for (i = 0; i < PARAM_N1; ++i) {
        cdw[i] = 0;
    }

    for (i = 0; i < PARAM_K; ++i) {
        gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);

        for (j = 0; j < PARAM_G; ++j) {
            tmp[j] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]);
        }

        prev = 0;
        for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
            x = cdw[k];
            cdw[k] = (uint8_t) (prev ^ tmp[k]);
            prev = x;
        }
    }

    memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
 }



 /**
 * @brief Computes 2 * PARAM_DELTA syndromes
 *
 * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
 * @param[in] cdw Array of size PARAM_N1 storing the received vector
 */
 void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
    for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) {
        for (size_t j = 1; j < PARAM_N1; ++j) {
            syndromes[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]);
        }
        syndromes[i] ^= cdw[0];
    }
 }



 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
 * We use the letter p for rho which is initialized at -1. <br>
 * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    uint16_t deg_sigma = 0;
    uint16_t deg_sigma_p = 0;
    uint16_t deg_sigma_copy = 0;
    uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    uint16_t pp = (uint16_t) -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    uint16_t mask1, mask2, mask12;
    uint16_t deg_X, deg_X_sigma_p;
    uint16_t dd;
    uint16_t mu;

    uint16_t i;

    sigma[0] = 1;
    for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
        deg_sigma_copy = deg_sigma;

        dd = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(d_p));

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            sigma[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(dd, X_sigma_p[i]);
        }

        deg_X = mu - pp;
        deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        mask12 = mask1 & mask2;
        deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

        if (mu == (2 * PARAM_DELTA - 1)) {
            break;
        }

        pp ^= mask12 & (mu ^ pp);
        d_p ^= mask12 & (d ^ d_p);
        for (i = PARAM_DELTA; i; --i) {
            X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }

        deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
        d = syndromes[mu + 1];

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            d ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQCRMRS192_CLEAN_fft for more details.
 *
 * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
 * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint8_t *error, uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0};

    PQCLEAN_HQCRMRS192_CLEAN_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(error, w);
 }



 /**
 * @brief Computes the polynomial z(x)
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 * @param[in] degree Integer that is the degree of polynomial sigma
 * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
 */
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
    size_t i, j;
    uint16_t mask;

    z[0] = 1;

    for (i = 1; i < PARAM_DELTA + 1; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] = mask & sigma[i];
    }

    z[1] ^= syndromes[0];

    for (i = 2; i <= PARAM_DELTA; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] ^= mask & syndromes[i - 1];

        for (j = 1; j < i; ++j) {
            z[i] ^= mask & PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]);
        }
    }
 }



 /**
 * @brief Computes the error values
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] error_values Array of PARAM_DELTA elements receiving the error values
 * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
 * @param[in] z_degree Integer that is the degree of polynomial z(x)
 * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
 */
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
    uint16_t beta_j[PARAM_DELTA] = {0};
    uint16_t e_j[PARAM_DELTA] = {0};

    uint16_t delta_counter;
    uint16_t delta_real_value;
    uint16_t found;
    uint16_t mask1;
    uint16_t mask2;
    uint16_t tmp1;
    uint16_t tmp2;
    uint16_t inverse;
    uint16_t inverse_power_j;

    // Compute the beta_{j_i} page 31 of the documentation
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; i++) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            beta_j[j] += mask1 & mask2 & gf_exp[i];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
    delta_real_value = delta_counter;

    // Compute the e_{j_i} page 31 of the documentation
    for (size_t i = 0; i < PARAM_DELTA; ++i) {
        tmp1 = 1;
        tmp2 = 1;
        inverse = PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(beta_j[i]);
        inverse_power_j = 1;

        for (size_t j = 1; j <= PARAM_DELTA; ++j) {
            inverse_power_j = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, inverse);
            tmp1 ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, z[j]);
        }
        for (size_t k = 1; k < PARAM_DELTA; ++k) {
            tmp2 = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
        }
        mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
        e_j[i] = mask1 & PQCLEAN_HQCRMRS192_CLEAN_gf_mul(tmp1, PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(tmp2));
    }

    // Place the delta e_{j_i} values at the right coordinates of the output vector
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; ++i) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            error_values[i] += mask1 & mask2 & e_j[j];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
 }



 /**
 * @brief Correct the errors
 *
 * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
 * @param[in] error Array of the error vector
 * @param[in] error_values Array of PARAM_DELTA elements storing the error values
 */
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
    for (size_t i = 0; i < PARAM_N1; ++i) {
        cdw[i] ^= error_values[i];
    }
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on six steps:
 *    <ol>
 *    <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step, is the polynomial z(x).
 *    <li> The fifth step, is the computation of the error values.
 *    <li> The sixth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
    uint16_t syndromes[2 * PARAM_DELTA] = {0};
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint8_t error[1 << PARAM_M] = {0};
    uint16_t z[PARAM_N1] = {0};
    uint16_t error_values[PARAM_N1] = {0};
    uint16_t deg;

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes, cdw);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    deg = compute_elp(sigma, syndromes);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Compute the polynomial z(x)
    compute_z_poly(z, sigma, deg, syndromes);

    // Compute the error values
    compute_error_values(error_values, z, error);

    // Correct the errors
    correct_errors(cdw, error_values);

    // Retrieve the message from the decoded codeword
    memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.h
--- a/src/kem/hqc/hqc-rmrs-192/clean/vector.c
+++ b/src/kem/hqc/hqc-rmrs-192/clean/vector.c
@@ -0,0 +1,176 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */


 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>. The vector
 * is stored by position.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
    uint8_t inc;
    size_t i, j;

    i = 0;
    j = random_bytes_size;
    while (i < weight) {
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            v[i]  = ((uint32_t) rand_bytes[j++]) << 16;
            v[i] |= ((uint32_t) rand_bytes[j++]) << 8;
            v[i] |= rand_bytes[j++];

        } while (v[i] >= UTILS_REJECTION_THRESHOLD);

        v[i] = v[i] % PARAM_N;

        inc = 1;
        for (size_t k = 0; k < i; k++) {
            if (v[k] == v[i]) {
                inc = 0;
            }
        }
        i += inc;
    }
 }



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
    uint32_t tmp[PARAM_OMEGA_R] = {0};

    PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(ctx, tmp, weight);

    for (size_t i = 0; i < weight; ++i) {
        int32_t index = tmp[i] / 64;
        int32_t pos = tmp[i] % 64;
        v[index] |= ((uint64_t) 1) << pos;
    }
 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQCRMRS192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0; i < size; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }



 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 uint8_t PQCLEAN_HQCRMRS192_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
    uint64_t r = 0;
    for (size_t i = 0; i < size; i++) {
        r |= v1[i] ^ v2[i];
    }
    r = (~r + 1) >> 63;
    return (uint8_t) r;
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQCRMRS192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    if (size_o < size_v) {
        uint64_t mask = 0x7FFFFFFFFFFFFFFF;
        int8_t val = 0;

        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, 8 * VEC_N1N2_SIZE_64);

        for (int8_t i = 0; i < val; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, 8 * CEIL_DIVIDE(size_v, 64));
    }
 }
--- a/src/kem/hqc/hqc-rmrs-192/clean/vector.h
+++ b/src/kem/hqc/hqc-rmrs-192/clean/vector.h
@@ -0,0 +1,27 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight);

 void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);


 void PQCLEAN_HQCRMRS192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 uint8_t PQCLEAN_HQCRMRS192_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

 void PQCLEAN_HQCRMRS192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/src/kem/hqc/hqc-rmrs-256/avx2/CMakeLists.txt
+++ b/src/kem/hqc/hqc-rmrs-256/avx2/CMakeLists.txt
@@ -0,0 +1,16 @@
 set(
  	SRC_AVX2_HQCRMRS256
 	code.c
 	fft.c
 	gf2x.c
 	gf.c
 	hqc.c
 	kem.c
 	parsing.c
 	reed_muller.c
 	reed_solomon.c
 	vector.c
 )

 define_kem_alg(hqcrmrs256_avx2
  PQCLEAN_HQCRMRS256_CLEAN "${SRC_AVX2_HQCRMRS256}" "${CMAKE_CURRENT_SOURCE_DIR}")
--- a/src/kem/hqc/hqc-rmrs-256/avx2/api.h
+++ b/src/kem/hqc/hqc-rmrs-256/avx2/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQCRMRS256_AVX2_API_H
 #define PQCLEAN_HQCRMRS256_AVX2_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_ALGNAME                      "HQC-RMRS-256"

 #define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_SECRETKEYBYTES               7285
 #define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_PUBLICKEYBYTES               7245
 #define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_BYTES                        64
 #define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_CIPHERTEXTBYTES              14469

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQCRMRS256_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/src/kem/hqc/hqc-rmrs-256/avx2/code.c
+++ b/src/kem/hqc/hqc-rmrs-256/avx2/code.c
@@ -0,0 +1,47 @@
 #include "code.h"
 #include "parameters.h"
 #include "reed_muller.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of concatenated code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the concatenated code
 *
 * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
 * a concatenated code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQCRMRS256_AVX2_code_encode(uint8_t *em, const uint8_t *m) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(tmp, m);
    PQCLEAN_HQCRMRS256_AVX2_reed_muller_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the concatenated code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQCRMRS256_AVX2_code_decode(uint8_t *m, const uint8_t *em) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS256_AVX2_reed_muller_decode(tmp, em);
    PQCLEAN_HQCRMRS256_AVX2_reed_solomon_decode(m, tmp);


 }
--- a/src/kem/hqc/hqc-rmrs-256/avx2/code.h
+++ b/src/kem/hqc/hqc-rmrs-256/avx2/code.h
@@ -0,0 +1,18 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS256_AVX2_code_encode(uint8_t *em, const uint8_t *message);

 void PQCLEAN_HQCRMRS256_AVX2_code_decode(uint8_t *m, const uint8_t *em);


 #endif