New HQC and HQC-RMRS from upstream

4 years ago · b0afb62c0e
--- a/crypto_kem/hqc-128/META.yml
+++ b/crypto_kem/hqc-128/META.yml
@@ -0,0 +1,34 @@
 name: HQC-128
 type: kem
 claimed-nist-level: 1
 claimed-security: IND-CCA2
 length-ciphertext: 6017
 length-public-key: 3024
 length-secret-key: 3064
 length-shared-secret: 64
 nistkat-sha256: 32702949431d8a869abb530a2fda87d5c81c63c698673b135e59ad7e8b5a4f5f
 principal-submitters:
  - Carlos Aguilar Melchor
  - Nicolas Aragon
  - Slim Bettaieb
  - Olivier Blazy
  - Jurjen Bos
  - Jean-Christophe Deneuville
  - Philippe Gaborit
  - Edoardo Persichetti
  - Jean-Marc Robert
  - Pascal Véron
  - Gilles Zémor
  - Loïc Bidoux
 implementations:
    - name: clean
      version: 2020-05-29
    - name: avx2
      version: 2020-05-29
      supported_platforms:
          - architecture: x86_64
            operating_systems:
                - Linux
                - Darwin
            required_flags:
                - avx2
--- a/crypto_kem/hqc-128/avx2/LICENSE
+++ b/crypto_kem/hqc-128/avx2/LICENSE
@@ -0,0 +1 @@
 Public Domain
--- a/crypto_kem/hqc-128/avx2/Makefile
+++ b/crypto_kem/hqc-128/avx2/Makefile
@@ -0,0 +1,22 @@
 # This Makefile can be used with GNU Make or BSD Make

 LIB=libhqc-128_avx2.a
 HEADERS=alpha_table.h api.h bch.h code.h fft.h gen_matrix.h gf2x.h gf.h hqc.h parameters.h parsing.h repetition.h vector.h 
 OBJECTS=bch.o code.o fft.o gf2x.o gf.o hqc.o kem.o parsing.o repetition.o vector.o 

 CFLAGS=-O3 -mavx2 -mbmi -mpclmul -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS)

 all: $(LIB)

 %.o: %.s $(HEADERS)
 	$(AS) -o $@ $<

 %.o: %.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $@ $<

 $(LIB): $(OBJECTS)
 	$(AR) -r $@ $(OBJECTS)

 clean:
 	$(RM) $(OBJECTS)
 	$(RM) $(LIB)
--- a/crypto_kem/hqc-128/avx2/alpha_table.h
+++ b/crypto_kem/hqc-128/avx2/alpha_table.h
--- a/crypto_kem/hqc-128/avx2/api.h
+++ b/crypto_kem/hqc-128/avx2/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQC128_AVX2_API_H
 #define PQCLEAN_HQC128_AVX2_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQC128_AVX2_CRYPTO_ALGNAME                      "HQC-128"

 #define PQCLEAN_HQC128_AVX2_CRYPTO_SECRETKEYBYTES               3064
 #define PQCLEAN_HQC128_AVX2_CRYPTO_PUBLICKEYBYTES               3024
 #define PQCLEAN_HQC128_AVX2_CRYPTO_BYTES                        64
 #define PQCLEAN_HQC128_AVX2_CRYPTO_CIPHERTEXTBYTES              6017

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQC128_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQC128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQC128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQC128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-128/avx2/bch.c
+++ b/crypto_kem/hqc-128/avx2/bch.c
@@ -0,0 +1,367 @@
 #include "alpha_table.h"
 #include "bch.h"
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file bch.c
 * Constant time implementation of BCH codes
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1 ; i < upper_bound ; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC128_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly ; j ; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Computes the values alpha^ij for decoding syndromes
 *
 * function to initialize a table which contains values alpha^ij for i in [0,N1[ and j in [1,2*PARAM_DELTA]
 * these values are used in order to compute the syndromes of the received word v(x)=v_0+v_1x+...+v_{n1-1}x^{n1-1}
 * value alpha^ij is stored in alpha_ij_table[2*PARAM_DELTA*i+j-1]
 * The syndromes are equal to v(alpha^k) for k in [1,2*PARAM_DELTA]
 * Size of the table is fixed to match 256 bit representation
 * Useless values are filled with 0.
 *
 * @param[in] exp Exp look-up-table of GF
 */
 void PQCLEAN_HQC128_AVX2_table_alphaij_generation(const uint16_t *exp) {
    int32_t tmp_value;
    int16_t *alpha_tmp;

    // pre-computation of alpha^ij for i in [0, N1[ and j in [1, 2*PARAM_DELTA]
    // see comment of alpha_ij_table_init() function.
    for (uint16_t i = 0; i < PARAM_N1 ; ++i) {
        tmp_value = 0;
        alpha_tmp = table_alpha_ij + i * (PARAM_DELTA << 1);
        for (uint16_t j = 0 ; j < (PARAM_DELTA << 1) ; j++) {
            tmp_value = PQCLEAN_HQC128_AVX2_gf_mod(tmp_value + i);
            alpha_tmp[j] = exp[tmp_value];
        }
    }
 }



 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite joiner1995decoding). <br>
 * We use the letter p for rho which is initialized at -1/2. <br>
 * The array X_sigma_p represents the polynomial X^(2(mu-rho))*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    sigma[0] = 1;
    size_t deg_sigma = 0;
    size_t deg_sigma_p = 0;
    uint16_t sigma_copy[PARAM_DELTA - 1] = {0};
    size_t deg_sigma_copy = 0;
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    int32_t pp = -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1));
        deg_sigma_copy = deg_sigma;

        uint16_t dd = PQCLEAN_HQC128_AVX2_gf_mul(d, PQCLEAN_HQC128_AVX2_gf_inverse(d_p)); // 0 if(d == 0)
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            sigma[i] ^= PQCLEAN_HQC128_AVX2_gf_mul(dd, X_sigma_p[i]);
        }

        size_t deg_X = 2 * mu - pp; // 2*(mu-rho)
        size_t deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        int16_t mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        int16_t mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        int16_t mask12 = mask1 & mask2;
        deg_sigma = (mask12 & deg_X_sigma_p) ^ (~mask12 & deg_sigma);

        if (mu == PARAM_DELTA - 1) {
            break;
        }

        // Update pp, d_p and X_sigma_p if needed
        pp = (mask12 & (2 * mu)) ^ (~mask12 & pp);
        d_p = (mask12 & d) ^ (~mask12 & d_p);
        for (size_t i = PARAM_DELTA - 1 ; i ; --i) {
            X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }
        X_sigma_p[1] = 0;
        X_sigma_p[0] = 0;
        deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p);

        // Compute the next discrepancy
        d = syndromes[2 * mu + 2];
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            d ^= PQCLEAN_HQC128_AVX2_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Retrieves the message message from the codeword codeword
 *
 * Since we performed a systematic encoding, the message is the last PARAM_K bits of the codeword.
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the message
 * @param[in] codeword Array of size VEC_N1_SIZE_BYTES storing the codeword
 */
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword) {
    int32_t val = PARAM_N1 - PARAM_K;

    uint64_t mask1 = (uint64_t) (0xffffffffffffffff << val % 64);
    uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64));
    size_t index = val / 64;

    for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[i] = message1 | message2;
    }

    // Last byte (8-val % 8 is the number of bits given by message1)
    if ((PARAM_K % 64 == 0) || (64 - val % 64 < PARAM_K % 64)) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[VEC_K_SIZE_64 - 1] = message1 | message2;
    } else {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        message[VEC_K_SIZE_64 - 1] = message1;
    }
 }



 /**
 * @brief Computes the 2^PARAM_DELTA syndromes from the received vector vector
 *
 * Syndromes are the sum of powers of alpha weighted by vector's coefficients.
 * These powers have been pre-computed in table_alphaPARAM_DELTA.h
 * Syndromes are 16-bits long , hence we can simultaneously compute 16 syndromes
 * in a 256-bit register
 *
 * @param[out] syndromes Array of size 2^(PARAM_FFT_T) receiving the 2*PARAM_DELTA syndromes
 * @param[in] rcv Array of size VEC_N1_SIZE_BYTES storing the received word
 */
 void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) {
    const __m256i zero_256 = _mm256_set1_epi64x(0);
    const __m256i mask_one = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202, 0x0101010101010101, 0x0);
    const __m256i mask_two  = _mm256_set1_epi64x(-0x7FBFDFEFF7FBFDFF);
    const __m256i un_256 = _mm256_set1_epi64x(1);

    __m256i y;
    __m256i S;
    __m256i L;
    __m256i tmp_repeat;
    uint32_t *aux;
    int16_t *alpha_tmp;
    uint32_t i;
    // static variable so that it is stored in the DATA segment
    // not in the STACK segment
    static uint8_t tmp_array[PARAM_N1 + 4]; // +4 to control overflow due to management of 256 bits
    __m256i *z = (__m256i *) tmp_array;
    // vectorized version of the separation of the coordinates of the vector v in order to put each coordinate in an unsigned char
    // aux is used to consider 4 elements in v at each step of the loop
    aux = (uint32_t *) rcv;
    for (i = 0 ; i < ((VEC_N1_SIZE_BYTES >> 2) << 2) ; i += 4) {
        // duplicate aux 8 times in y , i.e y= (aux aux aux .... aux)
        y = _mm256_set1_epi32(*aux);
        // shuffle the bytes of y so that if aux=(a0 a1 a2 a3)
        // then y = (a0 a0 a0 a0 a0 a0 a0 a0 a1 a1 a1 a1 a1 a1 a1 a1 .... a3)
        y = _mm256_shuffle_epi8(y, mask_one);
        // apply a mask on each byte of y to determine if jth bit of a_k is 0 or 1
        z[i >> 2] = _mm256_and_si256(y, mask_two);
        aux ++;
    }

    // Evaluation of the polynomial corresponding to the vector v in alpha^i for i in {1, ..., 2 * PARAM_DELTA}
    for (size_t j = 0 ; j < SYND_SIZE_256 ; ++j) {
        S = zero_256;
        alpha_tmp = table_alpha_ij + (j << 4);

        for (size_t i = 0 ; i < PARAM_N1 ; ++i) {
            tmp_repeat = _mm256_set1_epi64x((long long)(tmp_array[i] != 0));
            L = _mm256_cmpeq_epi64(tmp_repeat, un_256);
            tmp_repeat = _mm256_lddqu_si256((__m256i *)(alpha_tmp + i * (PARAM_DELTA << 1)));
            L = _mm256_and_si256(L, tmp_repeat);
            S = _mm256_xor_si256(L, S);
        }
        _mm256_storeu_si256(syndromes + j, S);
    }
 }


 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQC128_AVX2_fft for more details.
 *
 * @param[out] error Array of VEC_N1_SIZE_BYTES elements receiving the error polynomial
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint64_t *error, const uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0}; // w will receive the evaluation of sigma in all field elements

    PQCLEAN_HQC128_AVX2_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQC128_AVX2_fft_retrieve_bch_error_poly(error, w);
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on four steps:
 *    <ol>
 *    <li> The first step, done by additive FFT transpose, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on BCH decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the decoded message
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES storing the received word
 */

 void PQCLEAN_HQC128_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector) {
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint64_t error[(1 << PARAM_M) / 8] = {0};
    static __m256i syndromes_256[SYND_SIZE_256];

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes_256, vector);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    compute_elp(sigma, (uint16_t *)syndromes_256);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Add the error polynomial to the received polynomial
    PQCLEAN_HQC128_AVX2_vect_add(vector, vector, error, VEC_N1_SIZE_64);

    // Retrieve the message from the decoded codeword
    message_from_codeword(message, vector);

 }
--- a/crypto_kem/hqc-128/avx2/bch.h
+++ b/crypto_kem/hqc-128/avx2/bch.h
@@ -0,0 +1,23 @@
 #ifndef BCH_H
 #define BCH_H


 /**
 * @file bch.h
 * Header file of bch.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC128_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);

 void PQCLEAN_HQC128_AVX2_table_alphaij_generation(const uint16_t *exp);


 #endif
--- a/crypto_kem/hqc-128/avx2/code.c
+++ b/crypto_kem/hqc-128/avx2/code.c
@@ -0,0 +1,104 @@
 #include "bch.h"
 #include "code.h"
 #include "gen_matrix.h"
 #include "parameters.h"
 #include "repetition.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of tensor code
 */


 static inline uint64_t mux(uint64_t a, uint64_t b, int64_t bit);

 static inline uint64_t mux(uint64_t a, uint64_t b, int64_t bit) {
    uint64_t ret = a ^ b;
    return (ret & (-bit >> 63)) ^ a;
 }



 /**
 *
 * @brief Encoding the message m to a code word em using the tensor code
 *
 * We encode the message using the BCH code. For each bit obtained,
 * we duplicate the bit PARAM_N2 times to apply repetition code.
 * BCH encoding is done using the classical mG operation,
 * columns of the matrix are stored in 256-bit registers
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC128_AVX2_code_encode(uint64_t *em, const uint64_t *m) {
    uint64_t res;
    uint32_t i;
    static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFUL, 0x3FFFFFFFUL}};


    __m256i *colonne, y, aux0;
    __m256i msg = _mm256_lddqu_si256((const __m256i *) m);
    colonne = ((__m256i *) gen_matrix);

    for (i = 0 ; i < PARAM_N1 - PARAM_K ; i++) {
        // y is the and operation between m and ith column of G
        y = _mm256_and_si256(colonne[i], msg);
        // aux0 = (y2 y3 y0 y1)
        aux0 = _mm256_permute2x128_si256(y, y, 1);
        // y = (y0^y2 y1^y3 y2^y0 y3^y1)
        y = _mm256_xor_si256(y, aux0);
        // aux0 = (y1^y3 y0^y2 y1^y3 y0^y2)
        aux0 = _mm256_shuffle_epi32(y, 0x4e);
        // y = (y0^y1^y2^y3 repeated 4 times)
        y = _mm256_xor_si256(aux0, y);
        res = _mm_popcnt_u64(_mm256_extract_epi64(y, 0)) & 1;


        uint16_t pos_r = PARAM_N2 * i;
        uint16_t idx_r = (pos_r & 0x3f);
        uint64_t *p64 = em;
        p64 += pos_r >> 6;
        uint64_t select = mux(mask[0][0], mask[1][0], res);
        *p64 ^= select << idx_r;
        select = mux(mask[0][1], mask[1][1], res);
        *(p64 + 1) ^= select >> ((63 - idx_r));
    }

    /* now we add the message m */
    /* systematic encoding */
    for (int32_t i = 0 ; i < 4 ; i++) {
        for (int32_t j = 0 ; j < 64 ; j++) {
            uint8_t bit = (m[i] >> j) & 0x1;
            uint32_t pos_r = PARAM_N2 * ((PARAM_N1 - PARAM_K) + ((i << 6) + j));
            uint16_t idx_r = (pos_r & 0x3f);
            uint64_t *p64 = em;


            p64 += pos_r >> 6;
            uint64_t select = mux(mask[0][0], mask[1][0], bit);
            *p64 ^= select << idx_r;
            select = mux(mask[0][1], mask[1][1], bit);
            *(p64 + 1) ^= select >> ((63 - idx_r));
        }
    }

 }


 /**
 * @brief Decoding the code word em to a message m using the tensor code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC128_AVX2_code_decode(uint64_t *m, const uint64_t *em) {

    uint64_t tmp[VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQC128_AVX2_repetition_code_decode(tmp, em);
    PQCLEAN_HQC128_AVX2_bch_code_decode(m, tmp);

 }
--- a/crypto_kem/hqc-128/avx2/code.h
+++ b/crypto_kem/hqc-128/avx2/code.h
@@ -0,0 +1,20 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_code_encode(uint64_t *em, const uint64_t *message);

 void PQCLEAN_HQC128_AVX2_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-128/avx2/fft.c
+++ b/crypto_kem/hqc-128/avx2/fft.c
@@ -0,0 +1,333 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    subset_sums[0] = 0;

    for (size_t i = 0 ; i < set_size ; ++i) {
        for (size_t j = 0 ; j < (1U << i) ; ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        return;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        return;

    default:
        ;
        size_t n = 1 << (m_f - 2);

        uint16_t Q[2 * (1 << (PARAM_FFT - 2))];
        uint16_t R[2 * (1 << (PARAM_FFT - 2))];

        uint16_t Q0[1 << (PARAM_FFT - 2)];
        uint16_t Q1[1 << (PARAM_FFT - 2)];
        uint16_t R0[1 << (PARAM_FFT - 2)];
        uint16_t R1[1 << (PARAM_FFT - 2)];

        memcpy(Q, f + 3 * n, 2 * n);
        memcpy(Q + n, f + 3 * n, 2 * n);
        memcpy(R, f, 4 * n);

        for (size_t i = 0 ; i < n ; ++i) {
            Q[i] ^= f[2 * n + i];
            R[n + i] ^= Q[i];
        }

        radix(Q0, Q1, Q, m_f - 1);
        radix(R0, R1, R, m_f - 1);

        memcpy(f0, R0, 2 * n);
        memcpy(f0 + n, Q0, 2 * n);
        memcpy(f1, R1, 2 * n);
        memcpy(f1 + n, Q1, 2 * n);
    }
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)];
    uint16_t f1[1 << (PARAM_FFT - 2)];
    uint16_t gammas[PARAM_M - 2];
    uint16_t deltas[PARAM_M - 2];
    size_t k = 1 << (m - 1);
    uint16_t gammas_sums[1 << (PARAM_M - 2)];
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};

    // Step 1
    if (m_f == 1) {
        uint16_t tmp[PARAM_M - (PARAM_FFT - 1)];
        for (size_t i = 0 ; i < m ; ++i) {
            tmp[i] = PQCLEAN_HQC128_AVX2_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        for (size_t j = 0 ; j < m ; ++j) {
            for (size_t k = 0 ; k < (1U << j) ; ++k) {
                w[(1 << j) + k] = w[k] ^ tmp[j];
            }
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        uint16_t beta_m_pow = 1;
        for (size_t i = 1 ; i < (1U << m_f) ; ++i) {
            beta_m_pow = PQCLEAN_HQC128_AVX2_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQC128_AVX2_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (uint8_t i = 0 ; i < m - 1 ; ++i) {
        gammas[i] = PQCLEAN_HQC128_AVX2_gf_mul(betas[i], PQCLEAN_HQC128_AVX2_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQC128_AVX2_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1];
    uint16_t betas_sums[1 << (PARAM_M - 1)];
    uint16_t f0[1 << (PARAM_FFT - 1)];
    uint16_t f1[1 << (PARAM_FFT - 1)];
    uint16_t deltas[PARAM_M - 1];
    size_t k = 1 << (PARAM_M - 1);
    uint16_t u[1 << (PARAM_M - 1)];
    uint16_t v[1 << (PARAM_M - 1)];

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        deltas[i] = PQCLEAN_HQC128_AVX2_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (size_t i = 1 ; i < k ; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array of size VEC_N1_SIZE_BYTES
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQC128_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);
    size_t index = PARAM_GF_MUL_ORDER;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15);
    uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15);
    error[index / 8] ^= bit << (index % 64);

    for (size_t i = 1 ; i < k ; ++i) {
        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_AVX2_gf_log(gammas_sums[i]);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15);
        error[index / 64] ^= bit << (index % 64);

        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_AVX2_gf_log(gammas_sums[i] ^ 1);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15);
        error[index / 64] ^= bit << (index % 64);
    }
 }
--- a/crypto_kem/hqc-128/avx2/fft.h
+++ b/crypto_kem/hqc-128/avx2/fft.h
@@ -0,0 +1,20 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQC128_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w);


 #endif
--- a/crypto_kem/hqc-128/avx2/gen_matrix.h
+++ b/crypto_kem/hqc-128/avx2/gen_matrix.h
--- a/crypto_kem/hqc-128/avx2/gf.c
+++ b/crypto_kem/hqc-128/avx2/gf.c
--- a/crypto_kem/hqc-128/avx2/gf.h
+++ b/crypto_kem/hqc-128/avx2/gf.h
@@ -0,0 +1,29 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_gf_generate(uint16_t *exp, uint16_t *log, int16_t m);


 uint16_t PQCLEAN_HQC128_AVX2_gf_log(uint16_t elt);

 uint16_t PQCLEAN_HQC128_AVX2_gf_mul(uint16_t a, uint16_t b);

 uint16_t PQCLEAN_HQC128_AVX2_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQC128_AVX2_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQC128_AVX2_gf_mod(uint16_t i);


 #endif
--- a/crypto_kem/hqc-128/avx2/gf2x.c
+++ b/crypto_kem/hqc-128/avx2/gf2x.c
@@ -0,0 +1,558 @@
 #include "gf2x.h"
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>

 /**
 * \file gf2x.c
 * \brief AVX2 implementation of multiplication of two polynomials
 */


 // sizes for Toom-Cook
 #define T_TM3_3W_256 32
 #define T_TM3_3W_64 128

 #define VEC_N_ARRAY_SIZE_VEC CEIL_DIVIDE(PARAM_N, 256) /*!< The number of needed vectors to store PARAM_N bits*/
 #define WORD 64
 #define LAST64 (PARAM_N >> 6)
 uint64_t a1_times_a2[2 * VEC_N_256_SIZE_64 + 1];
 uint64_t tmp_reduce[VEC_N_ARRAY_SIZE_VEC << 2];
 __m256i *o256 = (__m256i *) tmp_reduce;
 uint64_t bloc64[PARAM_OMEGA_R]; // Allocation with the biggest possible weight
 uint64_t bit64[PARAM_OMEGA_R]; // Allocation with the biggest possible weight


 static inline void reduce(uint64_t *o, const uint64_t *a);
 inline static void karat_mult_1(__m128i *C, __m128i *A, __m128i *B);
 inline static void karat_mult_2(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_4(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_8(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_16(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_32(__m256i *C, __m256i *A, __m256i *B);
 static inline void divByXplus1(__m256i *out, __m256i *in, int size);
 static void TOOM3Mult(uint64_t *Out, const uint64_t *A, const uint64_t *B);



 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a Pointer to the polynomial a(x)
 */
 static inline void reduce(uint64_t *o, const uint64_t *a) {
    __m256i r256, carry256;
    __m256i *a256 = (__m256i *) a;
    static const int32_t dec64 = PARAM_N & 0x3f;
    static const int32_t d0 = WORD - dec64;
    int32_t i, i2;

    for (i = LAST64 ; i < (PARAM_N >> 5) - 4 ; i += 4) {
        r256 = _mm256_lddqu_si256((__m256i const *) (& a[i]));
        r256 = _mm256_srli_epi64(r256, dec64);
        carry256 = _mm256_lddqu_si256((__m256i const *) (& a[i + 1]));
        carry256 = _mm256_slli_epi64(carry256, d0);
        r256 ^= carry256;
        i2 = (i - LAST64) >> 2;
        o256[i2] = a256[i2] ^ r256;
    }

    r256 = (__m256i) {
        a[i], a[i + 1], 0x0UL, 0x0UL
    };
    carry256 = _mm256_lddqu_si256((__m256i const *) (& a[i + 1]));
    r256 = _mm256_srli_epi64(r256, dec64);
    carry256 = _mm256_slli_epi64(carry256, d0);
    r256 ^= carry256;
    i2 = (i - LAST64) >> 2;
    o256[i2] = (a256[i2] ^ r256);
    tmp_reduce[LAST64] &= RED_MASK;
    memcpy(o, tmp_reduce, VEC_N_SIZE_BYTES);
 }

 /**
 * @brief Compute C(x) = A(x)*B(x)
 * A(x) and B(x) are stored in 128-bit registers
 * This function computes A(x)*B(x) using Karatsuba
 *
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_1(__m128i *C, __m128i *A, __m128i *B) {
    __m128i D1[2];
    __m128i D0[2], D2[2];
    __m128i Al = _mm_loadu_si128(A);
    __m128i Ah = _mm_loadu_si128(A + 1);
    __m128i Bl = _mm_loadu_si128(B);
    __m128i Bh = _mm_loadu_si128(B + 1);

    //  Compute Al.Bl=D0
    __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0);
    __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11);
    __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e));
    __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e));
    __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    //  Compute Ah.Bh=D2
    DD0 = _mm_clmulepi64_si128(Ah, Bh, 0);
    DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11);
    AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e));
    BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Compute AlpAh.BlpBh=D1
    // Initialisation of AlpAh and BlpBh
    __m128i AlpAh = _mm_xor_si128(Al, Ah);
    __m128i BlpBh = _mm_xor_si128(Bl, Bh);
    DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0);
    DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11);
    AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e));
    BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Final comutation of C
    __m128i middle = _mm_xor_si128(D0[1], D2[0]);
    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_2(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[2], D1[2], D2[2], SAA, SBB;
    __m128i *A128 = (__m128i *)A, *B128 = (__m128i *)B;

    karat_mult_1((__m128i *) D0, A128, B128);
    karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2);

    SAA = A[0] ^ A[1];
    SBB = B[0] ^ B[1];

    karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB);
    __m256i middle = _mm256_xor_si256(D0[1], D2[0]);

    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_4(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2];

    karat_mult_2( D0, A, B);
    karat_mult_2(D2, A + 2, B + 2);

    SAA[0] = A[0] ^ A[2];
    SBB[0] = B[0] ^ B[2];
    SAA[1] = A[1] ^ A[3];
    SBB[1] = B[1] ^ B[3];

    karat_mult_2( D1, SAA, SBB);

    __m256i middle0 = _mm256_xor_si256(D0[2], D2[0]);
    __m256i middle1 = _mm256_xor_si256(D0[3], D2[1]);

    C[0] = D0[0];
    C[1] = D0[1];
    C[2] = middle0 ^ D0[0] ^ D1[0];
    C[3] = middle1 ^ D0[1] ^ D1[1];
    C[4] = middle0 ^ D1[2] ^ D2[2];
    C[5] = middle1 ^ D1[3] ^ D2[3];
    C[6] = D2[2];
    C[7] = D2[3];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4];

    karat_mult_4( D0, A, B);
    karat_mult_4(D2, A + 4, B + 4);

    for (int32_t i = 0 ; i < 4 ; i++) {
        int is = i + 4;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_4(D1, SAA, SBB);

    for (int32_t i = 0 ; i < 4 ; i++) {
        int32_t is = i + 4;
        int32_t is2 = is + 4;
        int32_t is3 = is2 + 4;

        __m256i middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[16], D1[16], D2[16], SAA[8], SBB[8];

    karat_mult_8( D0, A, B);
    karat_mult_8(D2, A + 8, B + 8);

    for (int32_t i = 0 ; i < 8 ; i++) {
        int32_t is = i + 8;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_8( D1, SAA, SBB);

    for (int32_t i = 0 ; i < 8 ; i++) {
        int32_t is = i + 8;
        int32_t is2 = is + 8;
        int32_t is3 = is2 + 8;

        __m256i middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[32], D1[32], D2[32], SAA[16], SBB[16];

    karat_mult_16( D0, A, B);
    karat_mult_16(D2, A + 16, B + 16);

    for (int32_t i = 0 ; i < 16 ; i++) {
        int is = i + 16;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_16( D1, SAA, SBB);

    for (int32_t i = 0 ; i < 16 ; i++) {
        int32_t is = i + 16;
        int32_t is2 = is + 16;
        int32_t is3 = is2 + 16;

        __m256i middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }


 /**
 * @brief Compute B(x) = A(x)/(x+1)
 *
 * This function computes A(x)/(x+1) using a Quercia like algorithm
 * @param[out] out Pointer to the result
 * @param[in] in Pointer to the polynomial A(x)
 * @param[in] size used to define the number of coeeficients of A
 */
 static inline void divByXplus1(__m256i *out, __m256i *in, int size) {
    uint64_t *A = (uint64_t *) in;
    uint64_t *B = (uint64_t *) out;

    B[0] = A[0];

    for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) {
        B[i] = B[i - 1] ^ A[i];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x) using TOOM3Mult
 *
 * This function computes A(x)*B(x) using TOOM-COOK3 Multiplication
 * last multiplication are done using Karatsuba
 * @param[out] Out Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static void TOOM3Mult(uint64_t *Out, const uint64_t *A, const uint64_t *B) {
    static __m256i U0[T_TM3_3W_256], V0[T_TM3_3W_256], U1[T_TM3_3W_256], V1[T_TM3_3W_256], U2[T_TM3_3W_256], V2[T_TM3_3W_256];
    static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)];
    static __m256i tmp[2 * (T_TM3_3W_256)];
    static __m256i ro256[6 * (T_TM3_3W_256)];
    const __m256i zero = (__m256i) {
        0ul, 0ul, 0ul, 0ul
    };
    int32_t T2 = T_TM3_3W_64 << 1;

    for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) {
        int32_t i4 = i << 2;
        int32_t i42 = i4 - 2;
        U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4]));
        V0[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4]));
        U1[i] = _mm256_lddqu_si256((__m256i const *)(& A[i42 + T_TM3_3W_64]));
        V1[i] = _mm256_lddqu_si256((__m256i const *)(& B[i42 + T_TM3_3W_64]));
        U2[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4 + T2 - 4]));
        V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4]));
    }

    for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) {
        int32_t i4 = i << 2;
        int32_t i41 = i4 + 1;
        U0[i] = (__m256i) {
            A[i4], A[i41], 0x0ul, 0x0ul
        };
        V0[i] = (__m256i) {
            B[i4], B[i41], 0x0ul, 0x0ul
        };
        U1[i] = (__m256i) {
            A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul
        };
        V1[i] = (__m256i) {
            B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul
        };
        U2[i] = (__m256i) {
            A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul
        };
        V2[i] = (__m256i) {
            B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul
        };
    }

    // Evaluation phase : x= X^64
    // P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty)
    // Evaluation: 5*2 add, 2*2 shift; 5 mul (n)
    //W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0
    for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) {
        W3[i] = U0[i] ^ U1[i] ^ U2[i];
        W2[i] = V0[i] ^ V1[i] ^ V2[i];
    }

    //W1 = W2 * W3
    karat_mult_32( W1, W2, W3);

    //W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !)
    int64_t *U1_64 = ((int64_t *) U1);
    int64_t *U2_64 = ((int64_t *) U2);

    int64_t *V1_64 = ((int64_t *) V1);
    int64_t *V2_64 = ((int64_t *) V2);

    W0[0] = _mm256_set_epi64x(U1_64[2] ^ U2_64[1], U1_64[1] ^ U2_64[0], U1_64[0], 0);
    W4[0] = _mm256_set_epi64x(V1_64[2] ^ V2_64[1], V1_64[1] ^ V2_64[0], V1_64[0], 0);

    U1_64 = ((int64_t *) U1);
    U2_64 = ((int64_t *) U2);

    V1_64 = ((int64_t *) V1);
    V2_64 = ((int64_t *) V2);

    for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) {
        int i4 = i << 2;
        W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1]));
        W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2]));

        W4[i] = _mm256_lddqu_si256((__m256i const *)(& V1_64[i4 - 1]));
        W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2]));
    }

    //W3 = W3 + W0      ; W2 = W2 + W4
    for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) {
        W3[i] ^= W0[i];
        W2[i] ^= W4[i];
    }

    //W0 = W0 + U0      ; W4 = W4 + V0
    for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) {
        W0[i] ^= U0[i];
        W4[i] ^= V0[i];
    }

    //W3 = W3 * W2      ; W2 = W0 * W4
    karat_mult_32(tmp, W3, W2);

    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W3[i] = tmp[i];
    }

    karat_mult_32(W2, W0, W4);
    //W4 = U2 * V2      ; W0 = U0 * V0
    karat_mult_32(W4, U2, V2);
    karat_mult_32(W0, U0, V0);

    // Interpolation phase
    // 9 add, 1 shift, 1 Smul, 2 Sdiv (2n)
    //W3 = W3 + W2
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W3[i] ^= W2[i];
    }

    //W1 = W1 + W0
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W1[i] ^= W0[i];
    }

    //W2 =(W2 + W0)/x -> x = X^64
    U1_64 = ((int64_t *) W2);
    U2_64 = ((int64_t *) W0);
    for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) {
        int32_t i4 = i << 2;
        W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1]));
        W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1]));
    }

    //W2 =(W2 + W3 + W4*(x^3+1))/(x+1)
    U1_64 = ((int64_t *) W4);
    __m256i *U1_256 = (__m256i *) (U1_64 + 1);
    tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) {
        0x0ul, 0x0ul, 0x0ul, U1_64[0]
    };

    for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) {
        tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]);
    }

    divByXplus1(W2, tmp, T_TM3_3W_256);
    W2[2 * (T_TM3_3W_256) - 1] = zero;

    //W3 =(W3 + W1)/(x*(x+1))
    U1_64 = (int64_t *) W3;
    U1_256 = (__m256i *) (U1_64 + 1);

    U2_64 = (int64_t *) W1;
    __m256i *U2_256 = (__m256i *) (U2_64 + 1);

    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) {
        tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]);
    }

    divByXplus1(W3, tmp, T_TM3_3W_256);
    W3[2 * (T_TM3_3W_256) - 1] = zero;

    //W1 = W1 + W4 + W2
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W1[i] ^= W2[i] ^ W4[i];
    }

    //W2 = W2 + W3
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W2[i] ^= W3[i];
    }

    // Recomposition
    //W  = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4
    //W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256)
    for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) {
        ro256[i] = W0[i];
        ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i];
        ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i];
    }

    ro256[(T_TM3_3W_256 << 1) - 1] = W0[(T_TM3_3W_256 << 1) - 1] ^ W2[0];
    ro256[(T_TM3_3W_256 << 2) - 2] = W2[(T_TM3_3W_256 << 1) - 1] ^ W4[0];
    ro256[(T_TM3_3W_256 * 6) - 3] = W4[(T_TM3_3W_256 << 1) - 1];

    U1_64 = ((int64_t *) &ro256[T_TM3_3W_256]);
    U1_256 = (__m256i *) (U1_64 - 2);

    U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]);
    U2_256 = (__m256i *) (U2_64 - 2);

    for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) {
        _mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i]));
        _mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i]));
    }

    for (int32_t i = 0 ; i < 6 * T_TM3_3W_256 - 2 ; i++) {
        uint64_t *out64 = Out + (i << 2);
        _mm256_storeu_si256((__m256i *)out64, ro256[i]);
    }
 }


 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to a polynomial
 * @param[in] a2 Pointer to a polynomial
 */
 void PQCLEAN_HQC128_AVX2_vect_mul(uint64_t *o, const uint64_t *a1, const uint64_t *a2) {
    TOOM3Mult(a1_times_a2, a1, a2);
    reduce(o, a1_times_a2);

    // clear all
    memset(a1_times_a2, 0, (VEC_N_SIZE_64 << 1) * sizeof(uint64_t));
 }
--- a/crypto_kem/hqc-128/avx2/gf2x.h
+++ b/crypto_kem/hqc-128/avx2/gf2x.h
@@ -0,0 +1,17 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_vect_mul(uint64_t *o, const uint64_t *a1, const uint64_t *a2);


 #endif
--- a/crypto_kem/hqc-128/avx2/hqc.c
+++ b/crypto_kem/hqc-128/avx2/hqc.c
@@ -0,0 +1,138 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQC128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    uint64_t x[VEC_N_256_SIZE_64] = {0};
    uint64_t y[VEC_N_256_SIZE_64] = {0};
    uint64_t h[VEC_N_256_SIZE_64] = {0};
    uint64_t s[VEC_N_256_SIZE_64] = {0};

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQC128_AVX2_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQC128_AVX2_vect_mul(s, y, h);
    PQCLEAN_HQC128_AVX2_vect_add(s, x, s, VEC_N_256_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQC128_AVX2_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQC128_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    uint64_t h[VEC_N_256_SIZE_64] = {0};
    uint64_t s[VEC_N_256_SIZE_64] = {0};
    uint64_t r1[VEC_N_256_SIZE_64] = {0};
    uint64_t r2[VEC_N_256_SIZE_64] = {0};
    uint64_t e[VEC_N_256_SIZE_64] = {0};
    uint64_t tmp1[VEC_N_256_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_256_SIZE_64] = {0};

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQC128_AVX2_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

    // Compute u = r1 + r2.h
    PQCLEAN_HQC128_AVX2_vect_mul(u, r2, h);
    PQCLEAN_HQC128_AVX2_vect_add(u, r1, u, VEC_N_256_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQC128_AVX2_code_encode(v, m);
    PQCLEAN_HQC128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQC128_AVX2_vect_mul(tmp2, r2, s);
    PQCLEAN_HQC128_AVX2_vect_add(tmp2, e, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQC128_AVX2_vect_add(tmp2, tmp1, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQC128_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC128_AVX2_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint64_t x[VEC_N_256_SIZE_64] = {0};
    uint64_t y[VEC_N_256_SIZE_64] = {0};
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t tmp1[VEC_N_256_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_256_SIZE_64] = {0};

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQC128_AVX2_hqc_secret_key_from_string(x, y, pk, sk);

    // Compute v - u.y
    PQCLEAN_HQC128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    PQCLEAN_HQC128_AVX2_vect_mul(tmp2, y, u);
    PQCLEAN_HQC128_AVX2_vect_add(tmp2, tmp1, tmp2, VEC_N_256_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQC128_AVX2_code_decode(m, tmp2);
 }
--- a/crypto_kem/hqc-128/avx2/hqc.h
+++ b/crypto_kem/hqc-128/avx2/hqc.h
@@ -0,0 +1,21 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQC128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQC128_AVX2_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-128/avx2/kem.c
+++ b/crypto_kem/hqc-128/avx2/kem.c
@@ -0,0 +1,138 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQC128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQC128_AVX2_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQC128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    PQCLEAN_HQC128_AVX2_vect_set_random_from_randombytes(m);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQC128_AVX2_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQC128_AVX2_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQC128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    int8_t result = -1;
    uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_256_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQC128_AVX2_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQC128_AVX2_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQC128_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = (PQCLEAN_HQC128_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC128_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQC128_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0);
    for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) {
        ss[i] = result * ss[i];
    }
    result--;


    return result;
 }
--- a/crypto_kem/hqc-128/avx2/parameters.h
+++ b/crypto_kem/hqc-128/avx2/parameters.h
@@ -0,0 +1,127 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H
 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */

 #include "api.h"
 #include "api.h"
 #include "vector.h"


 #define CEIL_DIVIDE(a, b)  (((a)/(b)) + ((a) % (b) == 0 ? 0 : 1)) /*!< Divide a by b and ceil the result*/
 #define BITMASK(a, size) ((1UL << ((a) % (size))) - 1) /*!< Create a mask*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of BCH code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of the repetition code)
  #define PARAM_N1N2                            Define the parameter n1 * n2 of the scheme (length of the tensor code)
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1_SIZE_BYTES                     Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N_MULT sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define VEC_N_256_SIZE_64                     Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits
  #define VEC_N1N2_256_SIZE_64                  Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits

  #define PARAM_T                               Define a threshold for decoding repetition code word (PARAM_T = (PARAM_N2 - 1) / 2)

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the BCH code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the BCH code
  #define PARAM_G                               Define the size of the generator polynomial of BCH code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=60
                                                The smallest power of 2 greater than 60+1 is 64=2^6
  #define PARAM_BCH_POLY                        Generator polynomial of the BCH code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                 23869
 #define PARAM_N1                                766
 #define PARAM_N2                                31
 #define PARAM_N1N2                              23746
 #define PARAM_OMEGA                             67
 #define PARAM_OMEGA_E                           77
 #define PARAM_OMEGA_R                           77
 #define PARAM_SECURITY                          128
 #define PARAM_DFR_EXP                           128

 #define SECRET_KEY_BYTES                        PQCLEAN_HQC128_AVX2_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQC128_AVX2_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQC128_AVX2_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQC128_AVX2_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD               16756038
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_BYTES                       CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 64)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 64)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_N_MULT                            24192
 #define VEC_N_256_SIZE_64           (CEIL_DIVIDE(PARAM_N_MULT, 256) << 2)
 #define VEC_N1N2_256_SIZE_64                    (CEIL_DIVIDE(PARAM_N1N2, 256) << 2)

 #define PARAM_T                                 15

 #define PARAM_DELTA                             57
 #define PARAM_M                                 10
 #define PARAM_GF_POLY                           0x409
 #define PARAM_GF_MUL_ORDER                      1023
 #define PARAM_K                                 256
 #define PARAM_G                                 511
 #define PARAM_FFT                               6
 #define PARAM_FFT_T                             7
 #define PARAM_BCH_POLY { \
        1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1, \
        1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0, \
        0,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, \
        1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0, \
        0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0, \
        1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0, \
        0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,0,1, \
        1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, \
        1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1, \
        1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,1,0,1,0,1, \
        0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1, \
        1,0,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1, \
        0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1, \
        1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1 \
    };

 #define RED_MASK                                0x1fffffffffffffffUL
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/crypto_kem/hqc-128/avx2/parsing.c
+++ b/crypto_kem/hqc-128/avx2/parsing.c
@@ -0,0 +1,121 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */



 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    memcpy(sk + SEED_BYTES, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint64_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQC128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    memcpy(pk + SEED_BYTES, s, VEC_N_SIZE_BYTES);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQC128_AVX2_vect_set_random(&pk_seedexpander, h);

    memcpy(s, pk + SEED_BYTES, VEC_N_SIZE_BYTES);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQC128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    memcpy(ct, u, VEC_N_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQC128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    memcpy(u, ct, VEC_N_SIZE_BYTES);
    memcpy(v, ct + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES);
    memcpy(d, ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, SHA512_BYTES);
 }
--- a/crypto_kem/hqc-128/avx2/parsing.h
+++ b/crypto_kem/hqc-128/avx2/parsing.h
@@ -0,0 +1,29 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQC128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQC128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQC128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQC128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQC128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/crypto_kem/hqc-128/avx2/repetition.c
+++ b/crypto_kem/hqc-128/avx2/repetition.c
@@ -0,0 +1,41 @@
 #include "parameters.h"
 #include "repetition.h"
 #include <immintrin.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 /**
 * @file repetition.c
 * @brief Implementation of repetition codes
 */


 #define MASK_N2                              ((1UL << PARAM_N2) - 1)

 /**
 * @brief Decoding the code words to a message using the repetition code
 *
 * We use a majority decoding. In fact we have that PARAM_N2 = 2 * PARAM_T + 1, thus,
 * if the Hamming weight of the vector is greater than PARAM_T, the code word is decoded
 * to 1 and 0 otherwise.
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC128_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    uint64_t cx, ones;

    for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) {
        bn = b >> 6;
        bi = b & 63;
        c = b + PARAM_N2 - 1;
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
        t++;
    }
 }
--- a/crypto_kem/hqc-128/avx2/repetition.h
+++ b/crypto_kem/hqc-128/avx2/repetition.h
@@ -0,0 +1,17 @@
 #ifndef REPETITION_H
 #define REPETITION_H


 /**
 * @file repetition.h
 * @brief Header file for repetition.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-128/avx2/vector.c
+++ b/crypto_kem/hqc-128/avx2/vector.c
@@ -0,0 +1,200 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0};
    uint32_t random_data = 0;
    uint32_t tmp[PARAM_OMEGA_R] = {0};
    uint8_t exist = 0;
    size_t j = 0;
    __m256i bit256[PARAM_OMEGA_R];
    __m256i bloc256[PARAM_OMEGA_R];
    static __m256i posCmp256 = (__m256i) {
        0UL, 1UL, 2UL, 3UL
    };
 #define LOOP_SIZE CEIL_DIVIDE(PARAM_N, 256)

    seedexpander(ctx, rand_bytes, random_bytes_size);

    for (uint32_t i = 0 ; i < weight ; ++i) {
        exist = 0;
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            random_data  = ((uint32_t) rand_bytes[j++]) << 16;
            random_data |= ((uint32_t) rand_bytes[j++]) << 8;
            random_data |= rand_bytes[j++];

        } while (random_data >= UTILS_REJECTION_THRESHOLD);

        random_data = random_data % PARAM_N;

        for (uint32_t k = 0 ; k < i ; k++) {
            if (tmp[k] == random_data) {
                exist = 1;
            }
        }

        if (exist == 1) {
            i--;
        } else {
            tmp[i] = random_data;
        }
    }

    for (uint32_t i = 0 ; i < weight ; i++) {
        // we store the bloc number and bit position of each vb[i]
        uint64_t bloc = tmp[i] >> 6;
        bloc256[i] = _mm256_set1_epi64x(bloc >> 2);
        uint64_t pos = (bloc & 0x3UL);
        __m256i pos256 = _mm256_set1_epi64x(pos);
        __m256i mask256 = _mm256_cmpeq_epi64(pos256, posCmp256);
        uint64_t bit64 = 1ULL << (tmp[i] & 0x3f);
        __m256i bloc256 = _mm256_set1_epi64x(bit64);
        bit256[i] = bloc256 & mask256;
    }

    for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) {
        __m256i aux = _mm256_loadu_si256(((__m256i *)v) + i);
        __m256i i256 = _mm256_set1_epi64x(i);

        for (uint32_t j = 0 ; j < weight ; j++) {
            __m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256);
            aux ^= bit256[j] & mask256;
        }
        _mm256_storeu_si256(((__m256i *)v) + i, aux);
    }

 #undef LOOP_SIZE
 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    memcpy(v, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= BITMASK(PARAM_N, 64);
 }



 /**
 * @brief Generates a random vector
 *
 * This function generates a random binary vector. It uses the the randombytes function.
 *
 * @param[in] v Pointer to an array
 */
 void PQCLEAN_HQC128_AVX2_vect_set_random_from_randombytes(uint64_t *v) {
    uint8_t rand_bytes [VEC_K_SIZE_BYTES] = {0};

    randombytes(rand_bytes, VEC_K_SIZE_BYTES);
    memcpy(v, rand_bytes, VEC_K_SIZE_BYTES);
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQC128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0 ; i < size ; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }



 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 int PQCLEAN_HQC128_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    unsigned char diff = 0;

    for (uint32_t i = 0 ; i < size ; i++) {
        diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i];
    }
    return diff != 0;
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQC128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    if (size_o < size_v) {
        uint64_t mask = 0x7FFFFFFFFFFFFFFF;
        int8_t val = 0;

        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, VEC_N1N2_SIZE_BYTES);

        for (int8_t i = 0 ; i < val ; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, CEIL_DIVIDE(size_v, 8));
    }
 }
--- a/crypto_kem/hqc-128/avx2/vector.h
+++ b/crypto_kem/hqc-128/avx2/vector.h
@@ -0,0 +1,29 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */

 #include "nistseedexpander.h"
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQC128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

 void PQCLEAN_HQC128_AVX2_vect_set_random_from_randombytes(uint64_t *v);


 void PQCLEAN_HQC128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 int PQCLEAN_HQC128_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size);

 void PQCLEAN_HQC128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/crypto_kem/hqc-128/clean/LICENSE
+++ b/crypto_kem/hqc-128/clean/LICENSE
@@ -0,0 +1 @@
 Public Domain
--- a/crypto_kem/hqc-128/clean/Makefile
+++ b/crypto_kem/hqc-128/clean/Makefile
@@ -0,0 +1,19 @@
 # This Makefile can be used with GNU Make or BSD Make

 LIB=libhqc-128_clean.a
 HEADERS=api.h bch.h code.h fft.h gf2x.h gf.h hqc.h parameters.h parsing.h repetition.h vector.h 
 OBJECTS=bch.o code.o fft.o gf2x.o gf.o hqc.o kem.o parsing.o repetition.o vector.o 

 CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS)

 all: $(LIB)

 %.o: %.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $@ $<

 $(LIB): $(OBJECTS)
 	$(AR) -r $@ $(OBJECTS)

 clean:
 	$(RM) $(OBJECTS)
 	$(RM) $(LIB)
--- a/crypto_kem/hqc-128/clean/Makefile.Microsoft_nmake
+++ b/crypto_kem/hqc-128/clean/Makefile.Microsoft_nmake
@@ -0,0 +1,19 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libhqc-128_clean.lib
 OBJECTS=bch.obj code.obj fft.obj gf2x.obj gf.obj hqc.obj kem.obj parsing.obj repetition.obj vector.obj 

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/hqc-128/clean/api.h
+++ b/crypto_kem/hqc-128/clean/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQC128_CLEAN_API_H
 #define PQCLEAN_HQC128_CLEAN_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQC128_CLEAN_CRYPTO_ALGNAME                      "HQC-128"

 #define PQCLEAN_HQC128_CLEAN_CRYPTO_SECRETKEYBYTES               3064
 #define PQCLEAN_HQC128_CLEAN_CRYPTO_PUBLICKEYBYTES               3024
 #define PQCLEAN_HQC128_CLEAN_CRYPTO_BYTES                        64
 #define PQCLEAN_HQC128_CLEAN_CRYPTO_CIPHERTEXTBYTES              6017

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQC128_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQC128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQC128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQC128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-128/clean/bch.c
+++ b/crypto_kem/hqc-128/clean/bch.c
@@ -0,0 +1,383 @@
 #include "bch.h"
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file bch.c
 * Constant time implementation of BCH codes
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
 static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
 static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1 ; i < upper_bound ; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly ; j ; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
 *
 * @param[out] message_unpacked Array of VEC_K_SIZE_BYTES bytes receiving the unpacked message
 * @param[in] message Array of PARAM_K bytes storing the packed message
 */
 static void unpack_message(uint8_t *message_unpacked, const uint64_t *message) {
    for (size_t i = 0 ; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)) ; ++i) {
        for (size_t j = 0 ; j < 64 ; ++j) {
            message_unpacked[j + 64 * i] = (message[i] >> j) & 0x0000000000000001;
        }
    }

    for (int8_t j = 0 ; j < PARAM_K % 64 ; ++j) {
        message_unpacked[j + 64 * (VEC_K_SIZE_64 - 1)] = (message[VEC_K_SIZE_64 - 1] >> j) & 0x0000000000000001;
    }
 }


 /**
 * @brief Encodes the message message to a codeword codeword using the generator polynomial bch_poly of the code
 *
 * @param[out] codeword Array of PARAM_N1 bytes receiving the codeword
 * @param[in] message Array of PARAM_K bytes storing the message to encode
 */
 static void lfsr_encode(uint8_t *codeword, const uint8_t *message) {
    uint8_t gate_value = 0;
    uint8_t bch_poly[PARAM_G] = PARAM_BCH_POLY;

    // Compute the Parity-check digits
    for (int16_t i = PARAM_K - 1 ; i >= 0 ; --i) {
        gate_value = message[i] ^ codeword[PARAM_N1 - PARAM_K - 1];

        for (size_t j = PARAM_N1 - PARAM_K - 1 ; j ; --j) {
            codeword[j] = codeword[j - 1] ^ (-gate_value & bch_poly[j]);
        }

        codeword[0] = gate_value;
    }

    // Add the message
    memcpy(codeword + PARAM_N1 - PARAM_K, message, PARAM_K);
 }



 /**
 * @brief Packs the codeword from an array codeword_unpacked where each byte stores a bit to a compact array codeword
 *
 * @param[out] codeword Array of VEC_N1_SIZE_BYTES bytes receiving the packed codeword
 * @param[in] codeword_unpacked Array of PARAM_N1 bytes storing the unpacked codeword
 */
 static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked) {
    for (size_t i = 0 ; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)) ; ++i) {
        for (size_t j = 0 ; j < 64 ; ++j) {
            codeword[i] |= ((uint64_t) codeword_unpacked[j + 64 * i]) << j;
        }
    }

    for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) {
        codeword[VEC_N1_SIZE_64 - 1] |= ((uint64_t) codeword_unpacked[j + 64 * (VEC_N1_SIZE_64 - 1)]) << j;
    }
 }


 /**
 * @brief Encodes a message message of PARAM_K bits to a BCH codeword codeword of PARAM_N1 bits
 *
 * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
 * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
 * with feedback connections based on the generator polynomial bch_poly of the BCH code.
 *
 * @param[out] codeword Array of size VEC_N1_SIZE_BYTES receiving the encoded message
 * @param[in] message Array of size VEC_K_SIZE_BYTES storing the message
 */
 void PQCLEAN_HQC128_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *message) {
    uint8_t message_unpacked[PARAM_K];
    uint8_t codeword_unpacked[PARAM_N1] = {0};

    unpack_message(message_unpacked, message);
    lfsr_encode(codeword_unpacked, message_unpacked);
    pack_codeword(codeword, codeword_unpacked);
 }


 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite joiner1995decoding). <br>
 * We use the letter p for rho which is initialized at -1/2. <br>
 * The array X_sigma_p represents the polynomial X^(2(mu-rho))*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    sigma[0] = 1;
    size_t deg_sigma = 0;
    size_t deg_sigma_p = 0;
    uint16_t sigma_copy[PARAM_DELTA - 1] = {0};
    size_t deg_sigma_copy = 0;
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    int32_t pp = -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1));
        deg_sigma_copy = deg_sigma;

        uint16_t dd = PQCLEAN_HQC128_CLEAN_gf_mul(d, PQCLEAN_HQC128_CLEAN_gf_inverse(d_p)); // 0 if(d == 0)
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            sigma[i] ^= PQCLEAN_HQC128_CLEAN_gf_mul(dd, X_sigma_p[i]);
        }

        size_t deg_X = 2 * mu - pp; // 2*(mu-rho)
        size_t deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        int16_t mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        int16_t mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        int16_t mask12 = mask1 & mask2;
        deg_sigma = (mask12 & deg_X_sigma_p) ^ (~mask12 & deg_sigma);

        if (mu == PARAM_DELTA - 1) {
            break;
        }

        // Update pp, d_p and X_sigma_p if needed
        pp = (mask12 & (2 * mu)) ^ (~mask12 & pp);
        d_p = (mask12 & d) ^ (~mask12 & d_p);
        for (size_t i = PARAM_DELTA - 1 ; i ; --i) {
            X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }
        X_sigma_p[1] = 0;
        X_sigma_p[0] = 0;
        deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p);

        // Compute the next discrepancy
        d = syndromes[2 * mu + 2];
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            d ^= PQCLEAN_HQC128_CLEAN_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Retrieves the message message from the codeword codeword
 *
 * Since we performed a systematic encoding, the message is the last PARAM_K bits of the codeword.
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the message
 * @param[in] codeword Array of size VEC_N1_SIZE_BYTES storing the codeword
 */
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword) {
    int32_t val = PARAM_N1 - PARAM_K;

    uint64_t mask1 = (uint64_t) (0xffffffffffffffff << val % 64);
    uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64));
    size_t index = val / 64;

    for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[i] = message1 | message2;
    }

    // Last byte (8-val % 8 is the number of bits given by message1)
    if ((PARAM_K % 64 == 0) || (64 - val % 64 < PARAM_K % 64)) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[VEC_K_SIZE_64 - 1] = message1 | message2;
    } else {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        message[VEC_K_SIZE_64 - 1] = message1;
    }
 }


 /**
 * @brief Computes the 2^PARAM_DELTA syndromes from the received vector vector
 *
 * Syndromes are the sum of powers of alpha weighted by vector's coefficients.
 * To do so, we use the additive FFT transpose, which takes as input a family w of GF(2^PARAM_M) elements
 * and outputs the weighted power sums of these w. <br>
 * Therefore, this requires twisting and applying a permutation before feeding vector to the PQCLEAN_HQC128_CLEAN_fft transpose. <br>
 * For more details see Berstein, Chou and Schawbe's explanations:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] syndromes Array of size 2^(PARAM_FFT_T) receiving the 2*PARAM_DELTA syndromes
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES storing the received word
 */
 static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector) {
    uint16_t w[1 << PARAM_M];

    PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(w, vector);
    PQCLEAN_HQC128_CLEAN_fft_t(syndromes, w, 2 * PARAM_DELTA);
 }


 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQC128_CLEAN_fft for more details.
 *
 * @param[out] error Array of VEC_N1_SIZE_BYTES elements receiving the error polynomial
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint64_t *error, const uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0}; // w will receive the evaluation of sigma in all field elements

    PQCLEAN_HQC128_CLEAN_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQC128_CLEAN_fft_retrieve_bch_error_poly(error, w);
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on four steps:
 *    <ol>
 *    <li> The first step, done by additive FFT transpose, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on BCH decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the decoded message
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES storing the received word
 */
 void PQCLEAN_HQC128_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector) {
    uint16_t syndromes[1 << PARAM_FFT_T] = {0};
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint64_t error[(1 << PARAM_M) / 8] = {0};

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes, vector);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    compute_elp(sigma, syndromes);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Add the error polynomial to the received polynomial
    PQCLEAN_HQC128_CLEAN_vect_add(vector, vector, error, VEC_N1_SIZE_64);

    // Retrieve the message from the decoded codeword
    message_from_codeword(message, vector);

 }
--- a/crypto_kem/hqc-128/clean/bch.h
+++ b/crypto_kem/hqc-128/clean/bch.h
@@ -0,0 +1,23 @@
 #ifndef BCH_H
 #define BCH_H


 /**
 * @file bch.h
 * Header file of bch.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *message);

 void PQCLEAN_HQC128_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);


 #endif
--- a/crypto_kem/hqc-128/clean/code.c
+++ b/crypto_kem/hqc-128/clean/code.c
@@ -0,0 +1,49 @@
 #include "bch.h"
 #include "code.h"
 #include "parameters.h"
 #include "repetition.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of tensor code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the tensor code
 *
 * First we encode the message using the BCH code, then with the repetition code to obtain
 * a tensor code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC128_CLEAN_code_encode(uint64_t *em, const uint64_t *m) {

    uint64_t tmp[VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQC128_CLEAN_bch_code_encode(tmp, m);
    PQCLEAN_HQC128_CLEAN_repetition_code_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the tensor code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC128_CLEAN_code_decode(uint64_t *m, const uint64_t *em) {

    uint64_t tmp[VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQC128_CLEAN_repetition_code_decode(tmp, em);
    PQCLEAN_HQC128_CLEAN_bch_code_decode(m, tmp);


 }
--- a/crypto_kem/hqc-128/clean/code.h
+++ b/crypto_kem/hqc-128/clean/code.h
@@ -0,0 +1,20 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_code_encode(uint64_t *em, const uint64_t *message);

 void PQCLEAN_HQC128_CLEAN_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-128/clean/fft.c
+++ b/crypto_kem/hqc-128/clean/fft.c
@@ -0,0 +1,627 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size);
 static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f);
 static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    subset_sums[0] = 0;

    for (size_t i = 0 ; i < set_size ; ++i) {
        for (size_t j = 0 ; j < (1U << i) ; ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Transpose of the linear radix conversion
 *
 * This is a direct transposition of the radix function
 * implemented following the process of transposing a linear function as exposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f Array of size a power of 2
 * @param[in] f0 Array half the size of f
 * @param[in] f1 Array half the size of f
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f[0] = f0[0];
        f[1] = f1[0];
        f[2] = f0[1] ^ f1[0];
        f[3] = f[2] ^ f1[1];
        f[4] = f[2] ^ f0[2];
        f[5] = f[3] ^ f1[2];
        f[6] = f[4] ^ f0[3] ^ f1[2];
        f[7] = f[3] ^ f0[3] ^ f1[3];
        f[8] = f[4] ^ f0[4];
        f[9] = f[5] ^ f1[4];
        f[10] = f[6] ^ f0[5] ^ f1[4];
        f[11] = f[7] ^ f0[5] ^ f1[4] ^ f1[5];
        f[12] = f[8] ^ f0[5] ^ f0[6] ^ f1[4];
        f[13] = f[7] ^ f[9] ^ f[11] ^ f1[6];
        f[14] = f[6] ^ f0[6] ^ f0[7] ^ f1[6];
        f[15] = f[7] ^ f0[7] ^ f1[7];
        return;

    case 3:
        f[0] = f0[0];
        f[1] = f1[0];
        f[2] = f0[1] ^ f1[0];
        f[3] = f[2] ^ f1[1];
        f[4] = f[2] ^ f0[2];
        f[5] = f[3] ^ f1[2];
        f[6] = f[4] ^ f0[3] ^ f1[2];
        f[7] = f[3] ^ f0[3] ^ f1[3];
        return;

    case 2:
        f[0] = f0[0];
        f[1] = f1[0];
        f[2] = f0[1] ^ f1[0];
        f[3] = f[2] ^ f1[1];
        return;

    case 1:
        f[0] = f0[0];
        f[1] = f1[0];
        return;

    default:
        ;

        size_t n = 1 << (m_f - 2);

        uint16_t Q0[1 << (PARAM_FFT_T - 2)];
        uint16_t Q1[1 << (PARAM_FFT_T - 2)];
        uint16_t R0[1 << (PARAM_FFT_T - 2)];
        uint16_t R1[1 << (PARAM_FFT_T - 2)];

        uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)];
        uint16_t R[1 << 2 * (PARAM_FFT_T - 2)];

        memcpy(Q0, f0 + n, 2 * n);
        memcpy(Q1, f1 + n, 2 * n);
        memcpy(R0, f0, 2 * n);
        memcpy(R1, f1, 2 * n);

        radix_t (Q, Q0, Q1, m_f - 1);
        radix_t (R, R0, R1, m_f - 1);

        memcpy(f, R, 4 * n);
        memcpy(f + 2 * n, R + n, 2 * n);
        memcpy(f + 3 * n, Q + n, 2 * n);

        for (size_t i = 0 ; i < n ; ++i) {
            f[2 * n + i] ^= Q[i];
            f[3 * n + i] ^= f[2 * n + i];
        }
    }
 }



 /**
 * @brief Recursively computes syndromes of family w
 *
 * This function is a subroutine of the function fft_t
 *
 * @param[out] f Array receiving the syndromes
 * @param[in] w Array storing the family
 * @param[in] f_coeffs Length of syndromes vector
 * @param[in] m 2^m is the smallest power of 2 greater or equal to the length of family w
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the length of f
 * @param[in] betas FFT constants
 */
 static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    size_t k = 1 << (m - 1);
    uint16_t gammas[PARAM_M - 2];
    uint16_t deltas[PARAM_M - 2];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t f0[1 << (PARAM_FFT_T - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT_T - 2)] = {0};

    // Step 1
    if (m_f == 1) {
        f[0] = 0;
        for (size_t i = 0 ; i < (1U << m) ; ++i) {
            f[0] ^= w[i];
        }
        f[1] = 0;

        uint16_t betas_sums[1 << (PARAM_M - 1)];
        betas_sums[0] = 0;
        for (size_t j = 0 ; j < m ; ++j) {
            for (size_t k = 0 ; k < (1U << j) ; ++k) {
                size_t index = (1 << j) + k;
                betas_sums[index] = betas_sums[k] ^ betas[j];
                f[1] ^= PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[index], w[index]);
            }
        }

        return;
    }

    // Compute gammas and deltas
    for (uint8_t i = 0 ; i < m - 1 ; ++i) {
        gammas[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], PQCLEAN_HQC128_CLEAN_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas subset sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    /* Step 6: Compute u and v from w (aka w)
     * w[i] = u[i] + G[i].v[i]
     * w[k+i] = w[i] + v[i] = u[i] + (G[i]+1).v[i]
     * Transpose:
     * u[i] = w[i] + w[k+i]
     * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case
        // Step 5: Compute f0 from u and f1 from v
        f1[1] = 0;
        u[0] = w[0] ^ w[k];
        f1[0] = w[k];
        for (size_t i = 1 ; i < k ; ++i) {
            u[i] = w[i] ^ w[k + i];
            f1[0] ^= PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i];
        }
        fft_t_rec(f0, u, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);
    } else {
        uint16_t v[1 << (PARAM_M - 2)] = {0};

        u[0] = w[0] ^ w[k];
        v[0] = w[k];

        for (size_t i = 1 ; i < k ; ++i) {
            u[i] = w[i] ^ w[k + i];
            v[i] = PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i];
        }

        // Step 5: Compute f0 from u and f1 from v
        fft_t_rec(f0, u, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);
        fft_t_rec(f1, v, f_coeffs / 2, m - 1, m_f - 1, deltas);
    }

    // Step 3: Compute g from g0 and g1
    radix_t(f, f0, f1, m_f);

    // Step 2: compute f from g
    if (betas[m - 1] != 1) {
        uint16_t beta_m_pow = 1;
        for (size_t i = 1 ; i < (1U << m_f) ; ++i) {
            beta_m_pow = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, f[i]);
        }
    }
 }



 /**
 * @brief Computes the syndromes f of the family w
 *
 * Since the syndromes linear map is the transpose of multipoint evaluation,
 * it uses exactly the same constants, either hardcoded or precomputed by compute_fft_lut(...). <br>
 * This follows directives from Bernstein, Chou and Schwabe given here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f Array of size 2*(PARAM_FFT_T) elements receiving the syndromes
 * @param[in] w Array of PARAM_GF_MUL_ORDER+1 elements
 * @param[in] f_coeffs Length of syndromes vector f
 */
 void PQCLEAN_HQC128_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) {
    // Transposed from Gao and Mateer algorithm
    uint16_t betas[PARAM_M - 1];
    uint16_t betas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};
    uint16_t deltas[PARAM_M - 1];
    uint16_t f0[1 << (PARAM_FFT_T - 1)];
    uint16_t f1[1 << (PARAM_FFT_T - 1)];

    compute_fft_betas(betas);
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    /* Step 6: Compute u and v from w (aka w)
     *
     * We had:
     * w[i] = u[i] + G[i].v[i]
     * w[k+i] = w[i] + v[i] = u[i] + (G[i]+1).v[i]
     * Transpose:
     * u[i] = w[i] + w[k+i]
     * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */
    u[0] = w[0] ^ w[k];
    v[0] = w[k];
    for (size_t i = 1 ; i < k ; ++i) {
        u[i] = w[i] ^ w[k + i];
        v[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i];
    }

    // Compute deltas
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5: Compute f0 from u and f1 from v
    fft_t_rec(f0, u, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT_T - 1, deltas);
    fft_t_rec(f1, v, f_coeffs / 2, PARAM_M - 1, PARAM_FFT_T - 1, deltas);

    // Step 3: Compute g from g0 and g1
    radix_t(f, f0, f1, PARAM_FFT_T);

    // Step 2: beta_m = 1 so f = g
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        return;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        return;

    default:
        ;
        size_t n = 1 << (m_f - 2);

        uint16_t Q[2 * (1 << (PARAM_FFT - 2))];
        uint16_t R[2 * (1 << (PARAM_FFT - 2))];

        uint16_t Q0[1 << (PARAM_FFT - 2)];
        uint16_t Q1[1 << (PARAM_FFT - 2)];
        uint16_t R0[1 << (PARAM_FFT - 2)];
        uint16_t R1[1 << (PARAM_FFT - 2)];

        memcpy(Q, f + 3 * n, 2 * n);
        memcpy(Q + n, f + 3 * n, 2 * n);
        memcpy(R, f, 4 * n);

        for (size_t i = 0 ; i < n ; ++i) {
            Q[i] ^= f[2 * n + i];
            R[n + i] ^= Q[i];
        }

        radix(Q0, Q1, Q, m_f - 1);
        radix(R0, R1, R, m_f - 1);

        memcpy(f0, R0, 2 * n);
        memcpy(f0 + n, Q0, 2 * n);
        memcpy(f1, R1, 2 * n);
        memcpy(f1 + n, Q1, 2 * n);
    }
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)];
    uint16_t f1[1 << (PARAM_FFT - 2)];
    uint16_t gammas[PARAM_M - 2];
    uint16_t deltas[PARAM_M - 2];
    size_t k = 1 << (m - 1);
    uint16_t gammas_sums[1 << (PARAM_M - 2)];
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};

    // Step 1
    if (m_f == 1) {
        uint16_t tmp[PARAM_M - (PARAM_FFT - 1)];
        for (size_t i = 0 ; i < m ; ++i) {
            tmp[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        for (size_t j = 0 ; j < m ; ++j) {
            for (size_t k = 0 ; k < (1U << j) ; ++k) {
                w[(1 << j) + k] = w[k] ^ tmp[j];
            }
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        uint16_t beta_m_pow = 1;
        for (size_t i = 1 ; i < (1U << m_f) ; ++i) {
            beta_m_pow = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (uint8_t i = 0 ; i < m - 1 ; ++i) {
        gammas[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], PQCLEAN_HQC128_CLEAN_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1];
    uint16_t betas_sums[1 << (PARAM_M - 1)];
    uint16_t f0[1 << (PARAM_FFT - 1)];
    uint16_t f1[1 << (PARAM_FFT - 1)];
    uint16_t deltas[PARAM_M - 1];
    size_t k = 1 << (PARAM_M - 1);
    uint16_t u[1 << (PARAM_M - 1)];
    uint16_t v[1 << (PARAM_M - 1)];

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (size_t i = 1 ; i < k ; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Arranges the received word vector in a form w such that applying the additive FFT transpose to w  yields the BCH syndromes of the received word vector.
 *
 * Since the received word vector gives coefficients of the primitive element alpha, we twist accordingly. <br>
 * Furthermore, the additive FFT transpose needs elements indexed by their decomposition on the chosen basis,
 * so we apply the adequate permutation.
 *
 * @param[out] w Array of size 2^PARAM_M
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES
 */
 void PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint64_t *vector) {
    uint16_t r[1 << PARAM_M];
    uint16_t gammas[PARAM_M - 1];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);

    // Unpack the received word vector into array r
    size_t i;
    for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) {
        for (size_t j = 0 ; j < 64 ; ++j) {
            r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1);
        }
    }

    // Last byte
    for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) {
        r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1);
    }

    // Complete r with zeros
    memset(r + PARAM_N1, 0, 2 * ((1 << PARAM_M) - PARAM_N1));

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    // Twist and permute r adequately to obtain w
    w[0] = 0;
    w[k] = -r[0] & 1;
    for (size_t i = 1 ; i < k ; ++i) {
        w[i] = -r[PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i];
        w[k + i] = -r[PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1);
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array of size VEC_N1_SIZE_BYTES
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQC128_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);
    size_t index = PARAM_GF_MUL_ORDER;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15);
    uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15);
    error[index / 8] ^= bit << (index % 64);

    for (size_t i = 1 ; i < k ; ++i) {
        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i]);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15);
        error[index / 64] ^= bit << (index % 64);

        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i] ^ 1);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15);
        error[index / 64] ^= bit << (index % 64);
    }
 }
--- a/crypto_kem/hqc-128/clean/fft.h
+++ b/crypto_kem/hqc-128/clean/fft.h
@@ -0,0 +1,25 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs);

 void PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint64_t *vector);


 void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQC128_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w);


 #endif
--- a/crypto_kem/hqc-128/clean/gf.c
+++ b/crypto_kem/hqc-128/clean/gf.c
--- a/crypto_kem/hqc-128/clean/gf.h
+++ b/crypto_kem/hqc-128/clean/gf.h
@@ -0,0 +1,29 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m);


 uint16_t PQCLEAN_HQC128_CLEAN_gf_log(uint16_t elt);

 uint16_t PQCLEAN_HQC128_CLEAN_gf_mul(uint16_t a, uint16_t b);

 uint16_t PQCLEAN_HQC128_CLEAN_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQC128_CLEAN_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQC128_CLEAN_gf_mod(uint16_t i);


 #endif
--- a/crypto_kem/hqc-128/clean/gf2x.c
+++ b/crypto_kem/hqc-128/clean/gf2x.c
@@ -0,0 +1,155 @@
 #include "gf2x.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "randombytes.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * \file gf2x.c
 * \brief Implementation of multiplication of two polynomials
 */


 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2);
 static void reduce(uint64_t *o, const uint64_t *a);
 static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);

 /**
 * @brief swap two elements in a table
 *
 * This function exchanges tab[elt1] with tab[elt2]
 *
 * @param[in] tab Pointer to the table
 * @param[in] elt1 Index of the first element
 * @param[in] elt2 Index of the second element
 */
 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) {
    uint16_t tmp = tab[elt1];

    tab[elt1] = tab[elt2];
    tab[elt2] = tmp;
 }



 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[in] a Pointer to the polynomial a(x)
 * @param[out] o Pointer to the result
 */
 static void reduce(uint64_t *o, const uint64_t *a) {
    uint64_t r;
    uint64_t carry;

    for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) {
        r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63);
        carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63)));
        o[i] = a[i] ^ r ^ carry;
    }

    o[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief computes product of the polynomial a1(x) with the sparse polynomial a2
 *
 *  o(x) = a1(x)a2(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2)
 * @param[in] a2 Pointer to the polynomial a1(x)
 * @param[in] weight Hamming wifht of the sparse polynomial a2
 * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process
 */
 static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
 //static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx)
    uint64_t carry;
    uint32_t dec, s;
    uint64_t table[16 * (VEC_N_SIZE_64 + 1)];
    uint16_t permuted_table[16];
    uint16_t permutation_table[16];
    uint16_t permuted_sparse_vect[PARAM_OMEGA_E];
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;

    for (uint32_t i = 0 ; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0 ; i < 15 ; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1 ; i < 16 ; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0 ; i < weight ; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0 ; i + 1 < weight ; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0 ; i < weight ; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
            *res_16++ ^= (uint16_t) (pt[j] >> 48);
        }
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial
 * @param[in] a2 Pointer to the dense polynomial
 * @param[in] weight Integer that is the weigt of the sparse polynomial
 * @param[in] ctx Pointer to the randomness context
 */
 void PQCLEAN_HQC128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
    uint64_t tmp[2 * VEC_N_SIZE_64 + 1];
    for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) {
        tmp[j] = 0;
    }

    fast_convolution_mult(tmp, a1, a2, weight, ctx);
    reduce(o, tmp);
 }
--- a/crypto_kem/hqc-128/clean/gf2x.h
+++ b/crypto_kem/hqc-128/clean/gf2x.h
@@ -0,0 +1,18 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */

 #include "nistseedexpander.h"
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);


 #endif
--- a/crypto_kem/hqc-128/clean/hqc.c
+++ b/crypto_kem/hqc-128/clean/hqc.c
@@ -0,0 +1,143 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQC128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    uint64_t x[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQC128_CLEAN_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQC128_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander);
    PQCLEAN_HQC128_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQC128_CLEAN_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQC128_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};
    uint64_t r1[VEC_N_SIZE_64] = {0};
    uint32_t r2[PARAM_OMEGA_R] = {0};
    uint64_t e[VEC_N_SIZE_64] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQC128_CLEAN_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

    // Compute u = r1 + r2.h
    PQCLEAN_HQC128_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQC128_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQC128_CLEAN_code_encode(v, m);
    PQCLEAN_HQC128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQC128_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQC128_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQC128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQC128_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC128_CLEAN_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint64_t x[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};
    AES_XOF_struct perm_seedexpander;
    uint8_t perm_seed[SEED_BYTES] = {0};

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQC128_CLEAN_hqc_secret_key_from_string(x, y, pk, sk);

    randombytes(perm_seed, SEED_BYTES);
    seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute v - u.y
    PQCLEAN_HQC128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    PQCLEAN_HQC128_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander);
    PQCLEAN_HQC128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQC128_CLEAN_code_decode(m, tmp2);
 }
--- a/crypto_kem/hqc-128/clean/hqc.h
+++ b/crypto_kem/hqc-128/clean/hqc.h
@@ -0,0 +1,21 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQC128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQC128_CLEAN_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-128/clean/kem.c
+++ b/crypto_kem/hqc-128/clean/kem.c
@@ -0,0 +1,138 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQC128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQC128_CLEAN_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQC128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    PQCLEAN_HQC128_CLEAN_vect_set_random_from_randombytes(m);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQC128_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQC128_CLEAN_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQC128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    int8_t result = -1;
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQC128_CLEAN_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQC128_CLEAN_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQC128_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = (PQCLEAN_HQC128_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC128_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0);
    for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) {
        ss[i] = result * ss[i];
    }
    result--;


    return result;
 }
--- a/crypto_kem/hqc-128/clean/parameters.h
+++ b/crypto_kem/hqc-128/clean/parameters.h
@@ -0,0 +1,123 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H
 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */

 #include "api.h"
 #include "api.h"
 #include "vector.h"


 #define CEIL_DIVIDE(a, b)  (((a)/(b)) + ((a) % (b) == 0 ? 0 : 1)) /*!< Divide a by b and ceil the result*/
 #define BITMASK(a, size) ((1UL << ((a) % (size))) - 1) /*!< Create a mask*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of BCH code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of the repetition code)
  #define PARAM_N1N2                            Define the parameter n1 * n2 of the scheme (length of the tensor code)
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1_SIZE_BYTES                     Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define PARAM_T                               Define a threshold for decoding repetition code word (PARAM_T = (PARAM_N2 - 1) / 2)

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the BCH code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the BCH code
  #define PARAM_G                               Define the size of the generator polynomial of BCH code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=60
                                                The smallest power of 2 greater than 60+1 is 64=2^6
  #define PARAM_FFT_T                           The additive FFT transpose computes a (2^PARAM_FFT_T)-sized syndrome vector
                                                We want to compute 2*PARAM_DELTA=120 syndromes
                                                The smallest power of 2 greater than 120 is 2^7
  #define PARAM_BCH_POLY                        Generator polynomial of the BCH code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                 23869
 #define PARAM_N1                                766
 #define PARAM_N2                                31
 #define PARAM_N1N2                              23746
 #define PARAM_OMEGA                             67
 #define PARAM_OMEGA_E                           77
 #define PARAM_OMEGA_R                           77
 #define PARAM_SECURITY                          128
 #define PARAM_DFR_EXP                           128

 #define SECRET_KEY_BYTES                        PQCLEAN_HQC128_CLEAN_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQC128_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQC128_CLEAN_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQC128_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD               16756038
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_BYTES                       CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 64)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 64)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_T                                 15

 #define PARAM_DELTA                             57
 #define PARAM_M                                 10
 #define PARAM_GF_POLY                           0x409
 #define PARAM_GF_MUL_ORDER                      1023
 #define PARAM_K                                 256
 #define PARAM_G                                 511
 #define PARAM_FFT                               6
 #define PARAM_FFT_T                             7
 #define PARAM_BCH_POLY { \
        1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1, \
        1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0, \
        0,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, \
        1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0, \
        0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0, \
        1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0, \
        0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,0,1, \
        1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, \
        1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1, \
        1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,1,0,1,0,1, \
        0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1, \
        1,0,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1, \
        0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1, \
        1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1 \
    };

 #define RED_MASK                                0x1fffffffffffffffUL
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/crypto_kem/hqc-128/clean/parsing.c
+++ b/crypto_kem/hqc-128/clean/parsing.c
@@ -0,0 +1,121 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */



 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    memcpy(sk + SEED_BYTES, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint32_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQC128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    memcpy(pk + SEED_BYTES, s, VEC_N_SIZE_BYTES);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQC128_CLEAN_vect_set_random(&pk_seedexpander, h);

    memcpy(s, pk + SEED_BYTES, VEC_N_SIZE_BYTES);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQC128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    memcpy(ct, u, VEC_N_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQC128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    memcpy(u, ct, VEC_N_SIZE_BYTES);
    memcpy(v, ct + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES);
    memcpy(d, ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, SHA512_BYTES);
 }
--- a/crypto_kem/hqc-128/clean/parsing.h
+++ b/crypto_kem/hqc-128/clean/parsing.h
@@ -0,0 +1,29 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQC128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQC128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQC128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQC128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQC128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/crypto_kem/hqc-128/clean/repetition.c
+++ b/crypto_kem/hqc-128/clean/repetition.c
@@ -0,0 +1,92 @@
 #include "parameters.h"
 #include "repetition.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 /**
 * @file repetition.c
 * @brief Implementation of repetition codes
 */

 #define MASK_N2                              ((1UL << PARAM_N2) - 1)

 static inline int32_t popcount(uint64_t n);

 /**
 * @brief Encoding each bit in the message m using the repetition code
 *
 *
 * @param[out] em Pointer to an array that is the code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC128_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) {
    static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFUL, 0x3FFFFFFFUL}};
    for (size_t i = 0 ; i < VEC_N1_SIZE_64 - 1 ; i++) {
        for (size_t j = 0 ; j < 64 ; j++) {
            uint8_t bit = (m[i] >> j) & 0x1;
            uint32_t pos_r = PARAM_N2 * ((i << 6) + j);
            uint16_t idx_r = (pos_r & 0x3f);
            uint64_t *p64 = em;
            p64 += pos_r >> 6;
            *p64 ^= mask[bit][0] << idx_r;
            *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
        }
    }

    for (size_t j = 0 ; j < (PARAM_N1 & 0x3f) ; j++) {
        uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
        uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
        uint16_t idx_r = (pos_r & 0x3f);
        uint64_t *p64 = em;
        p64 += pos_r >> 6;
        *p64 ^= mask[bit][0] << idx_r;
        *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
    }
 }



 /**
 * @brief  Compute the Hamming weight of the 64-bit integer n
 *
 * The Hamming weight is computed using a trick described in
 * Henry S. Warren  : "Hacker's Delight", chap 5., p. 66
 * @param[out] the Hamming weight of n
 * @param[in] a 64-bit integer n
 */
 static inline int32_t popcount(uint64_t n) {
    n -= (n >> 1) & 0x5555555555555555UL;
    n = (n & 0x3333333333333333UL) + ((n >> 2) & 0x3333333333333333UL);
    n = (n + (n >> 4)) & 0x0f0f0f0f0f0f0f0fUL;
    return (n * 0x0101010101010101UL) >> 56;
 }



 /**
 * @brief Decoding the code words to a message using the repetition code
 *
 * We use a majority decoding. In fact we have that PARAM_N2 = 2 * PARAM_T + 1, thus,
 * if the Hamming weight of the vector is greater than PARAM_T, the code word is decoded
 * to 1 and 0 otherwise.
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC128_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    uint64_t cx, ones;

    for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) {
        bn = b >> 6;
        bi = b & 63;
        c = b + PARAM_N2 - 1;
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
        t++;
    }
 }
--- a/crypto_kem/hqc-128/clean/repetition.h
+++ b/crypto_kem/hqc-128/clean/repetition.h
@@ -0,0 +1,19 @@
 #ifndef REPETITION_H
 #define REPETITION_H


 /**
 * @file repetition.h
 * @brief Header file for repetition.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m);

 void PQCLEAN_HQC128_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-128/clean/vector.c
+++ b/crypto_kem/hqc-128/clean/vector.c
@@ -0,0 +1,226 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */


 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>. The vector
 * is stored by position.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
    uint32_t random_data = 0;
    uint8_t exist = 0;
    size_t j = 0;

    seedexpander(ctx, rand_bytes, random_bytes_size);

    for (uint32_t i = 0 ; i < weight ; ++i) {
        exist = 0;
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            random_data  = ((uint32_t) rand_bytes[j++]) << 16;
            random_data |= ((uint32_t) rand_bytes[j++]) << 8;
            random_data |= rand_bytes[j++];

        } while (random_data >= UTILS_REJECTION_THRESHOLD);

        random_data = random_data % PARAM_N;

        for (uint32_t k = 0 ; k < i ; k++) {
            if (v[k] == random_data) {
                exist = 1;
            }
        }

        if (exist == 1) {
            i--;
        } else {
            v[i] = random_data;
        }
    }
 }



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {

    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
    uint32_t random_data = 0;
    uint32_t tmp[PARAM_OMEGA_R] = {0};
    uint8_t exist = 0;
    size_t j = 0;

    seedexpander(ctx, rand_bytes, random_bytes_size);

    for (uint32_t i = 0 ; i < weight ; ++i) {
        exist = 0;
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            random_data  = ((uint32_t) rand_bytes[j++]) << 16;
            random_data |= ((uint32_t) rand_bytes[j++]) << 8;
            random_data |= rand_bytes[j++];

        } while (random_data >= UTILS_REJECTION_THRESHOLD);

        random_data = random_data % PARAM_N;

        for (uint32_t k = 0 ; k < i ; k++) {
            if (tmp[k] == random_data) {
                exist = 1;
            }
        }

        if (exist == 1) {
            i--;
        } else {
            tmp[i] = random_data;
        }
    }

    for (uint16_t i = 0 ; i < weight ; ++i) {
        int32_t index = tmp[i] / 64;
        int32_t pos = tmp[i] % 64;
        v[index] |= ((uint64_t) 1) << pos;
    }
 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    memcpy(v, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= BITMASK(PARAM_N, 64);
 }



 /**
 * @brief Generates a random vector
 *
 * This function generates a random binary vector. It uses the the randombytes function.
 *
 * @param[in] v Pointer to an array
 */
 void PQCLEAN_HQC128_CLEAN_vect_set_random_from_randombytes(uint64_t *v) {
    uint8_t rand_bytes [VEC_K_SIZE_BYTES] = {0};

    randombytes(rand_bytes, VEC_K_SIZE_BYTES);
    memcpy(v, rand_bytes, VEC_K_SIZE_BYTES);
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQC128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0 ; i < size ; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }


 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 int PQCLEAN_HQC128_CLEAN_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    return memcmp(v1, v2, size);
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQC128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    if (size_o < size_v) {
        uint64_t mask = 0x7FFFFFFFFFFFFFFF;
        int8_t val = 0;

        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, VEC_N1N2_SIZE_BYTES);

        for (int8_t i = 0 ; i < val ; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, CEIL_DIVIDE(size_v, 8));
    }
 }
--- a/crypto_kem/hqc-128/clean/vector.h
+++ b/crypto_kem/hqc-128/clean/vector.h
@@ -0,0 +1,31 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */

 #include "nistseedexpander.h"
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight);

 void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQC128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

 void PQCLEAN_HQC128_CLEAN_vect_set_random_from_randombytes(uint64_t *v);


 void PQCLEAN_HQC128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 int PQCLEAN_HQC128_CLEAN_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size);

 void PQCLEAN_HQC128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/crypto_kem/hqc-192/META.yml
+++ b/crypto_kem/hqc-192/META.yml
@@ -0,0 +1,34 @@
 name: HQC-192
 type: kem
 claimed-nist-level: 3
 claimed-security: IND-CCA2
 length-ciphertext: 11364
 length-public-key: 5690
 length-secret-key: 5730
 length-shared-secret: 64
 nistkat-sha256: b49351ae5bdab016521254af85a0df2072b81841722c0c422bb44af22cec4418
 principal-submitters:
  - Carlos Aguilar Melchor
  - Nicolas Aragon
  - Slim Bettaieb
  - Olivier Blazy
  - Jurjen Bos
  - Jean-Christophe Deneuville
  - Philippe Gaborit
  - Edoardo Persichetti
  - Jean-Marc Robert
  - Pascal Véron
  - Gilles Zémor
  - Loïc Bidoux
 implementations:
    - name: clean
      version: 2020-05-29
    - name: avx2
      version: 2020-05-29
      supported_platforms:
          - architecture: x86_64
            operating_systems:
                - Linux
                - Darwin
            required_flags:
                - avx2
--- a/crypto_kem/hqc-192/avx2/LICENSE
+++ b/crypto_kem/hqc-192/avx2/LICENSE
@@ -0,0 +1 @@
 Public Domain
--- a/crypto_kem/hqc-192/avx2/Makefile
+++ b/crypto_kem/hqc-192/avx2/Makefile
@@ -0,0 +1,22 @@
 # This Makefile can be used with GNU Make or BSD Make

 LIB=libhqc-192_avx2.a
 HEADERS=alpha_table.h api.h bch.h code.h fft.h gen_matrix.h gf2x.h gf.h hqc.h parameters.h parsing.h repetition.h vector.h 
 OBJECTS=bch.o code.o fft.o gf2x.o gf.o hqc.o kem.o parsing.o repetition.o vector.o 

 CFLAGS=-O3 -mavx2 -mbmi -mpclmul -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS)

 all: $(LIB)

 %.o: %.s $(HEADERS)
 	$(AS) -o $@ $<

 %.o: %.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $@ $<

 $(LIB): $(OBJECTS)
 	$(AR) -r $@ $(OBJECTS)

 clean:
 	$(RM) $(OBJECTS)
 	$(RM) $(LIB)
--- a/crypto_kem/hqc-192/avx2/alpha_table.h
+++ b/crypto_kem/hqc-192/avx2/alpha_table.h
--- a/crypto_kem/hqc-192/avx2/api.h
+++ b/crypto_kem/hqc-192/avx2/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQC192_AVX2_API_H
 #define PQCLEAN_HQC192_AVX2_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQC192_AVX2_CRYPTO_ALGNAME                      "HQC-192"

 #define PQCLEAN_HQC192_AVX2_CRYPTO_SECRETKEYBYTES               5730
 #define PQCLEAN_HQC192_AVX2_CRYPTO_PUBLICKEYBYTES               5690
 #define PQCLEAN_HQC192_AVX2_CRYPTO_BYTES                        64
 #define PQCLEAN_HQC192_AVX2_CRYPTO_CIPHERTEXTBYTES              11364

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQC192_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQC192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQC192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQC192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-192/avx2/bch.c
+++ b/crypto_kem/hqc-192/avx2/bch.c
@@ -0,0 +1,367 @@
 #include "alpha_table.h"
 #include "bch.h"
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file bch.c
 * Constant time implementation of BCH codes
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1 ; i < upper_bound ; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC192_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly ; j ; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Computes the values alpha^ij for decoding syndromes
 *
 * function to initialize a table which contains values alpha^ij for i in [0,N1[ and j in [1,2*PARAM_DELTA]
 * these values are used in order to compute the syndromes of the received word v(x)=v_0+v_1x+...+v_{n1-1}x^{n1-1}
 * value alpha^ij is stored in alpha_ij_table[2*PARAM_DELTA*i+j-1]
 * The syndromes are equal to v(alpha^k) for k in [1,2*PARAM_DELTA]
 * Size of the table is fixed to match 256 bit representation
 * Useless values are filled with 0.
 *
 * @param[in] exp Exp look-up-table of GF
 */
 void PQCLEAN_HQC192_AVX2_table_alphaij_generation(const uint16_t *exp) {
    int32_t tmp_value;
    int16_t *alpha_tmp;

    // pre-computation of alpha^ij for i in [0, N1[ and j in [1, 2*PARAM_DELTA]
    // see comment of alpha_ij_table_init() function.
    for (uint16_t i = 0; i < PARAM_N1 ; ++i) {
        tmp_value = 0;
        alpha_tmp = table_alpha_ij + i * (PARAM_DELTA << 1);
        for (uint16_t j = 0 ; j < (PARAM_DELTA << 1) ; j++) {
            tmp_value = PQCLEAN_HQC192_AVX2_gf_mod(tmp_value + i);
            alpha_tmp[j] = exp[tmp_value];
        }
    }
 }



 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite joiner1995decoding). <br>
 * We use the letter p for rho which is initialized at -1/2. <br>
 * The array X_sigma_p represents the polynomial X^(2(mu-rho))*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    sigma[0] = 1;
    size_t deg_sigma = 0;
    size_t deg_sigma_p = 0;
    uint16_t sigma_copy[PARAM_DELTA - 1] = {0};
    size_t deg_sigma_copy = 0;
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    int32_t pp = -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1));
        deg_sigma_copy = deg_sigma;

        uint16_t dd = PQCLEAN_HQC192_AVX2_gf_mul(d, PQCLEAN_HQC192_AVX2_gf_inverse(d_p)); // 0 if(d == 0)
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            sigma[i] ^= PQCLEAN_HQC192_AVX2_gf_mul(dd, X_sigma_p[i]);
        }

        size_t deg_X = 2 * mu - pp; // 2*(mu-rho)
        size_t deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        int16_t mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        int16_t mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        int16_t mask12 = mask1 & mask2;
        deg_sigma = (mask12 & deg_X_sigma_p) ^ (~mask12 & deg_sigma);

        if (mu == PARAM_DELTA - 1) {
            break;
        }

        // Update pp, d_p and X_sigma_p if needed
        pp = (mask12 & (2 * mu)) ^ (~mask12 & pp);
        d_p = (mask12 & d) ^ (~mask12 & d_p);
        for (size_t i = PARAM_DELTA - 1 ; i ; --i) {
            X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }
        X_sigma_p[1] = 0;
        X_sigma_p[0] = 0;
        deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p);

        // Compute the next discrepancy
        d = syndromes[2 * mu + 2];
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            d ^= PQCLEAN_HQC192_AVX2_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Retrieves the message message from the codeword codeword
 *
 * Since we performed a systematic encoding, the message is the last PARAM_K bits of the codeword.
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the message
 * @param[in] codeword Array of size VEC_N1_SIZE_BYTES storing the codeword
 */
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword) {
    int32_t val = PARAM_N1 - PARAM_K;

    uint64_t mask1 = (uint64_t) (0xffffffffffffffff << val % 64);
    uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64));
    size_t index = val / 64;

    for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[i] = message1 | message2;
    }

    // Last byte (8-val % 8 is the number of bits given by message1)
    if ((PARAM_K % 64 == 0) || (64 - val % 64 < PARAM_K % 64)) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[VEC_K_SIZE_64 - 1] = message1 | message2;
    } else {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        message[VEC_K_SIZE_64 - 1] = message1;
    }
 }



 /**
 * @brief Computes the 2^PARAM_DELTA syndromes from the received vector vector
 *
 * Syndromes are the sum of powers of alpha weighted by vector's coefficients.
 * These powers have been pre-computed in table_alphaPARAM_DELTA.h
 * Syndromes are 16-bits long , hence we can simultaneously compute 16 syndromes
 * in a 256-bit register
 *
 * @param[out] syndromes Array of size 2^(PARAM_FFT_T) receiving the 2*PARAM_DELTA syndromes
 * @param[in] rcv Array of size VEC_N1_SIZE_BYTES storing the received word
 */
 void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) {
    const __m256i zero_256 = _mm256_set1_epi64x(0);
    const __m256i mask_one = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202, 0x0101010101010101, 0x0);
    const __m256i mask_two  = _mm256_set1_epi64x(-0x7FBFDFEFF7FBFDFF);
    const __m256i un_256 = _mm256_set1_epi64x(1);

    __m256i y;
    __m256i S;
    __m256i L;
    __m256i tmp_repeat;
    uint32_t *aux;
    int16_t *alpha_tmp;
    uint32_t i;
    // static variable so that it is stored in the DATA segment
    // not in the STACK segment
    static uint8_t tmp_array[PARAM_N1 + 4]; // +4 to control overflow due to management of 256 bits
    __m256i *z = (__m256i *) tmp_array;
    // vectorized version of the separation of the coordinates of the vector v in order to put each coordinate in an unsigned char
    // aux is used to consider 4 elements in v at each step of the loop
    aux = (uint32_t *) rcv;
    for (i = 0 ; i < ((VEC_N1_SIZE_BYTES >> 2) << 2) ; i += 4) {
        // duplicate aux 8 times in y , i.e y= (aux aux aux .... aux)
        y = _mm256_set1_epi32(*aux);
        // shuffle the bytes of y so that if aux=(a0 a1 a2 a3)
        // then y = (a0 a0 a0 a0 a0 a0 a0 a0 a1 a1 a1 a1 a1 a1 a1 a1 .... a3)
        y = _mm256_shuffle_epi8(y, mask_one);
        // apply a mask on each byte of y to determine if jth bit of a_k is 0 or 1
        z[i >> 2] = _mm256_and_si256(y, mask_two);
        aux ++;
    }

    // Evaluation of the polynomial corresponding to the vector v in alpha^i for i in {1, ..., 2 * PARAM_DELTA}
    for (size_t j = 0 ; j < SYND_SIZE_256 ; ++j) {
        S = zero_256;
        alpha_tmp = table_alpha_ij + (j << 4);

        for (size_t i = 0 ; i < PARAM_N1 ; ++i) {
            tmp_repeat = _mm256_set1_epi64x((long long)(tmp_array[i] != 0));
            L = _mm256_cmpeq_epi64(tmp_repeat, un_256);
            tmp_repeat = _mm256_lddqu_si256((__m256i *)(alpha_tmp + i * (PARAM_DELTA << 1)));
            L = _mm256_and_si256(L, tmp_repeat);
            S = _mm256_xor_si256(L, S);
        }
        _mm256_storeu_si256(syndromes + j, S);
    }
 }


 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQC192_AVX2_fft for more details.
 *
 * @param[out] error Array of VEC_N1_SIZE_BYTES elements receiving the error polynomial
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint64_t *error, const uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0}; // w will receive the evaluation of sigma in all field elements

    PQCLEAN_HQC192_AVX2_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQC192_AVX2_fft_retrieve_bch_error_poly(error, w);
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on four steps:
 *    <ol>
 *    <li> The first step, done by additive FFT transpose, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on BCH decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the decoded message
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES storing the received word
 */

 void PQCLEAN_HQC192_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector) {
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint64_t error[(1 << PARAM_M) / 8] = {0};
    static __m256i syndromes_256[SYND_SIZE_256];

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes_256, vector);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    compute_elp(sigma, (uint16_t *)syndromes_256);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Add the error polynomial to the received polynomial
    PQCLEAN_HQC192_AVX2_vect_add(vector, vector, error, VEC_N1_SIZE_64);

    // Retrieve the message from the decoded codeword
    message_from_codeword(message, vector);

 }
--- a/crypto_kem/hqc-192/avx2/bch.h
+++ b/crypto_kem/hqc-192/avx2/bch.h
@@ -0,0 +1,23 @@
 #ifndef BCH_H
 #define BCH_H


 /**
 * @file bch.h
 * Header file of bch.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC192_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);

 void PQCLEAN_HQC192_AVX2_table_alphaij_generation(const uint16_t *exp);


 #endif
--- a/crypto_kem/hqc-192/avx2/code.c
+++ b/crypto_kem/hqc-192/avx2/code.c
@@ -0,0 +1,104 @@
 #include "bch.h"
 #include "code.h"
 #include "gen_matrix.h"
 #include "parameters.h"
 #include "repetition.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of tensor code
 */


 static inline uint64_t mux(uint64_t a, uint64_t b, int64_t bit);

 static inline uint64_t mux(uint64_t a, uint64_t b, int64_t bit) {
    uint64_t ret = a ^ b;
    return (ret & (-bit >> 63)) ^ a;
 }



 /**
 *
 * @brief Encoding the message m to a code word em using the tensor code
 *
 * We encode the message using the BCH code. For each bit obtained,
 * we duplicate the bit PARAM_N2 times to apply repetition code.
 * BCH encoding is done using the classical mG operation,
 * columns of the matrix are stored in 256-bit registers
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC192_AVX2_code_encode(uint64_t *em, const uint64_t *m) {
    uint64_t res;
    uint32_t i;
    static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFFFFFFFFUL, 0x3FFFFFFFFFFFFFFUL}};


    __m256i *colonne, y, aux0;
    __m256i msg = _mm256_lddqu_si256((const __m256i *) m);
    colonne = ((__m256i *) gen_matrix);

    for (i = 0 ; i < PARAM_N1 - PARAM_K ; i++) {
        // y is the and operation between m and ith column of G
        y = _mm256_and_si256(colonne[i], msg);
        // aux0 = (y2 y3 y0 y1)
        aux0 = _mm256_permute2x128_si256(y, y, 1);
        // y = (y0^y2 y1^y3 y2^y0 y3^y1)
        y = _mm256_xor_si256(y, aux0);
        // aux0 = (y1^y3 y0^y2 y1^y3 y0^y2)
        aux0 = _mm256_shuffle_epi32(y, 0x4e);
        // y = (y0^y1^y2^y3 repeated 4 times)
        y = _mm256_xor_si256(aux0, y);
        res = _mm_popcnt_u64(_mm256_extract_epi64(y, 0)) & 1;


        uint16_t pos_r = PARAM_N2 * i;
        uint16_t idx_r = (pos_r & 0x3f);
        uint64_t *p64 = em;
        p64 += pos_r >> 6;
        uint64_t select = mux(mask[0][0], mask[1][0], res);
        *p64 ^= select << idx_r;
        select = mux(mask[0][1], mask[1][1], res);
        *(p64 + 1) ^= select >> ((63 - idx_r));
    }

    /* now we add the message m */
    /* systematic encoding */
    for (int32_t i = 0 ; i < 4 ; i++) {
        for (int32_t j = 0 ; j < 64 ; j++) {
            uint8_t bit = (m[i] >> j) & 0x1;
            uint32_t pos_r = PARAM_N2 * ((PARAM_N1 - PARAM_K) + ((i << 6) + j));
            uint16_t idx_r = (pos_r & 0x3f);
            uint64_t *p64 = em;


            p64 += pos_r >> 6;
            uint64_t select = mux(mask[0][0], mask[1][0], bit);
            *p64 ^= select << idx_r;
            select = mux(mask[0][1], mask[1][1], bit);
            *(p64 + 1) ^= select >> ((63 - idx_r));
        }
    }

 }


 /**
 * @brief Decoding the code word em to a message m using the tensor code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC192_AVX2_code_decode(uint64_t *m, const uint64_t *em) {

    uint64_t tmp[VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQC192_AVX2_repetition_code_decode(tmp, em);
    PQCLEAN_HQC192_AVX2_bch_code_decode(m, tmp);

 }
--- a/crypto_kem/hqc-192/avx2/code.h
+++ b/crypto_kem/hqc-192/avx2/code.h
@@ -0,0 +1,20 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_code_encode(uint64_t *em, const uint64_t *message);

 void PQCLEAN_HQC192_AVX2_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-192/avx2/fft.c
+++ b/crypto_kem/hqc-192/avx2/fft.c
@@ -0,0 +1,333 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    subset_sums[0] = 0;

    for (size_t i = 0 ; i < set_size ; ++i) {
        for (size_t j = 0 ; j < (1U << i) ; ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        return;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        return;

    default:
        ;
        size_t n = 1 << (m_f - 2);

        uint16_t Q[2 * (1 << (PARAM_FFT - 2))];
        uint16_t R[2 * (1 << (PARAM_FFT - 2))];

        uint16_t Q0[1 << (PARAM_FFT - 2)];
        uint16_t Q1[1 << (PARAM_FFT - 2)];
        uint16_t R0[1 << (PARAM_FFT - 2)];
        uint16_t R1[1 << (PARAM_FFT - 2)];

        memcpy(Q, f + 3 * n, 2 * n);
        memcpy(Q + n, f + 3 * n, 2 * n);
        memcpy(R, f, 4 * n);

        for (size_t i = 0 ; i < n ; ++i) {
            Q[i] ^= f[2 * n + i];
            R[n + i] ^= Q[i];
        }

        radix(Q0, Q1, Q, m_f - 1);
        radix(R0, R1, R, m_f - 1);

        memcpy(f0, R0, 2 * n);
        memcpy(f0 + n, Q0, 2 * n);
        memcpy(f1, R1, 2 * n);
        memcpy(f1 + n, Q1, 2 * n);
    }
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)];
    uint16_t f1[1 << (PARAM_FFT - 2)];
    uint16_t gammas[PARAM_M - 2];
    uint16_t deltas[PARAM_M - 2];
    size_t k = 1 << (m - 1);
    uint16_t gammas_sums[1 << (PARAM_M - 2)];
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};

    // Step 1
    if (m_f == 1) {
        uint16_t tmp[PARAM_M - (PARAM_FFT - 1)];
        for (size_t i = 0 ; i < m ; ++i) {
            tmp[i] = PQCLEAN_HQC192_AVX2_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        for (size_t j = 0 ; j < m ; ++j) {
            for (size_t k = 0 ; k < (1U << j) ; ++k) {
                w[(1 << j) + k] = w[k] ^ tmp[j];
            }
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        uint16_t beta_m_pow = 1;
        for (size_t i = 1 ; i < (1U << m_f) ; ++i) {
            beta_m_pow = PQCLEAN_HQC192_AVX2_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQC192_AVX2_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (uint8_t i = 0 ; i < m - 1 ; ++i) {
        gammas[i] = PQCLEAN_HQC192_AVX2_gf_mul(betas[i], PQCLEAN_HQC192_AVX2_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQC192_AVX2_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1];
    uint16_t betas_sums[1 << (PARAM_M - 1)];
    uint16_t f0[1 << (PARAM_FFT - 1)];
    uint16_t f1[1 << (PARAM_FFT - 1)];
    uint16_t deltas[PARAM_M - 1];
    size_t k = 1 << (PARAM_M - 1);
    uint16_t u[1 << (PARAM_M - 1)];
    uint16_t v[1 << (PARAM_M - 1)];

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        deltas[i] = PQCLEAN_HQC192_AVX2_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (size_t i = 1 ; i < k ; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array of size VEC_N1_SIZE_BYTES
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQC192_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);
    size_t index = PARAM_GF_MUL_ORDER;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15);
    uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15);
    error[index / 8] ^= bit << (index % 64);

    for (size_t i = 1 ; i < k ; ++i) {
        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_AVX2_gf_log(gammas_sums[i]);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15);
        error[index / 64] ^= bit << (index % 64);

        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_AVX2_gf_log(gammas_sums[i] ^ 1);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15);
        error[index / 64] ^= bit << (index % 64);
    }
 }
--- a/crypto_kem/hqc-192/avx2/fft.h
+++ b/crypto_kem/hqc-192/avx2/fft.h
@@ -0,0 +1,20 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQC192_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w);


 #endif
--- a/crypto_kem/hqc-192/avx2/gen_matrix.h
+++ b/crypto_kem/hqc-192/avx2/gen_matrix.h
--- a/crypto_kem/hqc-192/avx2/gf.c
+++ b/crypto_kem/hqc-192/avx2/gf.c
--- a/crypto_kem/hqc-192/avx2/gf.h
+++ b/crypto_kem/hqc-192/avx2/gf.h
@@ -0,0 +1,29 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_gf_generate(uint16_t *exp, uint16_t *log, int16_t m);


 uint16_t PQCLEAN_HQC192_AVX2_gf_log(uint16_t elt);

 uint16_t PQCLEAN_HQC192_AVX2_gf_mul(uint16_t a, uint16_t b);

 uint16_t PQCLEAN_HQC192_AVX2_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQC192_AVX2_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQC192_AVX2_gf_mod(uint16_t i);


 #endif
--- a/crypto_kem/hqc-192/avx2/gf2x.c
+++ b/crypto_kem/hqc-192/avx2/gf2x.c
@@ -0,0 +1,598 @@
 #include "gf2x.h"
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>

 /**
 * \file gf2x.c
 * \brief AVX2 implementation of multiplication of two polynomials
 */


 // sizes for Toom-Cook
 #define T_TM3_3W_256 64
 #define T_TM3_3W_64 256

 #define VEC_N_ARRAY_SIZE_VEC CEIL_DIVIDE(PARAM_N, 256) /*!< The number of needed vectors to store PARAM_N bits*/
 #define WORD 64
 #define LAST64 (PARAM_N >> 6)
 uint64_t a1_times_a2[2 * VEC_N_256_SIZE_64 + 1];
 uint64_t tmp_reduce[VEC_N_ARRAY_SIZE_VEC << 2];
 __m256i *o256 = (__m256i *) tmp_reduce;
 uint64_t bloc64[PARAM_OMEGA_R]; // Allocation with the biggest possible weight
 uint64_t bit64[PARAM_OMEGA_R]; // Allocation with the biggest possible weight


 static inline void reduce(uint64_t *o, const uint64_t *a);
 inline static void karat_mult_1(__m128i *C, __m128i *A, __m128i *B);
 inline static void karat_mult_2(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_4(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_8(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_16(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_32(__m256i *C, __m256i *A, __m256i *B);
 inline static void karat_mult_64(__m256i *C, __m256i *A, __m256i *B);
 static inline void divByXplus1(__m256i *out, __m256i *in, int size);
 static void TOOM3Mult(uint64_t *Out, const uint64_t *A, const uint64_t *B);



 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a Pointer to the polynomial a(x)
 */
 static inline void reduce(uint64_t *o, const uint64_t *a) {
    __m256i r256, carry256;
    __m256i *a256 = (__m256i *) a;
    static const int32_t dec64 = PARAM_N & 0x3f;
    static const int32_t d0 = WORD - dec64;
    int32_t i, i2;

    for (i = LAST64 ; i < (PARAM_N >> 5) - 4 ; i += 4) {
        r256 = _mm256_lddqu_si256((__m256i const *) (& a[i]));
        r256 = _mm256_srli_epi64(r256, dec64);
        carry256 = _mm256_lddqu_si256((__m256i const *) (& a[i + 1]));
        carry256 = _mm256_slli_epi64(carry256, d0);
        r256 ^= carry256;
        i2 = (i - LAST64) >> 2;
        o256[i2] = a256[i2] ^ r256;
    }

    r256 = _mm256_lddqu_si256((__m256i const *) (& a[i]));
    carry256 = _mm256_lddqu_si256((__m256i const *) (& a[i + 1]));
    r256 = _mm256_srli_epi64(r256, dec64);
    carry256 = _mm256_slli_epi64(carry256, d0);
    r256 ^= carry256;
    i2 = (i - LAST64) >> 2;
    o256[i2] = (a256[i2] ^ r256);
    tmp_reduce[LAST64] &= RED_MASK;
    memcpy(o, tmp_reduce, VEC_N_SIZE_BYTES);
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 * A(x) and B(x) are stored in 128-bit registers
 * This function computes A(x)*B(x) using Karatsuba
 *
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_1(__m128i *C, __m128i *A, __m128i *B) {
    __m128i D1[2];
    __m128i D0[2], D2[2];
    __m128i Al = _mm_loadu_si128(A);
    __m128i Ah = _mm_loadu_si128(A + 1);
    __m128i Bl = _mm_loadu_si128(B);
    __m128i Bh = _mm_loadu_si128(B + 1);

    //  Compute Al.Bl=D0
    __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0);
    __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11);
    __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e));
    __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e));
    __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    //  Compute Ah.Bh=D2
    DD0 = _mm_clmulepi64_si128(Ah, Bh, 0);
    DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11);
    AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e));
    BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Compute AlpAh.BlpBh=D1
    // Initialisation of AlpAh and BlpBh
    __m128i AlpAh = _mm_xor_si128(Al, Ah);
    __m128i BlpBh = _mm_xor_si128(Bl, Bh);
    DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0);
    DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11);
    AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e));
    BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Final comutation of C
    __m128i middle = _mm_xor_si128(D0[1], D2[0]);
    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_2(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[2], D1[2], D2[2], SAA, SBB;
    __m128i *A128 = (__m128i *)A, *B128 = (__m128i *)B;

    karat_mult_1((__m128i *) D0, A128, B128);
    karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2);

    SAA = A[0] ^ A[1];
    SBB = B[0] ^ B[1];

    karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB);
    __m256i middle = _mm256_xor_si256(D0[1], D2[0]);

    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_4(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2];

    karat_mult_2( D0, A, B);
    karat_mult_2(D2, A + 2, B + 2);

    SAA[0] = A[0] ^ A[2];
    SBB[0] = B[0] ^ B[2];
    SAA[1] = A[1] ^ A[3];
    SBB[1] = B[1] ^ B[3];

    karat_mult_2( D1, SAA, SBB);

    __m256i middle0 = _mm256_xor_si256(D0[2], D2[0]);
    __m256i middle1 = _mm256_xor_si256(D0[3], D2[1]);

    C[0] = D0[0];
    C[1] = D0[1];
    C[2] = middle0 ^ D0[0] ^ D1[0];
    C[3] = middle1 ^ D0[1] ^ D1[1];
    C[4] = middle0 ^ D1[2] ^ D2[2];
    C[5] = middle1 ^ D1[3] ^ D2[3];
    C[6] = D2[2];
    C[7] = D2[3];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4];

    karat_mult_4( D0, A, B);
    karat_mult_4(D2, A + 4, B + 4);

    for (int32_t i = 0 ; i < 4 ; i++) {
        int is = i + 4;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_4(D1, SAA, SBB);

    for (int32_t i = 0 ; i < 4 ; i++) {
        int32_t is = i + 4;
        int32_t is2 = is + 4;
        int32_t is3 = is2 + 4;

        __m256i middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[16], D1[16], D2[16], SAA[8], SBB[8];

    karat_mult_8( D0, A, B);
    karat_mult_8(D2, A + 8, B + 8);

    for (int32_t i = 0 ; i < 8 ; i++) {
        int32_t is = i + 8;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_8( D1, SAA, SBB);

    for (int32_t i = 0 ; i < 8 ; i++) {
        int32_t is = i + 8;
        int32_t is2 = is + 8;
        int32_t is3 = is2 + 8;

        __m256i middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[32], D1[32], D2[32], SAA[16], SBB[16];

    karat_mult_16( D0, A, B);
    karat_mult_16(D2, A + 16, B + 16);

    for (int32_t i = 0 ; i < 16 ; i++) {
        int is = i + 16;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_16( D1, SAA, SBB);

    for (int32_t i = 0 ; i < 16 ; i++) {
        int32_t is = i + 16;
        int32_t is2 = is + 16;
        int32_t is3 = is2 + 16;

        __m256i middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 inline static void karat_mult_64(__m256i *C, __m256i *A, __m256i *B) {
    __m256i D0[64], D1[64], D2[64], SAA[32], SBB[32];

    karat_mult_32( D0, A, B);
    karat_mult_32(D2, A + 32, B + 32);
    for (int32_t i = 0 ; i < 32 ; i++) {
        int32_t is = i + 32;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_32( D1, SAA, SBB);

    for (int32_t i = 0 ; i < 32 ; i++) {
        int32_t is = i + 32;
        int32_t is2 = is + 32;
        int32_t is3 = is2 + 32;

        __m256i middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute B(x) = A(x)/(x+1)
 *
 * This function computes A(x)/(x+1) using a Quercia like algorithm
 * @param[out] out Pointer to the result
 * @param[in] in Pointer to the polynomial A(x)
 * @param[in] size used to define the number of coeeficients of A
 */
 static inline void divByXplus1(__m256i *out, __m256i *in, int size) {
    uint64_t *A = (uint64_t *) in;
    uint64_t *B = (uint64_t *) out;

    B[0] = A[0];

    for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) {
        B[i] = B[i - 1] ^ A[i];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x) using TOOM3Mult
 *
 * This function computes A(x)*B(x) using TOOM-COOK3 Multiplication
 * last multiplication are done using Karatsuba
 * @param[out] Out Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static void TOOM3Mult(uint64_t *Out, const uint64_t *A, const uint64_t *B) {
    static __m256i U0[T_TM3_3W_256], V0[T_TM3_3W_256], U1[T_TM3_3W_256], V1[T_TM3_3W_256], U2[T_TM3_3W_256], V2[T_TM3_3W_256];
    static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)];
    static __m256i tmp[2 * (T_TM3_3W_256)];
    static __m256i ro256[6 * (T_TM3_3W_256)];
    const __m256i zero = (__m256i) {
        0ul, 0ul, 0ul, 0ul
    };
    int32_t T2 = T_TM3_3W_64 << 1;

    for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) {
        int32_t i4 = i << 2;
        int32_t i42 = i4 - 2;
        U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4]));
        V0[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4]));
        U1[i] = _mm256_lddqu_si256((__m256i const *)(& A[i42 + T_TM3_3W_64]));
        V1[i] = _mm256_lddqu_si256((__m256i const *)(& B[i42 + T_TM3_3W_64]));
        U2[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4 + T2 - 4]));
        V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4]));
    }

    for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) {
        int32_t i4 = i << 2;
        int32_t i41 = i4 + 1;
        U0[i] = (__m256i) {
            A[i4], A[i41], 0x0ul, 0x0ul
        };
        V0[i] = (__m256i) {
            B[i4], B[i41], 0x0ul, 0x0ul
        };
        U1[i] = (__m256i) {
            A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul
        };
        V1[i] = (__m256i) {
            B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul
        };
        U2[i] = (__m256i) {
            A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul
        };
        V2[i] = (__m256i) {
            B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul
        };
    }

    // Evaluation phase : x= X^64
    // P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty)
    // Evaluation: 5*2 add, 2*2 shift; 5 mul (n)
    //W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0
    for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) {
        W3[i] = U0[i] ^ U1[i] ^ U2[i];
        W2[i] = V0[i] ^ V1[i] ^ V2[i];
    }

    //W1 = W2 * W3
    karat_mult_64( W1, W2, W3);
    //W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !)
    int64_t *U1_64 = ((int64_t *) U1);
    int64_t *U2_64 = ((int64_t *) U2);

    int64_t *V1_64 = ((int64_t *) V1);
    int64_t *V2_64 = ((int64_t *) V2);

    W0[0] = _mm256_set_epi64x(U1_64[2] ^ U2_64[1], U1_64[1] ^ U2_64[0], U1_64[0], 0);
    W4[0] = _mm256_set_epi64x(V1_64[2] ^ V2_64[1], V1_64[1] ^ V2_64[0], V1_64[0], 0);

    U1_64 = ((int64_t *) U1);
    U2_64 = ((int64_t *) U2);

    V1_64 = ((int64_t *) V1);
    V2_64 = ((int64_t *) V2);

    for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) {
        int i4 = i << 2;
        W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1]));
        W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2]));

        W4[i] = _mm256_lddqu_si256((__m256i const *)(& V1_64[i4 - 1]));
        W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2]));
    }

    //W3 = W3 + W0      ; W2 = W2 + W4
    for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) {
        W3[i] ^= W0[i];
        W2[i] ^= W4[i];
    }

    //W0 = W0 + U0      ; W4 = W4 + V0
    for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) {
        W0[i] ^= U0[i];
        W4[i] ^= V0[i];
    }

    karat_mult_64(tmp, W3, W2);

    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W3[i] = tmp[i];
    }

    karat_mult_64( W2, W0, W4);

    //W4 = U2 * V2      ; W0 = U0 * V0
    karat_mult_64(W4, U2, V2);
    karat_mult_64(W0, U0, V0);

    // Interpolation phase
    // 9 add, 1 shift, 1 Smul, 2 Sdiv (2n)
    //W3 = W3 + W2
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W3[i] ^= W2[i];
    }

    //W1 = W1 + W0
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W1[i] ^= W0[i];
    }

    //W2 =(W2 + W0)/x -> x = X^64
    U1_64 = ((int64_t *) W2);
    U2_64 = ((int64_t *) W0);
    for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) {
        int32_t i4 = i << 2;
        W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1]));
        W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1]));
    }

    //W2 =(W2 + W3 + W4*(x^3+1))/(x+1)
    U1_64 = ((int64_t *) W4);
    __m256i *U1_256 = (__m256i *) (U1_64 + 1);
    tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) {
        0x0ul, 0x0ul, 0x0ul, U1_64[0]
    };

    for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) {
        tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]);
    }

    divByXplus1(W2, tmp, T_TM3_3W_256);
    W2[2 * (T_TM3_3W_256) - 1] = zero;

    //W3 =(W3 + W1)/(x*(x+1))
    U1_64 = (int64_t *) W3;
    U1_256 = (__m256i *) (U1_64 + 1);

    U2_64 = (int64_t *) W1;
    __m256i *U2_256 = (__m256i *) (U2_64 + 1);

    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) {
        tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]);
    }

    divByXplus1(W3, tmp, T_TM3_3W_256);
    W3[2 * (T_TM3_3W_256) - 1] = zero;

    //W1 = W1 + W4 + W2
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W1[i] ^= W2[i] ^ W4[i];
    }

    //W2 = W2 + W3
    for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) {
        W2[i] ^= W3[i];
    }

    // Recomposition
    //W  = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4
    //W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256)
    for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) {
        ro256[i] = W0[i];
        ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i];
        ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i];
    }

    ro256[(T_TM3_3W_256 << 1) - 1] = W0[(T_TM3_3W_256 << 1) - 1] ^ W2[0];
    ro256[(T_TM3_3W_256 << 2) - 2] = W2[(T_TM3_3W_256 << 1) - 1] ^ W4[0];
    ro256[(T_TM3_3W_256 * 6) - 3] = W4[(T_TM3_3W_256 << 1) - 1];

    U1_64 = ((int64_t *) &ro256[T_TM3_3W_256]);
    U1_256 = (__m256i *) (U1_64 - 2);

    U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]);
    U2_256 = (__m256i *) (U2_64 - 2);

    for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) {
        _mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i]));
        _mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i]));
    }

    for (int32_t i = 0 ; i < 6 * T_TM3_3W_256 - 2 ; i++) {
        uint64_t *out64 = Out + (i << 2);
        _mm256_storeu_si256((__m256i *)out64, ro256[i]);
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to a polynomial
 * @param[in] a2 Pointer to a polynomial
 */
 void PQCLEAN_HQC192_AVX2_vect_mul(uint64_t *o, const uint64_t *a1, const uint64_t *a2) {
    TOOM3Mult(a1_times_a2, a1, a2);
    reduce(o, a1_times_a2);

    // clear all
    memset(a1_times_a2, 0, (VEC_N_SIZE_64 << 1) * sizeof(uint64_t));
 }
--- a/crypto_kem/hqc-192/avx2/gf2x.h
+++ b/crypto_kem/hqc-192/avx2/gf2x.h
@@ -0,0 +1,17 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_vect_mul(uint64_t *o, const uint64_t *a1, const uint64_t *a2);


 #endif
--- a/crypto_kem/hqc-192/avx2/hqc.c
+++ b/crypto_kem/hqc-192/avx2/hqc.c
@@ -0,0 +1,138 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQC192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    uint64_t x[VEC_N_256_SIZE_64] = {0};
    uint64_t y[VEC_N_256_SIZE_64] = {0};
    uint64_t h[VEC_N_256_SIZE_64] = {0};
    uint64_t s[VEC_N_256_SIZE_64] = {0};

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQC192_AVX2_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQC192_AVX2_vect_mul(s, y, h);
    PQCLEAN_HQC192_AVX2_vect_add(s, x, s, VEC_N_256_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQC192_AVX2_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQC192_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    uint64_t h[VEC_N_256_SIZE_64] = {0};
    uint64_t s[VEC_N_256_SIZE_64] = {0};
    uint64_t r1[VEC_N_256_SIZE_64] = {0};
    uint64_t r2[VEC_N_256_SIZE_64] = {0};
    uint64_t e[VEC_N_256_SIZE_64] = {0};
    uint64_t tmp1[VEC_N_256_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_256_SIZE_64] = {0};

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQC192_AVX2_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

    // Compute u = r1 + r2.h
    PQCLEAN_HQC192_AVX2_vect_mul(u, r2, h);
    PQCLEAN_HQC192_AVX2_vect_add(u, r1, u, VEC_N_256_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQC192_AVX2_code_encode(v, m);
    PQCLEAN_HQC192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQC192_AVX2_vect_mul(tmp2, r2, s);
    PQCLEAN_HQC192_AVX2_vect_add(tmp2, e, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQC192_AVX2_vect_add(tmp2, tmp1, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQC192_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC192_AVX2_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint64_t x[VEC_N_256_SIZE_64] = {0};
    uint64_t y[VEC_N_256_SIZE_64] = {0};
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t tmp1[VEC_N_256_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_256_SIZE_64] = {0};

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQC192_AVX2_hqc_secret_key_from_string(x, y, pk, sk);

    // Compute v - u.y
    PQCLEAN_HQC192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    PQCLEAN_HQC192_AVX2_vect_mul(tmp2, y, u);
    PQCLEAN_HQC192_AVX2_vect_add(tmp2, tmp1, tmp2, VEC_N_256_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQC192_AVX2_code_decode(m, tmp2);
 }
--- a/crypto_kem/hqc-192/avx2/hqc.h
+++ b/crypto_kem/hqc-192/avx2/hqc.h
@@ -0,0 +1,21 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQC192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQC192_AVX2_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-192/avx2/kem.c
+++ b/crypto_kem/hqc-192/avx2/kem.c
@@ -0,0 +1,138 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQC192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQC192_AVX2_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQC192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    PQCLEAN_HQC192_AVX2_vect_set_random_from_randombytes(m);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQC192_AVX2_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQC192_AVX2_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQC192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    int8_t result = -1;
    uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_256_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQC192_AVX2_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQC192_AVX2_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQC192_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = (PQCLEAN_HQC192_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC192_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQC192_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0);
    for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) {
        ss[i] = result * ss[i];
    }
    result--;


    return result;
 }
--- a/crypto_kem/hqc-192/avx2/parameters.h
+++ b/crypto_kem/hqc-192/avx2/parameters.h
@@ -0,0 +1,127 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H
 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */

 #include "api.h"
 #include "api.h"
 #include "vector.h"


 #define CEIL_DIVIDE(a, b)  (((a)/(b)) + ((a) % (b) == 0 ? 0 : 1)) /*!< Divide a by b and ceil the result*/
 #define BITMASK(a, size) ((1UL << ((a) % (size))) - 1) /*!< Create a mask*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of BCH code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of the repetition code)
  #define PARAM_N1N2                            Define the parameter n1 * n2 of the scheme (length of the tensor code)
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1_SIZE_BYTES                     Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define VEC_N_256_SIZE_64                     Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits
  #define VEC_N1N2_256_SIZE_64                  Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits

  #define PARAM_T                               Define a threshold for decoding repetition code word (PARAM_T = (PARAM_N2 - 1) / 2)

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the BCH code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the BCH code
  #define PARAM_G                               Define the size of the generator polynomial of BCH code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=60
                                                The smallest power of 2 greater than 60+1 is 64=2^6
  #define PARAM_BCH_POLY                        Generator polynomial of the BCH code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                 45197
 #define PARAM_N1                                766
 #define PARAM_N2                                59
 #define PARAM_N1N2                              45194
 #define PARAM_OMEGA                             101
 #define PARAM_OMEGA_E                           117
 #define PARAM_OMEGA_R                           117
 #define PARAM_SECURITY                          192
 #define PARAM_DFR_EXP                           192

 #define SECRET_KEY_BYTES                        PQCLEAN_HQC192_AVX2_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQC192_AVX2_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQC192_AVX2_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQC192_AVX2_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD               16768087
 #define VEC_K_SIZE_BYTES                        CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_N1_SIZE_BYTES                       CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 64)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 64)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_N_MULT                            48768
 #define VEC_N_256_SIZE_64                       (CEIL_DIVIDE(PARAM_N_MULT, 256) << 2)
 #define VEC_N1N2_256_SIZE_64                    (CEIL_DIVIDE(PARAM_N1N2, 256) << 2)

 #define PARAM_T                                 29

 #define PARAM_DELTA                             57
 #define PARAM_M                                 10
 #define PARAM_GF_POLY                           0x409
 #define PARAM_GF_MUL_ORDER                      1023
 #define PARAM_K                                 256
 #define PARAM_G                                 511
 #define PARAM_FFT                               6
 #define PARAM_FFT_T                             7
 #define PARAM_BCH_POLY { \
        1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1, \
        1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0, \
        0,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, \
        1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0, \
        0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0, \
        1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0, \
        0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,0,1, \
        1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, \
        1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1, \
        1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,1,0,1,0,1, \
        0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1, \
        1,0,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1, \
        0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1, \
        1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1 \
    };

 #define RED_MASK                                0x0000000000001fffUL
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/crypto_kem/hqc-192/avx2/parsing.c
+++ b/crypto_kem/hqc-192/avx2/parsing.c
@@ -0,0 +1,121 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */



 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    memcpy(sk + SEED_BYTES, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint64_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQC192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    memcpy(pk + SEED_BYTES, s, VEC_N_SIZE_BYTES);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQC192_AVX2_vect_set_random(&pk_seedexpander, h);

    memcpy(s, pk + SEED_BYTES, VEC_N_SIZE_BYTES);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQC192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    memcpy(ct, u, VEC_N_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQC192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    memcpy(u, ct, VEC_N_SIZE_BYTES);
    memcpy(v, ct + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES);
    memcpy(d, ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, SHA512_BYTES);
 }
--- a/crypto_kem/hqc-192/avx2/parsing.h
+++ b/crypto_kem/hqc-192/avx2/parsing.h
@@ -0,0 +1,29 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQC192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQC192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQC192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQC192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQC192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/crypto_kem/hqc-192/avx2/repetition.c
+++ b/crypto_kem/hqc-192/avx2/repetition.c
@@ -0,0 +1,41 @@
 #include "parameters.h"
 #include "repetition.h"
 #include <immintrin.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 /**
 * @file repetition.c
 * @brief Implementation of repetition codes
 */


 #define MASK_N2                              ((1UL << PARAM_N2) - 1)

 /**
 * @brief Decoding the code words to a message using the repetition code
 *
 * We use a majority decoding. In fact we have that PARAM_N2 = 2 * PARAM_T + 1, thus,
 * if the Hamming weight of the vector is greater than PARAM_T, the code word is decoded
 * to 1 and 0 otherwise.
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC192_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    uint64_t cx, ones;

    for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) {
        bn = b >> 6;
        bi = b & 63;
        c = b + PARAM_N2 - 1;
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
        t++;
    }
 }
--- a/crypto_kem/hqc-192/avx2/repetition.h
+++ b/crypto_kem/hqc-192/avx2/repetition.h
@@ -0,0 +1,17 @@
 #ifndef REPETITION_H
 #define REPETITION_H


 /**
 * @file repetition.h
 * @brief Header file for repetition.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-192/avx2/vector.c
+++ b/crypto_kem/hqc-192/avx2/vector.c
@@ -0,0 +1,200 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0};
    uint32_t random_data = 0;
    uint32_t tmp[PARAM_OMEGA_R] = {0};
    uint8_t exist = 0;
    size_t j = 0;
    __m256i bit256[PARAM_OMEGA_R];
    __m256i bloc256[PARAM_OMEGA_R];
    static __m256i posCmp256 = (__m256i) {
        0UL, 1UL, 2UL, 3UL
    };
 #define LOOP_SIZE CEIL_DIVIDE(PARAM_N, 256)

    seedexpander(ctx, rand_bytes, random_bytes_size);

    for (uint32_t i = 0 ; i < weight ; ++i) {
        exist = 0;
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            random_data  = ((uint32_t) rand_bytes[j++]) << 16;
            random_data |= ((uint32_t) rand_bytes[j++]) << 8;
            random_data |= rand_bytes[j++];

        } while (random_data >= UTILS_REJECTION_THRESHOLD);

        random_data = random_data % PARAM_N;

        for (uint32_t k = 0 ; k < i ; k++) {
            if (tmp[k] == random_data) {
                exist = 1;
            }
        }

        if (exist == 1) {
            i--;
        } else {
            tmp[i] = random_data;
        }
    }

    for (uint32_t i = 0 ; i < weight ; i++) {
        // we store the bloc number and bit position of each vb[i]
        uint64_t bloc = tmp[i] >> 6;
        bloc256[i] = _mm256_set1_epi64x(bloc >> 2);
        uint64_t pos = (bloc & 0x3UL);
        __m256i pos256 = _mm256_set1_epi64x(pos);
        __m256i mask256 = _mm256_cmpeq_epi64(pos256, posCmp256);
        uint64_t bit64 = 1ULL << (tmp[i] & 0x3f);
        __m256i bloc256 = _mm256_set1_epi64x(bit64);
        bit256[i] = bloc256 & mask256;
    }

    for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) {
        __m256i aux = _mm256_loadu_si256(((__m256i *)v) + i);
        __m256i i256 = _mm256_set1_epi64x(i);

        for (uint32_t j = 0 ; j < weight ; j++) {
            __m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256);
            aux ^= bit256[j] & mask256;
        }
        _mm256_storeu_si256(((__m256i *)v) + i, aux);
    }

 #undef LOOP_SIZE
 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    memcpy(v, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= BITMASK(PARAM_N, 64);
 }



 /**
 * @brief Generates a random vector
 *
 * This function generates a random binary vector. It uses the the randombytes function.
 *
 * @param[in] v Pointer to an array
 */
 void PQCLEAN_HQC192_AVX2_vect_set_random_from_randombytes(uint64_t *v) {
    uint8_t rand_bytes [VEC_K_SIZE_BYTES] = {0};

    randombytes(rand_bytes, VEC_K_SIZE_BYTES);
    memcpy(v, rand_bytes, VEC_K_SIZE_BYTES);
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQC192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0 ; i < size ; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }



 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 int PQCLEAN_HQC192_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    unsigned char diff = 0;

    for (uint32_t i = 0 ; i < size ; i++) {
        diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i];
    }
    return diff != 0;
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQC192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    if (size_o < size_v) {
        uint64_t mask = 0x7FFFFFFFFFFFFFFF;
        int8_t val = 0;

        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, VEC_N1N2_SIZE_BYTES);

        for (int8_t i = 0 ; i < val ; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, CEIL_DIVIDE(size_v, 8));
    }
 }
--- a/crypto_kem/hqc-192/avx2/vector.h
+++ b/crypto_kem/hqc-192/avx2/vector.h
@@ -0,0 +1,29 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */

 #include "nistseedexpander.h"
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQC192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

 void PQCLEAN_HQC192_AVX2_vect_set_random_from_randombytes(uint64_t *v);


 void PQCLEAN_HQC192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 int PQCLEAN_HQC192_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size);

 void PQCLEAN_HQC192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/crypto_kem/hqc-192/clean/LICENSE
+++ b/crypto_kem/hqc-192/clean/LICENSE
@@ -0,0 +1 @@
 Public Domain
--- a/crypto_kem/hqc-192/clean/Makefile
+++ b/crypto_kem/hqc-192/clean/Makefile
@@ -0,0 +1,19 @@
 # This Makefile can be used with GNU Make or BSD Make

 LIB=libhqc-192_clean.a
 HEADERS=api.h bch.h code.h fft.h gf2x.h gf.h hqc.h parameters.h parsing.h repetition.h vector.h 
 OBJECTS=bch.o code.o fft.o gf2x.o gf.o hqc.o kem.o parsing.o repetition.o vector.o 

 CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS)

 all: $(LIB)

 %.o: %.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $@ $<

 $(LIB): $(OBJECTS)
 	$(AR) -r $@ $(OBJECTS)

 clean:
 	$(RM) $(OBJECTS)
 	$(RM) $(LIB)
--- a/crypto_kem/hqc-192/clean/Makefile.Microsoft_nmake
+++ b/crypto_kem/hqc-192/clean/Makefile.Microsoft_nmake
@@ -0,0 +1,19 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libhqc-192_clean.lib
 OBJECTS=bch.obj code.obj fft.obj gf2x.obj gf.obj hqc.obj kem.obj parsing.obj repetition.obj vector.obj 

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/hqc-192/clean/api.h
+++ b/crypto_kem/hqc-192/clean/api.h
@@ -0,0 +1,25 @@
 #ifndef PQCLEAN_HQC192_CLEAN_API_H
 #define PQCLEAN_HQC192_CLEAN_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQC192_CLEAN_CRYPTO_ALGNAME                      "HQC-192"

 #define PQCLEAN_HQC192_CLEAN_CRYPTO_SECRETKEYBYTES               5730
 #define PQCLEAN_HQC192_CLEAN_CRYPTO_PUBLICKEYBYTES               5690
 #define PQCLEAN_HQC192_CLEAN_CRYPTO_BYTES                        64
 #define PQCLEAN_HQC192_CLEAN_CRYPTO_CIPHERTEXTBYTES              11364

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQC192_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQC192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQC192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQC192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-192/clean/bch.c
+++ b/crypto_kem/hqc-192/clean/bch.c
@@ -0,0 +1,383 @@
 #include "bch.h"
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file bch.c
 * Constant time implementation of BCH codes
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
 static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
 static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1 ; i < upper_bound ; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly ; j ; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
 *
 * @param[out] message_unpacked Array of VEC_K_SIZE_BYTES bytes receiving the unpacked message
 * @param[in] message Array of PARAM_K bytes storing the packed message
 */
 static void unpack_message(uint8_t *message_unpacked, const uint64_t *message) {
    for (size_t i = 0 ; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)) ; ++i) {
        for (size_t j = 0 ; j < 64 ; ++j) {
            message_unpacked[j + 64 * i] = (message[i] >> j) & 0x0000000000000001;
        }
    }

    for (int8_t j = 0 ; j < PARAM_K % 64 ; ++j) {
        message_unpacked[j + 64 * (VEC_K_SIZE_64 - 1)] = (message[VEC_K_SIZE_64 - 1] >> j) & 0x0000000000000001;
    }
 }


 /**
 * @brief Encodes the message message to a codeword codeword using the generator polynomial bch_poly of the code
 *
 * @param[out] codeword Array of PARAM_N1 bytes receiving the codeword
 * @param[in] message Array of PARAM_K bytes storing the message to encode
 */
 static void lfsr_encode(uint8_t *codeword, const uint8_t *message) {
    uint8_t gate_value = 0;
    uint8_t bch_poly[PARAM_G] = PARAM_BCH_POLY;

    // Compute the Parity-check digits
    for (int16_t i = PARAM_K - 1 ; i >= 0 ; --i) {
        gate_value = message[i] ^ codeword[PARAM_N1 - PARAM_K - 1];

        for (size_t j = PARAM_N1 - PARAM_K - 1 ; j ; --j) {
            codeword[j] = codeword[j - 1] ^ (-gate_value & bch_poly[j]);
        }

        codeword[0] = gate_value;
    }

    // Add the message
    memcpy(codeword + PARAM_N1 - PARAM_K, message, PARAM_K);
 }



 /**
 * @brief Packs the codeword from an array codeword_unpacked where each byte stores a bit to a compact array codeword
 *
 * @param[out] codeword Array of VEC_N1_SIZE_BYTES bytes receiving the packed codeword
 * @param[in] codeword_unpacked Array of PARAM_N1 bytes storing the unpacked codeword
 */
 static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked) {
    for (size_t i = 0 ; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)) ; ++i) {
        for (size_t j = 0 ; j < 64 ; ++j) {
            codeword[i] |= ((uint64_t) codeword_unpacked[j + 64 * i]) << j;
        }
    }

    for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) {
        codeword[VEC_N1_SIZE_64 - 1] |= ((uint64_t) codeword_unpacked[j + 64 * (VEC_N1_SIZE_64 - 1)]) << j;
    }
 }


 /**
 * @brief Encodes a message message of PARAM_K bits to a BCH codeword codeword of PARAM_N1 bits
 *
 * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
 * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
 * with feedback connections based on the generator polynomial bch_poly of the BCH code.
 *
 * @param[out] codeword Array of size VEC_N1_SIZE_BYTES receiving the encoded message
 * @param[in] message Array of size VEC_K_SIZE_BYTES storing the message
 */
 void PQCLEAN_HQC192_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *message) {
    uint8_t message_unpacked[PARAM_K];
    uint8_t codeword_unpacked[PARAM_N1] = {0};

    unpack_message(message_unpacked, message);
    lfsr_encode(codeword_unpacked, message_unpacked);
    pack_codeword(codeword, codeword_unpacked);
 }


 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite joiner1995decoding). <br>
 * We use the letter p for rho which is initialized at -1/2. <br>
 * The array X_sigma_p represents the polynomial X^(2(mu-rho))*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    sigma[0] = 1;
    size_t deg_sigma = 0;
    size_t deg_sigma_p = 0;
    uint16_t sigma_copy[PARAM_DELTA - 1] = {0};
    size_t deg_sigma_copy = 0;
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    int32_t pp = -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1));
        deg_sigma_copy = deg_sigma;

        uint16_t dd = PQCLEAN_HQC192_CLEAN_gf_mul(d, PQCLEAN_HQC192_CLEAN_gf_inverse(d_p)); // 0 if(d == 0)
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            sigma[i] ^= PQCLEAN_HQC192_CLEAN_gf_mul(dd, X_sigma_p[i]);
        }

        size_t deg_X = 2 * mu - pp; // 2*(mu-rho)
        size_t deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        int16_t mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        int16_t mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        int16_t mask12 = mask1 & mask2;
        deg_sigma = (mask12 & deg_X_sigma_p) ^ (~mask12 & deg_sigma);

        if (mu == PARAM_DELTA - 1) {
            break;
        }

        // Update pp, d_p and X_sigma_p if needed
        pp = (mask12 & (2 * mu)) ^ (~mask12 & pp);
        d_p = (mask12 & d) ^ (~mask12 & d_p);
        for (size_t i = PARAM_DELTA - 1 ; i ; --i) {
            X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }
        X_sigma_p[1] = 0;
        X_sigma_p[0] = 0;
        deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p);

        // Compute the next discrepancy
        d = syndromes[2 * mu + 2];
        for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) {
            d ^= PQCLEAN_HQC192_CLEAN_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Retrieves the message message from the codeword codeword
 *
 * Since we performed a systematic encoding, the message is the last PARAM_K bits of the codeword.
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the message
 * @param[in] codeword Array of size VEC_N1_SIZE_BYTES storing the codeword
 */
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword) {
    int32_t val = PARAM_N1 - PARAM_K;

    uint64_t mask1 = (uint64_t) (0xffffffffffffffff << val % 64);
    uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64));
    size_t index = val / 64;

    for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[i] = message1 | message2;
    }

    // Last byte (8-val % 8 is the number of bits given by message1)
    if ((PARAM_K % 64 == 0) || (64 - val % 64 < PARAM_K % 64)) {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64);
        message[VEC_K_SIZE_64 - 1] = message1 | message2;
    } else {
        uint64_t message1 = (codeword[index] & mask1) >> val % 64;
        message[VEC_K_SIZE_64 - 1] = message1;
    }
 }


 /**
 * @brief Computes the 2^PARAM_DELTA syndromes from the received vector vector
 *
 * Syndromes are the sum of powers of alpha weighted by vector's coefficients.
 * To do so, we use the additive FFT transpose, which takes as input a family w of GF(2^PARAM_M) elements
 * and outputs the weighted power sums of these w. <br>
 * Therefore, this requires twisting and applying a permutation before feeding vector to the PQCLEAN_HQC192_CLEAN_fft transpose. <br>
 * For more details see Berstein, Chou and Schawbe's explanations:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] syndromes Array of size 2^(PARAM_FFT_T) receiving the 2*PARAM_DELTA syndromes
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES storing the received word
 */
 static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector) {
    uint16_t w[1 << PARAM_M];

    PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(w, vector);
    PQCLEAN_HQC192_CLEAN_fft_t(syndromes, w, 2 * PARAM_DELTA);
 }


 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQC192_CLEAN_fft for more details.
 *
 * @param[out] error Array of VEC_N1_SIZE_BYTES elements receiving the error polynomial
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint64_t *error, const uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0}; // w will receive the evaluation of sigma in all field elements

    PQCLEAN_HQC192_CLEAN_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQC192_CLEAN_fft_retrieve_bch_error_poly(error, w);
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on four steps:
 *    <ol>
 *    <li> The first step, done by additive FFT transpose, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on BCH decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] message Array of size VEC_K_SIZE_BYTES receiving the decoded message
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES storing the received word
 */
 void PQCLEAN_HQC192_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector) {
    uint16_t syndromes[1 << PARAM_FFT_T] = {0};
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint64_t error[(1 << PARAM_M) / 8] = {0};

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes, vector);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    compute_elp(sigma, syndromes);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Add the error polynomial to the received polynomial
    PQCLEAN_HQC192_CLEAN_vect_add(vector, vector, error, VEC_N1_SIZE_64);

    // Retrieve the message from the decoded codeword
    message_from_codeword(message, vector);

 }
--- a/crypto_kem/hqc-192/clean/bch.h
+++ b/crypto_kem/hqc-192/clean/bch.h
@@ -0,0 +1,23 @@
 #ifndef BCH_H
 #define BCH_H


 /**
 * @file bch.h
 * Header file of bch.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *message);

 void PQCLEAN_HQC192_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);


 #endif
--- a/crypto_kem/hqc-192/clean/code.c
+++ b/crypto_kem/hqc-192/clean/code.c
@@ -0,0 +1,49 @@
 #include "bch.h"
 #include "code.h"
 #include "parameters.h"
 #include "repetition.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of tensor code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the tensor code
 *
 * First we encode the message using the BCH code, then with the repetition code to obtain
 * a tensor code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC192_CLEAN_code_encode(uint64_t *em, const uint64_t *m) {

    uint64_t tmp[VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQC192_CLEAN_bch_code_encode(tmp, m);
    PQCLEAN_HQC192_CLEAN_repetition_code_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the tensor code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC192_CLEAN_code_decode(uint64_t *m, const uint64_t *em) {

    uint64_t tmp[VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQC192_CLEAN_repetition_code_decode(tmp, em);
    PQCLEAN_HQC192_CLEAN_bch_code_decode(m, tmp);


 }
--- a/crypto_kem/hqc-192/clean/code.h
+++ b/crypto_kem/hqc-192/clean/code.h
@@ -0,0 +1,20 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */

 #include "parameters.h"
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_code_encode(uint64_t *em, const uint64_t *message);

 void PQCLEAN_HQC192_CLEAN_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-192/clean/fft.c
+++ b/crypto_kem/hqc-192/clean/fft.c
@@ -0,0 +1,627 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size);
 static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f);
 static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    subset_sums[0] = 0;

    for (size_t i = 0 ; i < set_size ; ++i) {
        for (size_t j = 0 ; j < (1U << i) ; ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Transpose of the linear radix conversion
 *
 * This is a direct transposition of the radix function
 * implemented following the process of transposing a linear function as exposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f Array of size a power of 2
 * @param[in] f0 Array half the size of f
 * @param[in] f1 Array half the size of f
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f[0] = f0[0];
        f[1] = f1[0];
        f[2] = f0[1] ^ f1[0];
        f[3] = f[2] ^ f1[1];
        f[4] = f[2] ^ f0[2];
        f[5] = f[3] ^ f1[2];
        f[6] = f[4] ^ f0[3] ^ f1[2];
        f[7] = f[3] ^ f0[3] ^ f1[3];
        f[8] = f[4] ^ f0[4];
        f[9] = f[5] ^ f1[4];
        f[10] = f[6] ^ f0[5] ^ f1[4];
        f[11] = f[7] ^ f0[5] ^ f1[4] ^ f1[5];
        f[12] = f[8] ^ f0[5] ^ f0[6] ^ f1[4];
        f[13] = f[7] ^ f[9] ^ f[11] ^ f1[6];
        f[14] = f[6] ^ f0[6] ^ f0[7] ^ f1[6];
        f[15] = f[7] ^ f0[7] ^ f1[7];
        return;

    case 3:
        f[0] = f0[0];
        f[1] = f1[0];
        f[2] = f0[1] ^ f1[0];
        f[3] = f[2] ^ f1[1];
        f[4] = f[2] ^ f0[2];
        f[5] = f[3] ^ f1[2];
        f[6] = f[4] ^ f0[3] ^ f1[2];
        f[7] = f[3] ^ f0[3] ^ f1[3];
        return;

    case 2:
        f[0] = f0[0];
        f[1] = f1[0];
        f[2] = f0[1] ^ f1[0];
        f[3] = f[2] ^ f1[1];
        return;

    case 1:
        f[0] = f0[0];
        f[1] = f1[0];
        return;

    default:
        ;

        size_t n = 1 << (m_f - 2);

        uint16_t Q0[1 << (PARAM_FFT_T - 2)];
        uint16_t Q1[1 << (PARAM_FFT_T - 2)];
        uint16_t R0[1 << (PARAM_FFT_T - 2)];
        uint16_t R1[1 << (PARAM_FFT_T - 2)];

        uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)];
        uint16_t R[1 << 2 * (PARAM_FFT_T - 2)];

        memcpy(Q0, f0 + n, 2 * n);
        memcpy(Q1, f1 + n, 2 * n);
        memcpy(R0, f0, 2 * n);
        memcpy(R1, f1, 2 * n);

        radix_t (Q, Q0, Q1, m_f - 1);
        radix_t (R, R0, R1, m_f - 1);

        memcpy(f, R, 4 * n);
        memcpy(f + 2 * n, R + n, 2 * n);
        memcpy(f + 3 * n, Q + n, 2 * n);

        for (size_t i = 0 ; i < n ; ++i) {
            f[2 * n + i] ^= Q[i];
            f[3 * n + i] ^= f[2 * n + i];
        }
    }
 }



 /**
 * @brief Recursively computes syndromes of family w
 *
 * This function is a subroutine of the function fft_t
 *
 * @param[out] f Array receiving the syndromes
 * @param[in] w Array storing the family
 * @param[in] f_coeffs Length of syndromes vector
 * @param[in] m 2^m is the smallest power of 2 greater or equal to the length of family w
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the length of f
 * @param[in] betas FFT constants
 */
 static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    size_t k = 1 << (m - 1);
    uint16_t gammas[PARAM_M - 2];
    uint16_t deltas[PARAM_M - 2];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t f0[1 << (PARAM_FFT_T - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT_T - 2)] = {0};

    // Step 1
    if (m_f == 1) {
        f[0] = 0;
        for (size_t i = 0 ; i < (1U << m) ; ++i) {
            f[0] ^= w[i];
        }
        f[1] = 0;

        uint16_t betas_sums[1 << (PARAM_M - 1)];
        betas_sums[0] = 0;
        for (size_t j = 0 ; j < m ; ++j) {
            for (size_t k = 0 ; k < (1U << j) ; ++k) {
                size_t index = (1 << j) + k;
                betas_sums[index] = betas_sums[k] ^ betas[j];
                f[1] ^= PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[index], w[index]);
            }
        }

        return;
    }

    // Compute gammas and deltas
    for (uint8_t i = 0 ; i < m - 1 ; ++i) {
        gammas[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], PQCLEAN_HQC192_CLEAN_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas subset sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    /* Step 6: Compute u and v from w (aka w)
     * w[i] = u[i] + G[i].v[i]
     * w[k+i] = w[i] + v[i] = u[i] + (G[i]+1).v[i]
     * Transpose:
     * u[i] = w[i] + w[k+i]
     * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case
        // Step 5: Compute f0 from u and f1 from v
        f1[1] = 0;
        u[0] = w[0] ^ w[k];
        f1[0] = w[k];
        for (size_t i = 1 ; i < k ; ++i) {
            u[i] = w[i] ^ w[k + i];
            f1[0] ^= PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i];
        }
        fft_t_rec(f0, u, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);
    } else {
        uint16_t v[1 << (PARAM_M - 2)] = {0};

        u[0] = w[0] ^ w[k];
        v[0] = w[k];

        for (size_t i = 1 ; i < k ; ++i) {
            u[i] = w[i] ^ w[k + i];
            v[i] = PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i];
        }

        // Step 5: Compute f0 from u and f1 from v
        fft_t_rec(f0, u, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);
        fft_t_rec(f1, v, f_coeffs / 2, m - 1, m_f - 1, deltas);
    }

    // Step 3: Compute g from g0 and g1
    radix_t(f, f0, f1, m_f);

    // Step 2: compute f from g
    if (betas[m - 1] != 1) {
        uint16_t beta_m_pow = 1;
        for (size_t i = 1 ; i < (1U << m_f) ; ++i) {
            beta_m_pow = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, f[i]);
        }
    }
 }



 /**
 * @brief Computes the syndromes f of the family w
 *
 * Since the syndromes linear map is the transpose of multipoint evaluation,
 * it uses exactly the same constants, either hardcoded or precomputed by compute_fft_lut(...). <br>
 * This follows directives from Bernstein, Chou and Schwabe given here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f Array of size 2*(PARAM_FFT_T) elements receiving the syndromes
 * @param[in] w Array of PARAM_GF_MUL_ORDER+1 elements
 * @param[in] f_coeffs Length of syndromes vector f
 */
 void PQCLEAN_HQC192_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) {
    // Transposed from Gao and Mateer algorithm
    uint16_t betas[PARAM_M - 1];
    uint16_t betas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};
    uint16_t deltas[PARAM_M - 1];
    uint16_t f0[1 << (PARAM_FFT_T - 1)];
    uint16_t f1[1 << (PARAM_FFT_T - 1)];

    compute_fft_betas(betas);
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    /* Step 6: Compute u and v from w (aka w)
     *
     * We had:
     * w[i] = u[i] + G[i].v[i]
     * w[k+i] = w[i] + v[i] = u[i] + (G[i]+1).v[i]
     * Transpose:
     * u[i] = w[i] + w[k+i]
     * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */
    u[0] = w[0] ^ w[k];
    v[0] = w[k];
    for (size_t i = 1 ; i < k ; ++i) {
        u[i] = w[i] ^ w[k + i];
        v[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i];
    }

    // Compute deltas
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5: Compute f0 from u and f1 from v
    fft_t_rec(f0, u, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT_T - 1, deltas);
    fft_t_rec(f1, v, f_coeffs / 2, PARAM_M - 1, PARAM_FFT_T - 1, deltas);

    // Step 3: Compute g from g0 and g1
    radix_t(f, f0, f1, PARAM_FFT_T);

    // Step 2: beta_m = 1 so f = g
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        return;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        return;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        return;

    default:
        ;
        size_t n = 1 << (m_f - 2);

        uint16_t Q[2 * (1 << (PARAM_FFT - 2))];
        uint16_t R[2 * (1 << (PARAM_FFT - 2))];

        uint16_t Q0[1 << (PARAM_FFT - 2)];
        uint16_t Q1[1 << (PARAM_FFT - 2)];
        uint16_t R0[1 << (PARAM_FFT - 2)];
        uint16_t R1[1 << (PARAM_FFT - 2)];

        memcpy(Q, f + 3 * n, 2 * n);
        memcpy(Q + n, f + 3 * n, 2 * n);
        memcpy(R, f, 4 * n);

        for (size_t i = 0 ; i < n ; ++i) {
            Q[i] ^= f[2 * n + i];
            R[n + i] ^= Q[i];
        }

        radix(Q0, Q1, Q, m_f - 1);
        radix(R0, R1, R, m_f - 1);

        memcpy(f0, R0, 2 * n);
        memcpy(f0 + n, Q0, 2 * n);
        memcpy(f1, R1, 2 * n);
        memcpy(f1 + n, Q1, 2 * n);
    }
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)];
    uint16_t f1[1 << (PARAM_FFT - 2)];
    uint16_t gammas[PARAM_M - 2];
    uint16_t deltas[PARAM_M - 2];
    size_t k = 1 << (m - 1);
    uint16_t gammas_sums[1 << (PARAM_M - 2)];
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};

    // Step 1
    if (m_f == 1) {
        uint16_t tmp[PARAM_M - (PARAM_FFT - 1)];
        for (size_t i = 0 ; i < m ; ++i) {
            tmp[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        for (size_t j = 0 ; j < m ; ++j) {
            for (size_t k = 0 ; k < (1U << j) ; ++k) {
                w[(1 << j) + k] = w[k] ^ tmp[j];
            }
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        uint16_t beta_m_pow = 1;
        for (size_t i = 1 ; i < (1U << m_f) ; ++i) {
            beta_m_pow = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (uint8_t i = 0 ; i < m - 1 ; ++i) {
        gammas[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], PQCLEAN_HQC192_CLEAN_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (size_t i = 1 ; i < k ; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1];
    uint16_t betas_sums[1 << (PARAM_M - 1)];
    uint16_t f0[1 << (PARAM_FFT - 1)];
    uint16_t f1[1 << (PARAM_FFT - 1)];
    uint16_t deltas[PARAM_M - 1];
    size_t k = 1 << (PARAM_M - 1);
    uint16_t u[1 << (PARAM_M - 1)];
    uint16_t v[1 << (PARAM_M - 1)];

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) {
        deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (size_t i = 1 ; i < k ; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Arranges the received word vector in a form w such that applying the additive FFT transpose to w  yields the BCH syndromes of the received word vector.
 *
 * Since the received word vector gives coefficients of the primitive element alpha, we twist accordingly. <br>
 * Furthermore, the additive FFT transpose needs elements indexed by their decomposition on the chosen basis,
 * so we apply the adequate permutation.
 *
 * @param[out] w Array of size 2^PARAM_M
 * @param[in] vector Array of size VEC_N1_SIZE_BYTES
 */
 void PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint64_t *vector) {
    uint16_t r[1 << PARAM_M];
    uint16_t gammas[PARAM_M - 1];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);

    // Unpack the received word vector into array r
    size_t i;
    for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) {
        for (size_t j = 0 ; j < 64 ; ++j) {
            r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1);
        }
    }

    // Last byte
    for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) {
        r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1);
    }

    // Complete r with zeros
    memset(r + PARAM_N1, 0, 2 * ((1 << PARAM_M) - PARAM_N1));

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    // Twist and permute r adequately to obtain w
    w[0] = 0;
    w[k] = -r[0] & 1;
    for (size_t i = 1 ; i < k ; ++i) {
        w[i] = -r[PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i];
        w[k + i] = -r[PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1);
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array of size VEC_N1_SIZE_BYTES
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQC192_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1];
    uint16_t gammas_sums[1 << (PARAM_M - 1)];
    size_t k = 1 << (PARAM_M - 1);
    size_t index = PARAM_GF_MUL_ORDER;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15);
    uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15);
    error[index / 8] ^= bit << (index % 64);

    for (size_t i = 1 ; i < k ; ++i) {
        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i]);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15);
        error[index / 64] ^= bit << (index % 64);

        index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i] ^ 1);
        bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15);
        error[index / 64] ^= bit << (index % 64);
    }
 }
--- a/crypto_kem/hqc-192/clean/fft.h
+++ b/crypto_kem/hqc-192/clean/fft.h
@@ -0,0 +1,25 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs);

 void PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint64_t *vector);


 void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQC192_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w);


 #endif
--- a/crypto_kem/hqc-192/clean/gf.c
+++ b/crypto_kem/hqc-192/clean/gf.c
--- a/crypto_kem/hqc-192/clean/gf.h
+++ b/crypto_kem/hqc-192/clean/gf.h
@@ -0,0 +1,29 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <stddef.h>

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m);


 uint16_t PQCLEAN_HQC192_CLEAN_gf_log(uint16_t elt);

 uint16_t PQCLEAN_HQC192_CLEAN_gf_mul(uint16_t a, uint16_t b);

 uint16_t PQCLEAN_HQC192_CLEAN_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQC192_CLEAN_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQC192_CLEAN_gf_mod(uint16_t i);


 #endif
--- a/crypto_kem/hqc-192/clean/gf2x.c
+++ b/crypto_kem/hqc-192/clean/gf2x.c
@@ -0,0 +1,155 @@
 #include "gf2x.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "randombytes.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * \file gf2x.c
 * \brief Implementation of multiplication of two polynomials
 */


 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2);
 static void reduce(uint64_t *o, const uint64_t *a);
 static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);

 /**
 * @brief swap two elements in a table
 *
 * This function exchanges tab[elt1] with tab[elt2]
 *
 * @param[in] tab Pointer to the table
 * @param[in] elt1 Index of the first element
 * @param[in] elt2 Index of the second element
 */
 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) {
    uint16_t tmp = tab[elt1];

    tab[elt1] = tab[elt2];
    tab[elt2] = tmp;
 }



 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[in] a Pointer to the polynomial a(x)
 * @param[out] o Pointer to the result
 */
 static void reduce(uint64_t *o, const uint64_t *a) {
    uint64_t r;
    uint64_t carry;

    for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) {
        r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63);
        carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63)));
        o[i] = a[i] ^ r ^ carry;
    }

    o[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief computes product of the polynomial a1(x) with the sparse polynomial a2
 *
 *  o(x) = a1(x)a2(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2)
 * @param[in] a2 Pointer to the polynomial a1(x)
 * @param[in] weight Hamming wifht of the sparse polynomial a2
 * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process
 */
 static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
 //static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx)
    uint64_t carry;
    uint32_t dec, s;
    uint64_t table[16 * (VEC_N_SIZE_64 + 1)];
    uint16_t permuted_table[16];
    uint16_t permutation_table[16];
    uint16_t permuted_sparse_vect[PARAM_OMEGA_E];
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;

    for (uint32_t i = 0 ; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0 ; i < 15 ; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1 ; i < 16 ; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0 ; i < weight ; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0 ; i + 1 < weight ; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0 ; i < weight ; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
            *res_16++ ^= (uint16_t) (pt[j] >> 48);
        }
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial
 * @param[in] a2 Pointer to the dense polynomial
 * @param[in] weight Integer that is the weigt of the sparse polynomial
 * @param[in] ctx Pointer to the randomness context
 */
 void PQCLEAN_HQC192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
    uint64_t tmp[2 * VEC_N_SIZE_64 + 1];
    for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) {
        tmp[j] = 0;
    }

    fast_convolution_mult(tmp, a1, a2, weight, ctx);
    reduce(o, tmp);
 }
--- a/crypto_kem/hqc-192/clean/gf2x.h
+++ b/crypto_kem/hqc-192/clean/gf2x.h
@@ -0,0 +1,18 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */

 #include "nistseedexpander.h"
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);


 #endif
--- a/crypto_kem/hqc-192/clean/hqc.c
+++ b/crypto_kem/hqc-192/clean/hqc.c
@@ -0,0 +1,143 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQC192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    uint64_t x[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQC192_CLEAN_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQC192_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander);
    PQCLEAN_HQC192_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQC192_CLEAN_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQC192_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};
    uint64_t r1[VEC_N_SIZE_64] = {0};
    uint32_t r2[PARAM_OMEGA_R] = {0};
    uint64_t e[VEC_N_SIZE_64] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQC192_CLEAN_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

    // Compute u = r1 + r2.h
    PQCLEAN_HQC192_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQC192_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQC192_CLEAN_code_encode(v, m);
    PQCLEAN_HQC192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQC192_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQC192_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQC192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQC192_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC192_CLEAN_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint64_t x[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};
    AES_XOF_struct perm_seedexpander;
    uint8_t perm_seed[SEED_BYTES] = {0};

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQC192_CLEAN_hqc_secret_key_from_string(x, y, pk, sk);

    randombytes(perm_seed, SEED_BYTES);
    seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute v - u.y
    PQCLEAN_HQC192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    PQCLEAN_HQC192_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander);
    PQCLEAN_HQC192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQC192_CLEAN_code_decode(m, tmp2);
 }
--- a/crypto_kem/hqc-192/clean/hqc.h
+++ b/crypto_kem/hqc-192/clean/hqc.h
@@ -0,0 +1,21 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQC192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint64_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQC192_CLEAN_hqc_pke_decrypt(uint64_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-192/clean/kem.c
+++ b/crypto_kem/hqc-192/clean/kem.c
@@ -0,0 +1,138 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQC192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQC192_CLEAN_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQC192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    PQCLEAN_HQC192_CLEAN_vect_set_random_from_randombytes(m);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQC192_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQC192_CLEAN_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQC192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    int8_t result = -1;
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t m[VEC_K_SIZE_64] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQC192_CLEAN_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQC192_CLEAN_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, (uint8_t *) m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQC192_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, (unsigned char *) m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES, u, VEC_N_SIZE_BYTES);
    memcpy(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = (PQCLEAN_HQC192_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC192_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0);
    for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) {
        ss[i] = result * ss[i];
    }
    result--;


    return result;
 }
--- a/crypto_kem/hqc-192/clean/parameters.h
+++ b/crypto_kem/hqc-192/clean/parameters.h
@@ -0,0 +1,123 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H
 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */

 #include "api.h"
 #include "api.h"
 #include "vector.h"


 #define CEIL_DIVIDE(a, b)  (((a)/(b)) + ((a) % (b) == 0 ? 0 : 1)) /*!< Divide a by b and ceil the result*/
 #define BITMASK(a, size) ((1UL << ((a) % (size))) - 1) /*!< Create a mask*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of BCH code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of the repetition code)
  #define PARAM_N1N2                            Define the parameter n1 * n2 of the scheme (length of the tensor code)
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1_SIZE_BYTES                     Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define PARAM_T                               Define a threshold for decoding repetition code word (PARAM_T = (PARAM_N2 - 1) / 2)

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the BCH code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the BCH code
  #define PARAM_G                               Define the size of the generator polynomial of BCH code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=60
                                                The smallest power of 2 greater than 60+1 is 64=2^6
  #define PARAM_FFT_T                           The additive FFT transpose computes a (2^PARAM_FFT_T)-sized syndrome vector
                                                We want to compute 2*PARAM_DELTA=120 syndromes
                                                The smallest power of 2 greater than 120 is 2^7
  #define PARAM_BCH_POLY                        Generator polynomial of the BCH code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                 45197
 #define PARAM_N1                                766
 #define PARAM_N2                                59
 #define PARAM_N1N2                              45194
 #define PARAM_OMEGA                             101
 #define PARAM_OMEGA_E                           117
 #define PARAM_OMEGA_R                           117
 #define PARAM_SECURITY                          192
 #define PARAM_DFR_EXP                           192

 #define SECRET_KEY_BYTES                        PQCLEAN_HQC192_CLEAN_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQC192_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQC192_CLEAN_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQC192_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD               16768087
 #define VEC_K_SIZE_BYTES                        CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_N1_SIZE_BYTES                       CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 64)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 64)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_T                                 29

 #define PARAM_DELTA                             57
 #define PARAM_M                                 10
 #define PARAM_GF_POLY                           0x409
 #define PARAM_GF_MUL_ORDER                      1023
 #define PARAM_K                                 256
 #define PARAM_G                                 511
 #define PARAM_FFT                               6
 #define PARAM_FFT_T                             7
 #define PARAM_BCH_POLY { \
        1,1,0,0,0,0,1,0,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,1,1,0,1,1, \
        1,1,0,1,1,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0, \
        0,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, \
        1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0,1,1,0,1,0,1,0,1,0,1,1,1,1,0,1,0, \
        0,1,1,0,1,0,1,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,1,1,1,0, \
        1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1,0,0,1,1,0,1,0,0,0,0,1,0, \
        0,1,0,0,1,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,0,1,0,1, \
        1,1,1,1,1,0,1,0,1,0,1,1,0,0,0,1,1,0,0,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0, \
        1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,1,1, \
        1,1,1,0,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,0,1,0,0,1,1,1,1,1,0,1,0,1, \
        0,0,0,0,1,0,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,0,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1, \
        1,0,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,0,0,1,1, \
        0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,1,0,1, \
        1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,1 \
    };

 #define RED_MASK                                0x0000000000001fffUL
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/crypto_kem/hqc-192/clean/parsing.c
+++ b/crypto_kem/hqc-192/clean/parsing.c
@@ -0,0 +1,121 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */



 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    memcpy(sk + SEED_BYTES, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint32_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQC192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQC192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    memcpy(pk + SEED_BYTES, s, VEC_N_SIZE_BYTES);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQC192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQC192_CLEAN_vect_set_random(&pk_seedexpander, h);

    memcpy(s, pk + SEED_BYTES, VEC_N_SIZE_BYTES);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQC192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    memcpy(ct, u, VEC_N_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES, v, VEC_N1N2_SIZE_BYTES);
    memcpy(ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQC192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    memcpy(u, ct, VEC_N_SIZE_BYTES);
    memcpy(v, ct + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES);
    memcpy(d, ct + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES, SHA512_BYTES);
 }
--- a/crypto_kem/hqc-192/clean/parsing.h
+++ b/crypto_kem/hqc-192/clean/parsing.h
@@ -0,0 +1,29 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQC192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQC192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQC192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQC192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQC192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/crypto_kem/hqc-192/clean/repetition.c
+++ b/crypto_kem/hqc-192/clean/repetition.c
@@ -0,0 +1,91 @@
 #include "parameters.h"
 #include "repetition.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 /**
 * @file repetition.c
 * @brief Implementation of repetition codes
 */

 #define MASK_N2                              ((1UL << PARAM_N2) - 1)

 static inline int32_t popcount(uint64_t n);

 /**
 * @brief Encoding each bit in the message m using the repetition code
 *
 *
 * @param[out] em Pointer to an array that is the code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC192_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) {
    static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFFFFFFFFUL, 0x3FFFFFFFFFFFFFFUL}};
    for (size_t i = 0 ; i < VEC_N1_SIZE_64 - 1 ; i++) {
        for (size_t j = 0 ; j < 64 ; j++) {
            uint8_t bit = (m[i] >> j) & 0x1;
            uint32_t pos_r = PARAM_N2 * ((i << 6) + j);
            uint16_t idx_r = (pos_r & 0x3f);
            uint64_t *p64 = em;
            p64 += pos_r >> 6;
            *p64 ^= mask[bit][0] << idx_r;
            *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
        }
    }

    for (size_t j = 0 ; j < (PARAM_N1 & 0x3f) ; j++) {
        uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
        uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
        uint16_t idx_r = (pos_r & 0x3f);
        uint64_t *p64 = em;
        p64 += pos_r >> 6;
        *p64 ^= mask[bit][0] << idx_r;
        *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
    }
 }



 /**
 * @brief  Compute the Hamming weight of the 64-bit integer n
 *
 * The Hamming weight is computed using a trick described in
 * Henry S. Warren  : "Hacker's Delight", chap 5., p. 66
 * @param[out] the Hamming weight of n
 * @param[in] a 64-bit integer n
 */
 static inline int32_t popcount(uint64_t n) {
    n -= (n >> 1) & 0x5555555555555555UL;
    n = (n & 0x3333333333333333UL) + ((n >> 2) & 0x3333333333333333UL);
    n = (n + (n >> 4)) & 0x0f0f0f0f0f0f0f0fUL;
    return (n * 0x0101010101010101UL) >> 56;
 }



 /**
 * @brief Decoding the code words to a message using the repetition code
 *
 * We use a majority decoding. In fact we have that PARAM_N2 = 2 * PARAM_T + 1, thus,
 * if the Hamming weight of the vector is greater than PARAM_T, the code word is decoded
 * to 1 and 0 otherwise.
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC192_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    uint64_t cx, ones;
    for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) {
        bn = b >> 6;
        bi = b & 63;
        c = b + PARAM_N2 - 1;
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
        t++;
    }
 }
--- a/crypto_kem/hqc-192/clean/repetition.h
+++ b/crypto_kem/hqc-192/clean/repetition.h
@@ -0,0 +1,19 @@
 #ifndef REPETITION_H
 #define REPETITION_H


 /**
 * @file repetition.h
 * @brief Header file for repetition.c
 */

 #include <stdint.h>

 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m);

 void PQCLEAN_HQC192_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em);


 #endif
--- a/crypto_kem/hqc-192/clean/vector.c
+++ b/crypto_kem/hqc-192/clean/vector.c
@@ -0,0 +1,226 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */


 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>. The vector
 * is stored by position.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
    uint32_t random_data = 0;
    uint8_t exist = 0;
    size_t j = 0;

    seedexpander(ctx, rand_bytes, random_bytes_size);

    for (uint32_t i = 0 ; i < weight ; ++i) {
        exist = 0;
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            random_data  = ((uint32_t) rand_bytes[j++]) << 16;
            random_data |= ((uint32_t) rand_bytes[j++]) << 8;
            random_data |= rand_bytes[j++];

        } while (random_data >= UTILS_REJECTION_THRESHOLD);

        random_data = random_data % PARAM_N;

        for (uint32_t k = 0 ; k < i ; k++) {
            if (v[k] == random_data) {
                exist = 1;
            }
        }

        if (exist == 1) {
            i--;
        } else {
            v[i] = random_data;
        }
    }
 }



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {

    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
    uint32_t random_data = 0;
    uint32_t tmp[PARAM_OMEGA_R] = {0};
    uint8_t exist = 0;
    size_t j = 0;

    seedexpander(ctx, rand_bytes, random_bytes_size);

    for (uint32_t i = 0 ; i < weight ; ++i) {
        exist = 0;
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            random_data  = ((uint32_t) rand_bytes[j++]) << 16;
            random_data |= ((uint32_t) rand_bytes[j++]) << 8;
            random_data |= rand_bytes[j++];

        } while (random_data >= UTILS_REJECTION_THRESHOLD);

        random_data = random_data % PARAM_N;

        for (uint32_t k = 0 ; k < i ; k++) {
            if (tmp[k] == random_data) {
                exist = 1;
            }
        }

        if (exist == 1) {
            i--;
        } else {
            tmp[i] = random_data;
        }
    }

    for (uint16_t i = 0 ; i < weight ; ++i) {
        int32_t index = tmp[i] / 64;
        int32_t pos = tmp[i] % 64;
        v[index] |= ((uint64_t) 1) << pos;
    }
 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQC192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    memcpy(v, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= BITMASK(PARAM_N, 64);
 }



 /**
 * @brief Generates a random vector
 *
 * This function generates a random binary vector. It uses the the randombytes function.
 *
 * @param[in] v Pointer to an array
 */
 void PQCLEAN_HQC192_CLEAN_vect_set_random_from_randombytes(uint64_t *v) {
    uint8_t rand_bytes [VEC_K_SIZE_BYTES] = {0};

    randombytes(rand_bytes, VEC_K_SIZE_BYTES);
    memcpy(v, rand_bytes, VEC_K_SIZE_BYTES);
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQC192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0 ; i < size ; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }


 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 int PQCLEAN_HQC192_CLEAN_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    return memcmp(v1, v2, size);
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQC192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    if (size_o < size_v) {
        uint64_t mask = 0x7FFFFFFFFFFFFFFF;
        int8_t val = 0;

        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, VEC_N1N2_SIZE_BYTES);

        for (int8_t i = 0 ; i < val ; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, CEIL_DIVIDE(size_v, 8));
    }
 }
--- a/crypto_kem/hqc-192/clean/vector.h
+++ b/crypto_kem/hqc-192/clean/vector.h
@@ -0,0 +1,31 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */

 #include "nistseedexpander.h"
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight);

 void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQC192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

 void PQCLEAN_HQC192_CLEAN_vect_set_random_from_randombytes(uint64_t *v);


 void PQCLEAN_HQC192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 int PQCLEAN_HQC192_CLEAN_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size);

 void PQCLEAN_HQC192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif