Some MS compiler fixes

4 years ago · a3c627fe6b
--- a/crypto_kem/hqc-128/avx2/bch.c
+++ b/crypto_kem/hqc-128/avx2/bch.c
@@ -13,104 +13,11 @@
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1; i < upper_bound; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC128_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly; j; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Computes the values alpha^ij for decoding syndromes
 *
--- a/crypto_kem/hqc-128/avx2/bch.h
+++ b/crypto_kem/hqc-128/avx2/bch.h
@@ -15,8 +15,6 @@
 void PQCLEAN_HQC128_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC128_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);

 void PQCLEAN_HQC128_AVX2_table_alphaij_generation(const uint16_t *exp);


--- a/crypto_kem/hqc-128/avx2/fft.c
+++ b/crypto_kem/hqc-128/avx2/fft.c
@@ -48,7 +48,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -325,7 +325,8 @@ void PQCLEAN_HQC128_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint64_t bit;
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-128/avx2/repetition.c
+++ b/crypto_kem/hqc-128/avx2/repetition.c
@@ -23,8 +23,10 @@
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC128_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    size_t t = 0;
    uint32_t b, bn, bi, c, cn, ci;
    uint64_t cx, ones;
    uint64_t mask;

    for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
        bn = b >> 6;
@@ -33,9 +35,9 @@ void PQCLEAN_HQC128_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em)
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
        mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
        ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
        m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
        t++;
    }
 }
--- a/crypto_kem/hqc-128/clean/bch.c
+++ b/crypto_kem/hqc-128/clean/bch.c
@@ -11,8 +11,6 @@
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
 static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
 static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
@@ -21,97 +19,6 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1; i < upper_bound; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly; j; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
 *
--- a/crypto_kem/hqc-128/clean/bch.h
+++ b/crypto_kem/hqc-128/clean/bch.h
@@ -17,7 +17,4 @@ void PQCLEAN_HQC128_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *me
 void PQCLEAN_HQC128_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);


 #endif
--- a/crypto_kem/hqc-128/clean/fft.c
+++ b/crypto_kem/hqc-128/clean/fft.c
@@ -51,7 +51,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -134,7 +134,8 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin
    uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0};
    uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0};

    size_t i, n;
    uint16_t n;
    size_t i;

    n = 1 << (m_f - 2);
    memcpy(Q0, f0 + n, 2 * n);
@@ -627,7 +628,8 @@ void PQCLEAN_HQC128_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uin
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint64_t bit;
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-128/clean/gf2x.c
+++ b/crypto_kem/hqc-128/clean/gf2x.c
@@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;
    uint16_t i, j;

    for (uint32_t i = 0; i < 16; i++) {
    for (i = 0; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0; i < 15; i++) {
    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1; i < 16; i++) {
    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0; i + 1 < weight; i++) {
    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
--- a/crypto_kem/hqc-128/clean/repetition.c
+++ b/crypto_kem/hqc-128/clean/repetition.c
@@ -20,27 +20,26 @@ static inline int32_t popcount(uint64_t n);
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC128_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) {
    static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFUL, 0x3FFFFFFFUL}};
    for (size_t i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
        for (size_t j = 0; j < 64; j++) {
            uint8_t bit = (m[i] >> j) & 0x1;
            uint32_t pos_r = PARAM_N2 * ((i << 6) + j);
            uint16_t idx_r = (pos_r & 0x3f);
            uint64_t *p64 = em;
            p64 += pos_r >> 6;
            *p64 ^= mask[bit][0] << idx_r;
            *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
    uint16_t i, j, bit, idx_r;
    uint32_t pos_r;
    uint64_t *p64 = em;
    const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFUL, 0x3FFFFFFFUL}};
    for (i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
        for (j = 0; j < 64; j++) {
            bit = (m[i] >> j) & 0x1;
            pos_r = PARAM_N2 * ((i << 6) + j);
            idx_r = (pos_r & 0x3f);
            p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
            p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
        }
    }

    for (size_t j = 0; j < (PARAM_N1 & 0x3f); j++) {
        uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
        uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
        uint16_t idx_r = (pos_r & 0x3f);
        uint64_t *p64 = em;
        p64 += pos_r >> 6;
        *p64 ^= mask[bit][0] << idx_r;
        *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
    for (j = 0; j < (PARAM_N1 & 0x3f); j++) {
        bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
        pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
        idx_r = (pos_r & 0x3f);
        p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
        p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
    }
 }

@@ -74,8 +73,10 @@ static inline int32_t popcount(uint64_t n) {
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC128_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    size_t t = 0;
    uint32_t b, bn, bi, c, cn, ci;
    uint64_t cx, ones;
    uint64_t mask;

    for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
        bn = b >> 6;
@@ -84,9 +85,9 @@ void PQCLEAN_HQC128_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
        mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
        ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
        m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
        t++;
    }
 }
--- a/crypto_kem/hqc-192/avx2/bch.c
+++ b/crypto_kem/hqc-192/avx2/bch.c
@@ -13,104 +13,11 @@
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1; i < upper_bound; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC192_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly; j; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Computes the values alpha^ij for decoding syndromes
 *
--- a/crypto_kem/hqc-192/avx2/bch.h
+++ b/crypto_kem/hqc-192/avx2/bch.h
@@ -15,8 +15,6 @@
 void PQCLEAN_HQC192_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC192_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);

 void PQCLEAN_HQC192_AVX2_table_alphaij_generation(const uint16_t *exp);


--- a/crypto_kem/hqc-192/avx2/fft.c
+++ b/crypto_kem/hqc-192/avx2/fft.c
@@ -325,7 +325,8 @@ void PQCLEAN_HQC192_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint64_t bit;
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-192/avx2/repetition.c
+++ b/crypto_kem/hqc-192/avx2/repetition.c
@@ -23,8 +23,10 @@
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC192_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    size_t t = 0;
    uint32_t b, bn, bi, c, cn, ci;
    uint64_t cx, ones;
    uint64_t mask;

    for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
        bn = b >> 6;
@@ -33,9 +35,9 @@ void PQCLEAN_HQC192_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em)
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
        mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
        ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
        m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
        t++;
    }
 }
--- a/crypto_kem/hqc-192/clean/bch.c
+++ b/crypto_kem/hqc-192/clean/bch.c
@@ -11,8 +11,6 @@
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
 static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
 static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
@@ -21,97 +19,6 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1; i < upper_bound; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly; j; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
 *
--- a/crypto_kem/hqc-192/clean/bch.h
+++ b/crypto_kem/hqc-192/clean/bch.h
@@ -17,7 +17,4 @@ void PQCLEAN_HQC192_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *me
 void PQCLEAN_HQC192_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);


 #endif
--- a/crypto_kem/hqc-192/clean/fft.c
+++ b/crypto_kem/hqc-192/clean/fft.c
@@ -51,7 +51,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -134,7 +134,8 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin
    uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0};
    uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0};

    size_t i, n;
    uint16_t n;
    size_t i;

    n = 1 << (m_f - 2);
    memcpy(Q0, f0 + n, 2 * n);
--- a/crypto_kem/hqc-192/clean/gf2x.c
+++ b/crypto_kem/hqc-192/clean/gf2x.c
@@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;
    uint16_t i, j;

    for (uint32_t i = 0; i < 16; i++) {
    for (i = 0; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0; i < 15; i++) {
    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1; i < 16; i++) {
    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0; i + 1 < weight; i++) {
    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
--- a/crypto_kem/hqc-192/clean/repetition.c
+++ b/crypto_kem/hqc-192/clean/repetition.c
@@ -20,27 +20,26 @@ static inline int32_t popcount(uint64_t n);
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQC192_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) {
    static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFFFFFFFFUL, 0x3FFFFFFFFFFFFFFUL}};
    for (size_t i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
        for (size_t j = 0; j < 64; j++) {
            uint8_t bit = (m[i] >> j) & 0x1;
            uint32_t pos_r = PARAM_N2 * ((i << 6) + j);
            uint16_t idx_r = (pos_r & 0x3f);
            uint64_t *p64 = em;
            p64 += pos_r >> 6;
            *p64 ^= mask[bit][0] << idx_r;
            *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
    uint16_t i, j, bit, idx_r;
    uint32_t pos_r;
    uint64_t *p64 = em;
    const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFFFFFFFFUL, 0x3FFFFFFFFFFFFFFUL}};
    for (i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
        for (j = 0; j < 64; j++) {
            bit = (m[i] >> j) & 0x1;
            pos_r = PARAM_N2 * ((i << 6) + j);
            idx_r = (pos_r & 0x3f);
            p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
            p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
        }
    }

    for (size_t j = 0; j < (PARAM_N1 & 0x3f); j++) {
        uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
        uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
        uint16_t idx_r = (pos_r & 0x3f);
        uint64_t *p64 = em;
        p64 += pos_r >> 6;
        *p64 ^= mask[bit][0] << idx_r;
        *(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
    for (j = 0; j < (PARAM_N1 & 0x3f); j++) {
        bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
        pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
        idx_r = (pos_r & 0x3f);
        p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
        p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
    }
 }

@@ -74,8 +73,11 @@ static inline int32_t popcount(uint64_t n) {
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC192_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    size_t t = 0;
    uint32_t b, bn, bi, c, cn, ci;
    uint64_t cx, ones;
    uint64_t mask;

    for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
        bn = b >> 6;
        bi = b & 63;
@@ -83,9 +85,9 @@ void PQCLEAN_HQC192_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx * verif));
        m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
        mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
        ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
        m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
        t++;
    }
 }
--- a/crypto_kem/hqc-256/avx2/bch.c
+++ b/crypto_kem/hqc-256/avx2/bch.c
@@ -13,104 +13,11 @@
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1; i < upper_bound; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC256_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly; j; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Computes the values alpha^ij for decoding syndromes
 *
--- a/crypto_kem/hqc-256/avx2/bch.h
+++ b/crypto_kem/hqc-256/avx2/bch.h
@@ -15,8 +15,6 @@
 void PQCLEAN_HQC256_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC256_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);

 void PQCLEAN_HQC256_AVX2_table_alphaij_generation(const uint16_t *exp);


--- a/crypto_kem/hqc-256/avx2/fft.c
+++ b/crypto_kem/hqc-256/avx2/fft.c
@@ -325,7 +325,8 @@ void PQCLEAN_HQC256_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint64_t bit;
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-256/avx2/repetition.c
+++ b/crypto_kem/hqc-256/avx2/repetition.c
@@ -22,9 +22,11 @@
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC256_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    uint32_t t = 0;
    uint32_t b, bn, bi, c, cn, ci;
    uint64_t cx, ones;
    uint64_t cy;
    uint64_t mask;

    for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
        bn = b >> 6;
@@ -33,10 +35,10 @@ void PQCLEAN_HQC256_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em)
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
        cy = em[bn + 1];
        ones = _mm_popcnt_u64((em[bn] >> bi) | (cx * (1 - verif))) + _mm_popcnt_u64((1 - verif) * cy + verif * cx);
        m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
        ones = _mm_popcnt_u64((em[bn] >> bi) | (cx & mask)) + _mm_popcnt_u64((mask & cy) + (~mask & cx));
        m[t >> 6] |= (uint64_t) (((((int64_t)PARAM_T - ones) >> 63) & 1) << (t & 63)); // 1 << (t&63) if ones > PARAM_T else 0
        t++;
    }
 }
--- a/crypto_kem/hqc-256/clean/bch.c
+++ b/crypto_kem/hqc-256/clean/bch.c
@@ -11,8 +11,6 @@
 */


 static uint16_t mod(uint16_t i, uint16_t modulus);
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
 static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
 static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
 static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
@@ -21,97 +19,6 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
 static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
 static void compute_roots(uint64_t *error, const uint16_t *sigma);

 /**
 * @brief Returns i modulo the given modulus.
 *
 * i must be less than 2*modulus.
 * Therefore, the return value is either i or i-modulus.
 * @returns i mod (modulus)
 * @param[in] i The integer whose modulo is taken
 * @param[in] modulus The modulus
 */
 static uint16_t mod(uint16_t i, uint16_t modulus) {
    uint16_t tmp = i - modulus;

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & modulus);
 }



 /**
 * @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
 *
 * The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
 * @param[out] cosets Array receiving the coset representatives
 * @param[in] upper_bound The upper bound
 */
 static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
    // Compute the odd cyclotomic classes
    for (uint16_t i = 1; i < upper_bound; i += 2) {
        if (cosets[i] == 0) { // If i does not already belong to a class
            uint16_t tmp = i;
            size_t j = PARAM_M;
            cosets[i] = i;
            while (--j) { // Complete i's class
                tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
                cosets[tmp] = i;
            }
        }
    }
 }



 /**
 * @brief Computes the generator polynomial of the primitive BCH code with given parameters.
 *
 * Code length is 2^m-1. <br>
 * Parameter t is the targeted correction capacity of the code
 * and receives the real correction capacity (which is at least equal to the target). <br>
 * exp and log are arrays giving antilog and log of GF(2^m) elements.
 * @returns the degree of the generator polynomial
 * @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
 * @param[in,out] t Targeted correction capacity; receives the real correction capacity
 * @param[in] exp Antilog table of GF(2^m)
 * @param[in] log Log table of GF(2^m)
 */
 size_t PQCLEAN_HQC256_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
    uint16_t cosets[PARAM_GF_MUL_ORDER];
    size_t deg_bch_poly = 0;

    memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
    compute_cyclotomic_cosets(cosets, 2 * *t);

    // Start with bch_poly(X) = 1
    bch_poly[0] = 1;

    for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
        if (cosets[i] == 0) {
            continue;
        }

        // Multiply bch_poly(X) by X-a^i
        for (size_t j = deg_bch_poly; j; --j) {
            int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
            bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
        }
        bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
        bch_poly[++deg_bch_poly] = 1;
    }

    // Determine the real correction capacity
    while (cosets[2 * *t + 1] != 0) {
        ++*t;
    }

    return deg_bch_poly;
 }



 /**
 * @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
 *
--- a/crypto_kem/hqc-256/clean/bch.h
+++ b/crypto_kem/hqc-256/clean/bch.h
@@ -17,7 +17,4 @@ void PQCLEAN_HQC256_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *me
 void PQCLEAN_HQC256_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);


 size_t PQCLEAN_HQC256_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);


 #endif
--- a/crypto_kem/hqc-256/clean/fft.c
+++ b/crypto_kem/hqc-256/clean/fft.c
@@ -51,7 +51,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -134,7 +134,8 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin
    uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0};
    uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0};

    size_t i, n;
    uint16_t n;
    size_t i;

    n = 1 << (m_f - 2);
    memcpy(Q0, f0 + n, 2 * n);
--- a/crypto_kem/hqc-256/clean/gf2x.c
+++ b/crypto_kem/hqc-256/clean/gf2x.c
@@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;
    uint16_t i, j;

    for (uint32_t i = 0; i < 16; i++) {
    for (i = 0; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0; i < 15; i++) {
    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1; i < 16; i++) {
    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0; i + 1 < weight; i++) {
    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
--- a/crypto_kem/hqc-256/clean/repetition.c
+++ b/crypto_kem/hqc-256/clean/repetition.c
@@ -80,9 +80,11 @@ static inline int32_t popcount(uint64_t n) {
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQC256_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
    size_t t = 0, b, bn, bi, c, cn, ci;
    uint32_t t = 0;
    uint32_t b, bn, bi, c, cn, ci;
    uint64_t cx, ones;
    uint64_t cy;
    uint64_t mask;

    for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
        bn = b >> 6;
@@ -91,10 +93,10 @@ void PQCLEAN_HQC256_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em
        cn = c >> 6;
        ci = c & 63;
        cx = em[cn] << (63 - ci);
        int64_t verif = (cn == (bn + 1));
        mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
        cy = em[bn + 1];
        ones = popcount((em[bn] >> bi) | (cx * (1 - verif))) + popcount((1 - verif) * cy + verif * cx);
        m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
        ones = popcount((em[bn] >> bi) | (cx & mask)) + popcount((mask & cy) + (~mask & cx));
        m[t >> 6] |= (uint64_t) (((((int64_t)PARAM_T - ones) >> 63) & 1) << (t & 63)); // 1 << (t&63) if ones > PARAM_T else 0
        t++;
    }
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/fft.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/fft.c
@@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs
 void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-rmrs-128/clean/fft.c
+++ b/crypto_kem/hqc-rmrs-128/clean/fft.c
@@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff
 void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-rmrs-128/clean/gf2x.c
+++ b/crypto_kem/hqc-rmrs-128/clean/gf2x.c
@@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;
    uint16_t i, j;

    for (uint32_t i = 0; i < 16; i++) {
    for (i = 0; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0; i < 15; i++) {
    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1; i < 16; i++) {
    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0; i + 1 < weight; i++) {
    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
--- a/crypto_kem/hqc-rmrs-192/avx2/fft.c
+++ b/crypto_kem/hqc-rmrs-192/avx2/fft.c
@@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs
 void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-rmrs-192/clean/fft.c
+++ b/crypto_kem/hqc-rmrs-192/clean/fft.c
@@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff
 void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-rmrs-192/clean/gf2x.c
+++ b/crypto_kem/hqc-rmrs-192/clean/gf2x.c
@@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;
    uint16_t i, j;

    for (uint32_t i = 0; i < 16; i++) {
    for (i = 0; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0; i < 15; i++) {
    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1; i < 16; i++) {
    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0; i + 1 < weight; i++) {
    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
--- a/crypto_kem/hqc-rmrs-256/avx2/fft.c
+++ b/crypto_kem/hqc-rmrs-256/avx2/fft.c
@@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
    size_t i, j;
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
@@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs
 void PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-rmrs-256/clean/fft.c
+++ b/crypto_kem/hqc-rmrs-256/clean/fft.c
@@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff
 void PQCLEAN_HQCRMRS256_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    size_t i, k, index;
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);
--- a/crypto_kem/hqc-rmrs-256/clean/gf2x.c
+++ b/crypto_kem/hqc-rmrs-256/clean/gf2x.c
@@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t *pt;
    uint16_t *res_16;
    uint16_t i, j;

    for (uint32_t i = 0; i < 16; i++) {
    for (i = 0; i < 16; i++) {
        permuted_table[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (uint32_t i = 0; i < 15; i++) {
    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (uint32_t i = 1; i < 16; i++) {
    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (uint32_t i = 0; i + 1 < weight; i++) {
    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
    }

    for (uint32_t i = 0; i < weight; i++) {
    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res_16 = ((uint16_t *) o) + s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            *res_16++ ^= (uint16_t) pt[j];
            *res_16++ ^= (uint16_t) (pt[j] >> 16);
            *res_16++ ^= (uint16_t) (pt[j] >> 32);
--- a/test/duplicate_consistency/hqc-128_avx2.yml
+++ b/test/duplicate_consistency/hqc-128_avx2.yml
@@ -28,7 +28,6 @@ consistency_checks:
      - repetition.h
      - vector.h
      - bch.c
      - fft.c
      - gf.c
      - hqc.c
      - kem.c
@@ -54,7 +53,6 @@ consistency_checks:
      - repetition.h
      - vector.h
      - bch.c
      - fft.c
      - gf.c
      - hqc.c
      - kem.c
--- a/test/duplicate_consistency/hqc-128_clean.yml
+++ b/test/duplicate_consistency/hqc-128_clean.yml
@@ -21,7 +21,6 @@ consistency_checks:
      - vector.h
      - bch.c
      - code.c
      - fft.c
      - gf2x.c
      - gf.c
      - hqc.c
@@ -48,7 +47,6 @@ consistency_checks:
      - vector.h
      - bch.c
      - code.c
      - fft.c
      - gf2x.c
      - gf.c
      - hqc.c
--- a/test/duplicate_consistency/hqc-rmrs-128_avx2.yml
+++ b/test/duplicate_consistency/hqc-rmrs-128_avx2.yml
@@ -23,7 +23,6 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c
 - source:
    scheme: hqc-rmrs-192
    implementation: avx2
@@ -56,7 +55,6 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c
 - source:
    scheme: hqc-rmrs-256
    implementation: avx2
--- a/test/duplicate_consistency/hqc-rmrs-128_clean.yml
+++ b/test/duplicate_consistency/hqc-rmrs-128_clean.yml
@@ -26,7 +26,6 @@ consistency_checks:
      - reed_solomon.h
      - vector.h
      - code.c
      - fft.c
      - gf2x.c
      - gf.c
      - hqc.c
@@ -60,7 +59,6 @@ consistency_checks:
      - reed_solomon.h
      - vector.h
      - code.c
      - fft.c
      - gf2x.c
      - gf.c
      - hqc.c
--- a/test/duplicate_consistency/hqc-rmrs-192_avx2.yml
+++ b/test/duplicate_consistency/hqc-rmrs-192_avx2.yml
@@ -11,7 +11,6 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c
 - source:
    scheme: hqc-rmrs-256
    implementation: clean
@@ -23,7 +22,6 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c
 - source:
    scheme: hqc-rmrs-256
    implementation: avx2
--- a/test/duplicate_consistency/hqc-rmrs-192_clean.yml
+++ b/test/duplicate_consistency/hqc-rmrs-192_clean.yml
@@ -11,7 +11,6 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c
 - source:
    scheme: hqc-rmrs-256
    implementation: clean
@@ -45,4 +44,3 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c
--- a/test/duplicate_consistency/hqc-rmrs-256_avx2.yml
+++ b/test/duplicate_consistency/hqc-rmrs-256_avx2.yml
@@ -11,4 +11,3 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c
--- a/test/duplicate_consistency/hqc-rmrs-256_clean.yml
+++ b/test/duplicate_consistency/hqc-rmrs-256_clean.yml
@@ -11,4 +11,3 @@ consistency_checks:
      - reed_muller.h
      - reed_solomon.h
      - code.c
      - fft.c