Removes some variations

3 years ago · bdcc631260
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -219,6 +219,76 @@ define_kem_alg(kyber512_clean
 if(${ARCH} STREQUAL "ARCH_x86_64")

 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=haswell")

 set(
  SRC_AVX2_DILITHIUM2
  crypto_sign/dilithium2/avx2/consts.c
  crypto_sign/dilithium2/avx2/f1600x4.S
  crypto_sign/dilithium2/avx2/fips202x4.c
  crypto_sign/dilithium2/avx2/invntt.S
  crypto_sign/dilithium2/avx2/ntt.S
  crypto_sign/dilithium2/avx2/packing.c
  crypto_sign/dilithium2/avx2/pointwise.S
  crypto_sign/dilithium2/avx2/poly.c
  crypto_sign/dilithium2/avx2/polyvec.c
  crypto_sign/dilithium2/avx2/rejsample.c
  crypto_sign/dilithium2/avx2/rounding.c
  crypto_sign/dilithium2/avx2/shuffle.S
  crypto_sign/dilithium2/avx2/sign.c
  crypto_sign/dilithium2/avx2/symmetric-shake.c)
 set(
  INC_AVX2_DILITHIUM2
  crypto_sign/dilithium2/avx2
 )
 define_sig_alg(dilithium2_avx2
  PQCLEAN_DILITHIUM2_AVX2 "${SRC_AVX2_DILITHIUM2}" "${INC_AVX2_DILITHIUM2}")

 set(
  SRC_AVX2_DILITHIUM3
  crypto_sign/dilithium3/avx2/consts.c
  crypto_sign/dilithium3/avx2/f1600x4.S
  crypto_sign/dilithium3/avx2/fips202x4.c
  crypto_sign/dilithium3/avx2/invntt.S
  crypto_sign/dilithium3/avx2/ntt.S
  crypto_sign/dilithium3/avx2/packing.c
  crypto_sign/dilithium3/avx2/pointwise.S
  crypto_sign/dilithium3/avx2/poly.c
  crypto_sign/dilithium3/avx2/polyvec.c
  crypto_sign/dilithium3/avx2/rejsample.c
  crypto_sign/dilithium3/avx2/rounding.c
  crypto_sign/dilithium3/avx2/shuffle.S
  crypto_sign/dilithium3/avx2/sign.c
  crypto_sign/dilithium3/avx2/symmetric-shake.c)
 set(
  INC_AVX2_DILITHIUM3
  crypto_sign/dilithium3/avx2
 )
 define_sig_alg(dilithium3_avx2
  PQCLEAN_DILITHIUM3_AVX2 "${SRC_AVX2_DILITHIUM3}" "${INC_AVX2_DILITHIUM3}")

 set(
  SRC_AVX2_DILITHIUM5
  crypto_sign/dilithium5/avx2/consts.c
  crypto_sign/dilithium5/avx2/f1600x4.S
  crypto_sign/dilithium5/avx2/fips202x4.c
  crypto_sign/dilithium5/avx2/invntt.S
  crypto_sign/dilithium5/avx2/ntt.S
  crypto_sign/dilithium5/avx2/packing.c
  crypto_sign/dilithium5/avx2/pointwise.S
  crypto_sign/dilithium5/avx2/poly.c
  crypto_sign/dilithium5/avx2/polyvec.c
  crypto_sign/dilithium5/avx2/rejsample.c
  crypto_sign/dilithium5/avx2/rounding.c
  crypto_sign/dilithium5/avx2/shuffle.S
  crypto_sign/dilithium5/avx2/sign.c
  crypto_sign/dilithium5/avx2/symmetric-shake.c)
 set(
  INC_AVX2_DILITHIUM5
  crypto_sign/dilithium5/avx2
 )
 define_sig_alg(dilithium5_avx2
  PQCLEAN_DILITHIUM5_AVX2 "${SRC_AVX2_DILITHIUM5}" "${INC_AVX2_DILITHIUM5}")

 set(
  SRC_AVX2_KYBER512
  crypto_kem/kyber512/avx2/cbd.c
--- a/crypto_kem/frodokem1344aes/META.yml
+++ b/crypto_kem/frodokem1344aes/META.yml
@@ -1,28 +0,0 @@
 name: FrodoKEM-1344-AES
 type: kem
 claimed-nist-level: 5
 claimed-security: IND-CCA2
 length-public-key: 21520
 length-secret-key: 43088
 length-ciphertext: 21632
 length-shared-secret: 32
 nistkat-sha256: 2f4f1c352c1b343cce386c54234ca39fe29b48e45c66300f7311f5d3060d82b3
 principal-submitters:
  - Michael Naehrig, Microsoft Research
 auxiliary-submitters:
 - Erdem Alkim
 - Joppe W. Bos, NXP Semiconductors
 - Léo Ducas, CWI
 - Patrick Longa, Microsoft Research
 - Ilya Mironov, Google
 - Valeria Nikolaenko
 - Chris Peikert, University of Michigan
 - Ananth Raghunathan, Google
 - Douglas Stebila, University of Waterloo
 - Karen Easterbrook, Microsoft Research
 - Brian LaMacchia, Microsoft Research
 implementations:
 - name: clean
  version: https://github.com/microsoft/PQCrypto-LWEKE/commit/669522db63850fa64d1a24a47e138e80a59349db
 - name: opt
  version: https://github.com/microsoft/PQCrypto-LWEKE/commit/669522db63850fa64d1a24a47e138e80a59349db
--- a/crypto_kem/frodokem1344aes/clean/LICENSE
+++ b/crypto_kem/frodokem1344aes/clean/LICENSE
@@ -1,21 +0,0 @@
 MIT License

 Copyright (c) Microsoft Corporation. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE
--- a/crypto_kem/frodokem1344aes/clean/Makefile.Microsoft_nmake
+++ b/crypto_kem/frodokem1344aes/clean/Makefile.Microsoft_nmake
@@ -1,19 +0,0 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libfrodokem1344aes_clean.lib
 OBJECTS=kem.obj matrix_aes.obj noise.obj util.obj

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/frodokem1344aes/clean/api.h
+++ b/crypto_kem/frodokem1344aes/clean/api.h
@@ -1,20 +0,0 @@
 #ifndef PQCLEAN_FRODOKEM1344AES_CLEAN_API_H
 #define PQCLEAN_FRODOKEM1344AES_CLEAN_API_H

 #include <stddef.h>
 #include <stdint.h>

 #define PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_SECRETKEYBYTES  43088     // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
 #define PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_PUBLICKEYBYTES  21520     // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
 #define PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_BYTES              32
 #define PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_CIPHERTEXTBYTES 21632     // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

 #define PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_ALGNAME "FrodoKEM-1344-AES"

 int PQCLEAN_FRODOKEM1344AES_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

 int PQCLEAN_FRODOKEM1344AES_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

 int PQCLEAN_FRODOKEM1344AES_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

 #endif
--- a/crypto_kem/frodokem1344aes/clean/common.h
+++ b/crypto_kem/frodokem1344aes/clean/common.h
@@ -1,21 +0,0 @@
 #ifndef COMMON_H
 #define COMMON_H

 int PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 int PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(uint16_t *s, size_t n);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_key_encode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_key_decode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
 int8_t PQCLEAN_FRODOKEM1344AES_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
 void PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(uint8_t *mem, size_t n);
 uint16_t PQCLEAN_FRODOKEM1344AES_CLEAN_LE_TO_UINT16(uint16_t n);
 uint16_t PQCLEAN_FRODOKEM1344AES_CLEAN_UINT16_TO_LE(uint16_t n);

 #endif
--- a/crypto_kem/frodokem1344aes/clean/kem.c
+++ b/crypto_kem/frodokem1344aes/clean/kem.c
@@ -1,237 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "fips202.h"
 #include "randombytes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM1344AES_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
    // FrodoKEM's key generation
    // Outputs: public key pk (               BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
    //          secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
    uint8_t *pk_seedA = &pk[0];
    uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *sk_s = &sk[0];
    uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0};           // contains secret data
    uint16_t *E = &S[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A];    // contains secret data via randomness_s and randomness_seedSE
    uint8_t *randomness_s = &randomness[0];                 // contains secret data
    uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
    uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];           // contains secret data

    // Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
    randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
    shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

    // Generate S and E, and compute B = A*S + E. Generate A on-the-fly
    shake_input_seedSE[0] = 0x5F;
    memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
    shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_LE_TO_UINT16(S[i]);
    }
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_as_plus_e(B, S, E, pk);

    // Encode the second part of the public key
    PQCLEAN_FRODOKEM1344AES_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Add s, pk and S to the secret key
    memcpy(sk_s, randomness_s, CRYPTO_BYTES);
    memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_UINT16_TO_LE(S[i]);
    }
    memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

    // Add H(pk) to the secret key
    shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM1344AES_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
    // FrodoKEM's key encapsulation
    const uint8_t *pk_seedA = &pk[0];
    const uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *ct_c1 = &ct[0];
    uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];          // contains secret data
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                    // contains secret data via mu
    uint8_t *pkh = &G2in[0];
    uint8_t *mu = &G2in[BYTES_PKHASH];                        // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                          // contains secret data
    uint8_t *seedSE = &G2out[0];                              // contains secret data
    uint8_t *k = &G2out[CRYPTO_BYTES];                        // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];       // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];            // contains secret data
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];             // contains secret data

    // pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
    shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
    randombytes(mu, BYTES_MU);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSE[0] = 0x96;
    memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
    PQCLEAN_FRODOKEM1344AES_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Generate Epp, and compute V = Sp*B + Epp
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp);

    // Encode mu, and compute C = V + enc(mu) (mod q)
    PQCLEAN_FRODOKEM1344AES_CLEAN_key_encode(C, (uint16_t *)mu);
    PQCLEAN_FRODOKEM1344AES_CLEAN_add(C, V, C);
    PQCLEAN_FRODOKEM1344AES_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

    // Compute ss = F(ct||KK)
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
    memcpy(Fin_k, k, CRYPTO_BYTES);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(mu, BYTES_MU);
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM1344AES_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
    // FrodoKEM's key decapsulation
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];                  // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];             // contains secret data
    const uint8_t *ct_c1 = &ct[0];
    const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    const uint8_t *sk_s = &sk[0];
    const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint16_t S[PARAMS_N * PARAMS_NBAR];                      // contains secret data
    const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    const uint8_t *pk_seedA = &sk_pk[0];
    const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                   // contains secret data via muprime
    uint8_t *pkh = &G2in[0];
    uint8_t *muprime = &G2in[BYTES_PKHASH];                  // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                         // contains secret data
    uint8_t *seedSEprime = &G2out[0];                        // contains secret data
    uint8_t *kprime = &G2out[CRYPTO_BYTES];                  // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];      // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];           // contains secret data
    uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES];       // contains secret data

    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
    }

    // Compute W = C - Bp*S (mod q), and decode the randomness mu
    PQCLEAN_FRODOKEM1344AES_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_CLEAN_mul_bs(W, Bp, S);
    PQCLEAN_FRODOKEM1344AES_CLEAN_sub(W, C, W);
    PQCLEAN_FRODOKEM1344AES_CLEAN_key_decode((uint16_t *)muprime, W);

    // Generate (seedSE' || k') = G_2(pkh || mu')
    memcpy(pkh, sk_pkh, BYTES_PKHASH);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSEprime[0] = 0x96;
    memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

    // Generate Epp, and compute W = Sp*B + Epp
    PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp);

    // Encode mu, and compute CC = W + enc(mu') (mod q)
    PQCLEAN_FRODOKEM1344AES_CLEAN_key_encode(CC, (uint16_t *)muprime);
    PQCLEAN_FRODOKEM1344AES_CLEAN_add(CC, W, CC);

    // Prepare input to F
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

    // Reducing BBp modulo q
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
    }

    // If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
    // Needs to avoid branching on secret data as per:
    //     Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
    //     primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
    int8_t selector = PQCLEAN_FRODOKEM1344AES_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM1344AES_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
    // If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
    PQCLEAN_FRODOKEM1344AES_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(muprime, BYTES_MU);
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    return 0;
 }
--- a/crypto_kem/frodokem1344aes/clean/matrix_aes.c
+++ b/crypto_kem/frodokem1344aes/clean/matrix_aes.c
@@ -1,95 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: matrix arithmetic functions used by the KEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "aes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
    // Inputs: s, e (N x N_BAR)
    // Output: out = A*s + e (N x N_BAR)
    int i, j, k;
    int16_t A[PARAMS_N * PARAMS_N] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);
    for (i = 0; i < PARAMS_N; i++) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
            A[i * PARAMS_N + j] = (int16_t) i;                            // Loading values in the little-endian order
            A[i * PARAMS_N + j + 1] = (int16_t) j;
        }
    }
    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_UINT16_TO_LE(A[i]);
    }

    aes128_ecb((uint8_t *) A, (uint8_t *) A, PARAMS_N * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
    aes128_ctx_release(&ctx128);

    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_LE_TO_UINT16(A[i]);
    }
    memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));

    for (i = 0; i < PARAMS_N; i++) {                            // Matrix multiplication-addition A*s + e
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum = 0;
            for (j = 0; j < PARAMS_N; j++) {
                sum += A[i * PARAMS_N + j] * s[k * PARAMS_N + j];
            }
            out[i * PARAMS_NBAR + k] += sum;                    // Adding e. No need to reduce modulo 2^15, extra bits are taken care of during packing later on.
        }
    }

    return 1;
 }


 int PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
    // Inputs: s', e' (N_BAR x N)
    // Output: out = s'*A + e' (N_BAR x N)
    int i, j, k;
    int16_t A[PARAMS_N * PARAMS_N] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);
    for (i = 0; i < PARAMS_N; i++) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
            A[i * PARAMS_N + j] = (int16_t) i;                            // Loading values in the little-endian order
            A[i * PARAMS_N + j + 1] = (int16_t) j;
        }
    }
    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_UINT16_TO_LE(A[i]);
    }

    aes128_ecb((uint8_t *) A, (uint8_t *) A, PARAMS_N * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
    aes128_ctx_release(&ctx128);

    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM1344AES_CLEAN_LE_TO_UINT16(A[i]);
    }
    memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));

    for (i = 0; i < PARAMS_N; i++) {                            // Matrix multiplication-addition A*s + e
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum = 0;
            for (j = 0; j < PARAMS_N; j++) {
                sum += A[j * PARAMS_N + i] * s[k * PARAMS_N + j];
            }
            out[k * PARAMS_N + i] += sum;                       // Adding e. No need to reduce modulo 2^15, extra bits are taken care of during packing later on.
        }
    }

    return 1;
 }
--- a/crypto_kem/frodokem1344aes/clean/noise.c
+++ b/crypto_kem/frodokem1344aes/clean/noise.c
@@ -1,35 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: noise sampling functions
 *********************************************************************************************/

 #include <stdint.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

 void PQCLEAN_FRODOKEM1344AES_CLEAN_sample_n(uint16_t *s, size_t n) {
    // Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
    // The distribution is specified by its CDF.
    // Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
    size_t i;
    unsigned int j;

    for (i = 0; i < n; ++i) {
        uint16_t sample = 0;
        uint16_t prnd = s[i] >> 1;    // Drop the least significant bit
        uint16_t sign = s[i] & 0x1;    // Pick the least significant bit

        // No need to compare with the last value.
        for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
            // Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
            sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
        }
        // Assuming that sign is either 0 or 1, flips sample iff sign = 1
        s[i] = ((-sign) ^ sample) + sign;
    }
 }
--- a/crypto_kem/frodokem1344aes/clean/params.h
+++ b/crypto_kem/frodokem1344aes/clean/params.h
@@ -1,27 +0,0 @@
 #ifndef PARAMS_H
 #define PARAMS_H

 #define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_SECRETKEYBYTES
 #define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define CRYPTO_BYTES PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_BYTES
 #define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM1344AES_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define PARAMS_N 1344
 #define PARAMS_NBAR 8
 #define PARAMS_LOGQ 16
 #define PARAMS_Q (1 << PARAMS_LOGQ)
 #define PARAMS_EXTRACTED_BITS 4
 #define PARAMS_STRIPE_STEP 8
 #define PARAMS_PARALLEL 4
 #define BYTES_SEED_A 16
 #define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
 #define BYTES_PKHASH CRYPTO_BYTES

 // Selecting SHAKE XOF function for the KEM and noise sampling
 #define shake     shake256

 // CDF table
 #define CDF_TABLE_DATA {9142, 23462, 30338, 32361, 32725, 32765, 32767}
 #define CDF_TABLE_LEN 7

 #endif
--- a/crypto_kem/frodokem1344aes/clean/util.c
+++ b/crypto_kem/frodokem1344aes/clean/util.c
@@ -1,264 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: additional functions for FrodoKEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static inline uint8_t min(uint8_t x, uint8_t y) {
    if (x < y) {
        return x;
    }
    return y;
 }

 uint16_t PQCLEAN_FRODOKEM1344AES_CLEAN_LE_TO_UINT16(uint16_t n) {
    return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
 }

 uint16_t PQCLEAN_FRODOKEM1344AES_CLEAN_UINT16_TO_LE(uint16_t n) {
    uint16_t y;
    uint8_t *z = (uint8_t *) &y;
    z[0] = n & 0xFF;
    z[1] = (n & 0xFF00) >> 8;
    return y;
 }

 void PQCLEAN_FRODOKEM1344AES_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
    // Multiply by s on the right
    // Inputs: b (N_BAR x N), s (N x N_BAR)
    // Output: out = b*s (N_BAR x N_BAR)
    int i, j, k;

    for (i = 0; i < PARAMS_NBAR; i++) {
        for (j = 0; j < PARAMS_NBAR; j++) {
            out[i * PARAMS_NBAR + j] = 0;
            for (k = 0; k < PARAMS_N; k++) {
                out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
            }
            out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
    // Multiply by s on the left
    // Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
    // Output: out = s*b + e (N_BAR x N_BAR)
    int i, j, k;

    for (k = 0; k < PARAMS_NBAR; k++) {
        for (i = 0; i < PARAMS_NBAR; i++) {
            out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
            for (j = 0; j < PARAMS_N; j++) {
                out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
            }
            out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Add a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a + b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Subtract a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a - b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_key_encode(uint16_t *out, const uint16_t *in) {
    // Encoding
    unsigned int i, j, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
    uint16_t *pos = out;

    for (i = 0; i < nwords; i++) {
        temp = 0;
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
        }
        for (j = 0; j < npieces_word; j++) {
            *pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
            temp >>= PARAMS_EXTRACTED_BITS;
            pos++;
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_key_decode(uint16_t *out, const uint16_t *in) {
    // Decoding
    unsigned int i, j, index = 0, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
    uint8_t  *pos = (uint8_t *)out;
    uint64_t templong;

    for (i = 0; i < nwords; i++) {
        templong = 0;
        for (j = 0; j < npieces_word; j++) {  // temp = floor(in*2^{-11}+0.5)
            temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
            templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
            index++;
        }
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
    // Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
    // If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
    memset(out, 0, outlen);

    size_t i = 0;            // whole bytes already filled in
    size_t j = 0;            // whole uint16_t already copied
    uint16_t w = 0;          // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb in w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |        |        |********|********|
                              ^
                              j
        w : |   ****|
                ^
               bits
        out:|**|**|**|**|**|**|**|**|* |
                                    ^^
                                    ib
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < 8) {
            int nbits = min(8 - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask);  // the bits to copy from w to out
            out[i] = out[i] + (t << (8 - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = lsb;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == 8) {  // out[i] is filled in
            i++;
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
    // Unpack the input char vector into a uint16_t output vector, copying lsb bits
    // for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
    memset(out, 0, outlen * sizeof(uint16_t));

    size_t i = 0;            // whole uint16_t already filled in
    size_t j = 0;            // whole bytes already copied
    uint8_t w = 0;           // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb bits of w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |  |  |  |  |  |  |**|**|...
                              ^
                              j
        w : | *|
              ^
              bits
        out:|   *****|   *****|   ***  |        |...
                              ^   ^
                              i   b
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < lsb) {
            int nbits = min(lsb - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (w >> (bits - nbits)) & mask;  // the bits to copy from w to out
            out[i] = out[i] + (t << (lsb - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = 8;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == lsb) {  // out[i] is filled in
            i++;
        }
    }
 }


 int8_t PQCLEAN_FRODOKEM1344AES_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
    // Compare two arrays in constant time.
    // Returns 0 if the byte arrays are equal, -1 otherwise.
    uint16_t r = 0;

    for (size_t i = 0; i < len; i++) {
        r |= a[i] ^ b[i];
    }

    r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
    return (int8_t)r;
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
    // Select one of the two input arrays to be moved to r
    // If (selector == 0) then load r with a, else if (selector == -1) load r with b

    for (size_t i = 0; i < len; i++) {
        r[i] = (~selector & a[i]) | (selector & b[i]);
    }
 }


 void PQCLEAN_FRODOKEM1344AES_CLEAN_clear_bytes(uint8_t *mem, size_t n) {
    // Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
    // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
    volatile uint8_t *v = mem;

    for (size_t i = 0; i < n; i++) {
        v[i] = 0;
    }
 }
--- a/crypto_kem/frodokem1344aes/opt/LICENSE
+++ b/crypto_kem/frodokem1344aes/opt/LICENSE
@@ -1,21 +0,0 @@
 MIT License

 Copyright (c) Microsoft Corporation. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE
--- a/crypto_kem/frodokem1344aes/opt/Makefile.Microsoft_nmake
+++ b/crypto_kem/frodokem1344aes/opt/Makefile.Microsoft_nmake
@@ -1,19 +0,0 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libfrodokem1344aes_opt.lib
 OBJECTS=kem.obj matrix_aes.obj noise.obj util.obj

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/frodokem1344aes/opt/api.h
+++ b/crypto_kem/frodokem1344aes/opt/api.h
@@ -1,20 +0,0 @@
 #ifndef PQCLEAN_FRODOKEM1344AES_OPT_API_H
 #define PQCLEAN_FRODOKEM1344AES_OPT_API_H

 #include <stddef.h>
 #include <stdint.h>

 #define PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_SECRETKEYBYTES  43088     // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
 #define PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_PUBLICKEYBYTES  21520     // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
 #define PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_BYTES              32
 #define PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_CIPHERTEXTBYTES 21632     // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

 #define PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_ALGNAME "FrodoKEM-1344-AES"

 int PQCLEAN_FRODOKEM1344AES_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

 int PQCLEAN_FRODOKEM1344AES_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

 int PQCLEAN_FRODOKEM1344AES_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

 #endif
--- a/crypto_kem/frodokem1344aes/opt/common.h
+++ b/crypto_kem/frodokem1344aes/opt/common.h
@@ -1,21 +0,0 @@
 #ifndef COMMON_H
 #define COMMON_H

 int PQCLEAN_FRODOKEM1344AES_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 int PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 void PQCLEAN_FRODOKEM1344AES_OPT_sample_n(uint16_t *s, size_t n);
 void PQCLEAN_FRODOKEM1344AES_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
 void PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
 void PQCLEAN_FRODOKEM1344AES_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM1344AES_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM1344AES_OPT_key_encode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM1344AES_OPT_key_decode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM1344AES_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
 void PQCLEAN_FRODOKEM1344AES_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
 int8_t PQCLEAN_FRODOKEM1344AES_OPT_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
 void PQCLEAN_FRODOKEM1344AES_OPT_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
 void PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(uint8_t *mem, size_t n);
 uint16_t PQCLEAN_FRODOKEM1344AES_OPT_LE_TO_UINT16(uint16_t n);
 uint16_t PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(uint16_t n);

 #endif
--- a/crypto_kem/frodokem1344aes/opt/kem.c
+++ b/crypto_kem/frodokem1344aes/opt/kem.c
@@ -1,237 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "fips202.h"
 #include "randombytes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM1344AES_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
    // FrodoKEM's key generation
    // Outputs: public key pk (               BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
    //          secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
    uint8_t *pk_seedA = &pk[0];
    uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *sk_s = &sk[0];
    uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0};           // contains secret data
    uint16_t *E = &S[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A];    // contains secret data via randomness_s and randomness_seedSE
    uint8_t *randomness_s = &randomness[0];                 // contains secret data
    uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
    uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];           // contains secret data

    // Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
    randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
    shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

    // Generate S and E, and compute B = A*S + E. Generate A on-the-fly
    shake_input_seedSE[0] = 0x5F;
    memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
    shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM1344AES_OPT_LE_TO_UINT16(S[i]);
    }
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(S, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(E, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_mul_add_as_plus_e(B, S, E, pk);

    // Encode the second part of the public key
    PQCLEAN_FRODOKEM1344AES_OPT_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Add s, pk and S to the secret key
    memcpy(sk_s, randomness_s, CRYPTO_BYTES);
    memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(S[i]);
    }
    memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

    // Add H(pk) to the secret key
    shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(randomness, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM1344AES_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
    // FrodoKEM's key encapsulation
    const uint8_t *pk_seedA = &pk[0];
    const uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *ct_c1 = &ct[0];
    uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];          // contains secret data
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                    // contains secret data via mu
    uint8_t *pkh = &G2in[0];
    uint8_t *mu = &G2in[BYTES_PKHASH];                        // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                          // contains secret data
    uint8_t *seedSE = &G2out[0];                              // contains secret data
    uint8_t *k = &G2out[CRYPTO_BYTES];                        // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];       // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];            // contains secret data
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];             // contains secret data

    // pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
    shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
    randombytes(mu, BYTES_MU);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSE[0] = 0x96;
    memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM1344AES_OPT_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
    PQCLEAN_FRODOKEM1344AES_OPT_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Generate Epp, and compute V = Sp*B + Epp
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sb_plus_e(V, B, Sp, Epp);

    // Encode mu, and compute C = V + enc(mu) (mod q)
    PQCLEAN_FRODOKEM1344AES_OPT_key_encode(C, (uint16_t *)mu);
    PQCLEAN_FRODOKEM1344AES_OPT_add(C, V, C);
    PQCLEAN_FRODOKEM1344AES_OPT_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

    // Compute ss = F(ct||KK)
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
    memcpy(Fin_k, k, CRYPTO_BYTES);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(mu, BYTES_MU);
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM1344AES_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
    // FrodoKEM's key decapsulation
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];                  // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];             // contains secret data
    const uint8_t *ct_c1 = &ct[0];
    const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    const uint8_t *sk_s = &sk[0];
    const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint16_t S[PARAMS_N * PARAMS_NBAR];                      // contains secret data
    const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    const uint8_t *pk_seedA = &sk_pk[0];
    const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                   // contains secret data via muprime
    uint8_t *pkh = &G2in[0];
    uint8_t *muprime = &G2in[BYTES_PKHASH];                  // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                         // contains secret data
    uint8_t *seedSEprime = &G2out[0];                        // contains secret data
    uint8_t *kprime = &G2out[CRYPTO_BYTES];                  // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];      // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];           // contains secret data
    uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES];       // contains secret data

    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
    }

    // Compute W = C - Bp*S (mod q), and decode the randomness mu
    PQCLEAN_FRODOKEM1344AES_OPT_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_OPT_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_OPT_mul_bs(W, Bp, S);
    PQCLEAN_FRODOKEM1344AES_OPT_sub(W, C, W);
    PQCLEAN_FRODOKEM1344AES_OPT_key_decode((uint16_t *)muprime, W);

    // Generate (seedSE' || k') = G_2(pkh || mu')
    memcpy(pkh, sk_pkh, BYTES_PKHASH);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSEprime[0] = 0x96;
    memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM1344AES_OPT_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

    // Generate Epp, and compute W = Sp*B + Epp
    PQCLEAN_FRODOKEM1344AES_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM1344AES_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sb_plus_e(W, B, Sp, Epp);

    // Encode mu, and compute CC = W + enc(mu') (mod q)
    PQCLEAN_FRODOKEM1344AES_OPT_key_encode(CC, (uint16_t *)muprime);
    PQCLEAN_FRODOKEM1344AES_OPT_add(CC, W, CC);

    // Prepare input to F
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

    // Reducing BBp modulo q
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
    }

    // If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
    // Needs to avoid branching on secret data as per:
    //     Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
    //     primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
    int8_t selector = PQCLEAN_FRODOKEM1344AES_OPT_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM1344AES_OPT_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
    // If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
    PQCLEAN_FRODOKEM1344AES_OPT_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(muprime, BYTES_MU);
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    return 0;
 }
--- a/crypto_kem/frodokem1344aes/opt/matrix_aes.c
+++ b/crypto_kem/frodokem1344aes/opt/matrix_aes.c
@@ -1,127 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: matrix arithmetic functions used by the KEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "aes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM1344AES_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
    // Inputs: s, e (N x N_BAR)
    // Output: out = A*s + e (N x N_BAR)
    int k;
    uint16_t i, j;
    int16_t a_row[4 * PARAMS_N];

    for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
        *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
    }

    int16_t a_row_temp[4 * PARAMS_N] = {0};                     // Take four lines of A at once
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);

    for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
        a_row_temp[j + 1 + 0 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(j);     // Loading values in the little-endian order
        a_row_temp[j + 1 + 1 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(j);
        a_row_temp[j + 1 + 2 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(j);
        a_row_temp[j + 1 + 3 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(j);
    }

    for (i = 0; i < PARAMS_N; i += 4) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {    // Go through A, four rows at a time
            a_row_temp[j + 0 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(i + 0); // Loading values in the little-endian order
            a_row_temp[j + 1 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(i + 1);
            a_row_temp[j + 2 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(i + 2);
            a_row_temp[j + 3 * PARAMS_N] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(i + 3);
        }
        aes128_ecb((uint8_t *)a_row, (uint8_t *)a_row_temp, 4 * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
        for (k = 0; k < 4 * PARAMS_N; k++) {
            a_row[k] = PQCLEAN_FRODOKEM1344AES_OPT_LE_TO_UINT16(a_row[k]);
        }
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum[4] = {0};
            for (j = 0; j < PARAMS_N; j++) {                    // Matrix-vector multiplication
                uint16_t sp = s[k * PARAMS_N + j];
                sum[0] += a_row[0 * PARAMS_N + j] * sp;         // Go through four lines with same s
                sum[1] += a_row[1 * PARAMS_N + j] * sp;
                sum[2] += a_row[2 * PARAMS_N + j] * sp;
                sum[3] += a_row[3 * PARAMS_N + j] * sp;
            }
            out[(i + 0)*PARAMS_NBAR + k] += sum[0];
            out[(i + 2)*PARAMS_NBAR + k] += sum[2];
            out[(i + 1)*PARAMS_NBAR + k] += sum[1];
            out[(i + 3)*PARAMS_NBAR + k] += sum[3];
        }
    }
    aes128_ctx_release(&ctx128);
    return 1;
 }




 int PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
    // Inputs: s', e' (N_BAR x N)
    // Output: out = s'*A + e' (N_BAR x N)
    int j;
    uint16_t i, kk;
    for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
        *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
    }

    int k;
    uint16_t a_cols[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
    uint16_t a_cols_t[PARAMS_N * PARAMS_STRIPE_STEP];
    uint16_t a_cols_temp[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);

    for (i = 0, j = 0; i < PARAMS_N; i++, j += PARAMS_STRIPE_STEP) {
        a_cols_temp[j] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(i);                       // Loading values in the little-endian order
    }

    for (kk = 0; kk < PARAMS_N; kk += PARAMS_STRIPE_STEP) {     // Go through A's columns, 8 (== PARAMS_STRIPE_STEP) columns at a time.
        for (i = 0; i < (PARAMS_N * PARAMS_STRIPE_STEP); i += PARAMS_STRIPE_STEP) {
            a_cols_temp[i + 1] = PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(kk);              // Loading values in the little-endian order
        }

        aes128_ecb((uint8_t *)a_cols, (uint8_t *)a_cols_temp, PARAMS_N * PARAMS_STRIPE_STEP * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);

        for (i = 0; i < PARAMS_N; i++) {                        // Transpose a_cols to have access to it in the column-major order.
            for (k = 0; k < PARAMS_STRIPE_STEP; k++) {
                a_cols_t[k * PARAMS_N + i] = PQCLEAN_FRODOKEM1344AES_OPT_LE_TO_UINT16(a_cols[i * PARAMS_STRIPE_STEP + k]);
            }
        }

        for (i = 0; i < PARAMS_NBAR; i++) {
            for (k = 0; k < PARAMS_STRIPE_STEP; k += PARAMS_PARALLEL) {
                uint16_t sum[PARAMS_PARALLEL] = {0};
                for (j = 0; j < PARAMS_N; j++) {                // Matrix-vector multiplication
                    uint16_t sp = s[i * PARAMS_N + j];
                    sum[0] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 0) * PARAMS_N + j]);
                    sum[1] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 1) * PARAMS_N + j]);
                    sum[2] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 2) * PARAMS_N + j]);
                    sum[3] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 3) * PARAMS_N + j]);
                }
                out[i * PARAMS_N + kk + k + 0] += sum[0];
                out[i * PARAMS_N + kk + k + 2] += sum[2];
                out[i * PARAMS_N + kk + k + 1] += sum[1];
                out[i * PARAMS_N + kk + k + 3] += sum[3];
            }
        }
    }
    aes128_ctx_release(&ctx128);
    return 1;
 }
--- a/crypto_kem/frodokem1344aes/opt/noise.c
+++ b/crypto_kem/frodokem1344aes/opt/noise.c
@@ -1,35 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: noise sampling functions
 *********************************************************************************************/

 #include <stdint.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

 void PQCLEAN_FRODOKEM1344AES_OPT_sample_n(uint16_t *s, size_t n) {
    // Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
    // The distribution is specified by its CDF.
    // Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
    size_t i;
    unsigned int j;

    for (i = 0; i < n; ++i) {
        uint16_t sample = 0;
        uint16_t prnd = s[i] >> 1;    // Drop the least significant bit
        uint16_t sign = s[i] & 0x1;    // Pick the least significant bit

        // No need to compare with the last value.
        for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
            // Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
            sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
        }
        // Assuming that sign is either 0 or 1, flips sample iff sign = 1
        s[i] = ((-sign) ^ sample) + sign;
    }
 }
--- a/crypto_kem/frodokem1344aes/opt/params.h
+++ b/crypto_kem/frodokem1344aes/opt/params.h
@@ -1,27 +0,0 @@
 #ifndef PARAMS_H
 #define PARAMS_H

 #define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_SECRETKEYBYTES
 #define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_PUBLICKEYBYTES
 #define CRYPTO_BYTES PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_BYTES
 #define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM1344AES_OPT_CRYPTO_CIPHERTEXTBYTES

 #define PARAMS_N 1344
 #define PARAMS_NBAR 8
 #define PARAMS_LOGQ 16
 #define PARAMS_Q (1 << PARAMS_LOGQ)
 #define PARAMS_EXTRACTED_BITS 4
 #define PARAMS_STRIPE_STEP 8
 #define PARAMS_PARALLEL 4
 #define BYTES_SEED_A 16
 #define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
 #define BYTES_PKHASH CRYPTO_BYTES

 // Selecting SHAKE XOF function for the KEM and noise sampling
 #define shake     shake256

 // CDF table
 #define CDF_TABLE_DATA {9142, 23462, 30338, 32361, 32725, 32765, 32767}
 #define CDF_TABLE_LEN 7

 #endif
--- a/crypto_kem/frodokem1344aes/opt/util.c
+++ b/crypto_kem/frodokem1344aes/opt/util.c
@@ -1,264 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: additional functions for FrodoKEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static inline uint8_t min(uint8_t x, uint8_t y) {
    if (x < y) {
        return x;
    }
    return y;
 }

 uint16_t PQCLEAN_FRODOKEM1344AES_OPT_LE_TO_UINT16(uint16_t n) {
    return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
 }

 uint16_t PQCLEAN_FRODOKEM1344AES_OPT_UINT16_TO_LE(uint16_t n) {
    uint16_t y;
    uint8_t *z = (uint8_t *) &y;
    z[0] = n & 0xFF;
    z[1] = (n & 0xFF00) >> 8;
    return y;
 }

 void PQCLEAN_FRODOKEM1344AES_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
    // Multiply by s on the right
    // Inputs: b (N_BAR x N), s (N x N_BAR)
    // Output: out = b*s (N_BAR x N_BAR)
    int i, j, k;

    for (i = 0; i < PARAMS_NBAR; i++) {
        for (j = 0; j < PARAMS_NBAR; j++) {
            out[i * PARAMS_NBAR + j] = 0;
            for (k = 0; k < PARAMS_N; k++) {
                out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
            }
            out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
    // Multiply by s on the left
    // Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
    // Output: out = s*b + e (N_BAR x N_BAR)
    int i, j, k;

    for (k = 0; k < PARAMS_NBAR; k++) {
        for (i = 0; i < PARAMS_NBAR; i++) {
            out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
            for (j = 0; j < PARAMS_N; j++) {
                out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
            }
            out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Add a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a + b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Subtract a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a - b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_key_encode(uint16_t *out, const uint16_t *in) {
    // Encoding
    unsigned int i, j, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
    uint16_t *pos = out;

    for (i = 0; i < nwords; i++) {
        temp = 0;
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
        }
        for (j = 0; j < npieces_word; j++) {
            *pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
            temp >>= PARAMS_EXTRACTED_BITS;
            pos++;
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_key_decode(uint16_t *out, const uint16_t *in) {
    // Decoding
    unsigned int i, j, index = 0, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
    uint8_t  *pos = (uint8_t *)out;
    uint64_t templong;

    for (i = 0; i < nwords; i++) {
        templong = 0;
        for (j = 0; j < npieces_word; j++) {  // temp = floor(in*2^{-11}+0.5)
            temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
            templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
            index++;
        }
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
    // Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
    // If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
    memset(out, 0, outlen);

    size_t i = 0;            // whole bytes already filled in
    size_t j = 0;            // whole uint16_t already copied
    uint16_t w = 0;          // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb in w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |        |        |********|********|
                              ^
                              j
        w : |   ****|
                ^
               bits
        out:|**|**|**|**|**|**|**|**|* |
                                    ^^
                                    ib
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < 8) {
            int nbits = min(8 - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask);  // the bits to copy from w to out
            out[i] = out[i] + (t << (8 - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = lsb;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == 8) {  // out[i] is filled in
            i++;
        }
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
    // Unpack the input char vector into a uint16_t output vector, copying lsb bits
    // for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
    memset(out, 0, outlen * sizeof(uint16_t));

    size_t i = 0;            // whole uint16_t already filled in
    size_t j = 0;            // whole bytes already copied
    uint8_t w = 0;           // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb bits of w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |  |  |  |  |  |  |**|**|...
                              ^
                              j
        w : | *|
              ^
              bits
        out:|   *****|   *****|   ***  |        |...
                              ^   ^
                              i   b
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < lsb) {
            int nbits = min(lsb - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (w >> (bits - nbits)) & mask;  // the bits to copy from w to out
            out[i] = out[i] + (t << (lsb - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = 8;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == lsb) {  // out[i] is filled in
            i++;
        }
    }
 }


 int8_t PQCLEAN_FRODOKEM1344AES_OPT_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
    // Compare two arrays in constant time.
    // Returns 0 if the byte arrays are equal, -1 otherwise.
    uint16_t r = 0;

    for (size_t i = 0; i < len; i++) {
        r |= a[i] ^ b[i];
    }

    r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
    return (int8_t)r;
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
    // Select one of the two input arrays to be moved to r
    // If (selector == 0) then load r with a, else if (selector == -1) load r with b

    for (size_t i = 0; i < len; i++) {
        r[i] = (~selector & a[i]) | (selector & b[i]);
    }
 }


 void PQCLEAN_FRODOKEM1344AES_OPT_clear_bytes(uint8_t *mem, size_t n) {
    // Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
    // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
    volatile uint8_t *v = mem;

    for (size_t i = 0; i < n; i++) {
        v[i] = 0;
    }
 }
--- a/crypto_kem/frodokem640aes/META.yml
+++ b/crypto_kem/frodokem640aes/META.yml
@@ -1,28 +0,0 @@
 name: FrodoKEM-640-AES
 type: kem
 claimed-nist-level: 1
 claimed-security: IND-CCA2
 length-public-key: 9616
 length-secret-key: 19888
 length-ciphertext: 9720
 length-shared-secret: 16
 nistkat-sha256: c1f006531583896c47416e10707d1c8e487fe549df304d7a9c43155d5e47b8b6
 principal-submitters:
  - Michael Naehrig, Microsoft Research
 auxiliary-submitters:
 - Erdem Alkim
 - Joppe W. Bos, NXP Semiconductors
 - Léo Ducas, CWI
 - Patrick Longa, Microsoft Research
 - Ilya Mironov, Google
 - Valeria Nikolaenko
 - Chris Peikert, University of Michigan
 - Ananth Raghunathan, Google
 - Douglas Stebila, University of Waterloo
 - Karen Easterbrook, Microsoft Research
 - Brian LaMacchia, Microsoft Research
 implementations:
 - name: clean
  version: https://github.com/microsoft/PQCrypto-LWEKE/commit/669522db63850fa64d1a24a47e138e80a59349db
 - name: opt
  version: https://github.com/microsoft/PQCrypto-LWEKE/commit/669522db63850fa64d1a24a47e138e80a59349db
--- a/crypto_kem/frodokem640aes/clean/LICENSE
+++ b/crypto_kem/frodokem640aes/clean/LICENSE
@@ -1,21 +0,0 @@
 MIT License

 Copyright (c) Microsoft Corporation. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE
--- a/crypto_kem/frodokem640aes/clean/Makefile.Microsoft_nmake
+++ b/crypto_kem/frodokem640aes/clean/Makefile.Microsoft_nmake
@@ -1,19 +0,0 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libfrodokem640aes_clean.lib
 OBJECTS=kem.obj matrix_aes.obj noise.obj util.obj

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/frodokem640aes/clean/api.h
+++ b/crypto_kem/frodokem640aes/clean/api.h
@@ -1,20 +0,0 @@
 #ifndef PQCLEAN_FRODOKEM640AES_CLEAN_API_H
 #define PQCLEAN_FRODOKEM640AES_CLEAN_API_H

 #include <stddef.h>
 #include <stdint.h>

 #define PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_SECRETKEYBYTES  19888     // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
 #define PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_PUBLICKEYBYTES   9616     // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
 #define PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_BYTES              16
 #define PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_CIPHERTEXTBYTES  9720     // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

 #define PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_ALGNAME "FrodoKEM-640-AES"

 int PQCLEAN_FRODOKEM640AES_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

 int PQCLEAN_FRODOKEM640AES_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

 int PQCLEAN_FRODOKEM640AES_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

 #endif
--- a/crypto_kem/frodokem640aes/clean/common.h
+++ b/crypto_kem/frodokem640aes/clean/common.h
@@ -1,21 +0,0 @@
 #ifndef COMMON_H
 #define COMMON_H

 int PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 int PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 void PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(uint16_t *s, size_t n);
 void PQCLEAN_FRODOKEM640AES_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
 void PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
 void PQCLEAN_FRODOKEM640AES_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM640AES_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM640AES_CLEAN_key_encode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM640AES_CLEAN_key_decode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM640AES_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
 void PQCLEAN_FRODOKEM640AES_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
 int8_t PQCLEAN_FRODOKEM640AES_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
 void PQCLEAN_FRODOKEM640AES_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
 void PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(uint8_t *mem, size_t n);
 uint16_t PQCLEAN_FRODOKEM640AES_CLEAN_LE_TO_UINT16(uint16_t n);
 uint16_t PQCLEAN_FRODOKEM640AES_CLEAN_UINT16_TO_LE(uint16_t n);

 #endif
--- a/crypto_kem/frodokem640aes/clean/kem.c
+++ b/crypto_kem/frodokem640aes/clean/kem.c
@@ -1,237 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "fips202.h"
 #include "randombytes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM640AES_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
    // FrodoKEM's key generation
    // Outputs: public key pk (               BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
    //          secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
    uint8_t *pk_seedA = &pk[0];
    uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *sk_s = &sk[0];
    uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0};           // contains secret data
    uint16_t *E = &S[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A];    // contains secret data via randomness_s and randomness_seedSE
    uint8_t *randomness_s = &randomness[0];                 // contains secret data
    uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
    uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];           // contains secret data

    // Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
    randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
    shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

    // Generate S and E, and compute B = A*S + E. Generate A on-the-fly
    shake_input_seedSE[0] = 0x5F;
    memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
    shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM640AES_CLEAN_LE_TO_UINT16(S[i]);
    }
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_as_plus_e(B, S, E, pk);

    // Encode the second part of the public key
    PQCLEAN_FRODOKEM640AES_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Add s, pk and S to the secret key
    memcpy(sk_s, randomness_s, CRYPTO_BYTES);
    memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM640AES_CLEAN_UINT16_TO_LE(S[i]);
    }
    memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

    // Add H(pk) to the secret key
    shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM640AES_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
    // FrodoKEM's key encapsulation
    const uint8_t *pk_seedA = &pk[0];
    const uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *ct_c1 = &ct[0];
    uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];          // contains secret data
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                    // contains secret data via mu
    uint8_t *pkh = &G2in[0];
    uint8_t *mu = &G2in[BYTES_PKHASH];                        // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                          // contains secret data
    uint8_t *seedSE = &G2out[0];                              // contains secret data
    uint8_t *k = &G2out[CRYPTO_BYTES];                        // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];       // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];            // contains secret data
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];             // contains secret data

    // pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
    shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
    randombytes(mu, BYTES_MU);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSE[0] = 0x96;
    memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM640AES_CLEAN_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
    PQCLEAN_FRODOKEM640AES_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Generate Epp, and compute V = Sp*B + Epp
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp);

    // Encode mu, and compute C = V + enc(mu) (mod q)
    PQCLEAN_FRODOKEM640AES_CLEAN_key_encode(C, (uint16_t *)mu);
    PQCLEAN_FRODOKEM640AES_CLEAN_add(C, V, C);
    PQCLEAN_FRODOKEM640AES_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

    // Compute ss = F(ct||KK)
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
    memcpy(Fin_k, k, CRYPTO_BYTES);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(mu, BYTES_MU);
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM640AES_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
    // FrodoKEM's key decapsulation
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];                  // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];             // contains secret data
    const uint8_t *ct_c1 = &ct[0];
    const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    const uint8_t *sk_s = &sk[0];
    const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint16_t S[PARAMS_N * PARAMS_NBAR];                      // contains secret data
    const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    const uint8_t *pk_seedA = &sk_pk[0];
    const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                   // contains secret data via muprime
    uint8_t *pkh = &G2in[0];
    uint8_t *muprime = &G2in[BYTES_PKHASH];                  // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                         // contains secret data
    uint8_t *seedSEprime = &G2out[0];                        // contains secret data
    uint8_t *kprime = &G2out[CRYPTO_BYTES];                  // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];      // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];           // contains secret data
    uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES];       // contains secret data

    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
    }

    // Compute W = C - Bp*S (mod q), and decode the randomness mu
    PQCLEAN_FRODOKEM640AES_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_CLEAN_mul_bs(W, Bp, S);
    PQCLEAN_FRODOKEM640AES_CLEAN_sub(W, C, W);
    PQCLEAN_FRODOKEM640AES_CLEAN_key_decode((uint16_t *)muprime, W);

    // Generate (seedSE' || k') = G_2(pkh || mu')
    memcpy(pkh, sk_pkh, BYTES_PKHASH);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSEprime[0] = 0x96;
    memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM640AES_CLEAN_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

    // Generate Epp, and compute W = Sp*B + Epp
    PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp);

    // Encode mu, and compute CC = W + enc(mu') (mod q)
    PQCLEAN_FRODOKEM640AES_CLEAN_key_encode(CC, (uint16_t *)muprime);
    PQCLEAN_FRODOKEM640AES_CLEAN_add(CC, W, CC);

    // Prepare input to F
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

    // Reducing BBp modulo q
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
    }

    // If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
    // Needs to avoid branching on secret data as per:
    //     Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
    //     primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
    int8_t selector = PQCLEAN_FRODOKEM640AES_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM640AES_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
    // If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
    PQCLEAN_FRODOKEM640AES_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(muprime, BYTES_MU);
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    return 0;
 }
--- a/crypto_kem/frodokem640aes/clean/matrix_aes.c
+++ b/crypto_kem/frodokem640aes/clean/matrix_aes.c
@@ -1,95 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: matrix arithmetic functions used by the KEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "aes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
    // Inputs: s, e (N x N_BAR)
    // Output: out = A*s + e (N x N_BAR)
    int i, j, k;
    int16_t A[PARAMS_N * PARAMS_N] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);
    for (i = 0; i < PARAMS_N; i++) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
            A[i * PARAMS_N + j] = (int16_t) i;                            // Loading values in the little-endian order
            A[i * PARAMS_N + j + 1] = (int16_t) j;
        }
    }
    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM640AES_CLEAN_UINT16_TO_LE(A[i]);
    }

    aes128_ecb((uint8_t *) A, (uint8_t *) A, PARAMS_N * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
    aes128_ctx_release(&ctx128);

    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM640AES_CLEAN_LE_TO_UINT16(A[i]);
    }
    memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));

    for (i = 0; i < PARAMS_N; i++) {                            // Matrix multiplication-addition A*s + e
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum = 0;
            for (j = 0; j < PARAMS_N; j++) {
                sum += A[i * PARAMS_N + j] * s[k * PARAMS_N + j];
            }
            out[i * PARAMS_NBAR + k] += sum;                    // Adding e. No need to reduce modulo 2^15, extra bits are taken care of during packing later on.
        }
    }

    return 1;
 }


 int PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
    // Inputs: s', e' (N_BAR x N)
    // Output: out = s'*A + e' (N_BAR x N)
    int i, j, k;
    int16_t A[PARAMS_N * PARAMS_N] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);
    for (i = 0; i < PARAMS_N; i++) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
            A[i * PARAMS_N + j] = (int16_t) i;                            // Loading values in the little-endian order
            A[i * PARAMS_N + j + 1] = (int16_t) j;
        }
    }
    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM640AES_CLEAN_UINT16_TO_LE(A[i]);
    }

    aes128_ecb((uint8_t *) A, (uint8_t *) A, PARAMS_N * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
    aes128_ctx_release(&ctx128);

    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM640AES_CLEAN_LE_TO_UINT16(A[i]);
    }
    memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));

    for (i = 0; i < PARAMS_N; i++) {                            // Matrix multiplication-addition A*s + e
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum = 0;
            for (j = 0; j < PARAMS_N; j++) {
                sum += A[j * PARAMS_N + i] * s[k * PARAMS_N + j];
            }
            out[k * PARAMS_N + i] += sum;                       // Adding e. No need to reduce modulo 2^15, extra bits are taken care of during packing later on.
        }
    }

    return 1;
 }
--- a/crypto_kem/frodokem640aes/clean/noise.c
+++ b/crypto_kem/frodokem640aes/clean/noise.c
@@ -1,35 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: noise sampling functions
 *********************************************************************************************/

 #include <stdint.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

 void PQCLEAN_FRODOKEM640AES_CLEAN_sample_n(uint16_t *s, size_t n) {
    // Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
    // The distribution is specified by its CDF.
    // Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
    size_t i;
    unsigned int j;

    for (i = 0; i < n; ++i) {
        uint16_t sample = 0;
        uint16_t prnd = s[i] >> 1;    // Drop the least significant bit
        uint16_t sign = s[i] & 0x1;    // Pick the least significant bit

        // No need to compare with the last value.
        for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
            // Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
            sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
        }
        // Assuming that sign is either 0 or 1, flips sample iff sign = 1
        s[i] = ((-sign) ^ sample) + sign;
    }
 }
--- a/crypto_kem/frodokem640aes/clean/params.h
+++ b/crypto_kem/frodokem640aes/clean/params.h
@@ -1,27 +0,0 @@
 #ifndef PARAMS_H
 #define PARAMS_H

 #define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_SECRETKEYBYTES
 #define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define CRYPTO_BYTES PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_BYTES
 #define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM640AES_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define PARAMS_N 640
 #define PARAMS_NBAR 8
 #define PARAMS_LOGQ 15
 #define PARAMS_Q (1 << PARAMS_LOGQ)
 #define PARAMS_EXTRACTED_BITS 2
 #define PARAMS_STRIPE_STEP 8
 #define PARAMS_PARALLEL 4
 #define BYTES_SEED_A 16
 #define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
 #define BYTES_PKHASH CRYPTO_BYTES

 // Selecting SHAKE XOF function for the KEM and noise sampling
 #define shake     shake128

 // CDF table
 #define CDF_TABLE_DATA {4643, 13363, 20579, 25843, 29227, 31145, 32103, 32525, 32689, 32745, 32762, 32766, 32767}
 #define CDF_TABLE_LEN 13

 #endif
--- a/crypto_kem/frodokem640aes/clean/util.c
+++ b/crypto_kem/frodokem640aes/clean/util.c
@@ -1,264 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: additional functions for FrodoKEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static inline uint8_t min(uint8_t x, uint8_t y) {
    if (x < y) {
        return x;
    }
    return y;
 }

 uint16_t PQCLEAN_FRODOKEM640AES_CLEAN_LE_TO_UINT16(uint16_t n) {
    return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
 }

 uint16_t PQCLEAN_FRODOKEM640AES_CLEAN_UINT16_TO_LE(uint16_t n) {
    uint16_t y;
    uint8_t *z = (uint8_t *) &y;
    z[0] = n & 0xFF;
    z[1] = (n & 0xFF00) >> 8;
    return y;
 }

 void PQCLEAN_FRODOKEM640AES_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
    // Multiply by s on the right
    // Inputs: b (N_BAR x N), s (N x N_BAR)
    // Output: out = b*s (N_BAR x N_BAR)
    int i, j, k;

    for (i = 0; i < PARAMS_NBAR; i++) {
        for (j = 0; j < PARAMS_NBAR; j++) {
            out[i * PARAMS_NBAR + j] = 0;
            for (k = 0; k < PARAMS_N; k++) {
                out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
            }
            out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
    // Multiply by s on the left
    // Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
    // Output: out = s*b + e (N_BAR x N_BAR)
    int i, j, k;

    for (k = 0; k < PARAMS_NBAR; k++) {
        for (i = 0; i < PARAMS_NBAR; i++) {
            out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
            for (j = 0; j < PARAMS_N; j++) {
                out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
            }
            out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Add a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a + b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Subtract a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a - b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_key_encode(uint16_t *out, const uint16_t *in) {
    // Encoding
    unsigned int i, j, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
    uint16_t *pos = out;

    for (i = 0; i < nwords; i++) {
        temp = 0;
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
        }
        for (j = 0; j < npieces_word; j++) {
            *pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
            temp >>= PARAMS_EXTRACTED_BITS;
            pos++;
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_key_decode(uint16_t *out, const uint16_t *in) {
    // Decoding
    unsigned int i, j, index = 0, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
    uint8_t  *pos = (uint8_t *)out;
    uint64_t templong;

    for (i = 0; i < nwords; i++) {
        templong = 0;
        for (j = 0; j < npieces_word; j++) {  // temp = floor(in*2^{-11}+0.5)
            temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
            templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
            index++;
        }
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
    // Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
    // If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
    memset(out, 0, outlen);

    size_t i = 0;            // whole bytes already filled in
    size_t j = 0;            // whole uint16_t already copied
    uint16_t w = 0;          // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb in w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |        |        |********|********|
                              ^
                              j
        w : |   ****|
                ^
               bits
        out:|**|**|**|**|**|**|**|**|* |
                                    ^^
                                    ib
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < 8) {
            int nbits = min(8 - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask);  // the bits to copy from w to out
            out[i] = out[i] + (t << (8 - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = lsb;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == 8) {  // out[i] is filled in
            i++;
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
    // Unpack the input char vector into a uint16_t output vector, copying lsb bits
    // for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
    memset(out, 0, outlen * sizeof(uint16_t));

    size_t i = 0;            // whole uint16_t already filled in
    size_t j = 0;            // whole bytes already copied
    uint8_t w = 0;           // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb bits of w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |  |  |  |  |  |  |**|**|...
                              ^
                              j
        w : | *|
              ^
              bits
        out:|   *****|   *****|   ***  |        |...
                              ^   ^
                              i   b
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < lsb) {
            int nbits = min(lsb - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (w >> (bits - nbits)) & mask;  // the bits to copy from w to out
            out[i] = out[i] + (t << (lsb - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = 8;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == lsb) {  // out[i] is filled in
            i++;
        }
    }
 }


 int8_t PQCLEAN_FRODOKEM640AES_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
    // Compare two arrays in constant time.
    // Returns 0 if the byte arrays are equal, -1 otherwise.
    uint16_t r = 0;

    for (size_t i = 0; i < len; i++) {
        r |= a[i] ^ b[i];
    }

    r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
    return (int8_t)r;
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
    // Select one of the two input arrays to be moved to r
    // If (selector == 0) then load r with a, else if (selector == -1) load r with b

    for (size_t i = 0; i < len; i++) {
        r[i] = (~selector & a[i]) | (selector & b[i]);
    }
 }


 void PQCLEAN_FRODOKEM640AES_CLEAN_clear_bytes(uint8_t *mem, size_t n) {
    // Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
    // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
    volatile uint8_t *v = mem;

    for (size_t i = 0; i < n; i++) {
        v[i] = 0;
    }
 }
--- a/crypto_kem/frodokem640aes/opt/LICENSE
+++ b/crypto_kem/frodokem640aes/opt/LICENSE
@@ -1,21 +0,0 @@
 MIT License

 Copyright (c) Microsoft Corporation. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE
--- a/crypto_kem/frodokem640aes/opt/Makefile.Microsoft_nmake
+++ b/crypto_kem/frodokem640aes/opt/Makefile.Microsoft_nmake
@@ -1,19 +0,0 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libfrodokem640aes_opt.lib
 OBJECTS=kem.obj matrix_aes.obj noise.obj util.obj

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/frodokem640aes/opt/api.h
+++ b/crypto_kem/frodokem640aes/opt/api.h
@@ -1,20 +0,0 @@
 #ifndef PQCLEAN_FRODOKEM640AES_OPT_API_H
 #define PQCLEAN_FRODOKEM640AES_OPT_API_H

 #include <stddef.h>
 #include <stdint.h>

 #define PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_SECRETKEYBYTES  19888     // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
 #define PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_PUBLICKEYBYTES   9616     // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
 #define PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_BYTES              16
 #define PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_CIPHERTEXTBYTES  9720     // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

 #define PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_ALGNAME "FrodoKEM-640-AES"

 int PQCLEAN_FRODOKEM640AES_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

 int PQCLEAN_FRODOKEM640AES_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

 int PQCLEAN_FRODOKEM640AES_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

 #endif
--- a/crypto_kem/frodokem640aes/opt/common.h
+++ b/crypto_kem/frodokem640aes/opt/common.h
@@ -1,21 +0,0 @@
 #ifndef COMMON_H
 #define COMMON_H

 int PQCLEAN_FRODOKEM640AES_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 int PQCLEAN_FRODOKEM640AES_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 void PQCLEAN_FRODOKEM640AES_OPT_sample_n(uint16_t *s, size_t n);
 void PQCLEAN_FRODOKEM640AES_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
 void PQCLEAN_FRODOKEM640AES_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
 void PQCLEAN_FRODOKEM640AES_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM640AES_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM640AES_OPT_key_encode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM640AES_OPT_key_decode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM640AES_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
 void PQCLEAN_FRODOKEM640AES_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
 int8_t PQCLEAN_FRODOKEM640AES_OPT_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
 void PQCLEAN_FRODOKEM640AES_OPT_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
 void PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(uint8_t *mem, size_t n);
 uint16_t PQCLEAN_FRODOKEM640AES_OPT_LE_TO_UINT16(uint16_t n);
 uint16_t PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(uint16_t n);

 #endif
--- a/crypto_kem/frodokem640aes/opt/kem.c
+++ b/crypto_kem/frodokem640aes/opt/kem.c
@@ -1,237 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "fips202.h"
 #include "randombytes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM640AES_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
    // FrodoKEM's key generation
    // Outputs: public key pk (               BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
    //          secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
    uint8_t *pk_seedA = &pk[0];
    uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *sk_s = &sk[0];
    uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0};           // contains secret data
    uint16_t *E = &S[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A];    // contains secret data via randomness_s and randomness_seedSE
    uint8_t *randomness_s = &randomness[0];                 // contains secret data
    uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
    uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];           // contains secret data

    // Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
    randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
    shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

    // Generate S and E, and compute B = A*S + E. Generate A on-the-fly
    shake_input_seedSE[0] = 0x5F;
    memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
    shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM640AES_OPT_LE_TO_UINT16(S[i]);
    }
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(S, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(E, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_mul_add_as_plus_e(B, S, E, pk);

    // Encode the second part of the public key
    PQCLEAN_FRODOKEM640AES_OPT_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Add s, pk and S to the secret key
    memcpy(sk_s, randomness_s, CRYPTO_BYTES);
    memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(S[i]);
    }
    memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

    // Add H(pk) to the secret key
    shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(randomness, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM640AES_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
    // FrodoKEM's key encapsulation
    const uint8_t *pk_seedA = &pk[0];
    const uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *ct_c1 = &ct[0];
    uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];          // contains secret data
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                    // contains secret data via mu
    uint8_t *pkh = &G2in[0];
    uint8_t *mu = &G2in[BYTES_PKHASH];                        // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                          // contains secret data
    uint8_t *seedSE = &G2out[0];                              // contains secret data
    uint8_t *k = &G2out[CRYPTO_BYTES];                        // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];       // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];            // contains secret data
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];             // contains secret data

    // pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
    shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
    randombytes(mu, BYTES_MU);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSE[0] = 0x96;
    memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM640AES_OPT_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
    PQCLEAN_FRODOKEM640AES_OPT_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Generate Epp, and compute V = Sp*B + Epp
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_OPT_mul_add_sb_plus_e(V, B, Sp, Epp);

    // Encode mu, and compute C = V + enc(mu) (mod q)
    PQCLEAN_FRODOKEM640AES_OPT_key_encode(C, (uint16_t *)mu);
    PQCLEAN_FRODOKEM640AES_OPT_add(C, V, C);
    PQCLEAN_FRODOKEM640AES_OPT_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

    // Compute ss = F(ct||KK)
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
    memcpy(Fin_k, k, CRYPTO_BYTES);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(mu, BYTES_MU);
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM640AES_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
    // FrodoKEM's key decapsulation
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];                  // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];             // contains secret data
    const uint8_t *ct_c1 = &ct[0];
    const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    const uint8_t *sk_s = &sk[0];
    const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint16_t S[PARAMS_N * PARAMS_NBAR];                      // contains secret data
    const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    const uint8_t *pk_seedA = &sk_pk[0];
    const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                   // contains secret data via muprime
    uint8_t *pkh = &G2in[0];
    uint8_t *muprime = &G2in[BYTES_PKHASH];                  // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                         // contains secret data
    uint8_t *seedSEprime = &G2out[0];                        // contains secret data
    uint8_t *kprime = &G2out[CRYPTO_BYTES];                  // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];      // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];           // contains secret data
    uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES];       // contains secret data

    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
    }

    // Compute W = C - Bp*S (mod q), and decode the randomness mu
    PQCLEAN_FRODOKEM640AES_OPT_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_OPT_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_OPT_mul_bs(W, Bp, S);
    PQCLEAN_FRODOKEM640AES_OPT_sub(W, C, W);
    PQCLEAN_FRODOKEM640AES_OPT_key_decode((uint16_t *)muprime, W);

    // Generate (seedSE' || k') = G_2(pkh || mu')
    memcpy(pkh, sk_pkh, BYTES_PKHASH);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSEprime[0] = 0x96;
    memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM640AES_OPT_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

    // Generate Epp, and compute W = Sp*B + Epp
    PQCLEAN_FRODOKEM640AES_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM640AES_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM640AES_OPT_mul_add_sb_plus_e(W, B, Sp, Epp);

    // Encode mu, and compute CC = W + enc(mu') (mod q)
    PQCLEAN_FRODOKEM640AES_OPT_key_encode(CC, (uint16_t *)muprime);
    PQCLEAN_FRODOKEM640AES_OPT_add(CC, W, CC);

    // Prepare input to F
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

    // Reducing BBp modulo q
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
    }

    // If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
    // Needs to avoid branching on secret data as per:
    //     Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
    //     primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
    int8_t selector = PQCLEAN_FRODOKEM640AES_OPT_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM640AES_OPT_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
    // If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
    PQCLEAN_FRODOKEM640AES_OPT_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(muprime, BYTES_MU);
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    return 0;
 }
--- a/crypto_kem/frodokem640aes/opt/matrix_aes.c
+++ b/crypto_kem/frodokem640aes/opt/matrix_aes.c
@@ -1,127 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: matrix arithmetic functions used by the KEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "aes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM640AES_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
    // Inputs: s, e (N x N_BAR)
    // Output: out = A*s + e (N x N_BAR)
    int k;
    uint16_t i, j;
    int16_t a_row[4 * PARAMS_N];

    for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
        *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
    }

    int16_t a_row_temp[4 * PARAMS_N] = {0};                     // Take four lines of A at once
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);

    for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
        a_row_temp[j + 1 + 0 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(j);     // Loading values in the little-endian order
        a_row_temp[j + 1 + 1 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(j);
        a_row_temp[j + 1 + 2 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(j);
        a_row_temp[j + 1 + 3 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(j);
    }

    for (i = 0; i < PARAMS_N; i += 4) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {    // Go through A, four rows at a time
            a_row_temp[j + 0 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(i + 0); // Loading values in the little-endian order
            a_row_temp[j + 1 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(i + 1);
            a_row_temp[j + 2 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(i + 2);
            a_row_temp[j + 3 * PARAMS_N] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(i + 3);
        }
        aes128_ecb((uint8_t *)a_row, (uint8_t *)a_row_temp, 4 * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
        for (k = 0; k < 4 * PARAMS_N; k++) {
            a_row[k] = PQCLEAN_FRODOKEM640AES_OPT_LE_TO_UINT16(a_row[k]);
        }
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum[4] = {0};
            for (j = 0; j < PARAMS_N; j++) {                    // Matrix-vector multiplication
                uint16_t sp = s[k * PARAMS_N + j];
                sum[0] += a_row[0 * PARAMS_N + j] * sp;         // Go through four lines with same s
                sum[1] += a_row[1 * PARAMS_N + j] * sp;
                sum[2] += a_row[2 * PARAMS_N + j] * sp;
                sum[3] += a_row[3 * PARAMS_N + j] * sp;
            }
            out[(i + 0)*PARAMS_NBAR + k] += sum[0];
            out[(i + 2)*PARAMS_NBAR + k] += sum[2];
            out[(i + 1)*PARAMS_NBAR + k] += sum[1];
            out[(i + 3)*PARAMS_NBAR + k] += sum[3];
        }
    }
    aes128_ctx_release(&ctx128);
    return 1;
 }




 int PQCLEAN_FRODOKEM640AES_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
    // Inputs: s', e' (N_BAR x N)
    // Output: out = s'*A + e' (N_BAR x N)
    int j;
    uint16_t i, kk;
    for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
        *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
    }

    int k;
    uint16_t a_cols[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
    uint16_t a_cols_t[PARAMS_N * PARAMS_STRIPE_STEP];
    uint16_t a_cols_temp[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);

    for (i = 0, j = 0; i < PARAMS_N; i++, j += PARAMS_STRIPE_STEP) {
        a_cols_temp[j] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(i);                       // Loading values in the little-endian order
    }

    for (kk = 0; kk < PARAMS_N; kk += PARAMS_STRIPE_STEP) {     // Go through A's columns, 8 (== PARAMS_STRIPE_STEP) columns at a time.
        for (i = 0; i < (PARAMS_N * PARAMS_STRIPE_STEP); i += PARAMS_STRIPE_STEP) {
            a_cols_temp[i + 1] = PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(kk);              // Loading values in the little-endian order
        }

        aes128_ecb((uint8_t *)a_cols, (uint8_t *)a_cols_temp, PARAMS_N * PARAMS_STRIPE_STEP * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);

        for (i = 0; i < PARAMS_N; i++) {                        // Transpose a_cols to have access to it in the column-major order.
            for (k = 0; k < PARAMS_STRIPE_STEP; k++) {
                a_cols_t[k * PARAMS_N + i] = PQCLEAN_FRODOKEM640AES_OPT_LE_TO_UINT16(a_cols[i * PARAMS_STRIPE_STEP + k]);
            }
        }

        for (i = 0; i < PARAMS_NBAR; i++) {
            for (k = 0; k < PARAMS_STRIPE_STEP; k += PARAMS_PARALLEL) {
                uint16_t sum[PARAMS_PARALLEL] = {0};
                for (j = 0; j < PARAMS_N; j++) {                // Matrix-vector multiplication
                    uint16_t sp = s[i * PARAMS_N + j];
                    sum[0] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 0) * PARAMS_N + j]);
                    sum[1] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 1) * PARAMS_N + j]);
                    sum[2] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 2) * PARAMS_N + j]);
                    sum[3] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 3) * PARAMS_N + j]);
                }
                out[i * PARAMS_N + kk + k + 0] += sum[0];
                out[i * PARAMS_N + kk + k + 2] += sum[2];
                out[i * PARAMS_N + kk + k + 1] += sum[1];
                out[i * PARAMS_N + kk + k + 3] += sum[3];
            }
        }
    }
    aes128_ctx_release(&ctx128);
    return 1;
 }
--- a/crypto_kem/frodokem640aes/opt/noise.c
+++ b/crypto_kem/frodokem640aes/opt/noise.c
@@ -1,35 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: noise sampling functions
 *********************************************************************************************/

 #include <stdint.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

 void PQCLEAN_FRODOKEM640AES_OPT_sample_n(uint16_t *s, size_t n) {
    // Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
    // The distribution is specified by its CDF.
    // Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
    size_t i;
    unsigned int j;

    for (i = 0; i < n; ++i) {
        uint16_t sample = 0;
        uint16_t prnd = s[i] >> 1;    // Drop the least significant bit
        uint16_t sign = s[i] & 0x1;    // Pick the least significant bit

        // No need to compare with the last value.
        for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
            // Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
            sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
        }
        // Assuming that sign is either 0 or 1, flips sample iff sign = 1
        s[i] = ((-sign) ^ sample) + sign;
    }
 }
--- a/crypto_kem/frodokem640aes/opt/params.h
+++ b/crypto_kem/frodokem640aes/opt/params.h
@@ -1,27 +0,0 @@
 #ifndef PARAMS_H
 #define PARAMS_H

 #define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_SECRETKEYBYTES
 #define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_PUBLICKEYBYTES
 #define CRYPTO_BYTES PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_BYTES
 #define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM640AES_OPT_CRYPTO_CIPHERTEXTBYTES

 #define PARAMS_N 640
 #define PARAMS_NBAR 8
 #define PARAMS_LOGQ 15
 #define PARAMS_Q (1 << PARAMS_LOGQ)
 #define PARAMS_EXTRACTED_BITS 2
 #define PARAMS_STRIPE_STEP 8
 #define PARAMS_PARALLEL 4
 #define BYTES_SEED_A 16
 #define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
 #define BYTES_PKHASH CRYPTO_BYTES

 // Selecting SHAKE XOF function for the KEM and noise sampling
 #define shake     shake128

 // CDF table
 #define CDF_TABLE_DATA {4643, 13363, 20579, 25843, 29227, 31145, 32103, 32525, 32689, 32745, 32762, 32766, 32767}
 #define CDF_TABLE_LEN 13

 #endif
--- a/crypto_kem/frodokem640aes/opt/util.c
+++ b/crypto_kem/frodokem640aes/opt/util.c
@@ -1,264 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: additional functions for FrodoKEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static inline uint8_t min(uint8_t x, uint8_t y) {
    if (x < y) {
        return x;
    }
    return y;
 }

 uint16_t PQCLEAN_FRODOKEM640AES_OPT_LE_TO_UINT16(uint16_t n) {
    return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
 }

 uint16_t PQCLEAN_FRODOKEM640AES_OPT_UINT16_TO_LE(uint16_t n) {
    uint16_t y;
    uint8_t *z = (uint8_t *) &y;
    z[0] = n & 0xFF;
    z[1] = (n & 0xFF00) >> 8;
    return y;
 }

 void PQCLEAN_FRODOKEM640AES_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
    // Multiply by s on the right
    // Inputs: b (N_BAR x N), s (N x N_BAR)
    // Output: out = b*s (N_BAR x N_BAR)
    int i, j, k;

    for (i = 0; i < PARAMS_NBAR; i++) {
        for (j = 0; j < PARAMS_NBAR; j++) {
            out[i * PARAMS_NBAR + j] = 0;
            for (k = 0; k < PARAMS_N; k++) {
                out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
            }
            out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
    // Multiply by s on the left
    // Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
    // Output: out = s*b + e (N_BAR x N_BAR)
    int i, j, k;

    for (k = 0; k < PARAMS_NBAR; k++) {
        for (i = 0; i < PARAMS_NBAR; i++) {
            out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
            for (j = 0; j < PARAMS_N; j++) {
                out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
            }
            out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Add a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a + b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Subtract a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a - b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_key_encode(uint16_t *out, const uint16_t *in) {
    // Encoding
    unsigned int i, j, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
    uint16_t *pos = out;

    for (i = 0; i < nwords; i++) {
        temp = 0;
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
        }
        for (j = 0; j < npieces_word; j++) {
            *pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
            temp >>= PARAMS_EXTRACTED_BITS;
            pos++;
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_key_decode(uint16_t *out, const uint16_t *in) {
    // Decoding
    unsigned int i, j, index = 0, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
    uint8_t  *pos = (uint8_t *)out;
    uint64_t templong;

    for (i = 0; i < nwords; i++) {
        templong = 0;
        for (j = 0; j < npieces_word; j++) {  // temp = floor(in*2^{-11}+0.5)
            temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
            templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
            index++;
        }
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
    // Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
    // If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
    memset(out, 0, outlen);

    size_t i = 0;            // whole bytes already filled in
    size_t j = 0;            // whole uint16_t already copied
    uint16_t w = 0;          // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb in w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |        |        |********|********|
                              ^
                              j
        w : |   ****|
                ^
               bits
        out:|**|**|**|**|**|**|**|**|* |
                                    ^^
                                    ib
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < 8) {
            int nbits = min(8 - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask);  // the bits to copy from w to out
            out[i] = out[i] + (t << (8 - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = lsb;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == 8) {  // out[i] is filled in
            i++;
        }
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
    // Unpack the input char vector into a uint16_t output vector, copying lsb bits
    // for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
    memset(out, 0, outlen * sizeof(uint16_t));

    size_t i = 0;            // whole uint16_t already filled in
    size_t j = 0;            // whole bytes already copied
    uint8_t w = 0;           // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb bits of w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |  |  |  |  |  |  |**|**|...
                              ^
                              j
        w : | *|
              ^
              bits
        out:|   *****|   *****|   ***  |        |...
                              ^   ^
                              i   b
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < lsb) {
            int nbits = min(lsb - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (w >> (bits - nbits)) & mask;  // the bits to copy from w to out
            out[i] = out[i] + (t << (lsb - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = 8;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == lsb) {  // out[i] is filled in
            i++;
        }
    }
 }


 int8_t PQCLEAN_FRODOKEM640AES_OPT_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
    // Compare two arrays in constant time.
    // Returns 0 if the byte arrays are equal, -1 otherwise.
    uint16_t r = 0;

    for (size_t i = 0; i < len; i++) {
        r |= a[i] ^ b[i];
    }

    r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
    return (int8_t)r;
 }


 void PQCLEAN_FRODOKEM640AES_OPT_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
    // Select one of the two input arrays to be moved to r
    // If (selector == 0) then load r with a, else if (selector == -1) load r with b

    for (size_t i = 0; i < len; i++) {
        r[i] = (~selector & a[i]) | (selector & b[i]);
    }
 }


 void PQCLEAN_FRODOKEM640AES_OPT_clear_bytes(uint8_t *mem, size_t n) {
    // Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
    // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
    volatile uint8_t *v = mem;

    for (size_t i = 0; i < n; i++) {
        v[i] = 0;
    }
 }
--- a/crypto_kem/frodokem976aes/META.yml
+++ b/crypto_kem/frodokem976aes/META.yml
@@ -1,28 +0,0 @@
 name: FrodoKEM-976-AES
 type: kem
 claimed-nist-level: 3
 claimed-security: IND-CCA2
 length-public-key: 15632
 length-secret-key: 31296
 length-ciphertext: 15744
 length-shared-secret: 24
 nistkat-sha256: 7e415ab659d0d08d8f43135e1e9d75a8b342f52b65e8326ebf8135521b987615
 principal-submitters:
  - Michael Naehrig, Microsoft Research
 auxiliary-submitters:
 - Erdem Alkim
 - Joppe W. Bos, NXP Semiconductors
 - Léo Ducas, CWI
 - Patrick Longa, Microsoft Research
 - Ilya Mironov, Google
 - Valeria Nikolaenko
 - Chris Peikert, University of Michigan
 - Ananth Raghunathan, Google
 - Douglas Stebila, University of Waterloo
 - Karen Easterbrook, Microsoft Research
 - Brian LaMacchia, Microsoft Research
 implementations:
 - name: clean
  version: https://github.com/microsoft/PQCrypto-LWEKE/commit/669522db63850fa64d1a24a47e138e80a59349db
 - name: opt
  version: https://github.com/microsoft/PQCrypto-LWEKE/commit/669522db63850fa64d1a24a47e138e80a59349db
--- a/crypto_kem/frodokem976aes/clean/LICENSE
+++ b/crypto_kem/frodokem976aes/clean/LICENSE
@@ -1,21 +0,0 @@
 MIT License

 Copyright (c) Microsoft Corporation. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE
--- a/crypto_kem/frodokem976aes/clean/Makefile.Microsoft_nmake
+++ b/crypto_kem/frodokem976aes/clean/Makefile.Microsoft_nmake
@@ -1,19 +0,0 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libfrodokem976aes_clean.lib
 OBJECTS=kem.obj matrix_aes.obj noise.obj util.obj

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/frodokem976aes/clean/api.h
+++ b/crypto_kem/frodokem976aes/clean/api.h
@@ -1,20 +0,0 @@
 #ifndef PQCLEAN_FRODOKEM976AES_CLEAN_API_H
 #define PQCLEAN_FRODOKEM976AES_CLEAN_API_H

 #include <stddef.h>
 #include <stdint.h>

 #define PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_SECRETKEYBYTES  31296     // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
 #define PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_PUBLICKEYBYTES  15632     // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
 #define PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_BYTES              24
 #define PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_CIPHERTEXTBYTES  15744     // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

 #define PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_ALGNAME "FrodoKEM-976-AES"

 int PQCLEAN_FRODOKEM976AES_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

 int PQCLEAN_FRODOKEM976AES_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

 int PQCLEAN_FRODOKEM976AES_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

 #endif
--- a/crypto_kem/frodokem976aes/clean/common.h
+++ b/crypto_kem/frodokem976aes/clean/common.h
@@ -1,21 +0,0 @@
 #ifndef COMMON_H
 #define COMMON_H

 int PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 int PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 void PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(uint16_t *s, size_t n);
 void PQCLEAN_FRODOKEM976AES_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
 void PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
 void PQCLEAN_FRODOKEM976AES_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM976AES_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM976AES_CLEAN_key_encode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM976AES_CLEAN_key_decode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM976AES_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
 void PQCLEAN_FRODOKEM976AES_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
 int8_t PQCLEAN_FRODOKEM976AES_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
 void PQCLEAN_FRODOKEM976AES_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
 void PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(uint8_t *mem, size_t n);
 uint16_t PQCLEAN_FRODOKEM976AES_CLEAN_LE_TO_UINT16(uint16_t n);
 uint16_t PQCLEAN_FRODOKEM976AES_CLEAN_UINT16_TO_LE(uint16_t n);

 #endif
--- a/crypto_kem/frodokem976aes/clean/kem.c
+++ b/crypto_kem/frodokem976aes/clean/kem.c
@@ -1,237 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "fips202.h"
 #include "randombytes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM976AES_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
    // FrodoKEM's key generation
    // Outputs: public key pk (               BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
    //          secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
    uint8_t *pk_seedA = &pk[0];
    uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *sk_s = &sk[0];
    uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0};           // contains secret data
    uint16_t *E = &S[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A];    // contains secret data via randomness_s and randomness_seedSE
    uint8_t *randomness_s = &randomness[0];                 // contains secret data
    uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
    uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];           // contains secret data

    // Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
    randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
    shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

    // Generate S and E, and compute B = A*S + E. Generate A on-the-fly
    shake_input_seedSE[0] = 0x5F;
    memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
    shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM976AES_CLEAN_LE_TO_UINT16(S[i]);
    }
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_as_plus_e(B, S, E, pk);

    // Encode the second part of the public key
    PQCLEAN_FRODOKEM976AES_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Add s, pk and S to the secret key
    memcpy(sk_s, randomness_s, CRYPTO_BYTES);
    memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM976AES_CLEAN_UINT16_TO_LE(S[i]);
    }
    memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

    // Add H(pk) to the secret key
    shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM976AES_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
    // FrodoKEM's key encapsulation
    const uint8_t *pk_seedA = &pk[0];
    const uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *ct_c1 = &ct[0];
    uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];          // contains secret data
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                    // contains secret data via mu
    uint8_t *pkh = &G2in[0];
    uint8_t *mu = &G2in[BYTES_PKHASH];                        // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                          // contains secret data
    uint8_t *seedSE = &G2out[0];                              // contains secret data
    uint8_t *k = &G2out[CRYPTO_BYTES];                        // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];       // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];            // contains secret data
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];             // contains secret data

    // pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
    shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
    randombytes(mu, BYTES_MU);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSE[0] = 0x96;
    memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM976AES_CLEAN_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
    PQCLEAN_FRODOKEM976AES_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Generate Epp, and compute V = Sp*B + Epp
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp);

    // Encode mu, and compute C = V + enc(mu) (mod q)
    PQCLEAN_FRODOKEM976AES_CLEAN_key_encode(C, (uint16_t *)mu);
    PQCLEAN_FRODOKEM976AES_CLEAN_add(C, V, C);
    PQCLEAN_FRODOKEM976AES_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

    // Compute ss = F(ct||KK)
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
    memcpy(Fin_k, k, CRYPTO_BYTES);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(mu, BYTES_MU);
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM976AES_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
    // FrodoKEM's key decapsulation
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];                  // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];             // contains secret data
    const uint8_t *ct_c1 = &ct[0];
    const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    const uint8_t *sk_s = &sk[0];
    const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint16_t S[PARAMS_N * PARAMS_NBAR];                      // contains secret data
    const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    const uint8_t *pk_seedA = &sk_pk[0];
    const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                   // contains secret data via muprime
    uint8_t *pkh = &G2in[0];
    uint8_t *muprime = &G2in[BYTES_PKHASH];                  // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                         // contains secret data
    uint8_t *seedSEprime = &G2out[0];                        // contains secret data
    uint8_t *kprime = &G2out[CRYPTO_BYTES];                  // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];      // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];           // contains secret data
    uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES];       // contains secret data

    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
    }

    // Compute W = C - Bp*S (mod q), and decode the randomness mu
    PQCLEAN_FRODOKEM976AES_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_CLEAN_mul_bs(W, Bp, S);
    PQCLEAN_FRODOKEM976AES_CLEAN_sub(W, C, W);
    PQCLEAN_FRODOKEM976AES_CLEAN_key_decode((uint16_t *)muprime, W);

    // Generate (seedSE' || k') = G_2(pkh || mu')
    memcpy(pkh, sk_pkh, BYTES_PKHASH);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSEprime[0] = 0x96;
    memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM976AES_CLEAN_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

    // Generate Epp, and compute W = Sp*B + Epp
    PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp);

    // Encode mu, and compute CC = W + enc(mu') (mod q)
    PQCLEAN_FRODOKEM976AES_CLEAN_key_encode(CC, (uint16_t *)muprime);
    PQCLEAN_FRODOKEM976AES_CLEAN_add(CC, W, CC);

    // Prepare input to F
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

    // Reducing BBp modulo q
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
    }

    // If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
    // Needs to avoid branching on secret data as per:
    //     Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
    //     primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
    int8_t selector = PQCLEAN_FRODOKEM976AES_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM976AES_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
    // If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
    PQCLEAN_FRODOKEM976AES_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(muprime, BYTES_MU);
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    return 0;
 }
--- a/crypto_kem/frodokem976aes/clean/matrix_aes.c
+++ b/crypto_kem/frodokem976aes/clean/matrix_aes.c
@@ -1,95 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: matrix arithmetic functions used by the KEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "aes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
    // Inputs: s, e (N x N_BAR)
    // Output: out = A*s + e (N x N_BAR)
    int i, j, k;
    int16_t A[PARAMS_N * PARAMS_N] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);
    for (i = 0; i < PARAMS_N; i++) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
            A[i * PARAMS_N + j] = (int16_t) i;                            // Loading values in the little-endian order
            A[i * PARAMS_N + j + 1] = (int16_t) j;
        }
    }
    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM976AES_CLEAN_UINT16_TO_LE(A[i]);
    }

    aes128_ecb((uint8_t *) A, (uint8_t *) A, PARAMS_N * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
    aes128_ctx_release(&ctx128);

    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM976AES_CLEAN_LE_TO_UINT16(A[i]);
    }
    memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));

    for (i = 0; i < PARAMS_N; i++) {                            // Matrix multiplication-addition A*s + e
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum = 0;
            for (j = 0; j < PARAMS_N; j++) {
                sum += A[i * PARAMS_N + j] * s[k * PARAMS_N + j];
            }
            out[i * PARAMS_NBAR + k] += sum;                    // Adding e. No need to reduce modulo 2^15, extra bits are taken care of during packing later on.
        }
    }

    return 1;
 }


 int PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
    // Inputs: s', e' (N_BAR x N)
    // Output: out = s'*A + e' (N_BAR x N)
    int i, j, k;
    int16_t A[PARAMS_N * PARAMS_N] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);
    for (i = 0; i < PARAMS_N; i++) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
            A[i * PARAMS_N + j] = (int16_t) i;                            // Loading values in the little-endian order
            A[i * PARAMS_N + j + 1] = (int16_t) j;
        }
    }
    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM976AES_CLEAN_UINT16_TO_LE(A[i]);
    }

    aes128_ecb((uint8_t *) A, (uint8_t *) A, PARAMS_N * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
    aes128_ctx_release(&ctx128);

    for (i = 0; i < PARAMS_N * PARAMS_N; i++) {
        A[i] = PQCLEAN_FRODOKEM976AES_CLEAN_LE_TO_UINT16(A[i]);
    }
    memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));

    for (i = 0; i < PARAMS_N; i++) {                            // Matrix multiplication-addition A*s + e
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum = 0;
            for (j = 0; j < PARAMS_N; j++) {
                sum += A[j * PARAMS_N + i] * s[k * PARAMS_N + j];
            }
            out[k * PARAMS_N + i] += sum;                       // Adding e. No need to reduce modulo 2^15, extra bits are taken care of during packing later on.
        }
    }

    return 1;
 }
--- a/crypto_kem/frodokem976aes/clean/noise.c
+++ b/crypto_kem/frodokem976aes/clean/noise.c
@@ -1,35 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: noise sampling functions
 *********************************************************************************************/

 #include <stdint.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

 void PQCLEAN_FRODOKEM976AES_CLEAN_sample_n(uint16_t *s, size_t n) {
    // Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
    // The distribution is specified by its CDF.
    // Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
    size_t i;
    unsigned int j;

    for (i = 0; i < n; ++i) {
        uint16_t sample = 0;
        uint16_t prnd = s[i] >> 1;    // Drop the least significant bit
        uint16_t sign = s[i] & 0x1;    // Pick the least significant bit

        // No need to compare with the last value.
        for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
            // Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
            sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
        }
        // Assuming that sign is either 0 or 1, flips sample iff sign = 1
        s[i] = ((-sign) ^ sample) + sign;
    }
 }
--- a/crypto_kem/frodokem976aes/clean/params.h
+++ b/crypto_kem/frodokem976aes/clean/params.h
@@ -1,27 +0,0 @@
 #ifndef PARAMS_H
 #define PARAMS_H

 #define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_SECRETKEYBYTES
 #define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define CRYPTO_BYTES PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_BYTES
 #define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM976AES_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define PARAMS_N 976
 #define PARAMS_NBAR 8
 #define PARAMS_LOGQ 16
 #define PARAMS_Q (1 << PARAMS_LOGQ)
 #define PARAMS_EXTRACTED_BITS 3
 #define PARAMS_STRIPE_STEP 8
 #define PARAMS_PARALLEL 4
 #define BYTES_SEED_A 16
 #define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
 #define BYTES_PKHASH CRYPTO_BYTES

 // Selecting SHAKE XOF function for the KEM and noise sampling
 #define shake     shake256

 // CDF table
 #define CDF_TABLE_DATA {5638, 15915, 23689, 28571, 31116, 32217, 32613, 32731, 32760, 32766, 32767}
 #define CDF_TABLE_LEN 11

 #endif
--- a/crypto_kem/frodokem976aes/clean/util.c
+++ b/crypto_kem/frodokem976aes/clean/util.c
@@ -1,264 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: additional functions for FrodoKEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static inline uint8_t min(uint8_t x, uint8_t y) {
    if (x < y) {
        return x;
    }
    return y;
 }

 uint16_t PQCLEAN_FRODOKEM976AES_CLEAN_LE_TO_UINT16(uint16_t n) {
    return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
 }

 uint16_t PQCLEAN_FRODOKEM976AES_CLEAN_UINT16_TO_LE(uint16_t n) {
    uint16_t y;
    uint8_t *z = (uint8_t *) &y;
    z[0] = n & 0xFF;
    z[1] = (n & 0xFF00) >> 8;
    return y;
 }

 void PQCLEAN_FRODOKEM976AES_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
    // Multiply by s on the right
    // Inputs: b (N_BAR x N), s (N x N_BAR)
    // Output: out = b*s (N_BAR x N_BAR)
    int i, j, k;

    for (i = 0; i < PARAMS_NBAR; i++) {
        for (j = 0; j < PARAMS_NBAR; j++) {
            out[i * PARAMS_NBAR + j] = 0;
            for (k = 0; k < PARAMS_N; k++) {
                out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
            }
            out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
    // Multiply by s on the left
    // Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
    // Output: out = s*b + e (N_BAR x N_BAR)
    int i, j, k;

    for (k = 0; k < PARAMS_NBAR; k++) {
        for (i = 0; i < PARAMS_NBAR; i++) {
            out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
            for (j = 0; j < PARAMS_N; j++) {
                out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
            }
            out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Add a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a + b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Subtract a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a - b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_key_encode(uint16_t *out, const uint16_t *in) {
    // Encoding
    unsigned int i, j, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
    uint16_t *pos = out;

    for (i = 0; i < nwords; i++) {
        temp = 0;
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
        }
        for (j = 0; j < npieces_word; j++) {
            *pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
            temp >>= PARAMS_EXTRACTED_BITS;
            pos++;
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_key_decode(uint16_t *out, const uint16_t *in) {
    // Decoding
    unsigned int i, j, index = 0, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
    uint8_t  *pos = (uint8_t *)out;
    uint64_t templong;

    for (i = 0; i < nwords; i++) {
        templong = 0;
        for (j = 0; j < npieces_word; j++) {  // temp = floor(in*2^{-11}+0.5)
            temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
            templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
            index++;
        }
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
    // Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
    // If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
    memset(out, 0, outlen);

    size_t i = 0;            // whole bytes already filled in
    size_t j = 0;            // whole uint16_t already copied
    uint16_t w = 0;          // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb in w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |        |        |********|********|
                              ^
                              j
        w : |   ****|
                ^
               bits
        out:|**|**|**|**|**|**|**|**|* |
                                    ^^
                                    ib
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < 8) {
            int nbits = min(8 - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask);  // the bits to copy from w to out
            out[i] = out[i] + (t << (8 - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = lsb;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == 8) {  // out[i] is filled in
            i++;
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
    // Unpack the input char vector into a uint16_t output vector, copying lsb bits
    // for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
    memset(out, 0, outlen * sizeof(uint16_t));

    size_t i = 0;            // whole uint16_t already filled in
    size_t j = 0;            // whole bytes already copied
    uint8_t w = 0;           // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb bits of w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |  |  |  |  |  |  |**|**|...
                              ^
                              j
        w : | *|
              ^
              bits
        out:|   *****|   *****|   ***  |        |...
                              ^   ^
                              i   b
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < lsb) {
            int nbits = min(lsb - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (w >> (bits - nbits)) & mask;  // the bits to copy from w to out
            out[i] = out[i] + (t << (lsb - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = 8;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == lsb) {  // out[i] is filled in
            i++;
        }
    }
 }


 int8_t PQCLEAN_FRODOKEM976AES_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
    // Compare two arrays in constant time.
    // Returns 0 if the byte arrays are equal, -1 otherwise.
    uint16_t r = 0;

    for (size_t i = 0; i < len; i++) {
        r |= a[i] ^ b[i];
    }

    r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
    return (int8_t)r;
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
    // Select one of the two input arrays to be moved to r
    // If (selector == 0) then load r with a, else if (selector == -1) load r with b

    for (size_t i = 0; i < len; i++) {
        r[i] = (~selector & a[i]) | (selector & b[i]);
    }
 }


 void PQCLEAN_FRODOKEM976AES_CLEAN_clear_bytes(uint8_t *mem, size_t n) {
    // Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
    // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
    volatile uint8_t *v = mem;

    for (size_t i = 0; i < n; i++) {
        v[i] = 0;
    }
 }
--- a/crypto_kem/frodokem976aes/opt/LICENSE
+++ b/crypto_kem/frodokem976aes/opt/LICENSE
@@ -1,21 +0,0 @@
 MIT License

 Copyright (c) Microsoft Corporation. All rights reserved.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE
--- a/crypto_kem/frodokem976aes/opt/Makefile.Microsoft_nmake
+++ b/crypto_kem/frodokem976aes/opt/Makefile.Microsoft_nmake
@@ -1,19 +0,0 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libfrodokem976aes_opt.lib
 OBJECTS=kem.obj matrix_aes.obj noise.obj util.obj

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/frodokem976aes/opt/api.h
+++ b/crypto_kem/frodokem976aes/opt/api.h
@@ -1,20 +0,0 @@
 #ifndef PQCLEAN_FRODOKEM976AES_OPT_API_H
 #define PQCLEAN_FRODOKEM976AES_OPT_API_H

 #include <stddef.h>
 #include <stdint.h>

 #define PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_SECRETKEYBYTES  31296     // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
 #define PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_PUBLICKEYBYTES  15632     // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
 #define PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_BYTES              24
 #define PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_CIPHERTEXTBYTES  15744     // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

 #define PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_ALGNAME "FrodoKEM-976-AES"

 int PQCLEAN_FRODOKEM976AES_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

 int PQCLEAN_FRODOKEM976AES_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

 int PQCLEAN_FRODOKEM976AES_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

 #endif
--- a/crypto_kem/frodokem976aes/opt/common.h
+++ b/crypto_kem/frodokem976aes/opt/common.h
@@ -1,21 +0,0 @@
 #ifndef COMMON_H
 #define COMMON_H

 int PQCLEAN_FRODOKEM976AES_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 int PQCLEAN_FRODOKEM976AES_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
 void PQCLEAN_FRODOKEM976AES_OPT_sample_n(uint16_t *s, size_t n);
 void PQCLEAN_FRODOKEM976AES_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
 void PQCLEAN_FRODOKEM976AES_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
 void PQCLEAN_FRODOKEM976AES_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM976AES_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
 void PQCLEAN_FRODOKEM976AES_OPT_key_encode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM976AES_OPT_key_decode(uint16_t *out, const uint16_t *in);
 void PQCLEAN_FRODOKEM976AES_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
 void PQCLEAN_FRODOKEM976AES_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
 int8_t PQCLEAN_FRODOKEM976AES_OPT_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
 void PQCLEAN_FRODOKEM976AES_OPT_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
 void PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(uint8_t *mem, size_t n);
 uint16_t PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(uint16_t n);
 uint16_t PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(uint16_t n);

 #endif
--- a/crypto_kem/frodokem976aes/opt/kem.c
+++ b/crypto_kem/frodokem976aes/opt/kem.c
@@ -1,237 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "fips202.h"
 #include "randombytes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM976AES_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
    // FrodoKEM's key generation
    // Outputs: public key pk (               BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
    //          secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
    uint8_t *pk_seedA = &pk[0];
    uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *sk_s = &sk[0];
    uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0};           // contains secret data
    uint16_t *E = &S[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A];    // contains secret data via randomness_s and randomness_seedSE
    uint8_t *randomness_s = &randomness[0];                 // contains secret data
    uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
    uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];           // contains secret data

    // Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
    randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
    shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

    // Generate S and E, and compute B = A*S + E. Generate A on-the-fly
    shake_input_seedSE[0] = 0x5F;
    memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
    shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(S[i]);
    }
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(S, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(E, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_mul_add_as_plus_e(B, S, E, pk);

    // Encode the second part of the public key
    PQCLEAN_FRODOKEM976AES_OPT_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Add s, pk and S to the secret key
    memcpy(sk_s, randomness_s, CRYPTO_BYTES);
    memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(S[i]);
    }
    memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

    // Add H(pk) to the secret key
    shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(randomness, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM976AES_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
    // FrodoKEM's key encapsulation
    const uint8_t *pk_seedA = &pk[0];
    const uint8_t *pk_b = &pk[BYTES_SEED_A];
    uint8_t *ct_c1 = &ct[0];
    uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];               // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];          // contains secret data
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                    // contains secret data via mu
    uint8_t *pkh = &G2in[0];
    uint8_t *mu = &G2in[BYTES_PKHASH];                        // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                          // contains secret data
    uint8_t *seedSE = &G2out[0];                              // contains secret data
    uint8_t *k = &G2out[CRYPTO_BYTES];                        // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];       // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];            // contains secret data
    uint8_t shake_input_seedSE[1 + CRYPTO_BYTES];             // contains secret data

    // pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
    shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
    randombytes(mu, BYTES_MU);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSE[0] = 0x96;
    memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
    PQCLEAN_FRODOKEM976AES_OPT_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

    // Generate Epp, and compute V = Sp*B + Epp
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_OPT_mul_add_sb_plus_e(V, B, Sp, Epp);

    // Encode mu, and compute C = V + enc(mu) (mod q)
    PQCLEAN_FRODOKEM976AES_OPT_key_encode(C, (uint16_t *)mu);
    PQCLEAN_FRODOKEM976AES_OPT_add(C, V, C);
    PQCLEAN_FRODOKEM976AES_OPT_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

    // Compute ss = F(ct||KK)
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
    memcpy(Fin_k, k, CRYPTO_BYTES);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(mu, BYTES_MU);
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
    return 0;
 }


 int PQCLEAN_FRODOKEM976AES_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
    // FrodoKEM's key decapsulation
    uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0};              // contains secret data
    uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
    uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
    uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
    uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR];                  // contains secret data
    uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR];             // contains secret data
    const uint8_t *ct_c1 = &ct[0];
    const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
    const uint8_t *sk_s = &sk[0];
    const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
    const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
    uint16_t S[PARAMS_N * PARAMS_NBAR];                      // contains secret data
    const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
    const uint8_t *pk_seedA = &sk_pk[0];
    const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
    uint8_t G2in[BYTES_PKHASH + BYTES_MU];                   // contains secret data via muprime
    uint8_t *pkh = &G2in[0];
    uint8_t *muprime = &G2in[BYTES_PKHASH];                  // contains secret data
    uint8_t G2out[2 * CRYPTO_BYTES];                         // contains secret data
    uint8_t *seedSEprime = &G2out[0];                        // contains secret data
    uint8_t *kprime = &G2out[CRYPTO_BYTES];                  // contains secret data
    uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES];      // contains secret data via Fin_k
    uint8_t *Fin_ct = &Fin[0];
    uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES];           // contains secret data
    uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES];       // contains secret data

    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
    }

    // Compute W = C - Bp*S (mod q), and decode the randomness mu
    PQCLEAN_FRODOKEM976AES_OPT_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_OPT_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_OPT_mul_bs(W, Bp, S);
    PQCLEAN_FRODOKEM976AES_OPT_sub(W, C, W);
    PQCLEAN_FRODOKEM976AES_OPT_key_decode((uint16_t *)muprime, W);

    // Generate (seedSE' || k') = G_2(pkh || mu')
    memcpy(pkh, sk_pkh, BYTES_PKHASH);
    shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

    // Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
    shake_input_seedSEprime[0] = 0x96;
    memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
    shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
        Sp[i] = PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(Sp[i]);
    }
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

    // Generate Epp, and compute W = Sp*B + Epp
    PQCLEAN_FRODOKEM976AES_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
    PQCLEAN_FRODOKEM976AES_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
    PQCLEAN_FRODOKEM976AES_OPT_mul_add_sb_plus_e(W, B, Sp, Epp);

    // Encode mu, and compute CC = W + enc(mu') (mod q)
    PQCLEAN_FRODOKEM976AES_OPT_key_encode(CC, (uint16_t *)muprime);
    PQCLEAN_FRODOKEM976AES_OPT_add(CC, W, CC);

    // Prepare input to F
    memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

    // Reducing BBp modulo q
    for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
        BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
    }

    // If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
    // Needs to avoid branching on secret data as per:
    //     Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
    //     primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
    int8_t selector = PQCLEAN_FRODOKEM976AES_OPT_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM976AES_OPT_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
    // If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
    PQCLEAN_FRODOKEM976AES_OPT_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
    shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

    // Cleanup:
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(muprime, BYTES_MU);
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(Fin_k, CRYPTO_BYTES);
    PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
    return 0;
 }
--- a/crypto_kem/frodokem976aes/opt/matrix_aes.c
+++ b/crypto_kem/frodokem976aes/opt/matrix_aes.c
@@ -1,127 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: matrix arithmetic functions used by the KEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "aes.h"

 #include "api.h"
 #include "common.h"
 #include "params.h"

 int PQCLEAN_FRODOKEM976AES_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
    // Inputs: s, e (N x N_BAR)
    // Output: out = A*s + e (N x N_BAR)
    int k;
    uint16_t i, j;
    int16_t a_row[4 * PARAMS_N];

    for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
        *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
    }

    int16_t a_row_temp[4 * PARAMS_N] = {0};                     // Take four lines of A at once
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);

    for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
        a_row_temp[j + 1 + 0 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j);     // Loading values in the little-endian order
        a_row_temp[j + 1 + 1 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j);
        a_row_temp[j + 1 + 2 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j);
        a_row_temp[j + 1 + 3 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j);
    }

    for (i = 0; i < PARAMS_N; i += 4) {
        for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {    // Go through A, four rows at a time
            a_row_temp[j + 0 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 0); // Loading values in the little-endian order
            a_row_temp[j + 1 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 1);
            a_row_temp[j + 2 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 2);
            a_row_temp[j + 3 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 3);
        }
        aes128_ecb((uint8_t *)a_row, (uint8_t *)a_row_temp, 4 * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
        for (k = 0; k < 4 * PARAMS_N; k++) {
            a_row[k] = PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(a_row[k]);
        }
        for (k = 0; k < PARAMS_NBAR; k++) {
            uint16_t sum[4] = {0};
            for (j = 0; j < PARAMS_N; j++) {                    // Matrix-vector multiplication
                uint16_t sp = s[k * PARAMS_N + j];
                sum[0] += a_row[0 * PARAMS_N + j] * sp;         // Go through four lines with same s
                sum[1] += a_row[1 * PARAMS_N + j] * sp;
                sum[2] += a_row[2 * PARAMS_N + j] * sp;
                sum[3] += a_row[3 * PARAMS_N + j] * sp;
            }
            out[(i + 0)*PARAMS_NBAR + k] += sum[0];
            out[(i + 2)*PARAMS_NBAR + k] += sum[2];
            out[(i + 1)*PARAMS_NBAR + k] += sum[1];
            out[(i + 3)*PARAMS_NBAR + k] += sum[3];
        }
    }
    aes128_ctx_release(&ctx128);
    return 1;
 }




 int PQCLEAN_FRODOKEM976AES_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
    // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
    // Inputs: s', e' (N_BAR x N)
    // Output: out = s'*A + e' (N_BAR x N)
    int j;
    uint16_t i, kk;
    for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
        *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
    }

    int k;
    uint16_t a_cols[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
    uint16_t a_cols_t[PARAMS_N * PARAMS_STRIPE_STEP];
    uint16_t a_cols_temp[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
    aes128ctx ctx128;

    aes128_ecb_keyexp(&ctx128, seed_A);

    for (i = 0, j = 0; i < PARAMS_N; i++, j += PARAMS_STRIPE_STEP) {
        a_cols_temp[j] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i);                       // Loading values in the little-endian order
    }

    for (kk = 0; kk < PARAMS_N; kk += PARAMS_STRIPE_STEP) {     // Go through A's columns, 8 (== PARAMS_STRIPE_STEP) columns at a time.
        for (i = 0; i < (PARAMS_N * PARAMS_STRIPE_STEP); i += PARAMS_STRIPE_STEP) {
            a_cols_temp[i + 1] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(kk);              // Loading values in the little-endian order
        }

        aes128_ecb((uint8_t *)a_cols, (uint8_t *)a_cols_temp, PARAMS_N * PARAMS_STRIPE_STEP * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);

        for (i = 0; i < PARAMS_N; i++) {                        // Transpose a_cols to have access to it in the column-major order.
            for (k = 0; k < PARAMS_STRIPE_STEP; k++) {
                a_cols_t[k * PARAMS_N + i] = PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(a_cols[i * PARAMS_STRIPE_STEP + k]);
            }
        }

        for (i = 0; i < PARAMS_NBAR; i++) {
            for (k = 0; k < PARAMS_STRIPE_STEP; k += PARAMS_PARALLEL) {
                uint16_t sum[PARAMS_PARALLEL] = {0};
                for (j = 0; j < PARAMS_N; j++) {                // Matrix-vector multiplication
                    uint16_t sp = s[i * PARAMS_N + j];
                    sum[0] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 0) * PARAMS_N + j]);
                    sum[1] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 1) * PARAMS_N + j]);
                    sum[2] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 2) * PARAMS_N + j]);
                    sum[3] += (uint16_t)(sp * (uint32_t)a_cols_t[(k + 3) * PARAMS_N + j]);
                }
                out[i * PARAMS_N + kk + k + 0] += sum[0];
                out[i * PARAMS_N + kk + k + 2] += sum[2];
                out[i * PARAMS_N + kk + k + 1] += sum[1];
                out[i * PARAMS_N + kk + k + 3] += sum[3];
            }
        }
    }
    aes128_ctx_release(&ctx128);
    return 1;
 }
--- a/crypto_kem/frodokem976aes/opt/noise.c
+++ b/crypto_kem/frodokem976aes/opt/noise.c
@@ -1,35 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: noise sampling functions
 *********************************************************************************************/

 #include <stdint.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

 void PQCLEAN_FRODOKEM976AES_OPT_sample_n(uint16_t *s, size_t n) {
    // Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
    // The distribution is specified by its CDF.
    // Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
    size_t i;
    unsigned int j;

    for (i = 0; i < n; ++i) {
        uint16_t sample = 0;
        uint16_t prnd = s[i] >> 1;    // Drop the least significant bit
        uint16_t sign = s[i] & 0x1;    // Pick the least significant bit

        // No need to compare with the last value.
        for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
            // Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
            sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
        }
        // Assuming that sign is either 0 or 1, flips sample iff sign = 1
        s[i] = ((-sign) ^ sample) + sign;
    }
 }
--- a/crypto_kem/frodokem976aes/opt/params.h
+++ b/crypto_kem/frodokem976aes/opt/params.h
@@ -1,27 +0,0 @@
 #ifndef PARAMS_H
 #define PARAMS_H

 #define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_SECRETKEYBYTES
 #define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_PUBLICKEYBYTES
 #define CRYPTO_BYTES PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_BYTES
 #define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM976AES_OPT_CRYPTO_CIPHERTEXTBYTES

 #define PARAMS_N 976
 #define PARAMS_NBAR 8
 #define PARAMS_LOGQ 16
 #define PARAMS_Q (1 << PARAMS_LOGQ)
 #define PARAMS_EXTRACTED_BITS 3
 #define PARAMS_STRIPE_STEP 8
 #define PARAMS_PARALLEL 4
 #define BYTES_SEED_A 16
 #define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
 #define BYTES_PKHASH CRYPTO_BYTES

 // Selecting SHAKE XOF function for the KEM and noise sampling
 #define shake     shake256

 // CDF table
 #define CDF_TABLE_DATA {5638, 15915, 23689, 28571, 31116, 32217, 32613, 32731, 32760, 32766, 32767}
 #define CDF_TABLE_LEN 11

 #endif
--- a/crypto_kem/frodokem976aes/opt/util.c
+++ b/crypto_kem/frodokem976aes/opt/util.c
@@ -1,264 +0,0 @@
 /********************************************************************************************
 * FrodoKEM: Learning with Errors Key Encapsulation
 *
 * Abstract: additional functions for FrodoKEM
 *********************************************************************************************/

 #include <stdint.h>
 #include <string.h>

 #include "api.h"
 #include "common.h"
 #include "params.h"

 static inline uint8_t min(uint8_t x, uint8_t y) {
    if (x < y) {
        return x;
    }
    return y;
 }

 uint16_t PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(uint16_t n) {
    return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
 }

 uint16_t PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(uint16_t n) {
    uint16_t y;
    uint8_t *z = (uint8_t *) &y;
    z[0] = n & 0xFF;
    z[1] = (n & 0xFF00) >> 8;
    return y;
 }

 void PQCLEAN_FRODOKEM976AES_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
    // Multiply by s on the right
    // Inputs: b (N_BAR x N), s (N x N_BAR)
    // Output: out = b*s (N_BAR x N_BAR)
    int i, j, k;

    for (i = 0; i < PARAMS_NBAR; i++) {
        for (j = 0; j < PARAMS_NBAR; j++) {
            out[i * PARAMS_NBAR + j] = 0;
            for (k = 0; k < PARAMS_N; k++) {
                out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
            }
            out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
    // Multiply by s on the left
    // Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
    // Output: out = s*b + e (N_BAR x N_BAR)
    int i, j, k;

    for (k = 0; k < PARAMS_NBAR; k++) {
        for (i = 0; i < PARAMS_NBAR; i++) {
            out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
            for (j = 0; j < PARAMS_N; j++) {
                out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
            }
            out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Add a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a + b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
    // Subtract a and b
    // Inputs: a, b (N_BAR x N_BAR)
    // Output: c = a - b

    for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
        out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_key_encode(uint16_t *out, const uint16_t *in) {
    // Encoding
    unsigned int i, j, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
    uint16_t *pos = out;

    for (i = 0; i < nwords; i++) {
        temp = 0;
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
        }
        for (j = 0; j < npieces_word; j++) {
            *pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
            temp >>= PARAMS_EXTRACTED_BITS;
            pos++;
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_key_decode(uint16_t *out, const uint16_t *in) {
    // Decoding
    unsigned int i, j, index = 0, npieces_word = 8;
    unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
    uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
    uint8_t  *pos = (uint8_t *)out;
    uint64_t templong;

    for (i = 0; i < nwords; i++) {
        templong = 0;
        for (j = 0; j < npieces_word; j++) {  // temp = floor(in*2^{-11}+0.5)
            temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
            templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
            index++;
        }
        for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
            pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
    // Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
    // If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
    memset(out, 0, outlen);

    size_t i = 0;            // whole bytes already filled in
    size_t j = 0;            // whole uint16_t already copied
    uint16_t w = 0;          // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb in w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |        |        |********|********|
                              ^
                              j
        w : |   ****|
                ^
               bits
        out:|**|**|**|**|**|**|**|**|* |
                                    ^^
                                    ib
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < 8) {
            int nbits = min(8 - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask);  // the bits to copy from w to out
            out[i] = out[i] + (t << (8 - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = lsb;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == 8) {  // out[i] is filled in
            i++;
        }
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
    // Unpack the input char vector into a uint16_t output vector, copying lsb bits
    // for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
    memset(out, 0, outlen * sizeof(uint16_t));

    size_t i = 0;            // whole uint16_t already filled in
    size_t j = 0;            // whole bytes already copied
    uint8_t w = 0;           // the leftover, not yet copied
    uint8_t bits = 0;        // the number of lsb bits of w

    while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
        /*
        in: |  |  |  |  |  |  |**|**|...
                              ^
                              j
        w : | *|
              ^
              bits
        out:|   *****|   *****|   ***  |        |...
                              ^   ^
                              i   b
        */
        uint8_t b = 0;  // bits in out[i] already filled in
        while (b < lsb) {
            int nbits = min(lsb - b, bits);
            uint16_t mask = (1 << nbits) - 1;
            uint8_t t = (w >> (bits - nbits)) & mask;  // the bits to copy from w to out
            out[i] = out[i] + (t << (lsb - b - nbits));
            b += (uint8_t) nbits;
            bits -= (uint8_t) nbits;
            w &= ~(mask << bits);  // not strictly necessary; mostly for debugging

            if (bits == 0) {
                if (j < inlen) {
                    w = in[j];
                    bits = 8;
                    j++;
                } else {
                    break;  // the input vector is exhausted
                }
            }
        }
        if (b == lsb) {  // out[i] is filled in
            i++;
        }
    }
 }


 int8_t PQCLEAN_FRODOKEM976AES_OPT_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
    // Compare two arrays in constant time.
    // Returns 0 if the byte arrays are equal, -1 otherwise.
    uint16_t r = 0;

    for (size_t i = 0; i < len; i++) {
        r |= a[i] ^ b[i];
    }

    r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
    return (int8_t)r;
 }


 void PQCLEAN_FRODOKEM976AES_OPT_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
    // Select one of the two input arrays to be moved to r
    // If (selector == 0) then load r with a, else if (selector == -1) load r with b

    for (size_t i = 0; i < len; i++) {
        r[i] = (~selector & a[i]) | (selector & b[i]);
    }
 }


 void PQCLEAN_FRODOKEM976AES_OPT_clear_bytes(uint8_t *mem, size_t n) {
    // Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
    // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
    volatile uint8_t *v = mem;

    for (size_t i = 0; i < n; i++) {
        v[i] = 0;
    }
 }
--- a/crypto_kem/hqc-rmrs-128/META.yml
+++ b/crypto_kem/hqc-rmrs-128/META.yml
@@ -1,36 +0,0 @@
 name: HQC-RMRS-128
 type: kem
 claimed-nist-level: 1
 claimed-security: IND-CCA2
 length-ciphertext: 4481
 length-public-key: 2249
 length-secret-key: 2289
 length-shared-secret: 64
 nistkat-sha256: b9d10eda065c8ff31d40b929ad7f742889544363aa031096850009a882d9d827
 principal-submitters:
  - Carlos Aguilar Melchor
  - Nicolas Aragon
  - Slim Bettaieb
  - Olivier Blazy
  - Jurjen Bos
  - Jean-Christophe Deneuville
  - Philippe Gaborit
  - Edoardo Persichetti
  - Jean-Marc Robert
  - Pascal Véron
  - Gilles Zémor
  - Loïc Bidoux
 implementations:
    - name: clean
      version: hqc-submission_2020-10-01 via https://github.com/jschanck/package-pqclean/tree/c9181076/hqc
    - name: avx2
      version: hqc-submission_2020-10-01 via https://github.com/jschanck/package-pqclean/tree/c9181076/hqc
      supported_platforms:
          - architecture: x86_64
            operating_systems:
                - Linux
                - Darwin
            required_flags:
                - avx2
                - bmi1
                - pclmulqdq
--- a/crypto_kem/hqc-rmrs-128/avx2/LICENSE
+++ b/crypto_kem/hqc-rmrs-128/avx2/LICENSE
@@ -1 +0,0 @@
 Public Domain
--- a/crypto_kem/hqc-rmrs-128/avx2/api.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/api.h
@@ -1,25 +0,0 @@
 #ifndef PQCLEAN_HQCRMRS128_AVX2_API_H
 #define PQCLEAN_HQCRMRS128_AVX2_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_ALGNAME                      "HQC-RMRS-128"

 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES               2289
 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES               2249
 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES                        64
 #define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES              4481

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/code.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/code.c
@@ -1,47 +0,0 @@
 #include "code.h"
 #include "parameters.h"
 #include "reed_muller.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of concatenated code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the concatenated code
 *
 * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
 * a concatenated code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *m) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(tmp, m);
    PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the concatenated code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em) {
    uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

    PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(tmp, em);
    PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(m, tmp);


 }
--- a/crypto_kem/hqc-rmrs-128/avx2/code.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/code.h
@@ -1,18 +0,0 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *message);

 void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/fft.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/fft.c
@@ -1,351 +0,0 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    size_t i;
    for (i = 0; i < PARAM_M - 1; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
        for (j = 0; j < (1 << i); ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        break;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        break;

    default:
        radix_big(f0, f1, f, m_f);
        break;
    }
 }

 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
    uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

    uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

    size_t i, n;

    n = 1;
    n <<= (m_f - 2);
    memcpy(Q, f + 3 * n, 2 * n);
    memcpy(Q + n, f + 3 * n, 2 * n);
    memcpy(R, f, 4 * n);

    for (i = 0; i < n; ++i) {
        Q[i] ^= f[2 * n + i];
        R[n + i] ^= Q[i];
    }

    radix(Q0, Q1, Q, m_f - 1);
    radix(R0, R1, R, m_f - 1);

    memcpy(f0, R0, 2 * n);
    memcpy(f0 + n, Q0, 2 * n);
    memcpy(f1, R1, 2 * n);
    memcpy(f1 + n, Q1, 2 * n);
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function PQCLEAN_HQCRMRS128_AVX2_fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t gammas[PARAM_M - 2] = {0};
    uint16_t deltas[PARAM_M - 2] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};
    uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

    uint16_t beta_m_pow;
    size_t i, j, k;
    size_t x;

    // Step 1
    if (m_f == 1) {
        for (i = 0; i < m; ++i) {
            tmp[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        x = 1;
        for (j = 0; j < m; ++j) {
            for (k = 0; k < x; ++k) {
                w[x + k] = w[k] ^ tmp[j];
            }
            x <<= 1;
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        beta_m_pow = 1;
        x = 1;
        x <<= m_f;
        for (i = 1; i < x; ++i) {
            beta_m_pow = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (i = 0; i + 1 < m; ++i) {
        gammas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS128_AVX2_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    k = 1;
    k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1] = {0};
    uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
    uint16_t deltas[PARAM_M - 1] = {0};
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};

    size_t i, k;

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (i = 0; i < PARAM_M - 1; ++i) {
        deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    k = 1 << (PARAM_M - 1);
    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (i = 1; i < k; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array with the error
 * @param[out] error_compact Array with the error in a compact form
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    k = 1 << (PARAM_M - 1);
    error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
    error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

    for (i = 1; i < k; ++i) {
        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
        error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
        error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
    }
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/fft.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/fft.h
@@ -1,18 +0,0 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/gf.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/gf.c
@@ -1,176 +0,0 @@
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 /**
 * @file gf.c
 * Galois field implementation with multiplication using the pclmulqdq instruction
 */


 static uint16_t gf_reduce(uint64_t x, size_t deg_x);



 /**
 * Reduces polynomial x modulo primitive polynomial GF_POLY.
 * @returns x mod GF_POLY
 * @param[in] x Polynomial of degree less than 64
 * @param[in] deg_x The degree of polynomial x
 */
 static uint16_t gf_reduce(uint64_t x, size_t deg_x) {
    uint16_t z1, z2, rmdr, dist;
    uint64_t mod;
    size_t steps, i, j;

    // Deduce the number of steps of reduction
    steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2);

    // Reduce
    for (i = 0; i < steps; ++i) {
        mod = x >> PARAM_M;
        x &= (1 << PARAM_M) - 1;
        x ^= mod;

        z1 = 0;
        rmdr = PARAM_GF_POLY ^ 1;
        for (j = PARAM_GF_POLY_WT - 2; j; --j) {
            z2 = __tzcnt_u16(rmdr);
            dist = (uint16_t) (z2 - z1);
            mod <<= dist;
            x ^= mod;
            rmdr ^= 1 << z2;
            z1 = z2;
        }
    }

    return x;
 }



 /**
 * Multiplies two elements of GF(2^GF_M).
 * @returns the product a*b
 * @param[in] a Element of GF(2^GF_M)
 * @param[in] b Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b) {
    __m128i va = _mm_cvtsi32_si128(a);
    __m128i vb = _mm_cvtsi32_si128(b);
    __m128i vab = _mm_clmulepi64_si128(va, vb, 0);
    uint32_t ab = _mm_cvtsi128_si32(vab);

    return gf_reduce(ab, 2 * (PARAM_M - 1));
 }



 /**
 *  Compute 16 products in GF(2^GF_M).
 *  @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M)
 *  @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers
 *  @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer
 *
 */
 __m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b) {
    __m128i al = _mm256_extractf128_si256(a, 0);
    __m128i ah = _mm256_extractf128_si256(a, 1);
    __m128i bl = _mm256_extractf128_si256(b, 0);
    __m128i bh = _mm256_extractf128_si256(b, 1);

    __m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0);
    abl0 &= CONST128_MIDDLEMASKL;
    abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

    __m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11);
    abh0 &= CONST128_MIDDLEMASKL;
    abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

    abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL);
    abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH);

    __m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0);
    abl1 &= CONST128_MIDDLEMASKL;
    abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

    __m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11);
    abh1 &= CONST128_MIDDLEMASKL;
    abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

    abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL);
    abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH);

    __m256i ret = _mm256_set_m128i(abl1, abl0);

    __m256i aux = CONST256_MR0;

    for (int32_t i = 0; i < 7; i++) {
        ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux);
        aux = aux << 1;
    }

    ret &= CONST256_LASTMASK;
    return ret;
 }



 /**
 * Squares an element of GF(2^GF_M).
 * @returns a^2
 * @param[in] a Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a) {
    uint32_t b = a;
    uint32_t s = b & 1;
    for (size_t i = 1; i < PARAM_M; ++i) {
        b <<= 1;
        s ^= b & (1 << 2 * i);
    }

    return gf_reduce(s, 2 * (PARAM_M - 1));
 }



 /**
 * Computes the inverse of an element of GF(2^8),
 * using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254
 * @returns the inverse of a
 * @param[in] a Element of GF(2^GF_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a) {
    uint16_t inv = a;
    uint16_t tmp1, tmp2;

    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(a); /* a^2 */
    tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, a); /* a^3 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^4 */
    tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp1); /* a^7 */
    tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^11 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, inv); /* a^15 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^30 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^60 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^120 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^127 */
    inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^254 */
    return inv;
 }



 /**
 * Returns i modulo 2^GF_M-1.
 * i must be less than 2*(2^GF_M-1).
 * Therefore, the return value is either i or i-2^GF_M+1.
 * @returns i mod (2^GF_M-1)
 * @param[in] i The integer whose modulo is taken
 */
 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i) {
    uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

    // mask = 0xffff if (i < GF_MUL_ORDER)
    uint16_t mask = -(tmp >> 15);

    return tmp + (mask & PARAM_GF_MUL_ORDER);
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/gf.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/gf.h
@@ -1,69 +0,0 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <immintrin.h>
 #include <stddef.h>
 #include <stdint.h>

 #define _mm256_set_m128i(v0, v1)  _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)

 /**
 * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
 * The last two elements are needed by the PQCLEAN_HQCRMRS128_AVX2_gf_mul function
 * (for example if both elements to multiply are zero).
 */
 static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



 /**
 * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
 * The logarithm of 0 is set to 0 by convention.
 */
 static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };

 /**
 * Masks needed for the computation of 16 mult in GF(2^M)
 */
 #define CONST256_MR0      _mm256_set1_epi64x((long long) 0x0100010001000100)
 #define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff)
 #define CONST128_MASKL       _mm_set1_epi64x((long long) 0x0000ffff0000ffff)
 #define CONST128_MASKH       _mm_set1_epi64x((long long) 0xffff0000ffff0000)
 #define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff)
 #define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000)
 #define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff)
 #define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100)

 /**
 * x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register
 */
 static const __m256i red[7] = {
    {0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL},
    {0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL},
    {0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL},
    {0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL},
    {0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL},
    {0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL},
    {0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL},

 };


 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b);

 __m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b);

 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/gf2x.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/gf2x.c
@@ -1,369 +0,0 @@
 #include "gf2x.h"
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * \file gf2x.c
 * \brief AVX2 implementation of multiplication of two polynomials
 */



 #define VEC_N_SPLIT_3x3 CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256)
 #define VEC_N_SPLIT_3 (3*VEC_N_SPLIT_3x3)

 static inline void reduce(uint64_t *o, const __m256i *a);
 static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B);
 static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B);
 static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B);


 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a Pointer to the polynomial a(x)
 */
 static inline void reduce(uint64_t *o, const __m256i *a256) {
    size_t i, i2;
    __m256i r256, carry256;
    __m256i *o256 = (__m256i *)o;
    const uint64_t *a64 = (const uint64_t *)a256;
    uint64_t r, carry;

    i2 = 0;
    for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) {
        r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i]));
        r256 = _mm256_srli_epi64(r256, PARAM_N & 63);
        carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1]));
        carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63);
        r256 ^= carry256;
        _mm256_storeu_si256(&o256[i2], a256[i2] ^ r256);
        i2 += 1;
    }

    i = i - (PARAM_N >> 6);
    for (; i < (PARAM_N >> 6) + 1; i++) {
        r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63);
        carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63);
        r ^= carry;
        o[i] = a64[i] ^ r;
    }

    o[PARAM_N >> 6] &= RED_MASK;
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 * A(x) and B(x) are stored in 128-bit registers
 * This function computes A(x)*B(x) using Karatsuba
 *
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) {
    __m128i D1[2];
    __m128i D0[2], D2[2];
    __m128i Al = _mm_loadu_si128(A);
    __m128i Ah = _mm_loadu_si128(A + 1);
    __m128i Bl = _mm_loadu_si128(B);
    __m128i Bh = _mm_loadu_si128(B + 1);

    //  Compute Al.Bl=D0
    __m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0);
    __m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11);
    __m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e));
    __m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e));
    __m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    //  Compute Ah.Bh=D2
    DD0 = _mm_clmulepi64_si128(Ah, Bh, 0);
    DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11);
    AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e));
    BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Compute AlpAh.BlpBh=D1
    // Initialisation of AlpAh and BlpBh
    __m128i AlpAh = _mm_xor_si128(Al, Ah);
    __m128i BlpBh = _mm_xor_si128(Bl, Bh);
    DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0);
    DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11);
    AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e));
    BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e));
    DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
    D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
    D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

    // Final comutation of C
    __m128i middle = _mm_xor_si128(D0[1], D2[0]);
    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) {
    __m256i D0[2], D1[2], D2[2], SAA, SBB;
    const __m128i *A128 = (const __m128i *)A;
    const __m128i *B128 = (const __m128i *)B;
    __m256i middle;

    karat_mult_1((__m128i *) D0, A128, B128);
    karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2);

    SAA = _mm256_xor_si256(A[0], A[1]);
    SBB = _mm256_xor_si256(B[0], B[1]);

    karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB);
    middle = _mm256_xor_si256(D0[1], D2[0]);

    C[0] = D0[0];
    C[1] = middle ^ D0[0] ^ D1[0];
    C[2] = middle ^ D1[1] ^ D2[1];
    C[3] = D2[1];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) {
    __m256i D0[4], D1[4], D2[4], SAA[2], SBB[2];
    __m256i middle0;
    __m256i middle1;

    karat_mult_2(D0, A, B);
    karat_mult_2(D2, A + 2, B + 2);

    SAA[0] = A[0] ^ A[2];
    SBB[0] = B[0] ^ B[2];
    SAA[1] = A[1] ^ A[3];
    SBB[1] = B[1] ^ B[3];

    karat_mult_2( D1, SAA, SBB);

    middle0 = _mm256_xor_si256(D0[2], D2[0]);
    middle1 = _mm256_xor_si256(D0[3], D2[1]);

    C[0] = D0[0];
    C[1] = D0[1];
    C[2] = middle0 ^ D0[0] ^ D1[0];
    C[3] = middle1 ^ D0[1] ^ D1[1];
    C[4] = middle0 ^ D1[2] ^ D2[2];
    C[5] = middle1 ^ D1[3] ^ D2[3];
    C[6] = D2[2];
    C[7] = D2[3];
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) {
    size_t i, is, is2, is3;
    __m256i D0[8], D1[8], D2[8], SAA[4], SBB[4];
    __m256i middle;

    karat_mult_4(D0, A, B);
    karat_mult_4(D2, A + 4, B + 4);

    for (i = 0; i < 4; i++) {
        is = i + 4;
        SAA[i] = A[i] ^ A[is];
        SBB[i] = B[i] ^ B[is];
    }

    karat_mult_4(D1, SAA, SBB);

    for (i = 0; i < 4; i++) {
        is = i + 4;
        is2 = is + 4;
        is3 = is2 + 4;

        middle = _mm256_xor_si256(D0[is], D2[i]);

        C[i]   = D0[i];
        C[is]  = middle ^ D0[i] ^ D1[i];
        C[is2] = middle ^ D1[is] ^ D2[is];
        C[is3] = D2[is];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba 3 part split
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B) {
    size_t i, j;
    const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
    __m256i aa01[VEC_N_SPLIT_3x3], bb01[VEC_N_SPLIT_3x3], aa02[VEC_N_SPLIT_3x3], bb02[VEC_N_SPLIT_3x3], aa12[VEC_N_SPLIT_3x3], bb12[VEC_N_SPLIT_3x3];
    __m256i D0[2 * VEC_N_SPLIT_3x3], D1[2 * VEC_N_SPLIT_3x3], D2[2 * VEC_N_SPLIT_3x3], D3[2 * VEC_N_SPLIT_3x3], D4[2 * VEC_N_SPLIT_3x3], D5[2 * VEC_N_SPLIT_3x3];
    __m256i ro256[6 * VEC_N_SPLIT_3x3];
    __m256i middle0;

    a0 = A;
    a1 = A + VEC_N_SPLIT_3x3;
    a2 = A + (VEC_N_SPLIT_3x3 << 1);

    b0 = B;
    b1 = B + VEC_N_SPLIT_3x3;
    b2 = B + (VEC_N_SPLIT_3x3 << 1);

    for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
        aa01[i] = a0[i] ^ a1[i];
        bb01[i] = b0[i] ^ b1[i];

        aa12[i] = a2[i] ^ a1[i];
        bb12[i] = b2[i] ^ b1[i];

        aa02[i] = a0[i] ^ a2[i];
        bb02[i] = b0[i] ^ b2[i];
    }

    karat_mult_8(D0, a0, b0);
    karat_mult_8(D1, a1, b1);
    karat_mult_8(D2, a2, b2);

    karat_mult_8(D3, aa01, bb01);
    karat_mult_8(D4, aa02, bb02);
    karat_mult_8(D5, aa12, bb12);

    for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
        j = i + VEC_N_SPLIT_3x3;
        middle0 = D0[i] ^ D1[i] ^ D0[j];
        ro256[i] = D0[i];
        ro256[j]  = D3[i] ^ middle0;
        ro256[j + VEC_N_SPLIT_3x3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
        middle0 = D1[j] ^ D2[i] ^ D2[j];
        ro256[j + (VEC_N_SPLIT_3x3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
        ro256[i + (VEC_N_SPLIT_3x3 << 2)] = D5[j] ^ middle0;
        ro256[j + (VEC_N_SPLIT_3x3 << 2)] = D2[j];
    }

    for (i = 0; i < 2 * VEC_N_SPLIT_3; i++) {
        C[i] = ro256[i];
    }
 }



 /**
 * @brief Compute C(x) = A(x)*B(x)
 *
 * This function computes A(x)*B(x) using Karatsuba 3 part split
 * A(x) and B(x) are stored in 256-bit registers
 * @param[out] C Pointer to the result
 * @param[in] A Pointer to the polynomial A(x)
 * @param[in] B Pointer to the polynomial B(x)
 */
 static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B) {
    size_t i, j;
    const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
    __m256i aa01[VEC_N_SPLIT_3], bb01[VEC_N_SPLIT_3], aa02[VEC_N_SPLIT_3], bb02[VEC_N_SPLIT_3], aa12[VEC_N_SPLIT_3], bb12[VEC_N_SPLIT_3];
    __m256i D0[2 * VEC_N_SPLIT_3], D1[2 * VEC_N_SPLIT_3], D2[2 * VEC_N_SPLIT_3], D3[2 * VEC_N_SPLIT_3], D4[2 * VEC_N_SPLIT_3], D5[2 * VEC_N_SPLIT_3];
    __m256i middle0;

    a0 = (__m256i *)(A->arr64);
    a1 = a0 + VEC_N_SPLIT_3;
    a2 = a0 + (2 * VEC_N_SPLIT_3);

    b0 = (__m256i *)(B->arr64);
    b1 = b0 + VEC_N_SPLIT_3;
    b2 = b0 + (2 * VEC_N_SPLIT_3);

    for (i = 0; i < VEC_N_SPLIT_3; i++) {
        aa01[i] = a0[i] ^ a1[i];
        bb01[i] = b0[i] ^ b1[i];

        aa12[i] = a2[i] ^ a1[i];
        bb12[i] = b2[i] ^ b1[i];

        aa02[i] = a0[i] ^ a2[i];
        bb02[i] = b0[i] ^ b2[i];
    }

    karat_three_way_mult(D0, a0, b0);
    karat_three_way_mult(D1, a1, b1);
    karat_three_way_mult(D2, a2, b2);

    karat_three_way_mult(D3, aa01, bb01);
    karat_three_way_mult(D4, aa02, bb02);
    karat_three_way_mult(D5, aa12, bb12);

    for (i = 0; i < VEC_N_SPLIT_3; i++) {
        j = i + VEC_N_SPLIT_3;
        middle0 = D0[i] ^ D1[i] ^ D0[j];
        C[i] = D0[i];
        C[j]  = D3[i] ^ middle0;
        C[j + VEC_N_SPLIT_3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
        middle0 = D1[j] ^ D2[i] ^ D2[j];
        C[j + (VEC_N_SPLIT_3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
        C[i + (VEC_N_SPLIT_3 << 2)] = D5[j] ^ middle0;
        C[j + (VEC_N_SPLIT_3 << 2)] = D2[j];
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a dense polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to a polynomial
 * @param[in] a2 Pointer to a polynomial
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) {
    __m256i a1_times_a2[2 * PARAM_N_MULT + 1] = {0};
    karat_mult9(a1_times_a2, a1, a2);
    reduce(o, a1_times_a2);
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/gf2x.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/gf2x.h
@@ -1,21 +0,0 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */
 #include "parameters.h"
 #include <immintrin.h>
 #include <stdint.h>

 typedef union {
    uint64_t arr64[VEC_N_256_SIZE_64];
    __m256i dummy;
 } aligned_vec_t;

 void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/hqc.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/hqc.c
@@ -1,168 +0,0 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    aligned_vec_t vx = {0};
    uint64_t *x = vx.arr64;
    aligned_vec_t vy = {0};
    uint64_t *y = vy.arr64;
    aligned_vec_t vh = {0};
    uint64_t *h = vh.arr64;
    aligned_vec_t vs = {0};
    uint64_t *s = vs.arr64;
    aligned_vec_t vtmp = {0};
    uint64_t *tmp = vtmp.arr64;

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp, &vy, &vh);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    aligned_vec_t vh = {0};
    uint64_t *h = vh.arr64;
    aligned_vec_t vs = {0};
    uint64_t *s = vs.arr64;
    aligned_vec_t vr1 = {0};
    uint64_t *r1 = vr1.arr64;
    aligned_vec_t vr2 = {0};
    uint64_t *r2 = vr2.arr64;
    aligned_vec_t ve = {0};
    uint64_t *e = ve.arr64;
    aligned_vec_t vtmp1 = {0};
    uint64_t *tmp1 = vtmp1.arr64;
    aligned_vec_t vtmp2 = {0};
    uint64_t *tmp2 = vtmp2.arr64;
    aligned_vec_t vtmp3 = {0};
    uint64_t *tmp3 = vtmp3.arr64;

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);



    // Compute u = r1 + r2.h
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp1, &vr2, &vh);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQCRMRS128_AVX2_code_encode((uint8_t *)v, m);
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp2, &vr2, &vs);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    aligned_vec_t vx = {0};
    uint64_t *x = vx.arr64;
    aligned_vec_t vy = {0};
    uint64_t *y = vy.arr64;
    aligned_vec_t vtmp1 = {0};
    uint64_t *tmp1 = vtmp1.arr64;
    aligned_vec_t vtmp2 = {0};
    uint64_t *tmp2 = vtmp2.arr64;
    aligned_vec_t vtmp3 = {0};
    uint64_t *tmp3 = vtmp3.arr64;

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(x, y, pk, sk);

    // Compute v - u.y
    PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) {
        tmp2[i] = u[i];
    }
    PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp3, &vy, &vtmp2);
    PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQCRMRS128_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_code_decode(m, (uint8_t *)tmp1);
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/hqc.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/hqc.h
@@ -1,19 +0,0 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/kem.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/kem.c
@@ -1,140 +0,0 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    static uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    randombytes(m, VEC_K_SIZE_BYTES);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    uint8_t result;
    uint64_t u[VEC_N_256_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_256_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_256_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare(d, d2, SHA512_BYTES);
    result = (uint8_t) (-((int16_t) result) >> 15);
    for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
        ss[i] &= ~result;
    }


    return -(result & 1);
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/parameters.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/parameters.h
@@ -1,111 +0,0 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H


 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */
 #include "api.h"


 #define CEIL_DIVIDE(a, b)  (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of Reed-Solomon code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
  #define PARAM_N1N2                            Define the length in bits of the Concatenated code
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1Y_SIZE_BYTES                    Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define VEC_N_256_SIZE_64                     Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits
  #define VEC_N1N2_256_SIZE_64                  Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_POLY_WT                      Hamming weight of PARAM_GF_POLY
  #define PARAM_GF_POLY_M2                      Distance between the primitive polynomial first two set bits
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the Reed-Solomon code
  #define PARAM_G                               Define the size of the generator polynomial of Reed-Solomon code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
                                                The smallest power of 2 greater than 24+1 is 32=2^5
  #define RS_POLY_COEFS                         Coefficients of the generator polynomial of the Reed-Solomon code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                 17669
 #define PARAM_N1                                46
 #define PARAM_N2                                384
 #define PARAM_N1N2                              17664
 #define PARAM_OMEGA                             66
 #define PARAM_OMEGA_E                           75
 #define PARAM_OMEGA_R                           75
 #define PARAM_SECURITY                          128
 #define PARAM_DFR_EXP                           128

 #define SECRET_KEY_BYTES                        PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD               16767881
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        PARAM_K
 #define VEC_N1_SIZE_BYTES                       PARAM_N1
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_256                          CEIL_DIVIDE(PARAM_N, 256)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_N_MULT                            (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256))
 #define VEC_N_256_SIZE_64                       (PARAM_N_MULT / 64)
 #define VEC_N1N2_256_SIZE_64                    (CEIL_DIVIDE(PARAM_N1N2, 256) << 2)

 #define PARAM_DELTA                             15
 #define PARAM_M                                 8
 #define PARAM_GF_POLY                           0x11D
 #define PARAM_GF_POLY_WT                        5
 #define PARAM_GF_POLY_M2                        4
 #define PARAM_GF_MUL_ORDER                      255
 #define PARAM_K                                 16
 #define PARAM_G                                 31
 #define PARAM_FFT                               5
 #define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1

 #define RED_MASK                                0x1f
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/parsing.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/parsing.c
@@ -1,186 +0,0 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */


 void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in) {
    out[0] = (in >> 0x00) & 0xFF;
    out[1] = (in >> 0x08) & 0xFF;
    out[2] = (in >> 0x10) & 0xFF;
    out[3] = (in >> 0x18) & 0xFF;
    out[4] = (in >> 0x20) & 0xFF;
    out[5] = (in >> 0x28) & 0xFF;
    out[6] = (in >> 0x30) & 0xFF;
    out[7] = (in >> 0x38) & 0xFF;
 }


 uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in) {
    uint64_t ret = in[7];

    for (int8_t i = 6; i >= 0; i--) {
        ret <<= 8;
        ret |= in[i];
    }

    return ret;
 }

 void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
    size_t index_in = 0;
    size_t index_out = 0;

    // first copy by 8 bytes
    if (inlen >= 8 && outlen >= 1) {
        while (index_out < outlen && index_in + 8 <= inlen) {
            out64[index_out] = PQCLEAN_HQCRMRS128_AVX2_load8(in8 + index_in);

            index_in += 8;
            index_out += 1;
        }
    }

    // we now need to do the last 7 bytes if necessary
    if (index_in >= inlen || index_out >= outlen) {
        return;
    }
    out64[index_out] = in8[inlen - 1];
    for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
        out64[index_out] <<= 8;
        out64[index_out] |= in8[index_in + i];
    }
 }

 void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
    for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
        out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
        index_out++;
        if (index_out % 8 == 0) {
            index_in++;
        }
    }
 }


 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(sk, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint64_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(pk, sk, PUBLIC_KEY_BYTES);

    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    pk += SEED_BYTES;
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(ct, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(d, ct, SHA512_BYTES);
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/parsing.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/parsing.h
@@ -1,36 +0,0 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in);

 uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in);

 void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

 void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/reed_muller.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/reed_muller.c
@@ -1,389 +0,0 @@
 #include "parameters.h"
 #include "reed_muller.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file reed_muller.c
 * Constant time implementation of Reed-Muller code RM(1,7)
 */


 // number of repeated code words
 #define MULTIPLICITY                   CEIL_DIVIDE(PARAM_N2, 128)

 // copy bit 0 into all bits of a 64 bit value
 #define BIT0MASK(x) (int64_t)(-((x) & 1))

 static void encode(uint8_t *word, uint8_t message);
 static void expand_and_sum(__m256i *dst, const uint64_t *src);
 static void hadamard(__m256i *src, __m256i *dst);
 static uint32_t find_peaks(__m256i *transform);



 /**
 * @brief Encode a single byte into a single codeword using RM(1,7)
 *
 * Encoding matrix of this code:
 * bit pattern (note that bits are numbered big endian)
 * 0   aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
 * 1   cccccccc cccccccc cccccccc cccccccc
 * 2   f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
 * 3   ff00ff00 ff00ff00 ff00ff00 ff00ff00
 * 4   ffff0000 ffff0000 ffff0000 ffff0000
 * 5   00000000 ffffffff 00000000 ffffffff
 * 6   00000000 00000000 ffffffff ffffffff
 * 7   ffffffff ffffffff ffffffff ffffffff
 *
 * @param[out] word An RM(1,7) codeword
 * @param[in] message A message to encode
 */
 static void encode(uint8_t *word, uint8_t message) {
    uint32_t e;
    // bit 7 flips all the bits, do that first to save work
    e = BIT0MASK(message >> 7);
    // bits 0, 1, 2, 3, 4 are the same for all four longs
    // (Warning: in the bit matrix above, low bits are at the left!)
    e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
    e ^= BIT0MASK(message >> 1) & 0xcccccccc;
    e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
    e ^= BIT0MASK(message >> 3) & 0xff00ff00;
    e ^= BIT0MASK(message >> 4) & 0xffff0000;
    // we can store this in the first quarter
    word[0 + 0] = (e >> 0x00) & 0xff;
    word[0 + 1] = (e >> 0x08) & 0xff;
    word[0 + 2] = (e >> 0x10) & 0xff;
    word[0 + 3] = (e >> 0x18) & 0xff;
    // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
    e ^= BIT0MASK(message >> 5);
    word[4 + 0] = (e >> 0x00) & 0xff;
    word[4 + 1] = (e >> 0x08) & 0xff;
    word[4 + 2] = (e >> 0x10) & 0xff;
    word[4 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 6);
    word[12 + 0] = (e >> 0x00) & 0xff;
    word[12 + 1] = (e >> 0x08) & 0xff;
    word[12 + 2] = (e >> 0x10) & 0xff;
    word[12 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 5);
    word[8 + 0] = (e >> 0x00) & 0xff;
    word[8 + 1] = (e >> 0x08) & 0xff;
    word[8 + 2] = (e >> 0x10) & 0xff;
    word[8 + 3] = (e >> 0x18) & 0xff;
 }



 /**
 * @brief Add multiple codewords into expanded codeword
 *
 * Note: this does not write the codewords as -1 or +1 as the green machine does
 * instead, just 0 and 1 is used.
 * The resulting hadamard transform has:
 * all values are halved
 * the first entry is 64 too high
 *
 * @param[out] dst Structure that contain the expanded codeword
 * @param[in] src Structure that contain the codeword
 */
 inline void expand_and_sum(__m256i *dst, const uint64_t *src) {
    uint16_t v[16];
    for (size_t part = 0; part < 8; part++) {
        dst[part] = _mm256_setzero_si256();
    }
    for (size_t copy = 0; copy < MULTIPLICITY; copy++) {
        for (size_t part = 0; part < 8; part++) {
            for (size_t bit = 0; bit < 16; bit++) {
                v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1;
            }
            dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
                                          v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
        }
    }
 }



 /**
 * @brief Hadamard transform
 *
 * Perform hadamard transform of src and store result in dst
 * src is overwritten: it is also used as intermediate buffer
 * Method is best explained if we use H(3) instead of H(7):
 *
 * The routine multiplies by the matrix H(3):
 *                     [1  1  1  1  1  1  1  1]
 *                     [1 -1  1 -1  1 -1  1 -1]
 *                     [1  1 -1 -1  1  1 -1 -1]
 * [a b c d e f g h] * [1 -1 -1  1  1 -1 -1  1] = result of routine
 *                     [1  1  1  1 -1 -1 -1 -1]
 *                     [1 -1  1 -1 -1  1 -1  1]
 *                     [1  1 -1 -1 -1 -1  1  1]
 *                     [1 -1 -1  1 -1  1  1 -1]
 * You can do this in three passes, where each pass does this:
 * set lower half of buffer to pairwise sums,
 * and upper half to differences
 * index     0        1        2        3        4        5        6        7
 * input:    a,       b,       c,       d,       e,       f,       g,       h
 * pass 1:   a+b,     c+d,     e+f,     g+h,     a-b,     c-d,     e-f,     g-h
 * pass 2:   a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
 * pass 3:   a+b+c+d+e+f+g+h   a+b-c-d+e+f-g-h   a+b+c+d-e-f-g-h   a+b-c-d-e+-f+g+h
 * a-b+c-d+e-f+g-h   a-b-c+d+e-f-g+h   a-b+c-d-e+f-g+h   a-b-c+d-e+f+g-h
 * This order of computation is chosen because it vectorises well.
 * Likewise, this routine multiplies by H(7) in seven passes.
 *
 * @param[out] src Structure that contain the expanded codeword
 * @param[out] dst Structure that contain the expanded codeword
 */
 inline void hadamard(__m256i *src, __m256i *dst) {
    // the passes move data:
    // src -> dst -> src -> dst -> src -> dst -> src -> dst
    // using p1 and p2 alternately
    __m256i *p1 = src;
    __m256i *p2 = dst;
    __m256i *p3;
    for (size_t pass = 0; pass < 7; pass++) {
        // warning: hadd works "within lanes" as Intel call it
        // so you have to swap the middle 64 bit blocks of the result
        for (size_t part = 0; part < 4; part++) {
            p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
            p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
        }
        // swap p1, p2 for next round
        p3 = p1;
        p1 = p2;
        p2 = p3;
    }
 }



 /**
 * @brief Finding the location of the highest value
 *
 * This is the final step of the green machine: find the location of the highest value,
 * and add 128 if the peak is positive
 * Notes on decoding
 * The standard "Green machine" decoder words as follows:
 * if the received codeword is W, compute (2 * W - 1) * H7
 * The entries of the resulting vector are always even and vary from
 * -128 (= the complement is a code word, add bit 7 to decode)
 * via 0 (this is a different codeword)
 * to 128 (this is the code word).
 *
 * Our decoding differs in two ways:
 * - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1)
 * - We take the sum of the repititions (so the entries are 0..MULTIPLICITY)
 * This implies that we have to subtract 64M (M=MULTIPLICITY)
 * from the first entry to make sure the first codewords is handled properly
 * and that the entries vary from -64M to 64M.
 * -64M or 64M stands for a perfect codeword.
 * If there are fewer than 32M errors, there is always a unique codeword
 * which an entry with absolute value > 32M;
 * this is because an error changes an entry by 1.
 * The highest number that seem to be decodable is 50 errors, so that the
 * highest entries in the hadamard transform can be as low as 12.
 * But this is different for the repeated code.
 * Because multiple codewords are added, this changes: the lowest value of the
 * hadamard transform of the sum of six words is seen to be as low as 43 (!),
 * which is way less than 12*6.
 *
 * It is possible that there are more errors, but the word is still uniquely
 * decodable: we found a word with distance of 50 from the nearest codeword.
 * That means that the highest entry can be as low as 14M.
 * Since we have to do binary search, we search for the range 1-64M
 * which can be done in 6+l2g(M) steps.
 * The binary search is based on (values>32M are unique):
 * M  32M     min>  max>  firstStep #steps
 * 2   64       1   64    33 +- 16    6
 * 4  128       1  128    65 +- 32    7
 * 6  192       1  192   129 +- 64    8
 *
 * As a check, we run a sample for M=6 to see the peak value; it ranged
 * from 43 to 147, so my analysis looks right. Also, it shows that decoding
 * far beyond the bound of 32M is needed.
 *
 * For the vectors, it would be tempting to use 8 bit ints,
 * because the values "almost" fit in there.
 * We could use some trickery to fit it in 8 bits, like saturated add or
 * division by 2 in a late step.
 * Unfortunately, these instructions do not exist.
 * the adds _mm512_adds_epi8 is available only on the latest processors,
 * and division, shift, mulhi are not available at all for 8 bits.
 * So, we use 16 bit ints.
 *
 * For the search of the optimal comparison value,
 * remember the transform contains 64M-d,
 * where d are the distances to the codewords.
 * The highest value gives the most likely codeword.
 * There is not fast vectorized way to find this value, so we search for the
 * maximum value itself.
 * In each pass, we collect a bit map of the transform values that are,
 * say >bound.  There are three cases:
 * bit map = 0: all code words are further away than 64M-bound (decrease bound)
 * bit map has one bit: one unique code word has distance < 64M-bound
 * bit map has multiple bits: multiple words (increase bound)
 * We will search for the lowest value of bound that gives a nonzero bit map.
 *
 * @param[in] transform Structure that contain the expanded codeword
 */
 inline uint32_t find_peaks(__m256i *transform) {
    // a whole lot of vector variables
    __m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows;
    __m256i tmp = _mm256_setzero_si256();
    __m256i vect_mask;
    __m256i res;
    int32_t lower;
    int32_t width;
    uint32_t message;
    uint32_t mask;
    int8_t index;
    int8_t abs_value;
    int8_t mask1;
    int8_t mask2;
    uint16_t result;

    // compute absolute value of transform
    for (size_t i = 0; i < 8; i++) {
        abs_rows[i] = _mm256_abs_epi16(transform[i]);
    }
    // compute a vector of 16 elements which contains the maximum somewhere
    // (later used to compute bits 0 through 3 of message)
    max_abs_rows = abs_rows[0];
    for (size_t i = 1; i < 8; i++) {
        max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]);
    }

    // do binary search for the highest value that is lower than the maximum
    // loop invariant: lower gives bit map = 0, lower + width gives bit map > 0
    lower = 1;
    // this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6
    width = 1 << (5 + MULTIPLICITY / 2);
    // if you don't unroll this loop, it fits in the loop cache
    // uncomment the line below to speeding up the program by a few percent
    // #pragma GCC unroll 0
    while (width > 1) {
        width >>= 1;
        // compare with lower + width; put result in bitmap
        // make vector from value of new bound
        bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width));
        bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound);
        // step up if there are any matches
        // rely on compiler to use conditional move here
        mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
        mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
        lower += mask & width;
    }
    // lower+width contains the maximum value of the vector
    // or less, if the maximum is very high (which is OK)
    // normally, there is one maximum, but sometimes there are more
    // find where the maxima occur in the maximum vector
    // (each determines lower 4 bits of peak position)
    // construct vector filled with bound-1
    bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1));

    // find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message
    // find lowest value by searching backwards skip first check to save time
    message = 0x70;
    for (size_t i = 0; i < 8; i++) {
        bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound);
        mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
        mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
        message ^= mask & (message ^ ((7 - i) << 4));
    }
    // we decided which row of the matrix contains the lowest match
    // select proper row
    index = message >> 4;

    tmp = _mm256_setzero_si256();
    for (size_t i = 0; i < 8; i++) {
        abs_value = (int8_t)(index - i);
        mask1 = abs_value >> 7;
        abs_value ^= mask1;
        abs_value -= mask1;
        mask2 = ((uint8_t) - abs_value >> 7);
        mask = (-1ULL) + mask2;
        vect_mask = _mm256_set1_epi32(mask);
        res = _mm256_and_si256(abs_rows[i], vect_mask);
        tmp = _mm256_or_si256(tmp, res);
    }

    active_row = tmp;

    // get the column number of the vector element
    // by setting the bits corresponding to the columns
    // and then adding elements within two groups of 8
    vect_mask = _mm256_cmpgt_epi16(active_row, bound);
    vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1);
    for (size_t i = 0; i < 3; i++) {
        vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask);
    }
    // add low 4 bits of message
    message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8));

    // set bit 7 if sign of biggest value is positive
    // make sure a jump isn't generated by the compiler
    tmp = _mm256_setzero_si256();
    for (size_t i = 0; i < 8; i++) {
        mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63);
        vect_mask = _mm256_set1_epi32(mask);
        tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i]));
    }
    result = 0;
    for (size_t i = 0; i < 16; i++) {
        mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63);
        result |= mask & ((uint16_t *)&tmp)[i];
    }
    message |= (0x8000 & ~result) >> 8;
    return message;
 }



 /**
 * @brief Encodes the received word
 *
 * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
 * or MULTIPLICITY repeats of 128 bits
 *
 * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // encode first word
        encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
        // copy to other identical codewords
        for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
            memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
        }
    }
 }



 /**
 * @brief Decodes the received word
 *
 * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
 * The theory of error-correcting codes codes @cite macwilliams1977theory
 *
 * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
    __m256i expanded[8];
    __m256i transform[8];
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // collect the codewords
        expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]);
        // apply hadamard transform
        hadamard(expanded, transform);
        // fix the first entry to get the half Hadamard transform
        transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0,
                                         0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY);
        // finish the decoding
        msg[i] = find_peaks(transform);
    }
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/reed_muller.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/reed_muller.h
@@ -1,18 +0,0 @@
 #ifndef REED_MULLER_H
 #define REED_MULLER_H


 /**
 * @file reed_muller.h
 * Header file of reed_muller.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

 void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


 #endif
--- a/crypto_kem/hqc-rmrs-128/avx2/reed_solomon.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/reed_solomon.c
@@ -1,466 +0,0 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 /**
 * @file reed_solomon.c
 * Constant time implementation of Reed-Solomon codes
 */


 static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
 static void compute_roots(uint8_t *error, uint16_t *sigma);
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

 static const __m256i alpha_ij256_1[45] = {
    {0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087},
    {0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006},
    {0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035},
    {0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014},
    {0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be},
    {0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078},
    {0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3},
    {0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d},
    {0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed},
    {0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e},
    {0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054},
    {0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4},
    {0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5},
    {0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062},
    {0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064},
    {0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051},
    {0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045},
    {0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb},
    {0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083},
    {0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020},
    {0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d},
    {0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0},
    {0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee},
    {0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba},
    {0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e},
    {0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb},
    {0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9},
    {0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd},
    {0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec},
    {0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9},
    {0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052},
    {0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1},
    {0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1},
    {0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc},
    {0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c},
    {0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2},
    {0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048},
    {0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016},
    {0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad},
    {0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074},
    {0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9},
    {0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025},
    {0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c},
    {0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de},
    {0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f}
 };
 static const __m256i alpha_ij256_2[45] = {
    {0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x0000000000600030},
    {0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x0000000000b90069},
    {0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x0000000000df007f},
    {0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00000000003b00f8},
    {0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x000000000055004d},
    {0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x00000000009600f1},
    {0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x00000000005900e0},
    {0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x00000000002c00f7},
    {0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0000000000260040},
    {0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x0000000000c1009c},
    {0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00000000000f005f},
    {0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x00000000001a00b6},
    {0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x0000000000a900ec},
    {0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x00000000009100aa},
    {0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0000000000640096},
    {0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00000000002400a2},
    {0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x000000000001000b},
    {0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00000000006000cd},
    {0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x0000000000b900d4},
    {0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x0000000000df005e},
    {0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00000000003b0086},
    {0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0000000000550085},
    {0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x00000000009600d5},
    {0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000000000059006e},
    {0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x00000000002c00ef},
    {0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00000000002600fa},
    {0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0000000000c1002d},
    {0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x00000000000f0023},
    {0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x00000000001a001e},
    {0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0000000000a9001a},
    {0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x00000000009100da},
    {0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0000000000640063},
    {0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0000000000240082},
    {0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0000000000010045},
    {0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x000000000060006c},
    {0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x0000000000b9008f},
    {0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x0000000000df0028},
    {0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00000000003b00d3},
    {0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x00000000005500ce},
    {0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x0000000000960084},
    {0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x00000000005900e5},
    {0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x00000000002c0007},
    {0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0000000000260090},
    {0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x0000000000c10002},
    {0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x00000000000f0060}
 };


 /**
 * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
 *
 * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
 * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
 * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
 *
 * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_K_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
    size_t i, k;
    uint8_t gate_value = 0;
    uint8_t prev, x;

    union {
        uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
        __m256i dummy;
    } tmp = {0};

    union {
        uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
        __m256i dummy;
    } PARAM_RS_POLY = {{ RS_POLY_COEFS }};

    __m256i *tmp256 = (__m256i *)tmp.arr16;
    __m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16;

    for (i = 0; i < PARAM_K; ++i) {
        gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);
        _mm256_storeu_si256(&tmp256[0], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0]));
        _mm256_storeu_si256(&tmp256[1], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1]));

        prev = 0;
        for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
            x = cdw[k];
            cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]);
            prev = x;
        }
    }

    memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
 }



 /**
 * @brief Computes 2 * PARAM_DELTA syndromes
 *
 * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
 * @param[in] cdw Array of size PARAM_N1 storing the received vector
 */
 void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
    __m256i *syndromes256 = (__m256i *) syndromes;
    __m256i last_syndromes256;
    syndromes256[0] = _mm256_set1_epi16(cdw[0]);

    for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
        syndromes256[0] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]);
    }

    last_syndromes256 = _mm256_set1_epi16(cdw[0]);

    for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
        last_syndromes256 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]);
    }

    __m128i *s128 = (__m128i *) &last_syndromes256;
    _mm_store_si128((__m128i *) (syndromes + 16), *s128);

    uint64_t *s8 = (uint64_t *) (syndromes + 24);
    s8[0] = _mm_extract_epi64(s128[1], 0);

    uint32_t *s12 = (uint32_t *) (syndromes + 28);
    uint32_t *s32 = ((uint32_t *) &last_syndromes256) + 6;
    s12[0] = *s32;
 }



 /**
 * @brief Computes the error locator polynomial (ELP) sigma
 *
 * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
 * We use the letter p for rho which is initialized at -1. <br>
 * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
 * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
 * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
 * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
 * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
 * and we only need to save its first PARAM_DELTA - 1 coefficients.
 *
 * @returns the degree of the ELP sigma
 * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
 * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
 */
 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
    uint16_t deg_sigma = 0;
    uint16_t deg_sigma_p = 0;
    uint16_t deg_sigma_copy = 0;
    uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
    uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
    uint16_t pp = (uint16_t) -1; // 2*rho
    uint16_t d_p = 1;
    uint16_t d = syndromes[0];

    uint16_t mask1, mask2, mask12;
    uint16_t deg_X, deg_X_sigma_p;
    uint16_t dd;
    uint16_t mu;

    uint16_t i;

    sigma[0] = 1;
    for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
        // Save sigma in case we need it to update X_sigma_p
        memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
        deg_sigma_copy = deg_sigma;

        dd = PQCLEAN_HQCRMRS128_AVX2_gf_mul(d, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(d_p));

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            sigma[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(dd, X_sigma_p[i]);
        }

        deg_X = mu - pp;
        deg_X_sigma_p = deg_X + deg_sigma_p;

        // mask1 = 0xffff if(d != 0) and 0 otherwise
        mask1 = -((uint16_t) - d >> 15);

        // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
        mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

        // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
        mask12 = mask1 & mask2;
        deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

        if (mu == (2 * PARAM_DELTA - 1)) {
            break;
        }

        pp ^= mask12 & (mu ^ pp);
        d_p ^= mask12 & (d ^ d_p);
        for (i = PARAM_DELTA; i; --i) {
            X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
        }

        deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
        d = syndromes[mu + 1];

        for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
            d ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]);
        }
    }

    return deg_sigma;
 }



 /**
 * @brief Computes the error polynomial error from the error locator polynomial sigma
 *
 * See function PQCLEAN_HQCRMRS128_AVX2_fft for more details.
 *
 * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
 * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 */
 static void compute_roots(uint8_t *error, uint16_t *sigma) {
    uint16_t w[1 << PARAM_M] = {0};

    PQCLEAN_HQCRMRS128_AVX2_fft(w, sigma, PARAM_DELTA + 1);
    PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(error, w);
 }



 /**
 * @brief Computes the polynomial z(x)
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
 * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
 * @param[in] degree Integer that is the degree of polynomial sigma
 * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
 */
 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
    size_t i, j;
    uint16_t mask;

    z[0] = 1;

    for (i = 1; i < PARAM_DELTA + 1; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] = mask & sigma[i];
    }

    z[1] ^= syndromes[0];

    for (i = 2; i <= PARAM_DELTA; ++i) {
        mask = -((uint16_t) (i - degree - 1) >> 15);
        z[i] ^= mask & syndromes[i - 1];

        for (j = 1; j < i; ++j) {
            z[i] ^= mask & PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]);
        }
    }
 }



 /**
 * @brief Computes the error values
 *
 * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
 *
 * @param[out] error_values Array of PARAM_DELTA elements receiving the error values
 * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
 * @param[in] z_degree Integer that is the degree of polynomial z(x)
 * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
 */
 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
    uint16_t beta_j[PARAM_DELTA] = {0};
    uint16_t e_j[PARAM_DELTA] = {0};

    uint16_t delta_counter;
    uint16_t delta_real_value;
    uint16_t found;
    uint16_t mask1;
    uint16_t mask2;
    uint16_t tmp1;
    uint16_t tmp2;
    uint16_t inverse;
    uint16_t inverse_power_j;

    // Compute the beta_{j_i} page 31 of the documentation
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; i++) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            beta_j[j] += mask1 & mask2 & gf_exp[i];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
    delta_real_value = delta_counter;

    // Compute the e_{j_i} page 31 of the documentation
    for (size_t i = 0; i < PARAM_DELTA; ++i) {
        tmp1 = 1;
        tmp2 = 1;
        inverse = PQCLEAN_HQCRMRS128_AVX2_gf_inverse(beta_j[i]);
        inverse_power_j = 1;

        for (size_t j = 1; j <= PARAM_DELTA; ++j) {
            inverse_power_j = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, inverse);
            tmp1 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, z[j]);
        }
        for (size_t k = 1; k < PARAM_DELTA; ++k) {
            tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
        }
        mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
        e_j[i] = mask1 & PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(tmp2));
    }

    // Place the delta e_{j_i} values at the right coordinates of the output vector
    delta_counter = 0;
    for (size_t i = 0; i < PARAM_N1; ++i) {
        found = 0;
        mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
        for (size_t j = 0; j < PARAM_DELTA; j++) {
            mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
            error_values[i] += mask1 & mask2 & e_j[j];
            found += mask1 & mask2 & 1;
        }
        delta_counter += found;
    }
 }



 /**
 * @brief Correct the errors
 *
 * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
 * @param[in] error Array of the error vector
 * @param[in] error_values Array of PARAM_DELTA elements storing the error values
 */
 static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
    for (size_t i = 0; i < PARAM_N1; ++i) {
        cdw[i] ^= error_values[i];
    }
 }



 /**
 * @brief Decodes the received word
 *
 * This function relies on six steps:
 *    <ol>
 *    <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
 *    <li> The second step is the computation of the error-locator polynomial sigma.
 *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
 *    <li> The fourth step, is the polynomial z(x).
 *    <li> The fifth step, is the computation of the error values.
 *    <li> The sixth step is the correction of the errors in the received polynomial.
 *    </ol>
 * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
 *
 * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
    uint16_t syndromes[2 * PARAM_DELTA] = {0};
    uint16_t sigma[1 << PARAM_FFT] = {0};
    uint8_t error[1 << PARAM_M] = {0};
    uint16_t z[PARAM_N1] = {0};
    uint16_t error_values[PARAM_N1] = {0};
    uint16_t deg;

    // Calculate the 2*PARAM_DELTA syndromes
    compute_syndromes(syndromes, cdw);

    // Compute the error locator polynomial sigma
    // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
    deg = compute_elp(sigma, syndromes);

    // Compute the error polynomial error
    compute_roots(error, sigma);

    // Compute the polynomial z(x)
    compute_z_poly(z, sigma, deg, syndromes);

    // Compute the error values
    compute_error_values(error_values, z, error);

    // Correct the errors
    correct_errors(cdw, error_values);

    // Retrieve the message from the decoded codeword
    memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

 }
--- a/crypto_kem/hqc-rmrs-128/avx2/reed_solomon.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/reed_solomon.h
--- a/crypto_kem/hqc-rmrs-128/avx2/vector.c
+++ b/crypto_kem/hqc-rmrs-128/avx2/vector.c
@@ -1,178 +0,0 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <immintrin.h>
 #include <stdint.h>
 #include <string.h>
 /**
 * @file vector.c
 * @brief Implementation of vectors sampling and some utilities for the HQC scheme
 */



 /**
 * @brief Generates a vector of a given Hamming weight
 *
 * This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
 * To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
 *  1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
 *  2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times  70853\f$
 *  3. If \f$ x \geq t\f$, go to 1
 *  4. It return \f$ r = x \mod 70853\f$
 *
 * The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
 *
 * @param[in] v Pointer to an array
 * @param[in] weight Integer that is the Hamming weight
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
    size_t random_bytes_size = 3 * weight;
    uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0};
    uint32_t tmp[PARAM_OMEGA_R] = {0};
    __m256i bit256[PARAM_OMEGA_R];
    __m256i bloc256[PARAM_OMEGA_R];
    __m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0);
    __m256i pos256;
    __m256i mask256;
    __m256i aux;
    __m256i i256;
    uint64_t bloc, pos, bit64;
    uint8_t inc;
    size_t i, j, k;

    i = 0;
    j = random_bytes_size;
    while (i < weight) {
        do {
            if (j == random_bytes_size) {
                seedexpander(ctx, rand_bytes, random_bytes_size);
                j = 0;
            }

            tmp[i] = ((uint32_t) rand_bytes[j++]) << 16;
            tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8;
            tmp[i] |= rand_bytes[j++];

        } while (tmp[i] >= UTILS_REJECTION_THRESHOLD);

        tmp[i] = tmp[i] % PARAM_N;

        inc = 1;
        for (k = 0; k < i; k++) {
            if (tmp[k] == tmp[i]) {
                inc = 0;
            }
        }
        i += inc;
    }

    for (i = 0; i < weight; i++) {
        // we store the bloc number and bit position of each vb[i]
        bloc = tmp[i] >> 6;
        bloc256[i] = _mm256_set1_epi64x(bloc >> 2);
        pos = (bloc & 0x3UL);
        pos256 = _mm256_set1_epi64x(pos);
        mask256 = _mm256_cmpeq_epi64(pos256, posCmp256);
        bit64 = 1ULL << (tmp[i] & 0x3f);
        bit256[i] = _mm256_set1_epi64x(bit64)&mask256;
    }

    for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) {
        aux = _mm256_loadu_si256(((__m256i *)v) + i);
        i256 = _mm256_set1_epi64x(i);

        for (j = 0; j < weight; j++) {
            mask256 = _mm256_cmpeq_epi64(bloc256[j], i256);
            aux ^= bit256[j] & mask256;
        }
        _mm256_storeu_si256(((__m256i *)v) + i, aux);
    }

 }



 /**
 * @brief Generates a random vector of dimension <b>PARAM_N</b>
 *
 * This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
 * array of bytes using the seedexpander function, and drop the extra bits using a mask.
 *
 * @param[in] v Pointer to an array
 * @param[in] ctx Pointer to the context of the seed expander
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
    uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

    seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

    PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
    v[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief Adds two vectors
 *
 * @param[out] o Pointer to an array that is the result
 * @param[in] v1 Pointer to an array that is the first vector
 * @param[in] v2 Pointer to an array that is the second vector
 * @param[in] size Integer that is the size of the vectors
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
    for (uint32_t i = 0; i < size; ++i) {
        o[i] = v1[i] ^ v2[i];
    }
 }



 /**
 * @brief Compares two vectors
 *
 * @param[in] v1 Pointer to an array that is first vector
 * @param[in] v2 Pointer to an array that is second vector
 * @param[in] size Integer that is the size of the vectors
 * @returns 0 if the vectors are equals and a negative/psotive value otherwise
 */
 uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
    uint64_t r = 0;
    for (size_t i = 0; i < size; i++) {
        r |= v1[i] ^ v2[i];
    }
    r = (~r + 1) >> 63;
    return (uint8_t) r;
 }



 /**
 * @brief Resize a vector so that it contains <b>size_o</b> bits
 *
 * @param[out] o Pointer to the output vector
 * @param[in] size_o Integer that is the size of the output vector in bits
 * @param[in] v Pointer to the input vector
 * @param[in] size_v Integer that is the size of the input vector in bits
 */
 void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
    uint64_t mask = 0x7FFFFFFFFFFFFFFF;
    int8_t val = 0;
    if (size_o < size_v) {
        if (size_o % 64) {
            val = 64 - (size_o % 64);
        }

        memcpy(o, v, VEC_N1N2_SIZE_BYTES);

        for (int8_t i = 0; i < val; ++i) {
            o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
        }
    } else {
        memcpy(o, v, CEIL_DIVIDE(size_v, 8));
    }
 }
--- a/crypto_kem/hqc-rmrs-128/avx2/vector.h
+++ b/crypto_kem/hqc-rmrs-128/avx2/vector.h
@@ -1,27 +0,0 @@
 #ifndef VECTOR_H
 #define VECTOR_H


 /**
 * @file vector.h
 * @brief Header file for vector.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

 void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_from_randombytes(uint64_t *v);


 void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

 uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

 void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/LICENSE
+++ b/crypto_kem/hqc-rmrs-128/clean/LICENSE
@@ -1 +0,0 @@
 Public Domain
--- a/crypto_kem/hqc-rmrs-128/clean/Makefile.Microsoft_nmake
+++ b/crypto_kem/hqc-rmrs-128/clean/Makefile.Microsoft_nmake
@@ -1,19 +0,0 @@
 # This Makefile can be used with Microsoft Visual Studio's nmake using the command:
 #    nmake /f Makefile.Microsoft_nmake

 LIBRARY=libhqc-rmrs-128_clean.lib
 OBJECTS=code.obj fft.obj gf2x.obj gf.obj hqc.obj kem.obj parsing.obj reed_muller.obj reed_solomon.obj vector.obj 

 CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX

 all: $(LIBRARY)

 # Make sure objects are recompiled if headers change.
 $(OBJECTS): *.h

 $(LIBRARY): $(OBJECTS)
    LIB.EXE /NOLOGO /WX /OUT:$@ $**

 clean:
    -DEL $(OBJECTS)
    -DEL $(LIBRARY)
--- a/crypto_kem/hqc-rmrs-128/clean/api.h
+++ b/crypto_kem/hqc-rmrs-128/clean/api.h
@@ -1,25 +0,0 @@
 #ifndef PQCLEAN_HQCRMRS128_CLEAN_API_H
 #define PQCLEAN_HQCRMRS128_CLEAN_API_H
 /**
 * @file api.h
 * @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
 */

 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_ALGNAME                      "HQC-RMRS-128"

 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES               2289
 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES               2249
 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES                        64
 #define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES              4481

 // As a technicality, the public key is appended to the secret key in order to respect the NIST API.
 // Without this constraint, PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32

 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/code.c
+++ b/crypto_kem/hqc-rmrs-128/clean/code.c
@@ -1,46 +0,0 @@
 #include "code.h"
 #include "parameters.h"
 #include "reed_muller.h"
 #include "reed_solomon.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file code.c
 * @brief Implementation of concatenated code
 */



 /**
 *
 * @brief Encoding the message m to a code word em using the concatenated code
 *
 * First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
 * a concatenated code word.
 *
 * @param[out] em Pointer to an array that is the tensor code word
 * @param[in] m Pointer to an array that is the message
 */
 void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *m) {
    uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

    PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(tmp, m);
    PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(em, tmp);

 }



 /**
 * @brief Decoding the code word em to a message m using the concatenated code
 *
 * @param[out] m Pointer to an array that is the message
 * @param[in] em Pointer to an array that is the code word
 */
 void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em) {
    uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

    PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(tmp, em);
    PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(m, tmp);

 }
--- a/crypto_kem/hqc-rmrs-128/clean/code.h
+++ b/crypto_kem/hqc-rmrs-128/clean/code.h
@@ -1,18 +0,0 @@
 #ifndef CODE_H
 #define CODE_H


 /**
 * @file code.h
 * Header file of code.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *message);

 void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/fft.c
+++ b/crypto_kem/hqc-rmrs-128/clean/fft.c
@@ -1,351 +0,0 @@
 #include "fft.h"
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file fft.c
 * Implementation of the additive FFT and its transpose.
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 */


 static void compute_fft_betas(uint16_t *betas);
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


 /**
 * @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
 *
 * @param[out] betas Array of size PARAM_M-1
 */
 static void compute_fft_betas(uint16_t *betas) {
    size_t i;
    for (i = 0; i < PARAM_M - 1; ++i) {
        betas[i] = 1 << (PARAM_M - 1 - i);
    }
 }



 /**
 * @brief Computes the subset sums of the given set
 *
 * The array subset_sums is such that its ith element is
 * the subset sum of the set elements given by the binary form of i.
 *
 * @param[out] subset_sums Array of size 2^set_size receiving the subset sums
 * @param[in] set Array of set_size elements
 * @param[in] set_size Size of the array set
 */
 static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
    uint16_t i, j;
    subset_sums[0] = 0;

    for (i = 0; i < set_size; ++i) {
        for (j = 0; j < (1 << i); ++j) {
            subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
        }
    }
 }



 /**
 * @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
 *
 * Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
 * as proposed by Bernstein, Chou and Schwabe:
 * https://binary.cr.yp.to/mcbits-20130616.pdf
 *
 * @param[out] f0 Array half the size of f
 * @param[out] f1 Array half the size of f
 * @param[in] f Array of size a power of 2
 * @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
 */
 static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    switch (m_f) {
    case 4:
        f0[4] = f[8] ^ f[12];
        f0[6] = f[12] ^ f[14];
        f0[7] = f[14] ^ f[15];
        f1[5] = f[11] ^ f[13];
        f1[6] = f[13] ^ f[14];
        f1[7] = f[15];
        f0[5] = f[10] ^ f[12] ^ f1[5];
        f1[4] = f[9] ^ f[13] ^ f0[5];

        f0[0] = f[0];
        f1[3] = f[7] ^ f[11] ^ f[15];
        f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
        f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
        f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
        f1[2] = f[3] ^ f1[1] ^ f0[3];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 3:
        f0[0] = f[0];
        f0[2] = f[4] ^ f[6];
        f0[3] = f[6] ^ f[7];
        f1[1] = f[3] ^ f[5] ^ f[7];
        f1[2] = f[5] ^ f[6];
        f1[3] = f[7];
        f0[1] = f[2] ^ f0[2] ^ f1[1];
        f1[0] = f[1] ^ f0[1];
        break;

    case 2:
        f0[0] = f[0];
        f0[1] = f[2] ^ f[3];
        f1[0] = f[1] ^ f0[1];
        f1[1] = f[3];
        break;

    case 1:
        f0[0] = f[0];
        f1[0] = f[1];
        break;

    default:
        radix_big(f0, f1, f, m_f);
        break;
    }
 }

 static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
    uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
    uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

    uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

    size_t i, n;

    n = 1;
    n <<= (m_f - 2);
    memcpy(Q, f + 3 * n, 2 * n);
    memcpy(Q + n, f + 3 * n, 2 * n);
    memcpy(R, f, 4 * n);

    for (i = 0; i < n; ++i) {
        Q[i] ^= f[2 * n + i];
        R[n + i] ^= Q[i];
    }

    radix(Q0, Q1, Q, m_f - 1);
    radix(R0, R1, R, m_f - 1);

    memcpy(f0, R0, 2 * n);
    memcpy(f0 + n, Q0, 2 * n);
    memcpy(f1, R1, 2 * n);
    memcpy(f1 + n, Q1, 2 * n);
 }



 /**
 * @brief Evaluates f at all subset sums of a given set
 *
 * This function is a subroutine of the function PQCLEAN_HQCRMRS128_CLEAN_fft.
 *
 * @param[out] w Array
 * @param[in] f Array
 * @param[in] f_coeffs Number of coefficients of f
 * @param[in] m Number of betas
 * @param[in] m_f Number of coefficients of f (one more than its degree)
 * @param[in] betas FFT constants
 */
 static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
    uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
    uint16_t gammas[PARAM_M - 2] = {0};
    uint16_t deltas[PARAM_M - 2] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
    uint16_t u[1 << (PARAM_M - 2)] = {0};
    uint16_t v[1 << (PARAM_M - 2)] = {0};
    uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

    uint16_t beta_m_pow;
    size_t i, j, k;
    size_t x;

    // Step 1
    if (m_f == 1) {
        for (i = 0; i < m; ++i) {
            tmp[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], f[1]);
        }

        w[0] = f[0];
        x = 1;
        for (j = 0; j < m; ++j) {
            for (k = 0; k < x; ++k) {
                w[x + k] = w[k] ^ tmp[j];
            }
            x <<= 1;
        }

        return;
    }

    // Step 2: compute g
    if (betas[m - 1] != 1) {
        beta_m_pow = 1;
        x = 1;
        x <<= m_f;
        for (i = 1; i < x; ++i) {
            beta_m_pow = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
            f[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, f[i]);
        }
    }

    // Step 3
    radix(f0, f1, f, m_f);

    // Step 4: compute gammas and deltas
    for (i = 0; i + 1 < m; ++i) {
        gammas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(betas[m - 1]));
        deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(gammas[i]) ^ gammas[i];
    }

    // Compute gammas sums
    compute_subset_sums(gammas_sums, gammas, m - 1);

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

    k = 1;
    k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
    if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
        w[0] = u[0];
        w[k] = u[0] ^ f1[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], f1[0]);
            w[k + i] = w[i] ^ f1[0];
        }
    } else {
        fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

        // Step 6
        memcpy(w + k, v, 2 * k);
        w[0] = u[0];
        w[k] ^= u[0];
        for (i = 1; i < k; ++i) {
            w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], v[i]);
            w[k + i] ^= w[i];
        }
    }
 }



 /**
 * @brief Evaluates f on all fields elements using an additive FFT algorithm
 *
 * f_coeffs is the number of coefficients of f (one less than its degree). <br>
 * The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
 * This implementation is based on the paper from Gao and Mateer: <br>
 * Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
 * IEEE Transactions on Information Theory 56 (2010), 6265--6272.
 * http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
 * and includes improvements proposed by Bernstein, Chou and Schwabe here:
 * https://binary.cr.yp.to/mcbits-20130616.pdf <br>
 * Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
 * meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
 * Also note that f is altered during computation (twisted at each level).
 *
 * @param[out] w Array
 * @param[in] f Array of 2^PARAM_FFT elements
 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
 */
 void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
    uint16_t betas[PARAM_M - 1] = {0};
    uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
    uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
    uint16_t deltas[PARAM_M - 1] = {0};
    uint16_t u[1 << (PARAM_M - 1)] = {0};
    uint16_t v[1 << (PARAM_M - 1)] = {0};

    size_t i, k;

    // Follows Gao and Mateer algorithm
    compute_fft_betas(betas);

    // Step 1: PARAM_FFT > 1, nothing to do

    // Compute gammas sums
    compute_subset_sums(betas_sums, betas, PARAM_M - 1);

    // Step 2: beta_m = 1, nothing to do

    // Step 3
    radix(f0, f1, f, PARAM_FFT);

    // Step 4: Compute deltas
    for (i = 0; i < PARAM_M - 1; ++i) {
        deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(betas[i]) ^ betas[i];
    }

    // Step 5
    fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
    fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

    k = 1 << (PARAM_M - 1);
    // Step 6, 7 and error polynomial computation
    memcpy(w + k, v, 2 * k);

    // Check if 0 is root
    w[0] = u[0];

    // Check if 1 is root
    w[k] ^= u[0];

    // Find other roots
    for (i = 1; i < k; ++i) {
        w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas_sums[i], v[i]);
        w[k + i] ^= w[i];
    }
 }



 /**
 * @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
 *
 * @param[out] error Array with the error
 * @param[out] error_compact Array with the error in a compact form
 * @param[in] w Array of size 2^PARAM_M
 */
 void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
    uint16_t gammas[PARAM_M - 1] = {0};
    uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
    uint16_t k;
    size_t i, index;

    compute_fft_betas(gammas);
    compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

    k = 1 << (PARAM_M - 1);
    error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
    error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

    for (i = 1; i < k; ++i) {
        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
        error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

        index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
        error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
    }
 }
--- a/crypto_kem/hqc-rmrs-128/clean/fft.h
+++ b/crypto_kem/hqc-rmrs-128/clean/fft.h
@@ -1,18 +0,0 @@
 #ifndef FFT_H
 #define FFT_H


 /**
 * @file fft.h
 * Header file of fft.c
 */

 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

 void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/gf.c
+++ b/crypto_kem/hqc-rmrs-128/clean/gf.c
@@ -1,63 +0,0 @@
 #include "gf.h"
 #include "parameters.h"
 #include <stdint.h>
 /**
 * @file gf.c
 * Galois field implementation with multiplication using lookup tables
 */


 /**
 * @brief Multiplies nonzero element a by element b
 * @returns the product a*b
 * @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero)
 * @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero)
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b) {
    uint16_t mask;
    mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0
    return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(gf_log[a] + gf_log[b])];
 }



 /**
 * @brief Squares an element of GF(2^PARAM_M)
 * @returns a^2
 * @param[in] a Element of GF(2^PARAM_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a) {
    int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(2 * gf_log[a])];
 }



 /**
 * @brief Computes the inverse of an element of GF(2^PARAM_M)
 * @returns the inverse of a
 * @param[in] a Element of GF(2^PARAM_M)
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a) {
    int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
    return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]];
 }



 /**
 * @brief Returns i modulo 2^PARAM_M-1
 * i must be less than 2*(2^PARAM_M-1).
 * Therefore, the return value is either i or i-2^PARAM_M+1.
 * @returns i mod (2^PARAM_M-1)
 * @param[in] i The integer whose modulo is taken
 */
 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i) {
    uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

    // mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
    uint16_t mask = -(tmp >> 15);

    return tmp + (mask & PARAM_GF_MUL_ORDER);
 }
--- a/crypto_kem/hqc-rmrs-128/clean/gf.h
+++ b/crypto_kem/hqc-rmrs-128/clean/gf.h
@@ -1,39 +0,0 @@
 #ifndef GF_H
 #define GF_H


 /**
 * @file gf.h
 * Header file of gf.c
 */

 #include <stddef.h>
 #include <stdint.h>


 /**
 * Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
 * The last two elements are needed by the PQCLEAN_HQCRMRS128_CLEAN_gf_mul function
 * (for example if both elements to multiply are zero).
 */
 static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



 /**
 * Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
 * The logarithm of 0 is set to 0 by convention.
 */
 static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };


 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b);

 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a);

 uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/gf2x.c
+++ b/crypto_kem/hqc-rmrs-128/clean/gf2x.c
@@ -1,154 +0,0 @@
 #include "gf2x.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include <stdint.h>
 /**
 * \file gf2x.c
 * \brief Implementation of multiplication of two polynomials
 */


 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2);
 static void reduce(uint64_t *o, const uint64_t *a);
 static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);

 /**
 * @brief swap two elements in a table
 *
 * This function exchanges tab[elt1] with tab[elt2]
 *
 * @param[in] tab Pointer to the table
 * @param[in] elt1 Index of the first element
 * @param[in] elt2 Index of the second element
 */
 static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) {
    uint16_t tmp = tab[elt1];

    tab[elt1] = tab[elt2];
    tab[elt2] = tmp;
 }



 /**
 * @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
 *
 * This function computes the modular reduction of the polynomial a(x)
 *
 * @param[in] a Pointer to the polynomial a(x)
 * @param[out] o Pointer to the result
 */
 static void reduce(uint64_t *o, const uint64_t *a) {
    size_t i;
    uint64_t r;
    uint64_t carry;

    for (i = 0; i < VEC_N_SIZE_64; i++) {
        r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63);
        carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63)));
        o[i] = a[i] ^ r ^ carry;
    }

    o[VEC_N_SIZE_64 - 1] &= RED_MASK;
 }



 /**
 * @brief computes product of the polynomial a1(x) with the sparse polynomial a2
 *
 *  o(x) = a1(x)a2(x)
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2)
 * @param[in] a2 Pointer to the polynomial a1(x)
 * @param[in] weight Hamming wifht of the sparse polynomial a2
 * @param[in] ctx Pointer to a seed expander used to randomize the multiplication process
 */
 static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
 //static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx)
    uint64_t carry;
    uint32_t dec, s;
    uint64_t table[16 * (VEC_N_SIZE_64 + 1)];
    uint16_t permuted_table[16];
    uint16_t permutation_table[16];
    uint16_t permuted_sparse_vect[PARAM_OMEGA_E];
    uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
    uint64_t tmp;
    uint64_t *pt;
    uint8_t *res;
    size_t i, j;

    for (i = 0; i < 16; i++) {
        permuted_table[i] = (uint16_t) i;
    }

    seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

    for (i = 0; i < 15; i++) {
        swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
    }

    pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
    for (j = 0; j < VEC_N_SIZE_64; j++) {
        pt[j] = a2[j];
    }
    pt[VEC_N_SIZE_64] = 0x0;

    for (i = 1; i < 16; i++) {
        carry = 0;
        pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
        for (j = 0; j < VEC_N_SIZE_64; j++) {
            pt[j] = (a2[j] << i) ^ carry;
            carry = (a2[j] >> ((64 - i)));
        }
        pt[VEC_N_SIZE_64] = carry;
    }

    for (i = 0; i < weight; i++) {
        permuted_sparse_vect[i] = (uint16_t) i;
    }

    seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

    for (i = 0; i + 1 < weight; i++) {
        swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i)));
    }

    for (i = 0; i < weight; i++) {
        dec = a1[permuted_sparse_vect[i]] & 0xf;
        s = a1[permuted_sparse_vect[i]] >> 4;
        res = o + 2 * s;
        pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

        for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
            tmp = PQCLEAN_HQCRMRS128_CLEAN_load8(res);
            PQCLEAN_HQCRMRS128_CLEAN_store8(res, tmp ^ pt[j]);
            res += 8;
        }
    }
 }



 /**
 * @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
 *
 * This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
 * and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
 *
 * @param[out] o Pointer to the result
 * @param[in] a1 Pointer to the sparse polynomial
 * @param[in] a2 Pointer to the dense polynomial
 * @param[in] weight Integer that is the weigt of the sparse polynomial
 * @param[in] ctx Pointer to the randomness context
 */
 void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
    uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0};

    fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx);
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp));
    reduce(o, tmp);
 }
--- a/crypto_kem/hqc-rmrs-128/clean/gf2x.h
+++ b/crypto_kem/hqc-rmrs-128/clean/gf2x.h
@@ -1,16 +0,0 @@
 #ifndef GF2X_H
 #define GF2X_H


 /**
 * @file gf2x.h
 * @brief Header file for gf2x.c
 */
 #include "nistseedexpander.h"
 #include "randombytes.h"
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/hqc.c
+++ b/crypto_kem/hqc-rmrs-128/clean/hqc.c
@@ -1,144 +0,0 @@
 #include "code.h"
 #include "gf2x.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 /**
 * @file hqc.c
 * @brief Implementation of hqc.h
 */



 /**
 * @brief Keygen of the HQC_PKE IND_CPA scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and  <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
    AES_XOF_struct sk_seedexpander;
    AES_XOF_struct pk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};
    uint8_t pk_seed[SEED_BYTES] = {0};
    uint64_t x[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};

    // Create seed_expanders for public key and secret key
    randombytes(sk_seed, SEED_BYTES);
    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    randombytes(pk_seed, SEED_BYTES);
    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute secret key
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);

    // Compute public key
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h);
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64);

    // Parse keys to string
    PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(pk, pk_seed, s);
    PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk);

 }



 /**
 * @brief Encryption of the HQC_PKE IND_CPA scheme
 *
 * The cihertext is composed of vectors <b>u</b> and <b>v</b>.
 *
 * @param[out] u Vector u (first part of the ciphertext)
 * @param[out] v Vector v (second part of the ciphertext)
 * @param[in] m Vector representing the message to encrypt
 * @param[in] theta Seed used to derive randomness required for encryption
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
    AES_XOF_struct seedexpander;
    uint64_t h[VEC_N_SIZE_64] = {0};
    uint64_t s[VEC_N_SIZE_64] = {0};
    uint64_t r1[VEC_N_SIZE_64] = {0};
    uint32_t r2[PARAM_OMEGA_R] = {0};
    uint64_t e[VEC_N_SIZE_64] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};

    // Create seed_expander from theta
    seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

    // Retrieve h and s from public key
    PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(h, s, pk);

    // Generate r1, r2 and e
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

    // Compute u = r1 + r2.h
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64);

    // Compute v = m.G by encoding the message
    PQCLEAN_HQCRMRS128_CLEAN_code_encode((uint8_t *)v, m);
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

    // Compute v = m.G + s.r2 + e
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

 }



 /**
 * @brief Decryption of the HQC_PKE IND_CPA scheme
 *
 * @param[out] m Vector representing the decrypted message
 * @param[in] u Vector u (first part of the ciphertext)
 * @param[in] v Vector v (second part of the ciphertext)
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
    uint8_t pk[PUBLIC_KEY_BYTES] = {0};
    uint64_t tmp1[VEC_N_SIZE_64] = {0};
    uint64_t tmp2[VEC_N_SIZE_64] = {0};
    uint32_t y[PARAM_OMEGA] = {0};
    AES_XOF_struct perm_seedexpander;
    uint8_t perm_seed[SEED_BYTES] = {0};

    // Retrieve x, y, pk from secret key
    PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk);

    randombytes(perm_seed, SEED_BYTES);
    seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH);

    // Compute v - u.y
    PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
    PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander);
    PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);


    // Compute m by decoding v - u.y
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_code_decode(m, (uint8_t *)tmp1);
 }
--- a/crypto_kem/hqc-rmrs-128/clean/hqc.h
+++ b/crypto_kem/hqc-rmrs-128/clean/hqc.h
@@ -1,19 +0,0 @@
 #ifndef HQC_H
 #define HQC_H


 /**
 * @file hqc.h
 * @brief Functions of the HQC_PKE IND_CPA scheme
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/kem.c
+++ b/crypto_kem/hqc-rmrs-128/clean/kem.c
@@ -1,140 +0,0 @@
 #include "api.h"
 #include "fips202.h"
 #include "hqc.h"
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "sha2.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file kem.c
 * @brief Implementation of api.h
 */



 /**
 * @brief Keygen of the HQC_KEM IND_CAA2 scheme
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As a technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] pk String containing the public key
 * @param[out] sk String containing the secret key
 * @returns 0 if keygen is successful
 */
 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(pk, sk);
    return 0;
 }



 /**
 * @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ct String containing the ciphertext
 * @param[out] ss String containing the shared secret
 * @param[in] pk String containing the public key
 * @returns 0 if encapsulation is successful
 */
 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

    uint8_t theta[SHA512_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Computing m
    randombytes(m, VEC_K_SIZE_BYTES);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m
    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk);

    // Computing d
    sha512(d, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Computing ciphertext
    PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(ct, u, v, d);


    return 0;
 }



 /**
 * @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
 *
 * @param[out] ss String containing the shared secret
 * @param[in] ct String containing the cipĥertext
 * @param[in] sk String containing the secret key
 * @returns 0 if decapsulation is successful, -1 otherwise
 */
 int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

    uint8_t result;
    uint64_t u[VEC_N_SIZE_64] = {0};
    uint64_t v[VEC_N1N2_SIZE_64] = {0};
    unsigned char d[SHA512_BYTES] = {0};
    unsigned char pk[PUBLIC_KEY_BYTES] = {0};
    uint8_t m[VEC_K_SIZE_BYTES] = {0};
    uint8_t theta[SHA512_BYTES] = {0};
    uint64_t u2[VEC_N_SIZE_64] = {0};
    uint64_t v2[VEC_N1N2_SIZE_64] = {0};
    unsigned char d2[SHA512_BYTES] = {0};
    unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

    // Retrieving u, v and d from ciphertext
    PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(u, v, d, ct);

    // Retrieving pk from sk
    memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

    // Decryting
    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(m, u, v, sk);

    // Computing theta
    sha3_512(theta, m, VEC_K_SIZE_BYTES);

    // Encrypting m'
    PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk);

    // Computing d'
    sha512(d2, m, VEC_K_SIZE_BYTES);

    // Computing shared secret
    memcpy(mc, m, VEC_K_SIZE_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

    // Abort if c != c' or d != d'
    result = PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
    result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare(d, d2, SHA512_BYTES);
    result = (uint8_t) (-((int16_t) result) >> 15);
    for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
        ss[i] &= ~result;
    }


    return -(result & 1);
 }
--- a/crypto_kem/hqc-rmrs-128/clean/parameters.h
+++ b/crypto_kem/hqc-rmrs-128/clean/parameters.h
@@ -1,98 +0,0 @@
 #ifndef HQC_PARAMETERS_H
 #define HQC_PARAMETERS_H


 /**
 * @file parameters.h
 * @brief Parameters of the HQC_KEM IND-CCA2 scheme
 */
 #include "api.h"


 #define CEIL_DIVIDE(a, b)  (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

 /*
  #define PARAM_N                               Define the parameter n of the scheme
  #define PARAM_N1                              Define the parameter n1 of the scheme (length of Reed-Solomon code)
  #define PARAM_N2                              Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
  #define PARAM_N1N2                            Define the length in bits of the Concatenated code
  #define PARAM_OMEGA                           Define the parameter omega of the scheme
  #define PARAM_OMEGA_E                         Define the parameter omega_e of the scheme
  #define PARAM_OMEGA_R                         Define the parameter omega_r of the scheme
  #define PARAM_SECURITY                        Define the security level corresponding to the chosen parameters
  #define PARAM_DFR_EXP                         Define the decryption failure rate corresponding to the chosen parameters

  #define SECRET_KEY_BYTES                      Define the size of the secret key in bytes
  #define PUBLIC_KEY_BYTES                      Define the size of the public key in bytes
  #define SHARED_SECRET_BYTES                   Define the size of the shared secret in bytes
  #define CIPHERTEXT_BYTES                      Define the size of the ciphertext in bytes

  #define UTILS_REJECTION_THRESHOLD             Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
  #define VEC_N_SIZE_BYTES                      Define the size of the array used to store a PARAM_N sized vector in bytes
  #define VEC_K_SIZE_BYTES                      Define the size of the array used to store a PARAM_K sized vector in bytes
  #define VEC_N1Y_SIZE_BYTES                    Define the size of the array used to store a PARAM_N1 sized vector in bytes
  #define VEC_N1N2_SIZE_BYTES                   Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

  #define VEC_N_SIZE_64                         Define the size of the array used to store a PARAM_N sized vector in 64 bits
  #define VEC_K_SIZE_64                         Define the size of the array used to store a PARAM_K sized vector in 64 bits
  #define VEC_N1_SIZE_64                        Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
  #define VEC_N1N2_SIZE_64                      Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

  #define PARAM_DELTA                           Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
  #define PARAM_M                               Define a positive integer
  #define PARAM_GF_POLY                         Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
  #define PARAM_GF_MUL_ORDER                    Define the size of the multiplicative group of GF(2^PARAM_M),  i.e 2^PARAM_M -1
  #define PARAM_K                               Define the size of the information bits of the Reed-Solomon code
  #define PARAM_G                               Define the size of the generator polynomial of Reed-Solomon code
  #define PARAM_FFT                             The additive FFT takes a 2^PARAM_FFT polynomial as input
                                                We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
                                                The smallest power of 2 greater than 24+1 is 32=2^5
  #define RS_POLY_COEFS                         Coefficients of the generator polynomial of the Reed-Solomon code

  #define RED_MASK                              A mask fot the higher bits of a vector
  #define SHA512_BYTES                          Define the size of SHA512 output in bytes
  #define SEED_BYTES                            Define the size of the seed in bytes
  #define SEEDEXPANDER_MAX_LENGTH               Define the seed expander max length
 */

 #define PARAM_N                                                             17669
 #define PARAM_N1                                46
 #define PARAM_N2                                384
 #define PARAM_N1N2                              17664
 #define PARAM_OMEGA                             66
 #define PARAM_OMEGA_E                           75
 #define PARAM_OMEGA_R                           75
 #define PARAM_SECURITY                          128
 #define PARAM_DFR_EXP                           128

 #define SECRET_KEY_BYTES                        PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES
 #define PUBLIC_KEY_BYTES                        PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES
 #define SHARED_SECRET_BYTES                     PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES
 #define CIPHERTEXT_BYTES                        PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES

 #define UTILS_REJECTION_THRESHOLD             16767881
 #define VEC_N_SIZE_BYTES                        CEIL_DIVIDE(PARAM_N, 8)
 #define VEC_K_SIZE_BYTES                        PARAM_K
 #define VEC_N1_SIZE_BYTES                       PARAM_N1
 #define VEC_N1N2_SIZE_BYTES                     CEIL_DIVIDE(PARAM_N1N2, 8)

 #define VEC_N_SIZE_64                           CEIL_DIVIDE(PARAM_N, 64)
 #define VEC_K_SIZE_64                           CEIL_DIVIDE(PARAM_K, 8)
 #define VEC_N1_SIZE_64                          CEIL_DIVIDE(PARAM_N1, 8)
 #define VEC_N1N2_SIZE_64                        CEIL_DIVIDE(PARAM_N1N2, 64)

 #define PARAM_DELTA                             15
 #define PARAM_M                                 8
 #define PARAM_GF_POLY                           0x11D
 #define PARAM_GF_MUL_ORDER                      255
 #define PARAM_K                                 16
 #define PARAM_G                                 31
 #define PARAM_FFT                               5
 #define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1

 #define RED_MASK                                0x1f
 #define SHA512_BYTES                            64
 #define SEED_BYTES                              40
 #define SEEDEXPANDER_MAX_LENGTH                 4294967295

 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/parsing.c
+++ b/crypto_kem/hqc-rmrs-128/clean/parsing.c
@@ -1,186 +0,0 @@
 #include "nistseedexpander.h"
 #include "parameters.h"
 #include "parsing.h"
 #include "randombytes.h"
 #include "vector.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file parsing.c
 * @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
 */


 void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in) {
    out[0] = (in >> 0x00) & 0xFF;
    out[1] = (in >> 0x08) & 0xFF;
    out[2] = (in >> 0x10) & 0xFF;
    out[3] = (in >> 0x18) & 0xFF;
    out[4] = (in >> 0x20) & 0xFF;
    out[5] = (in >> 0x28) & 0xFF;
    out[6] = (in >> 0x30) & 0xFF;
    out[7] = (in >> 0x38) & 0xFF;
 }


 uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in) {
    uint64_t ret = in[7];

    for (int8_t i = 6; i >= 0; i--) {
        ret <<= 8;
        ret |= in[i];
    }

    return ret;
 }

 void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
    size_t index_in = 0;
    size_t index_out = 0;

    // first copy by 8 bytes
    if (inlen >= 8 && outlen >= 1) {
        while (index_out < outlen && index_in + 8 <= inlen) {
            out64[index_out] = PQCLEAN_HQCRMRS128_CLEAN_load8(in8 + index_in);

            index_in += 8;
            index_out += 1;
        }
    }

    // we now need to do the last 7 bytes if necessary
    if (index_in >= inlen || index_out >= outlen) {
        return;
    }
    out64[index_out] = in8[inlen - 1];
    for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
        out64[index_out] <<= 8;
        out64[index_out] |= in8[index_in + i];
    }
 }

 void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
    for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
        out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
        index_out++;
        if (index_out % 8 == 0) {
            index_in++;
        }
    }
 }


 /**
 * @brief Parse a secret key into a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] sk String containing the secret key
 * @param[in] sk_seed Seed used to generate the secret key
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
    memcpy(sk, sk_seed, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(sk, pk, PUBLIC_KEY_BYTES);
 }

 /**
 * @brief Parse a secret key from a string
 *
 * The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
 * As technicality, the public key is appended to the secret key in order to respect NIST API.
 *
 * @param[out] x uint64_t representation of vector x
 * @param[out] y uint32_t representation of vector y
 * @param[out] pk String containing the public key
 * @param[in] sk String containing the secret key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) {
    AES_XOF_struct sk_seedexpander;
    uint8_t sk_seed[SEED_BYTES] = {0};

    memcpy(sk_seed, sk, SEED_BYTES);
    sk += SEED_BYTES;
    memcpy(pk, sk, PUBLIC_KEY_BYTES);

    seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);
 }

 /**
 * @brief Parse a public key into a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] pk String containing the public key
 * @param[in] pk_seed Seed used to generate the public key
 * @param[in] s uint8_t representation of vector s
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
    memcpy(pk, pk_seed, SEED_BYTES);
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
 }



 /**
 * @brief Parse a public key from a string
 *
 * The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
 *
 * @param[out] h uint8_t representation of vector h
 * @param[out] s uint8_t representation of vector s
 * @param[in] pk String containing the public key
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
    AES_XOF_struct pk_seedexpander;
    uint8_t pk_seed[SEED_BYTES] = {0};

    memcpy(pk_seed, pk, SEED_BYTES);
    pk += SEED_BYTES;
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

    seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
    PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h);
 }


 /**
 * @brief Parse a ciphertext into a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] ct String containing the ciphertext
 * @param[in] u uint8_t representation of vector u
 * @param[in] v uint8_t representation of vector v
 * @param[in] d String containing the hash d
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(ct, d, SHA512_BYTES);
 }


 /**
 * @brief Parse a ciphertext from a string
 *
 * The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
 *
 * @param[out] u uint8_t representation of vector u
 * @param[out] v uint8_t representation of vector v
 * @param[out] d String containing the hash d
 * @param[in] ct String containing the ciphertext
 */
 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
    ct += VEC_N_SIZE_BYTES;
    PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
    ct += VEC_N1N2_SIZE_BYTES;
    memcpy(d, ct, SHA512_BYTES);
 }
--- a/crypto_kem/hqc-rmrs-128/clean/parsing.h
+++ b/crypto_kem/hqc-rmrs-128/clean/parsing.h
@@ -1,36 +0,0 @@
 #ifndef PARSING_H
 #define PARSING_H


 /**
 * @file parsing.h
 * @brief Header file for parsing.c
 */

 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in);

 uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in);

 void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

 void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk);


 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

 void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


 #endif
--- a/crypto_kem/hqc-rmrs-128/clean/reed_muller.c
+++ b/crypto_kem/hqc-rmrs-128/clean/reed_muller.c
@@ -1,237 +0,0 @@
 #include "parameters.h"
 #include "reed_muller.h"
 #include <stdint.h>
 #include <string.h>
 /**
 * @file reed_muller.c
 * Constant time implementation of Reed-Muller code RM(1,7)
 */



 // number of repeated code words
 #define MULTIPLICITY                   CEIL_DIVIDE(PARAM_N2, 128)

 // copy bit 0 into all bits of a 32 bit value
 #define BIT0MASK(x) (-((x) & 1))


 static void encode(uint8_t *word, uint8_t message);
 static void hadamard(uint16_t src[128], uint16_t dst[128]);
 static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]);
 static uint8_t find_peaks(const uint16_t transform[128]);



 /**
 * @brief Encode a single byte into a single codeword using RM(1,7)
 *
 * Encoding matrix of this code:
 * bit pattern (note that bits are numbered big endian)
 * 0   aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
 * 1   cccccccc cccccccc cccccccc cccccccc
 * 2   f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
 * 3   ff00ff00 ff00ff00 ff00ff00 ff00ff00
 * 4   ffff0000 ffff0000 ffff0000 ffff0000
 * 5   ffffffff 00000000 ffffffff 00000000
 * 6   ffffffff ffffffff 00000000 00000000
 * 7   ffffffff ffffffff ffffffff ffffffff
 *
 * @param[out] word An RM(1,7) codeword
 * @param[in] message A message
 */
 static void encode(uint8_t *word, uint8_t message) {
    uint32_t e;
    // bit 7 flips all the bits, do that first to save work
    e = BIT0MASK(message >> 7);
    // bits 0, 1, 2, 3, 4 are the same for all four longs
    // (Warning: in the bit matrix above, low bits are at the left!)
    e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
    e ^= BIT0MASK(message >> 1) & 0xcccccccc;
    e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
    e ^= BIT0MASK(message >> 3) & 0xff00ff00;
    e ^= BIT0MASK(message >> 4) & 0xffff0000;
    // we can store this in the first quarter
    word[0 + 0] = (e >> 0x00) & 0xff;
    word[0 + 1] = (e >> 0x08) & 0xff;
    word[0 + 2] = (e >> 0x10) & 0xff;
    word[0 + 3] = (e >> 0x18) & 0xff;
    // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
    e ^= BIT0MASK(message >> 5);
    word[4 + 0] = (e >> 0x00) & 0xff;
    word[4 + 1] = (e >> 0x08) & 0xff;
    word[4 + 2] = (e >> 0x10) & 0xff;
    word[4 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 6);
    word[12 + 0] = (e >> 0x00) & 0xff;
    word[12 + 1] = (e >> 0x08) & 0xff;
    word[12 + 2] = (e >> 0x10) & 0xff;
    word[12 + 3] = (e >> 0x18) & 0xff;
    e ^= BIT0MASK(message >> 5);
    word[8 + 0] = (e >> 0x00) & 0xff;
    word[8 + 1] = (e >> 0x08) & 0xff;
    word[8 + 2] = (e >> 0x10) & 0xff;
    word[8 + 3] = (e >> 0x18) & 0xff;
 }



 /**
 * @brief Hadamard transform
 *
 * Perform hadamard transform of src and store result in dst
 * src is overwritten: it is also used as intermediate buffer
 * Method is best explained if we use H(3) instead of H(7):
 *
 * The routine multiplies by the matrix H(3):
 *                     [1  1  1  1  1  1  1  1]
 *                     [1 -1  1 -1  1 -1  1 -1]
 *                     [1  1 -1 -1  1  1 -1 -1]
 * [a b c d e f g h] * [1 -1 -1  1  1 -1 -1  1] = result of routine
 *                     [1  1  1  1 -1 -1 -1 -1]
 *                     [1 -1  1 -1 -1  1 -1  1]
 *                     [1  1 -1 -1 -1 -1  1  1]
 *                     [1 -1 -1  1 -1  1  1 -1]
 * You can do this in three passes, where each pass does this:
 * set lower half of buffer to pairwise sums,
 * and upper half to differences
 * index     0        1        2        3        4        5        6        7
 * input:    a,       b,       c,       d,       e,       f,       g,       h
 * pass 1:   a+b,     c+d,     e+f,     g+h,     a-b,     c-d,     e-f,     g-h
 * pass 2:   a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
 * pass 3:   a+b+c+d+e+f+g+h   a+b-c-d+e+f-g-h   a+b+c+d-e-f-g-h   a+b-c-d-e+-f+g+h
 *                    a-b+c-d+e-f+g-h   a-b-c+d+e-f-g+h   a-b+c-d-e+f-g+h   a-b-c+d-e+f+g-h
 * This order of computation is chosen because it vectorises well.
 * Likewise, this routine multiplies by H(7) in seven passes.
 *
 * @param[out] src Structure that contain the expanded codeword
 * @param[out] dst Structure that contain the expanded codeword
 */
 static void hadamard(uint16_t src[128], uint16_t dst[128]) {
    // the passes move data:
    // src -> dst -> src -> dst -> src -> dst -> src -> dst
    // using p1 and p2 alternately
    uint16_t *p1 = src;
    uint16_t *p2 = dst;
    uint16_t *p3;
    for (uint32_t pass = 0; pass < 7; pass++) {
        for (uint32_t i = 0; i < 64; i++) {
            p2[i] = p1[2 * i] + p1[2 * i + 1];
            p2[i + 64] = p1[2 * i] - p1[2 * i + 1];
        }
        // swap p1, p2 for next round
        p3 = p1;
        p1 = p2;
        p2 = p3;
    }
 }



 /**
 * @brief Add multiple codewords into expanded codeword
 *
 * Accesses memory in order
 * Note: this does not write the codewords as -1 or +1 as the green machine does
 * instead, just 0 and 1 is used.
 * The resulting hadamard transform has:
 * all values are halved
 * the first entry is 64 too high
 *
 * @param[out] dest Structure that contain the expanded codeword
 * @param[in] src Structure that contain the codeword
 */
 static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) {
    size_t part, bit, copy;
    // start with the first copy
    for (part = 0; part < 16; part++) {
        for (bit = 0; bit < 8; bit++) {
            dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1);
        }
    }
    // sum the rest of the copies
    for (copy = 1; copy < MULTIPLICITY; copy++) {
        for (part = 0; part < 16; part++) {
            for (bit = 0; bit < 8; bit++) {
                dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1);
            }
        }
    }
 }



 /**
 * @brief Finding the location of the highest value
 *
 * This is the final step of the green machine: find the location of the highest value,
 * and add 128 if the peak is positive
 * if there are two identical peaks, the peak with smallest value
 * in the lowest 7 bits it taken
 * @param[in] transform Structure that contain the expanded codeword
 */
 static uint8_t find_peaks(const uint16_t transform[128]) {
    uint16_t peak_abs = 0;
    uint16_t peak = 0;
    uint16_t pos = 0;
    uint16_t t, abs, mask;
    for (uint16_t i = 0; i < 128; i++) {
        t = transform[i];
        abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t)
        mask = -(((uint16_t)(peak_abs - abs)) >> 15);
        peak ^= mask & (peak ^ t);
        pos ^= mask & (pos ^ i);
        peak_abs ^= mask & (peak_abs ^ abs);
    }
    pos |= 128 & ((peak >> 15) - 1);
    return (uint8_t) pos;
 }




 /**
 * @brief Encodes the received word
 *
 * The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
 * or MULTIPLICITY repeats of 128 bits
 *
 * @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
 * @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
 */
 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // encode first word
        encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
        // copy to other identical codewords
        for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
            memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
        }
    }
 }



 /**
 * @brief Decodes the received word
 *
 * Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
 * The theory of error-correcting codes codes @cite macwilliams1977theory
 *
 * @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
 * @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
 */
 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
    uint16_t expanded[128];
    uint16_t transform[128];
    for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
        // collect the codewords
        expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]);
        // apply hadamard transform
        hadamard(expanded, transform);
        // fix the first entry to get the half Hadamard transform
        transform[0] -= 64 * MULTIPLICITY;
        // finish the decoding
        msg[i] = find_peaks(transform);
    }
 }
--- a/crypto_kem/hqc-rmrs-128/clean/reed_muller.h
+++ b/crypto_kem/hqc-rmrs-128/clean/reed_muller.h
@@ -1,18 +0,0 @@
 #ifndef REED_MULLER_H
 #define REED_MULLER_H


 /**
 * @file reed_muller.h
 * Header file of reed_muller.c
 */
 #include "parameters.h"
 #include <stddef.h>
 #include <stdint.h>

 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

 void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


 #endif