1
1
дзеркало https://github.com/henrydcase/pqc.git synced 2024-11-22 07:35:38 +00:00
This commit is contained in:
John M. Schanck 2020-09-10 18:52:20 -04:00 зафіксовано Kris Kwiatkowski
джерело 9d35c7bb57
коміт a3c627fe6b
44 змінених файлів з 165 додано та 722 видалено

@ -13,104 +13,11 @@
*/
static uint16_t mod(uint16_t i, uint16_t modulus);
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
static void compute_roots(uint64_t *error, const uint16_t *sigma);
/**
* @brief Returns i modulo the given modulus.
*
* i must be less than 2*modulus.
* Therefore, the return value is either i or i-modulus.
* @returns i mod (modulus)
* @param[in] i The integer whose modulo is taken
* @param[in] modulus The modulus
*/
static uint16_t mod(uint16_t i, uint16_t modulus) {
uint16_t tmp = i - modulus;
// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
int16_t mask = -(tmp >> 15);
return tmp + (mask & modulus);
}
/**
* @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
*
* The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
* @param[out] cosets Array receiving the coset representatives
* @param[in] upper_bound The upper bound
*/
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
// Compute the odd cyclotomic classes
for (uint16_t i = 1; i < upper_bound; i += 2) {
if (cosets[i] == 0) { // If i does not already belong to a class
uint16_t tmp = i;
size_t j = PARAM_M;
cosets[i] = i;
while (--j) { // Complete i's class
tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
cosets[tmp] = i;
}
}
}
}
/**
* @brief Computes the generator polynomial of the primitive BCH code with given parameters.
*
* Code length is 2^m-1. <br>
* Parameter t is the targeted correction capacity of the code
* and receives the real correction capacity (which is at least equal to the target). <br>
* exp and log are arrays giving antilog and log of GF(2^m) elements.
* @returns the degree of the generator polynomial
* @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
* @param[in,out] t Targeted correction capacity; receives the real correction capacity
* @param[in] exp Antilog table of GF(2^m)
* @param[in] log Log table of GF(2^m)
*/
size_t PQCLEAN_HQC128_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
uint16_t cosets[PARAM_GF_MUL_ORDER];
size_t deg_bch_poly = 0;
memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
compute_cyclotomic_cosets(cosets, 2 * *t);
// Start with bch_poly(X) = 1
bch_poly[0] = 1;
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
if (cosets[i] == 0) {
continue;
}
// Multiply bch_poly(X) by X-a^i
for (size_t j = deg_bch_poly; j; --j) {
int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
}
bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
bch_poly[++deg_bch_poly] = 1;
}
// Determine the real correction capacity
while (cosets[2 * *t + 1] != 0) {
++*t;
}
return deg_bch_poly;
}
/**
* @brief Computes the values alpha^ij for decoding syndromes
*

@ -15,8 +15,6 @@
void PQCLEAN_HQC128_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);
size_t PQCLEAN_HQC128_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);
void PQCLEAN_HQC128_AVX2_table_alphaij_generation(const uint16_t *exp);

@ -48,7 +48,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -325,7 +325,8 @@ void PQCLEAN_HQC128_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint64_t bit;
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -23,8 +23,10 @@
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQC128_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
size_t t = 0, b, bn, bi, c, cn, ci;
size_t t = 0;
uint32_t b, bn, bi, c, cn, ci;
uint64_t cx, ones;
uint64_t mask;
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
bn = b >> 6;
@ -33,9 +35,9 @@ void PQCLEAN_HQC128_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em)
cn = c >> 6;
ci = c & 63;
cx = em[cn] << (63 - ci);
int64_t verif = (cn == (bn + 1));
ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx * verif));
m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
t++;
}
}

@ -11,8 +11,6 @@
*/
static uint16_t mod(uint16_t i, uint16_t modulus);
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
@ -21,97 +19,6 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
static void compute_roots(uint64_t *error, const uint16_t *sigma);
/**
* @brief Returns i modulo the given modulus.
*
* i must be less than 2*modulus.
* Therefore, the return value is either i or i-modulus.
* @returns i mod (modulus)
* @param[in] i The integer whose modulo is taken
* @param[in] modulus The modulus
*/
static uint16_t mod(uint16_t i, uint16_t modulus) {
uint16_t tmp = i - modulus;
// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
int16_t mask = -(tmp >> 15);
return tmp + (mask & modulus);
}
/**
* @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
*
* The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
* @param[out] cosets Array receiving the coset representatives
* @param[in] upper_bound The upper bound
*/
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
// Compute the odd cyclotomic classes
for (uint16_t i = 1; i < upper_bound; i += 2) {
if (cosets[i] == 0) { // If i does not already belong to a class
uint16_t tmp = i;
size_t j = PARAM_M;
cosets[i] = i;
while (--j) { // Complete i's class
tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
cosets[tmp] = i;
}
}
}
}
/**
* @brief Computes the generator polynomial of the primitive BCH code with given parameters.
*
* Code length is 2^m-1. <br>
* Parameter t is the targeted correction capacity of the code
* and receives the real correction capacity (which is at least equal to the target). <br>
* exp and log are arrays giving antilog and log of GF(2^m) elements.
* @returns the degree of the generator polynomial
* @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
* @param[in,out] t Targeted correction capacity; receives the real correction capacity
* @param[in] exp Antilog table of GF(2^m)
* @param[in] log Log table of GF(2^m)
*/
size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
uint16_t cosets[PARAM_GF_MUL_ORDER];
size_t deg_bch_poly = 0;
memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
compute_cyclotomic_cosets(cosets, 2 * *t);
// Start with bch_poly(X) = 1
bch_poly[0] = 1;
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
if (cosets[i] == 0) {
continue;
}
// Multiply bch_poly(X) by X-a^i
for (size_t j = deg_bch_poly; j; --j) {
int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
}
bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
bch_poly[++deg_bch_poly] = 1;
}
// Determine the real correction capacity
while (cosets[2 * *t + 1] != 0) {
++*t;
}
return deg_bch_poly;
}
/**
* @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
*

@ -17,7 +17,4 @@ void PQCLEAN_HQC128_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *me
void PQCLEAN_HQC128_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);
size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);
#endif

@ -51,7 +51,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -134,7 +134,8 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin
uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0};
uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0};
size_t i, n;
uint16_t n;
size_t i;
n = 1 << (m_f - 2);
memcpy(Q0, f0 + n, 2 * n);
@ -627,7 +628,8 @@ void PQCLEAN_HQC128_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uin
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint64_t bit;
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t *pt;
uint16_t *res_16;
uint16_t i, j;
for (uint32_t i = 0; i < 16; i++) {
for (i = 0; i < 16; i++) {
permuted_table[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));
for (uint32_t i = 0; i < 15; i++) {
for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;
for (uint32_t i = 1; i < 16; i++) {
for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));
for (uint32_t i = 0; i + 1 < weight; i++) {
for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res_16 = ((uint16_t *) o) + s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
*res_16++ ^= (uint16_t) pt[j];
*res_16++ ^= (uint16_t) (pt[j] >> 16);
*res_16++ ^= (uint16_t) (pt[j] >> 32);

@ -20,27 +20,26 @@ static inline int32_t popcount(uint64_t n);
* @param[in] m Pointer to an array that is the message
*/
void PQCLEAN_HQC128_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) {
static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFUL, 0x3FFFFFFFUL}};
for (size_t i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
for (size_t j = 0; j < 64; j++) {
uint8_t bit = (m[i] >> j) & 0x1;
uint32_t pos_r = PARAM_N2 * ((i << 6) + j);
uint16_t idx_r = (pos_r & 0x3f);
uint64_t *p64 = em;
p64 += pos_r >> 6;
*p64 ^= mask[bit][0] << idx_r;
*(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
uint16_t i, j, bit, idx_r;
uint32_t pos_r;
uint64_t *p64 = em;
const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFUL, 0x3FFFFFFFUL}};
for (i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
for (j = 0; j < 64; j++) {
bit = (m[i] >> j) & 0x1;
pos_r = PARAM_N2 * ((i << 6) + j);
idx_r = (pos_r & 0x3f);
p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
}
}
for (size_t j = 0; j < (PARAM_N1 & 0x3f); j++) {
uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
uint16_t idx_r = (pos_r & 0x3f);
uint64_t *p64 = em;
p64 += pos_r >> 6;
*p64 ^= mask[bit][0] << idx_r;
*(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
for (j = 0; j < (PARAM_N1 & 0x3f); j++) {
bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
idx_r = (pos_r & 0x3f);
p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
}
}
@ -74,8 +73,10 @@ static inline int32_t popcount(uint64_t n) {
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQC128_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
size_t t = 0, b, bn, bi, c, cn, ci;
size_t t = 0;
uint32_t b, bn, bi, c, cn, ci;
uint64_t cx, ones;
uint64_t mask;
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
bn = b >> 6;
@ -84,9 +85,9 @@ void PQCLEAN_HQC128_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em
cn = c >> 6;
ci = c & 63;
cx = em[cn] << (63 - ci);
int64_t verif = (cn == (bn + 1));
ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx * verif));
m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
t++;
}
}

@ -13,104 +13,11 @@
*/
static uint16_t mod(uint16_t i, uint16_t modulus);
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
static void compute_roots(uint64_t *error, const uint16_t *sigma);
/**
* @brief Returns i modulo the given modulus.
*
* i must be less than 2*modulus.
* Therefore, the return value is either i or i-modulus.
* @returns i mod (modulus)
* @param[in] i The integer whose modulo is taken
* @param[in] modulus The modulus
*/
static uint16_t mod(uint16_t i, uint16_t modulus) {
uint16_t tmp = i - modulus;
// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
int16_t mask = -(tmp >> 15);
return tmp + (mask & modulus);
}
/**
* @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
*
* The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
* @param[out] cosets Array receiving the coset representatives
* @param[in] upper_bound The upper bound
*/
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
// Compute the odd cyclotomic classes
for (uint16_t i = 1; i < upper_bound; i += 2) {
if (cosets[i] == 0) { // If i does not already belong to a class
uint16_t tmp = i;
size_t j = PARAM_M;
cosets[i] = i;
while (--j) { // Complete i's class
tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
cosets[tmp] = i;
}
}
}
}
/**
* @brief Computes the generator polynomial of the primitive BCH code with given parameters.
*
* Code length is 2^m-1. <br>
* Parameter t is the targeted correction capacity of the code
* and receives the real correction capacity (which is at least equal to the target). <br>
* exp and log are arrays giving antilog and log of GF(2^m) elements.
* @returns the degree of the generator polynomial
* @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
* @param[in,out] t Targeted correction capacity; receives the real correction capacity
* @param[in] exp Antilog table of GF(2^m)
* @param[in] log Log table of GF(2^m)
*/
size_t PQCLEAN_HQC192_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
uint16_t cosets[PARAM_GF_MUL_ORDER];
size_t deg_bch_poly = 0;
memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
compute_cyclotomic_cosets(cosets, 2 * *t);
// Start with bch_poly(X) = 1
bch_poly[0] = 1;
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
if (cosets[i] == 0) {
continue;
}
// Multiply bch_poly(X) by X-a^i
for (size_t j = deg_bch_poly; j; --j) {
int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
}
bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
bch_poly[++deg_bch_poly] = 1;
}
// Determine the real correction capacity
while (cosets[2 * *t + 1] != 0) {
++*t;
}
return deg_bch_poly;
}
/**
* @brief Computes the values alpha^ij for decoding syndromes
*

@ -15,8 +15,6 @@
void PQCLEAN_HQC192_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);
size_t PQCLEAN_HQC192_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);
void PQCLEAN_HQC192_AVX2_table_alphaij_generation(const uint16_t *exp);

@ -325,7 +325,8 @@ void PQCLEAN_HQC192_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint64_t bit;
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -23,8 +23,10 @@
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQC192_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
size_t t = 0, b, bn, bi, c, cn, ci;
size_t t = 0;
uint32_t b, bn, bi, c, cn, ci;
uint64_t cx, ones;
uint64_t mask;
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
bn = b >> 6;
@ -33,9 +35,9 @@ void PQCLEAN_HQC192_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em)
cn = c >> 6;
ci = c & 63;
cx = em[cn] << (63 - ci);
int64_t verif = (cn == (bn + 1));
ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx * verif));
m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
ones = _mm_popcnt_u64(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
t++;
}
}

@ -11,8 +11,6 @@
*/
static uint16_t mod(uint16_t i, uint16_t modulus);
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
@ -21,97 +19,6 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
static void compute_roots(uint64_t *error, const uint16_t *sigma);
/**
* @brief Returns i modulo the given modulus.
*
* i must be less than 2*modulus.
* Therefore, the return value is either i or i-modulus.
* @returns i mod (modulus)
* @param[in] i The integer whose modulo is taken
* @param[in] modulus The modulus
*/
static uint16_t mod(uint16_t i, uint16_t modulus) {
uint16_t tmp = i - modulus;
// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
int16_t mask = -(tmp >> 15);
return tmp + (mask & modulus);
}
/**
* @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
*
* The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
* @param[out] cosets Array receiving the coset representatives
* @param[in] upper_bound The upper bound
*/
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
// Compute the odd cyclotomic classes
for (uint16_t i = 1; i < upper_bound; i += 2) {
if (cosets[i] == 0) { // If i does not already belong to a class
uint16_t tmp = i;
size_t j = PARAM_M;
cosets[i] = i;
while (--j) { // Complete i's class
tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
cosets[tmp] = i;
}
}
}
}
/**
* @brief Computes the generator polynomial of the primitive BCH code with given parameters.
*
* Code length is 2^m-1. <br>
* Parameter t is the targeted correction capacity of the code
* and receives the real correction capacity (which is at least equal to the target). <br>
* exp and log are arrays giving antilog and log of GF(2^m) elements.
* @returns the degree of the generator polynomial
* @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
* @param[in,out] t Targeted correction capacity; receives the real correction capacity
* @param[in] exp Antilog table of GF(2^m)
* @param[in] log Log table of GF(2^m)
*/
size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
uint16_t cosets[PARAM_GF_MUL_ORDER];
size_t deg_bch_poly = 0;
memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
compute_cyclotomic_cosets(cosets, 2 * *t);
// Start with bch_poly(X) = 1
bch_poly[0] = 1;
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
if (cosets[i] == 0) {
continue;
}
// Multiply bch_poly(X) by X-a^i
for (size_t j = deg_bch_poly; j; --j) {
int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
}
bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
bch_poly[++deg_bch_poly] = 1;
}
// Determine the real correction capacity
while (cosets[2 * *t + 1] != 0) {
++*t;
}
return deg_bch_poly;
}
/**
* @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
*

@ -17,7 +17,4 @@ void PQCLEAN_HQC192_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *me
void PQCLEAN_HQC192_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);
size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);
#endif

@ -51,7 +51,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -134,7 +134,8 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin
uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0};
uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0};
size_t i, n;
uint16_t n;
size_t i;
n = 1 << (m_f - 2);
memcpy(Q0, f0 + n, 2 * n);

@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t *pt;
uint16_t *res_16;
uint16_t i, j;
for (uint32_t i = 0; i < 16; i++) {
for (i = 0; i < 16; i++) {
permuted_table[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));
for (uint32_t i = 0; i < 15; i++) {
for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;
for (uint32_t i = 1; i < 16; i++) {
for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));
for (uint32_t i = 0; i + 1 < weight; i++) {
for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res_16 = ((uint16_t *) o) + s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
*res_16++ ^= (uint16_t) pt[j];
*res_16++ ^= (uint16_t) (pt[j] >> 16);
*res_16++ ^= (uint16_t) (pt[j] >> 32);

@ -20,27 +20,26 @@ static inline int32_t popcount(uint64_t n);
* @param[in] m Pointer to an array that is the message
*/
void PQCLEAN_HQC192_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) {
static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFFFFFFFFUL, 0x3FFFFFFFFFFFFFFUL}};
for (size_t i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
for (size_t j = 0; j < 64; j++) {
uint8_t bit = (m[i] >> j) & 0x1;
uint32_t pos_r = PARAM_N2 * ((i << 6) + j);
uint16_t idx_r = (pos_r & 0x3f);
uint64_t *p64 = em;
p64 += pos_r >> 6;
*p64 ^= mask[bit][0] << idx_r;
*(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
uint16_t i, j, bit, idx_r;
uint32_t pos_r;
uint64_t *p64 = em;
const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFFFFFFFFUL, 0x3FFFFFFFFFFFFFFUL}};
for (i = 0; i < VEC_N1_SIZE_64 - 1; i++) {
for (j = 0; j < 64; j++) {
bit = (m[i] >> j) & 0x1;
pos_r = PARAM_N2 * ((i << 6) + j);
idx_r = (pos_r & 0x3f);
p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
}
}
for (size_t j = 0; j < (PARAM_N1 & 0x3f); j++) {
uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
uint16_t idx_r = (pos_r & 0x3f);
uint64_t *p64 = em;
p64 += pos_r >> 6;
*p64 ^= mask[bit][0] << idx_r;
*(p64 + 1) ^= mask[bit][1] >> ((63 - idx_r));
for (j = 0; j < (PARAM_N1 & 0x3f); j++) {
bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1;
pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j);
idx_r = (pos_r & 0x3f);
p64[pos_r >> 6] ^= mask[bit][0] << idx_r;
p64[(pos_r >> 6) + 1] ^= mask[bit][1] >> ((63 - idx_r));
}
}
@ -74,8 +73,11 @@ static inline int32_t popcount(uint64_t n) {
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQC192_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
size_t t = 0, b, bn, bi, c, cn, ci;
size_t t = 0;
uint32_t b, bn, bi, c, cn, ci;
uint64_t cx, ones;
uint64_t mask;
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
bn = b >> 6;
bi = b & 63;
@ -83,9 +85,9 @@ void PQCLEAN_HQC192_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em
cn = c >> 6;
ci = c & 63;
cx = em[cn] << (63 - ci);
int64_t verif = (cn == (bn + 1));
ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx * verif));
m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
ones = popcount(((em[bn] >> bi) & MASK_N2) | (cx & ~mask));
m[t >> 6] |= (uint64_t) ((((PARAM_T - ones) >> 31) & 1) << (t & 63));
t++;
}
}

@ -13,104 +13,11 @@
*/
static uint16_t mod(uint16_t i, uint16_t modulus);
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
static void compute_syndromes(__m256i *syndromes, const uint64_t *rcv);
static void compute_roots(uint64_t *error, const uint16_t *sigma);
/**
* @brief Returns i modulo the given modulus.
*
* i must be less than 2*modulus.
* Therefore, the return value is either i or i-modulus.
* @returns i mod (modulus)
* @param[in] i The integer whose modulo is taken
* @param[in] modulus The modulus
*/
static uint16_t mod(uint16_t i, uint16_t modulus) {
uint16_t tmp = i - modulus;
// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
int16_t mask = -(tmp >> 15);
return tmp + (mask & modulus);
}
/**
* @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
*
* The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
* @param[out] cosets Array receiving the coset representatives
* @param[in] upper_bound The upper bound
*/
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
// Compute the odd cyclotomic classes
for (uint16_t i = 1; i < upper_bound; i += 2) {
if (cosets[i] == 0) { // If i does not already belong to a class
uint16_t tmp = i;
size_t j = PARAM_M;
cosets[i] = i;
while (--j) { // Complete i's class
tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
cosets[tmp] = i;
}
}
}
}
/**
* @brief Computes the generator polynomial of the primitive BCH code with given parameters.
*
* Code length is 2^m-1. <br>
* Parameter t is the targeted correction capacity of the code
* and receives the real correction capacity (which is at least equal to the target). <br>
* exp and log are arrays giving antilog and log of GF(2^m) elements.
* @returns the degree of the generator polynomial
* @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
* @param[in,out] t Targeted correction capacity; receives the real correction capacity
* @param[in] exp Antilog table of GF(2^m)
* @param[in] log Log table of GF(2^m)
*/
size_t PQCLEAN_HQC256_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
uint16_t cosets[PARAM_GF_MUL_ORDER];
size_t deg_bch_poly = 0;
memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
compute_cyclotomic_cosets(cosets, 2 * *t);
// Start with bch_poly(X) = 1
bch_poly[0] = 1;
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
if (cosets[i] == 0) {
continue;
}
// Multiply bch_poly(X) by X-a^i
for (size_t j = deg_bch_poly; j; --j) {
int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
}
bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
bch_poly[++deg_bch_poly] = 1;
}
// Determine the real correction capacity
while (cosets[2 * *t + 1] != 0) {
++*t;
}
return deg_bch_poly;
}
/**
* @brief Computes the values alpha^ij for decoding syndromes
*

@ -15,8 +15,6 @@
void PQCLEAN_HQC256_AVX2_bch_code_decode(uint64_t *message, uint64_t *vector);
size_t PQCLEAN_HQC256_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);
void PQCLEAN_HQC256_AVX2_table_alphaij_generation(const uint16_t *exp);

@ -325,7 +325,8 @@ void PQCLEAN_HQC256_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint64_t bit;
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -22,9 +22,11 @@
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQC256_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) {
size_t t = 0, b, bn, bi, c, cn, ci;
uint32_t t = 0;
uint32_t b, bn, bi, c, cn, ci;
uint64_t cx, ones;
uint64_t cy;
uint64_t mask;
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
bn = b >> 6;
@ -33,10 +35,10 @@ void PQCLEAN_HQC256_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em)
cn = c >> 6;
ci = c & 63;
cx = em[cn] << (63 - ci);
int64_t verif = (cn == (bn + 1));
mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
cy = em[bn + 1];
ones = _mm_popcnt_u64((em[bn] >> bi) | (cx * (1 - verif))) + _mm_popcnt_u64((1 - verif) * cy + verif * cx);
m[t >> 6] |= ((uint64_t)(ones > PARAM_T)) << (t & 63);
ones = _mm_popcnt_u64((em[bn] >> bi) | (cx & mask)) + _mm_popcnt_u64((mask & cy) + (~mask & cx));
m[t >> 6] |= (uint64_t) (((((int64_t)PARAM_T - ones) >> 63) & 1) << (t & 63)); // 1 << (t&63) if ones > PARAM_T else 0
t++;
}
}

@ -11,8 +11,6 @@
*/
static uint16_t mod(uint16_t i, uint16_t modulus);
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound);
static void unpack_message(uint8_t *message_unpacked, const uint64_t *message);
static void lfsr_encode(uint8_t *codeword, const uint8_t *message);
static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked);
@ -21,97 +19,6 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword);
static void compute_syndromes(uint16_t *syndromes, const uint64_t *vector);
static void compute_roots(uint64_t *error, const uint16_t *sigma);
/**
* @brief Returns i modulo the given modulus.
*
* i must be less than 2*modulus.
* Therefore, the return value is either i or i-modulus.
* @returns i mod (modulus)
* @param[in] i The integer whose modulo is taken
* @param[in] modulus The modulus
*/
static uint16_t mod(uint16_t i, uint16_t modulus) {
uint16_t tmp = i - modulus;
// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
int16_t mask = -(tmp >> 15);
return tmp + (mask & modulus);
}
/**
* @brief Computes the odd binary cyclotomic cosets modulo 2^m-1 for integers less than upper_bound.
*
* The array cosets of size 2^m-1 is filled by placing at index i the coset representative of i.
* @param[out] cosets Array receiving the coset representatives
* @param[in] upper_bound The upper bound
*/
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) {
// Compute the odd cyclotomic classes
for (uint16_t i = 1; i < upper_bound; i += 2) {
if (cosets[i] == 0) { // If i does not already belong to a class
uint16_t tmp = i;
size_t j = PARAM_M;
cosets[i] = i;
while (--j) { // Complete i's class
tmp = mod(2 * tmp, PARAM_GF_MUL_ORDER);
cosets[tmp] = i;
}
}
}
}
/**
* @brief Computes the generator polynomial of the primitive BCH code with given parameters.
*
* Code length is 2^m-1. <br>
* Parameter t is the targeted correction capacity of the code
* and receives the real correction capacity (which is at least equal to the target). <br>
* exp and log are arrays giving antilog and log of GF(2^m) elements.
* @returns the degree of the generator polynomial
* @param[out] bch_poly Array of size (m*t + 1) receiving the coefficients of the generator polynomial
* @param[in,out] t Targeted correction capacity; receives the real correction capacity
* @param[in] exp Antilog table of GF(2^m)
* @param[in] log Log table of GF(2^m)
*/
size_t PQCLEAN_HQC256_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log) {
uint16_t cosets[PARAM_GF_MUL_ORDER];
size_t deg_bch_poly = 0;
memset(cosets, 0, 2 * PARAM_GF_MUL_ORDER);
compute_cyclotomic_cosets(cosets, 2 * *t);
// Start with bch_poly(X) = 1
bch_poly[0] = 1;
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) {
if (cosets[i] == 0) {
continue;
}
// Multiply bch_poly(X) by X-a^i
for (size_t j = deg_bch_poly; j; --j) {
int16_t mask = -((uint16_t) - bch_poly[j] >> 15);
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1];
}
bch_poly[0] = exp[mod(log[bch_poly[0]] + i, PARAM_GF_MUL_ORDER)];
bch_poly[++deg_bch_poly] = 1;
}
// Determine the real correction capacity
while (cosets[2 * *t + 1] != 0) {
++*t;
}
return deg_bch_poly;
}
/**
* @brief Unpacks the message message to the array message_unpacked where each byte stores a bit of the message
*

@ -17,7 +17,4 @@ void PQCLEAN_HQC256_CLEAN_bch_code_encode(uint64_t *codeword, const uint64_t *me
void PQCLEAN_HQC256_CLEAN_bch_code_decode(uint64_t *message, uint64_t *vector);
size_t PQCLEAN_HQC256_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, const uint16_t *exp, const uint16_t *log);
#endif

@ -51,7 +51,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -134,7 +134,8 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin
uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0};
uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0};
size_t i, n;
uint16_t n;
size_t i;
n = 1 << (m_f - 2);
memcpy(Q0, f0 + n, 2 * n);

@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t *pt;
uint16_t *res_16;
uint16_t i, j;
for (uint32_t i = 0; i < 16; i++) {
for (i = 0; i < 16; i++) {
permuted_table[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));
for (uint32_t i = 0; i < 15; i++) {
for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;
for (uint32_t i = 1; i < 16; i++) {
for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));
for (uint32_t i = 0; i + 1 < weight; i++) {
for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res_16 = ((uint16_t *) o) + s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
*res_16++ ^= (uint16_t) pt[j];
*res_16++ ^= (uint16_t) (pt[j] >> 16);
*res_16++ ^= (uint16_t) (pt[j] >> 32);

@ -80,9 +80,11 @@ static inline int32_t popcount(uint64_t n) {
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQC256_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) {
size_t t = 0, b, bn, bi, c, cn, ci;
uint32_t t = 0;
uint32_t b, bn, bi, c, cn, ci;
uint64_t cx, ones;
uint64_t cy;
uint64_t mask;
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) {
bn = b >> 6;
@ -91,10 +93,10 @@ void PQCLEAN_HQC256_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em
cn = c >> 6;
ci = c & 63;
cx = em[cn] << (63 - ci);
int64_t verif = (cn == (bn + 1));
mask = (uint64_t) (-((int64_t) (cn ^ (bn + 1))) >> 63); // cn != bn+1
cy = em[bn + 1];
ones = popcount((em[bn] >> bi) | (cx * (1 - verif))) + popcount((1 - verif) * cy + verif * cx);
m[t >> 6] |= ((uint64_t) (ones > PARAM_T)) << (t & 63);
ones = popcount((em[bn] >> bi) | (cx & mask)) + popcount((mask & cy) + (~mask & cx));
m[t >> 6] |= (uint64_t) (((((int64_t)PARAM_T - ones) >> 63) & 1) << (t & 63)); // 1 << (t&63) if ones > PARAM_T else 0
t++;
}
}

@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs
void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff
void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t *pt;
uint16_t *res_16;
uint16_t i, j;
for (uint32_t i = 0; i < 16; i++) {
for (i = 0; i < 16; i++) {
permuted_table[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));
for (uint32_t i = 0; i < 15; i++) {
for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;
for (uint32_t i = 1; i < 16; i++) {
for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));
for (uint32_t i = 0; i + 1 < weight; i++) {
for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res_16 = ((uint16_t *) o) + s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
*res_16++ ^= (uint16_t) pt[j];
*res_16++ ^= (uint16_t) (pt[j] >> 16);
*res_16++ ^= (uint16_t) (pt[j] >> 32);

@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs
void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff
void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t *pt;
uint16_t *res_16;
uint16_t i, j;
for (uint32_t i = 0; i < 16; i++) {
for (i = 0; i < 16; i++) {
permuted_table[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));
for (uint32_t i = 0; i < 15; i++) {
for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;
for (uint32_t i = 1; i < 16; i++) {
for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));
for (uint32_t i = 0; i + 1 < weight; i++) {
for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res_16 = ((uint16_t *) o) + s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
*res_16++ ^= (uint16_t) pt[j];
*res_16++ ^= (uint16_t) (pt[j] >> 16);
*res_16++ ^= (uint16_t) (pt[j] >> 32);

@ -47,7 +47,7 @@ static void compute_fft_betas(uint16_t *betas) {
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) {
size_t i, j;
uint16_t i, j;
subset_sums[0] = 0;
for (i = 0; i < set_size; ++i) {
@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs
void PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -324,7 +324,8 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff
void PQCLEAN_HQCRMRS256_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
size_t i, k, index;
uint16_t k;
size_t i, index;
compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

@ -78,50 +78,51 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t *pt;
uint16_t *res_16;
uint16_t i, j;
for (uint32_t i = 0; i < 16; i++) {
for (i = 0; i < 16; i++) {
permuted_table[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));
for (uint32_t i = 0; i < 15; i++) {
for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;
for (uint32_t i = 1; i < 16; i++) {
for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) {
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = i;
}
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));
for (uint32_t i = 0; i + 1 < weight; i++) {
for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i));
}
for (uint32_t i = 0; i < weight; i++) {
for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res_16 = ((uint16_t *) o) + s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) {
for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
*res_16++ ^= (uint16_t) pt[j];
*res_16++ ^= (uint16_t) (pt[j] >> 16);
*res_16++ ^= (uint16_t) (pt[j] >> 32);

@ -28,7 +28,6 @@ consistency_checks:
- repetition.h
- vector.h
- bch.c
- fft.c
- gf.c
- hqc.c
- kem.c
@ -54,7 +53,6 @@ consistency_checks:
- repetition.h
- vector.h
- bch.c
- fft.c
- gf.c
- hqc.c
- kem.c

@ -21,7 +21,6 @@ consistency_checks:
- vector.h
- bch.c
- code.c
- fft.c
- gf2x.c
- gf.c
- hqc.c
@ -48,7 +47,6 @@ consistency_checks:
- vector.h
- bch.c
- code.c
- fft.c
- gf2x.c
- gf.c
- hqc.c

@ -23,7 +23,6 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c
- source:
scheme: hqc-rmrs-192
implementation: avx2
@ -56,7 +55,6 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c
- source:
scheme: hqc-rmrs-256
implementation: avx2

@ -26,7 +26,6 @@ consistency_checks:
- reed_solomon.h
- vector.h
- code.c
- fft.c
- gf2x.c
- gf.c
- hqc.c
@ -60,7 +59,6 @@ consistency_checks:
- reed_solomon.h
- vector.h
- code.c
- fft.c
- gf2x.c
- gf.c
- hqc.c

@ -11,7 +11,6 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c
- source:
scheme: hqc-rmrs-256
implementation: clean
@ -23,7 +22,6 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c
- source:
scheme: hqc-rmrs-256
implementation: avx2

@ -11,7 +11,6 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c
- source:
scheme: hqc-rmrs-256
implementation: clean
@ -45,4 +44,3 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c

@ -11,4 +11,3 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c

@ -11,4 +11,3 @@ consistency_checks:
- reed_muller.h
- reed_solomon.h
- code.c
- fft.c