@@ -49,7 +49,7 @@ static uint16_t mod(uint16_t i, uint16_t modulus) { | |||
*/ | |||
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) { | |||
// Compute the odd cyclotomic classes | |||
for (uint16_t i = 1 ; i < upper_bound ; i += 2) { | |||
for (uint16_t i = 1; i < upper_bound; i += 2) { | |||
if (cosets[i] == 0) { // If i does not already belong to a class | |||
uint16_t tmp = i; | |||
size_t j = PARAM_M; | |||
@@ -87,13 +87,13 @@ size_t PQCLEAN_HQC128_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const | |||
// Start with bch_poly(X) = 1 | |||
bch_poly[0] = 1; | |||
for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) { | |||
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) { | |||
if (cosets[i] == 0) { | |||
continue; | |||
} | |||
// Multiply bch_poly(X) by X-a^i | |||
for (size_t j = deg_bch_poly ; j ; --j) { | |||
for (size_t j = deg_bch_poly; j; --j) { | |||
int16_t mask = -((uint16_t) - bch_poly[j] >> 15); | |||
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1]; | |||
} | |||
@@ -129,10 +129,10 @@ void PQCLEAN_HQC128_AVX2_table_alphaij_generation(const uint16_t *exp) { | |||
// pre-computation of alpha^ij for i in [0, N1[ and j in [1, 2*PARAM_DELTA] | |||
// see comment of alpha_ij_table_init() function. | |||
for (uint16_t i = 0; i < PARAM_N1 ; ++i) { | |||
for (uint16_t i = 0; i < PARAM_N1; ++i) { | |||
tmp_value = 0; | |||
alpha_tmp = table_alpha_ij + i * (PARAM_DELTA << 1); | |||
for (uint16_t j = 0 ; j < (PARAM_DELTA << 1) ; j++) { | |||
for (uint16_t j = 0; j < (PARAM_DELTA << 1); j++) { | |||
tmp_value = PQCLEAN_HQC128_AVX2_gf_mod(tmp_value + i); | |||
alpha_tmp[j] = exp[tmp_value]; | |||
} | |||
@@ -168,13 +168,13 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) { | |||
for (size_t mu = 0; mu < PARAM_DELTA; ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQC128_AVX2_gf_mul(d, PQCLEAN_HQC128_AVX2_gf_inverse(d_p)); // 0 if(d == 0) | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQC128_AVX2_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -198,7 +198,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Update pp, d_p and X_sigma_p if needed | |||
pp = (mask12 & (2 * mu)) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA - 1 ; i ; --i) { | |||
for (size_t i = PARAM_DELTA - 1; i; --i) { | |||
X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
X_sigma_p[1] = 0; | |||
@@ -207,7 +207,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Compute the next discrepancy | |||
d = syndromes[2 * mu + 2]; | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQC128_AVX2_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]); | |||
} | |||
} | |||
@@ -232,7 +232,7 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword) { | |||
uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64)); | |||
size_t index = val / 64; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64 - 1; ++i) { | |||
uint64_t message1 = (codeword[index] & mask1) >> val % 64; | |||
uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64); | |||
message[i] = message1 | message2; | |||
@@ -282,7 +282,7 @@ void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) { | |||
// vectorized version of the separation of the coordinates of the vector v in order to put each coordinate in an unsigned char | |||
// aux is used to consider 4 elements in v at each step of the loop | |||
aux = (uint32_t *) rcv; | |||
for (i = 0 ; i < ((VEC_N1_SIZE_BYTES >> 2) << 2) ; i += 4) { | |||
for (i = 0; i < ((VEC_N1_SIZE_BYTES >> 2) << 2); i += 4) { | |||
// duplicate aux 8 times in y , i.e y= (aux aux aux .... aux) | |||
y = _mm256_set1_epi32(*aux); | |||
// shuffle the bytes of y so that if aux=(a0 a1 a2 a3) | |||
@@ -294,11 +294,11 @@ void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) { | |||
} | |||
// Evaluation of the polynomial corresponding to the vector v in alpha^i for i in {1, ..., 2 * PARAM_DELTA} | |||
for (size_t j = 0 ; j < SYND_SIZE_256 ; ++j) { | |||
for (size_t j = 0; j < SYND_SIZE_256; ++j) { | |||
S = zero_256; | |||
alpha_tmp = table_alpha_ij + (j << 4); | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
tmp_repeat = _mm256_set1_epi64x((long long)(tmp_array[i] != 0)); | |||
L = _mm256_cmpeq_epi64(tmp_repeat, un_256); | |||
tmp_repeat = _mm256_lddqu_si256((__m256i *)(alpha_tmp + i * (PARAM_DELTA << 1))); | |||
@@ -43,7 +43,7 @@ void PQCLEAN_HQC128_AVX2_code_encode(uint64_t *em, const uint64_t *m) { | |||
__m256i msg = _mm256_lddqu_si256((const __m256i *) m); | |||
colonne = ((__m256i *) gen_matrix); | |||
for (i = 0 ; i < PARAM_N1 - PARAM_K ; i++) { | |||
for (i = 0; i < PARAM_N1 - PARAM_K; i++) { | |||
// y is the and operation between m and ith column of G | |||
y = _mm256_and_si256(colonne[i], msg); | |||
// aux0 = (y2 y3 y0 y1) | |||
@@ -69,8 +69,8 @@ void PQCLEAN_HQC128_AVX2_code_encode(uint64_t *em, const uint64_t *m) { | |||
/* now we add the message m */ | |||
/* systematic encoding */ | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t j = 0 ; j < 64 ; j++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
for (int32_t j = 0; j < 64; j++) { | |||
uint8_t bit = (m[i] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * ((PARAM_N1 - PARAM_K) + ((i << 6) + j)); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -30,7 +30,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -51,8 +51,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -139,7 +139,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -182,13 +182,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQC128_AVX2_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -199,7 +199,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC128_AVX2_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC128_AVX2_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -209,7 +209,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC128_AVX2_gf_mul(betas[i], PQCLEAN_HQC128_AVX2_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC128_AVX2_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -224,7 +224,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -235,7 +235,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -288,7 +288,7 @@ void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC128_AVX2_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -307,7 +307,7 @@ void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -337,7 +337,7 @@ void PQCLEAN_HQC128_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint | |||
bit = 1 ^ ((uint16_t) - w[k] >> 15); | |||
error[index / 8] ^= bit << (index % 64); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_AVX2_gf_log(gammas_sums[i]); | |||
bit = 1 ^ ((uint16_t) - w[i] >> 15); | |||
error[index / 64] ^= bit << (index % 64); | |||
@@ -44,7 +44,7 @@ static inline void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -192,7 +192,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4( D0, A, B); | |||
karat_mult_4(D2, A + 4, B + 4); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int is = i + 4; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -200,7 +200,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4(D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int32_t is = i + 4; | |||
int32_t is2 = is + 4; | |||
int32_t is3 = is2 + 4; | |||
@@ -231,7 +231,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D0, A, B); | |||
karat_mult_8(D2, A + 8, B + 8); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -239,7 +239,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
int32_t is2 = is + 8; | |||
int32_t is3 = is2 + 8; | |||
@@ -270,7 +270,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D0, A, B); | |||
karat_mult_16(D2, A + 16, B + 16); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int is = i + 16; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -278,7 +278,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int32_t is = i + 16; | |||
int32_t is2 = is + 16; | |||
int32_t is3 = is2 + 16; | |||
@@ -307,7 +307,7 @@ static inline void divByXplus1(__m256i *out, __m256i *in, int size) { | |||
B[0] = A[0]; | |||
for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size << 2); i++) { | |||
B[i] = B[i - 1] ^ A[i]; | |||
} | |||
} | |||
@@ -331,7 +331,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 - 1; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i42 = i4 - 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
@@ -342,7 +342,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4])); | |||
} | |||
for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = T_TM3_3W_256 - 1; i < T_TM3_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i41 = i4 + 1; | |||
U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); | |||
@@ -356,8 +356,8 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^64 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
@@ -365,7 +365,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//W1 = W2 * W3 | |||
karat_mult_32( W1, W2, W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
int64_t *U1_64 = ((int64_t *) U1); | |||
int64_t *U2_64 = ((int64_t *) U2); | |||
@@ -381,7 +381,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V1_64 = ((int64_t *) V1); | |||
V2_64 = ((int64_t *) V2); | |||
for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = 1; i < T_TM3_3W_256; i++) { | |||
int i4 = i << 2; | |||
W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1])); | |||
W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2])); | |||
@@ -390,46 +390,46 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2])); | |||
} | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
karat_mult_32(tmp, W3, W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
karat_mult_32(W2, W0, W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
karat_mult_32(W4, U2, V2); | |||
karat_mult_32(W0, U0, V0); | |||
// Interpolation phase | |||
// 9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x -> x = X^64 | |||
U1_64 = ((int64_t *) W2); | |||
U2_64 = ((int64_t *) W0); | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1); i++) { | |||
int32_t i4 = i << 2; | |||
W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1])); | |||
W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1])); | |||
@@ -440,7 +440,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
__m256i *U1_256 = (__m256i *) (U1_64 + 1); | |||
tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); | |||
for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 1; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); | |||
} | |||
@@ -454,7 +454,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = (int64_t *) W1; | |||
__m256i *U2_256 = (__m256i *) (U2_64 + 1); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256) - 1; i++) { | |||
tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]); | |||
} | |||
@@ -462,19 +462,19 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W3[2 * (T_TM3_3W_256) - 1] = zero; | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256) | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i]; | |||
ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i]; | |||
@@ -490,12 +490,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]); | |||
U2_256 = (__m256i *) (U2_64 - 2); | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 << 1; i++) { | |||
_mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i])); | |||
_mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i])); | |||
} | |||
for (int32_t i = 0 ; i < 2 * VEC_N_SIZE_256 + 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * VEC_N_SIZE_256 + 1; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQC128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *c | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQC128_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC128_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQC128_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -26,7 +26,7 @@ void PQCLEAN_HQC128_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) | |||
size_t t = 0, b, bn, bi, c, cn, ci; | |||
uint64_t cx, ones; | |||
for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) { | |||
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) { | |||
bn = b >> 6; | |||
bi = b & 63; | |||
c = b + PARAM_N2 - 1; | |||
@@ -45,7 +45,7 @@ void PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -61,7 +61,7 @@ void PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -74,7 +74,7 @@ void PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
} | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
// we store the bloc number and bit position of each vb[i] | |||
uint64_t bloc = tmp[i] >> 6; | |||
bloc256[i] = _mm256_set1_epi64x(bloc >> 2); | |||
@@ -86,11 +86,11 @@ void PQCLEAN_HQC128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
bit256[i] = bloc256 & mask256; | |||
} | |||
for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) { | |||
for (uint32_t i = 0; i < LOOP_SIZE; i++) { | |||
__m256i aux = _mm256_loadu_si256(((__m256i *)v) + i); | |||
__m256i i256 = _mm256_set1_epi64x(i); | |||
for (uint32_t j = 0 ; j < weight ; j++) { | |||
for (uint32_t j = 0; j < weight; j++) { | |||
__m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); | |||
aux ^= bit256[j] & mask256; | |||
} | |||
@@ -147,7 +147,7 @@ void PQCLEAN_HQC128_AVX2_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQC128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -165,7 +165,7 @@ void PQCLEAN_HQC128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_ | |||
int PQCLEAN_HQC128_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
unsigned char diff = 0; | |||
for (uint32_t i = 0 ; i < size ; i++) { | |||
for (uint32_t i = 0; i < size; i++) { | |||
diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i]; | |||
} | |||
return diff != 0; | |||
@@ -192,7 +192,7 @@ void PQCLEAN_HQC128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_ | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -50,7 +50,7 @@ static uint16_t mod(uint16_t i, uint16_t modulus) { | |||
*/ | |||
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) { | |||
// Compute the odd cyclotomic classes | |||
for (uint16_t i = 1 ; i < upper_bound ; i += 2) { | |||
for (uint16_t i = 1; i < upper_bound; i += 2) { | |||
if (cosets[i] == 0) { // If i does not already belong to a class | |||
uint16_t tmp = i; | |||
size_t j = PARAM_M; | |||
@@ -88,13 +88,13 @@ size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, cons | |||
// Start with bch_poly(X) = 1 | |||
bch_poly[0] = 1; | |||
for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) { | |||
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) { | |||
if (cosets[i] == 0) { | |||
continue; | |||
} | |||
// Multiply bch_poly(X) by X-a^i | |||
for (size_t j = deg_bch_poly ; j ; --j) { | |||
for (size_t j = deg_bch_poly; j; --j) { | |||
int16_t mask = -((uint16_t) - bch_poly[j] >> 15); | |||
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1]; | |||
} | |||
@@ -119,13 +119,13 @@ size_t PQCLEAN_HQC128_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, cons | |||
* @param[in] message Array of PARAM_K bytes storing the packed message | |||
*/ | |||
static void unpack_message(uint8_t *message_unpacked, const uint64_t *message) { | |||
for (size_t i = 0 ; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)) ; ++i) { | |||
for (size_t j = 0 ; j < 64 ; ++j) { | |||
for (size_t i = 0; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)); ++i) { | |||
for (size_t j = 0; j < 64; ++j) { | |||
message_unpacked[j + 64 * i] = (message[i] >> j) & 0x0000000000000001; | |||
} | |||
} | |||
for (int8_t j = 0 ; j < PARAM_K % 64 ; ++j) { | |||
for (int8_t j = 0; j < PARAM_K % 64; ++j) { | |||
message_unpacked[j + 64 * (VEC_K_SIZE_64 - 1)] = (message[VEC_K_SIZE_64 - 1] >> j) & 0x0000000000000001; | |||
} | |||
} | |||
@@ -142,10 +142,10 @@ static void lfsr_encode(uint8_t *codeword, const uint8_t *message) { | |||
uint8_t bch_poly[PARAM_G] = PARAM_BCH_POLY; | |||
// Compute the Parity-check digits | |||
for (int16_t i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int16_t i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = message[i] ^ codeword[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = PARAM_N1 - PARAM_K - 1 ; j ; --j) { | |||
for (size_t j = PARAM_N1 - PARAM_K - 1; j; --j) { | |||
codeword[j] = codeword[j - 1] ^ (-gate_value & bch_poly[j]); | |||
} | |||
@@ -165,13 +165,13 @@ static void lfsr_encode(uint8_t *codeword, const uint8_t *message) { | |||
* @param[in] codeword_unpacked Array of PARAM_N1 bytes storing the unpacked codeword | |||
*/ | |||
static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked) { | |||
for (size_t i = 0 ; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)) ; ++i) { | |||
for (size_t j = 0 ; j < 64 ; ++j) { | |||
for (size_t i = 0; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)); ++i) { | |||
for (size_t j = 0; j < 64; ++j) { | |||
codeword[i] |= ((uint64_t) codeword_unpacked[j + 64 * i]) << j; | |||
} | |||
} | |||
for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) { | |||
for (size_t j = 0; j < PARAM_N1 % 64; ++j) { | |||
codeword[VEC_N1_SIZE_64 - 1] |= ((uint64_t) codeword_unpacked[j + 64 * (VEC_N1_SIZE_64 - 1)]) << j; | |||
} | |||
} | |||
@@ -224,13 +224,13 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) { | |||
for (size_t mu = 0; mu < PARAM_DELTA; ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQC128_CLEAN_gf_mul(d, PQCLEAN_HQC128_CLEAN_gf_inverse(d_p)); // 0 if(d == 0) | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQC128_CLEAN_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -254,7 +254,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Update pp, d_p and X_sigma_p if needed | |||
pp = (mask12 & (2 * mu)) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA - 1 ; i ; --i) { | |||
for (size_t i = PARAM_DELTA - 1; i; --i) { | |||
X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
X_sigma_p[1] = 0; | |||
@@ -263,7 +263,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Compute the next discrepancy | |||
d = syndromes[2 * mu + 2]; | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQC128_CLEAN_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]); | |||
} | |||
} | |||
@@ -288,7 +288,7 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword) { | |||
uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64)); | |||
size_t index = val / 64; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64 - 1; ++i) { | |||
uint64_t message1 = (codeword[index] & mask1) >> val % 64; | |||
uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64); | |||
message[i] = message1 | message2; | |||
@@ -33,7 +33,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -54,8 +54,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -149,7 +149,7 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin | |||
memcpy(f + 2 * n, R + n, 2 * n); | |||
memcpy(f + 3 * n, Q + n, 2 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
f[2 * n + i] ^= Q[i]; | |||
f[3 * n + i] ^= f[2 * n + i]; | |||
} | |||
@@ -185,14 +185,14 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
// Step 1 | |||
if (m_f == 1) { | |||
f[0] = 0; | |||
for (i = 0 ; i < (1U << m) ; ++i) { | |||
for (i = 0; i < (1U << m); ++i) { | |||
f[0] ^= w[i]; | |||
} | |||
f[1] = 0; | |||
betas_sums[0] = 0; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
betas_sums[(1 << j) + k] = betas_sums[k] ^ betas[j]; | |||
f[1] ^= PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[(1 << j) + k], w[(1 << j) + k]); | |||
} | |||
@@ -202,7 +202,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
} | |||
// Compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], PQCLEAN_HQC128_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -222,7 +222,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
f1[1] = 0; | |||
u[0] = w[0] ^ w[k]; | |||
f1[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
f1[0] ^= PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
@@ -231,7 +231,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
u[0] = w[0] ^ w[k]; | |||
v[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
v[i] = PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
@@ -247,7 +247,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
// Step 2: compute f from g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -294,13 +294,13 @@ void PQCLEAN_HQC128_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) | |||
k = 1 << (PARAM_M - 1); | |||
u[0] = w[0] ^ w[k]; | |||
v[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
v[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
// Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -395,7 +395,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -438,13 +438,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -455,7 +455,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -465,7 +465,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], PQCLEAN_HQC128_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -480,7 +480,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -491,7 +491,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -544,7 +544,7 @@ void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -563,7 +563,7 @@ void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -588,14 +588,14 @@ void PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 | |||
size_t i, j, k; | |||
// Unpack the received word vector into array r | |||
for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) { | |||
for (j = 0 ; j < 64 ; ++j) { | |||
for (i = 0; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0); ++i) { | |||
for (j = 0; j < 64; ++j) { | |||
r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); | |||
} | |||
} | |||
// Last byte | |||
for (j = 0 ; j < PARAM_N1 % 64 ; ++j) { | |||
for (j = 0; j < PARAM_N1 % 64; ++j) { | |||
r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); | |||
} | |||
@@ -609,7 +609,7 @@ void PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 | |||
k = 1 << (PARAM_M - 1); | |||
w[0] = 0; | |||
w[k] = -r[0] & 1; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = -r[PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i]; | |||
w[k + i] = -r[PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1); | |||
} | |||
@@ -639,7 +639,7 @@ void PQCLEAN_HQC128_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uin | |||
bit = 1 ^ ((uint16_t) - w[k] >> 15); | |||
error[index / 8] ^= bit << (index % 64); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i]); | |||
bit = 1 ^ ((uint16_t) - w[i] >> 15); | |||
error[index / 64] ^= bit << (index % 64); | |||
@@ -45,7 +45,7 @@ static void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -79,49 +79,49 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
uint64_t *pt; | |||
uint16_t *res_16; | |||
for (uint32_t i = 0 ; i < 16; i++) { | |||
for (uint32_t i = 0; i < 16; i++) { | |||
permuted_table[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i < 15 ; i++) { | |||
for (uint32_t i = 0; i < 15; i++) { | |||
swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); | |||
} | |||
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); | |||
for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = a2[j]; | |||
} | |||
pt[VEC_N_SIZE_64] = 0x0; | |||
for (uint32_t i = 1 ; i < 16 ; i++) { | |||
for (uint32_t i = 1; i < 16; i++) { | |||
carry = 0; | |||
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = (a2[j] << i) ^ carry; | |||
carry = (a2[j] >> ((64 - i))); | |||
} | |||
pt[VEC_N_SIZE_64] = carry; | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
permuted_sparse_vect[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i + 1 < weight ; i++) { | |||
for (uint32_t i = 0; i + 1 < weight; i++) { | |||
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i)); | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
dec = a1[permuted_sparse_vect[i]] & 0xf; | |||
s = a1[permuted_sparse_vect[i]] >> 4; | |||
res_16 = ((uint16_t *) o) + s; | |||
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) { | |||
*res_16++ ^= (uint16_t) pt[j]; | |||
*res_16++ ^= (uint16_t) (pt[j] >> 16); | |||
*res_16++ ^= (uint16_t) (pt[j] >> 32); | |||
@@ -146,7 +146,7 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
*/ | |||
void PQCLEAN_HQC128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { | |||
uint64_t tmp[2 * VEC_N_SIZE_64 + 1]; | |||
for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < 2 * VEC_N_SIZE_64 + 1; j++) { | |||
tmp[j] = 0; | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQC128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char * | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQC128_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC128_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -21,8 +21,8 @@ static inline int32_t popcount(uint64_t n); | |||
*/ | |||
void PQCLEAN_HQC128_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) { | |||
static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFUL, 0x3FFFFFFFUL}}; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_64 - 1 ; i++) { | |||
for (size_t j = 0 ; j < 64 ; j++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_64 - 1; i++) { | |||
for (size_t j = 0; j < 64; j++) { | |||
uint8_t bit = (m[i] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * ((i << 6) + j); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -33,7 +33,7 @@ void PQCLEAN_HQC128_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m | |||
} | |||
} | |||
for (size_t j = 0 ; j < (PARAM_N1 & 0x3f) ; j++) { | |||
for (size_t j = 0; j < (PARAM_N1 & 0x3f); j++) { | |||
uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -77,7 +77,7 @@ void PQCLEAN_HQC128_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em | |||
size_t t = 0, b, bn, bi, c, cn, ci; | |||
uint64_t cx, ones; | |||
for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) { | |||
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) { | |||
bn = b >> 6; | |||
bi = b & 63; | |||
c = b + PARAM_N2 - 1; | |||
@@ -36,7 +36,7 @@ void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_st | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -52,7 +52,7 @@ void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_st | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (v[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -95,7 +95,7 @@ void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -111,7 +111,7 @@ void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -124,7 +124,7 @@ void PQCLEAN_HQC128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
} | |||
} | |||
for (uint16_t i = 0 ; i < weight ; ++i) { | |||
for (uint16_t i = 0; i < weight; ++i) { | |||
int32_t index = tmp[i] / 64; | |||
int32_t pos = tmp[i] % 64; | |||
v[index] |= ((uint64_t) 1) << pos; | |||
@@ -178,7 +178,7 @@ void PQCLEAN_HQC128_CLEAN_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQC128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQC128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64 | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -49,7 +49,7 @@ static uint16_t mod(uint16_t i, uint16_t modulus) { | |||
*/ | |||
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) { | |||
// Compute the odd cyclotomic classes | |||
for (uint16_t i = 1 ; i < upper_bound ; i += 2) { | |||
for (uint16_t i = 1; i < upper_bound; i += 2) { | |||
if (cosets[i] == 0) { // If i does not already belong to a class | |||
uint16_t tmp = i; | |||
size_t j = PARAM_M; | |||
@@ -87,13 +87,13 @@ size_t PQCLEAN_HQC192_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const | |||
// Start with bch_poly(X) = 1 | |||
bch_poly[0] = 1; | |||
for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) { | |||
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) { | |||
if (cosets[i] == 0) { | |||
continue; | |||
} | |||
// Multiply bch_poly(X) by X-a^i | |||
for (size_t j = deg_bch_poly ; j ; --j) { | |||
for (size_t j = deg_bch_poly; j; --j) { | |||
int16_t mask = -((uint16_t) - bch_poly[j] >> 15); | |||
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1]; | |||
} | |||
@@ -129,10 +129,10 @@ void PQCLEAN_HQC192_AVX2_table_alphaij_generation(const uint16_t *exp) { | |||
// pre-computation of alpha^ij for i in [0, N1[ and j in [1, 2*PARAM_DELTA] | |||
// see comment of alpha_ij_table_init() function. | |||
for (uint16_t i = 0; i < PARAM_N1 ; ++i) { | |||
for (uint16_t i = 0; i < PARAM_N1; ++i) { | |||
tmp_value = 0; | |||
alpha_tmp = table_alpha_ij + i * (PARAM_DELTA << 1); | |||
for (uint16_t j = 0 ; j < (PARAM_DELTA << 1) ; j++) { | |||
for (uint16_t j = 0; j < (PARAM_DELTA << 1); j++) { | |||
tmp_value = PQCLEAN_HQC192_AVX2_gf_mod(tmp_value + i); | |||
alpha_tmp[j] = exp[tmp_value]; | |||
} | |||
@@ -168,13 +168,13 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) { | |||
for (size_t mu = 0; mu < PARAM_DELTA; ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQC192_AVX2_gf_mul(d, PQCLEAN_HQC192_AVX2_gf_inverse(d_p)); // 0 if(d == 0) | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQC192_AVX2_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -198,7 +198,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Update pp, d_p and X_sigma_p if needed | |||
pp = (mask12 & (2 * mu)) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA - 1 ; i ; --i) { | |||
for (size_t i = PARAM_DELTA - 1; i; --i) { | |||
X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
X_sigma_p[1] = 0; | |||
@@ -207,7 +207,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Compute the next discrepancy | |||
d = syndromes[2 * mu + 2]; | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQC192_AVX2_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]); | |||
} | |||
} | |||
@@ -232,7 +232,7 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword) { | |||
uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64)); | |||
size_t index = val / 64; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64 - 1; ++i) { | |||
uint64_t message1 = (codeword[index] & mask1) >> val % 64; | |||
uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64); | |||
message[i] = message1 | message2; | |||
@@ -282,7 +282,7 @@ void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) { | |||
// vectorized version of the separation of the coordinates of the vector v in order to put each coordinate in an unsigned char | |||
// aux is used to consider 4 elements in v at each step of the loop | |||
aux = (uint32_t *) rcv; | |||
for (i = 0 ; i < ((VEC_N1_SIZE_BYTES >> 2) << 2) ; i += 4) { | |||
for (i = 0; i < ((VEC_N1_SIZE_BYTES >> 2) << 2); i += 4) { | |||
// duplicate aux 8 times in y , i.e y= (aux aux aux .... aux) | |||
y = _mm256_set1_epi32(*aux); | |||
// shuffle the bytes of y so that if aux=(a0 a1 a2 a3) | |||
@@ -294,11 +294,11 @@ void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) { | |||
} | |||
// Evaluation of the polynomial corresponding to the vector v in alpha^i for i in {1, ..., 2 * PARAM_DELTA} | |||
for (size_t j = 0 ; j < SYND_SIZE_256 ; ++j) { | |||
for (size_t j = 0; j < SYND_SIZE_256; ++j) { | |||
S = zero_256; | |||
alpha_tmp = table_alpha_ij + (j << 4); | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
tmp_repeat = _mm256_set1_epi64x((long long)(tmp_array[i] != 0)); | |||
L = _mm256_cmpeq_epi64(tmp_repeat, un_256); | |||
tmp_repeat = _mm256_lddqu_si256((__m256i *)(alpha_tmp + i * (PARAM_DELTA << 1))); | |||
@@ -43,7 +43,7 @@ void PQCLEAN_HQC192_AVX2_code_encode(uint64_t *em, const uint64_t *m) { | |||
__m256i msg = _mm256_lddqu_si256((const __m256i *) m); | |||
colonne = ((__m256i *) gen_matrix); | |||
for (i = 0 ; i < PARAM_N1 - PARAM_K ; i++) { | |||
for (i = 0; i < PARAM_N1 - PARAM_K; i++) { | |||
// y is the and operation between m and ith column of G | |||
y = _mm256_and_si256(colonne[i], msg); | |||
// aux0 = (y2 y3 y0 y1) | |||
@@ -69,8 +69,8 @@ void PQCLEAN_HQC192_AVX2_code_encode(uint64_t *em, const uint64_t *m) { | |||
/* now we add the message m */ | |||
/* systematic encoding */ | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t j = 0 ; j < 64 ; j++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
for (int32_t j = 0; j < 64; j++) { | |||
uint8_t bit = (m[i] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * ((PARAM_N1 - PARAM_K) + ((i << 6) + j)); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -30,7 +30,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -51,8 +51,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -139,7 +139,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -182,13 +182,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQC192_AVX2_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -199,7 +199,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC192_AVX2_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC192_AVX2_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -209,7 +209,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC192_AVX2_gf_mul(betas[i], PQCLEAN_HQC192_AVX2_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC192_AVX2_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -224,7 +224,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -235,7 +235,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -288,7 +288,7 @@ void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC192_AVX2_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -307,7 +307,7 @@ void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -337,7 +337,7 @@ void PQCLEAN_HQC192_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint | |||
bit = 1 ^ ((uint16_t) - w[k] >> 15); | |||
error[index / 8] ^= bit << (index % 64); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_AVX2_gf_log(gammas_sums[i]); | |||
bit = 1 ^ ((uint16_t) - w[i] >> 15); | |||
error[index / 64] ^= bit << (index % 64); | |||
@@ -45,7 +45,7 @@ static inline void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -193,7 +193,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4( D0, A, B); | |||
karat_mult_4(D2, A + 4, B + 4); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int is = i + 4; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -201,7 +201,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4(D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int32_t is = i + 4; | |||
int32_t is2 = is + 4; | |||
int32_t is3 = is2 + 4; | |||
@@ -232,7 +232,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D0, A, B); | |||
karat_mult_8(D2, A + 8, B + 8); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -240,7 +240,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
int32_t is2 = is + 8; | |||
int32_t is3 = is2 + 8; | |||
@@ -271,7 +271,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D0, A, B); | |||
karat_mult_16(D2, A + 16, B + 16); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int is = i + 16; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -279,7 +279,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int32_t is = i + 16; | |||
int32_t is2 = is + 16; | |||
int32_t is3 = is2 + 16; | |||
@@ -309,7 +309,7 @@ static inline void karat_mult_64(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_32( D0, A, B); | |||
karat_mult_32(D2, A + 32, B + 32); | |||
for (int32_t i = 0 ; i < 32 ; i++) { | |||
for (int32_t i = 0; i < 32; i++) { | |||
int32_t is = i + 32; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -317,7 +317,7 @@ static inline void karat_mult_64(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_32( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 32 ; i++) { | |||
for (int32_t i = 0; i < 32; i++) { | |||
int32_t is = i + 32; | |||
int32_t is2 = is + 32; | |||
int32_t is3 = is2 + 32; | |||
@@ -347,7 +347,7 @@ static inline void divByXplus1(__m256i *out, __m256i *in, int size) { | |||
B[0] = A[0]; | |||
for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size << 2); i++) { | |||
B[i] = B[i - 1] ^ A[i]; | |||
} | |||
} | |||
@@ -371,7 +371,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 - 1; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i42 = i4 - 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
@@ -382,7 +382,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4])); | |||
} | |||
for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = T_TM3_3W_256 - 1; i < T_TM3_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i41 = i4 + 1; | |||
U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); | |||
@@ -396,8 +396,8 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^64 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
@@ -405,7 +405,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//W1 = W2 * W3 | |||
karat_mult_64( W1, W2, W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
int64_t *U1_64 = ((int64_t *) U1); | |||
int64_t *U2_64 = ((int64_t *) U2); | |||
@@ -421,7 +421,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V1_64 = ((int64_t *) V1); | |||
V2_64 = ((int64_t *) V2); | |||
for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = 1; i < T_TM3_3W_256; i++) { | |||
int i4 = i << 2; | |||
W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1])); | |||
W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2])); | |||
@@ -430,14 +430,14 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2])); | |||
} | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
@@ -445,31 +445,31 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
karat_mult_64(tmp, W3, W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
karat_mult_64( W2, W0, W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
karat_mult_64(W4, U2, V2); | |||
karat_mult_64(W0, U0, V0); | |||
// Interpolation phase | |||
// 9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x -> x = X^64 | |||
U1_64 = ((int64_t *) W2); | |||
U2_64 = ((int64_t *) W0); | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1); i++) { | |||
int32_t i4 = i << 2; | |||
W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1])); | |||
W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1])); | |||
@@ -480,7 +480,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
__m256i *U1_256 = (__m256i *) (U1_64 + 1); | |||
tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); | |||
for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 1; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); | |||
} | |||
@@ -494,7 +494,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = (int64_t *) W1; | |||
__m256i *U2_256 = (__m256i *) (U2_64 + 1); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256) - 1; i++) { | |||
tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]); | |||
} | |||
@@ -502,19 +502,19 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W3[2 * (T_TM3_3W_256) - 1] = zero; | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256) | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i]; | |||
ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i]; | |||
@@ -530,12 +530,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]); | |||
U2_256 = (__m256i *) (U2_64 - 2); | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 << 1; i++) { | |||
_mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i])); | |||
_mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i])); | |||
} | |||
for (int32_t i = 0 ; i < 2 * VEC_N_SIZE_256 + 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * VEC_N_SIZE_256 + 1; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQC192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *c | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQC192_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC192_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQC192_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -26,7 +26,7 @@ void PQCLEAN_HQC192_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) | |||
size_t t = 0, b, bn, bi, c, cn, ci; | |||
uint64_t cx, ones; | |||
for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) { | |||
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) { | |||
bn = b >> 6; | |||
bi = b & 63; | |||
c = b + PARAM_N2 - 1; | |||
@@ -44,7 +44,7 @@ void PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -60,7 +60,7 @@ void PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -73,7 +73,7 @@ void PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
} | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
// we store the bloc number and bit position of each vb[i] | |||
uint64_t bloc = tmp[i] >> 6; | |||
bloc256[i] = _mm256_set1_epi64x(bloc >> 2); | |||
@@ -85,11 +85,11 @@ void PQCLEAN_HQC192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
bit256[i] = bloc256 & mask256; | |||
} | |||
for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) { | |||
for (uint32_t i = 0; i < LOOP_SIZE; i++) { | |||
__m256i aux = _mm256_loadu_si256(((__m256i *)v) + i); | |||
__m256i i256 = _mm256_set1_epi64x(i); | |||
for (uint32_t j = 0 ; j < weight ; j++) { | |||
for (uint32_t j = 0; j < weight; j++) { | |||
__m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); | |||
aux ^= bit256[j] & mask256; | |||
} | |||
@@ -146,7 +146,7 @@ void PQCLEAN_HQC192_AVX2_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQC192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -164,7 +164,7 @@ void PQCLEAN_HQC192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_ | |||
int PQCLEAN_HQC192_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
unsigned char diff = 0; | |||
for (uint32_t i = 0 ; i < size ; i++) { | |||
for (uint32_t i = 0; i < size; i++) { | |||
diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i]; | |||
} | |||
return diff != 0; | |||
@@ -191,7 +191,7 @@ void PQCLEAN_HQC192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_ | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -50,7 +50,7 @@ static uint16_t mod(uint16_t i, uint16_t modulus) { | |||
*/ | |||
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) { | |||
// Compute the odd cyclotomic classes | |||
for (uint16_t i = 1 ; i < upper_bound ; i += 2) { | |||
for (uint16_t i = 1; i < upper_bound; i += 2) { | |||
if (cosets[i] == 0) { // If i does not already belong to a class | |||
uint16_t tmp = i; | |||
size_t j = PARAM_M; | |||
@@ -88,13 +88,13 @@ size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, cons | |||
// Start with bch_poly(X) = 1 | |||
bch_poly[0] = 1; | |||
for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) { | |||
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) { | |||
if (cosets[i] == 0) { | |||
continue; | |||
} | |||
// Multiply bch_poly(X) by X-a^i | |||
for (size_t j = deg_bch_poly ; j ; --j) { | |||
for (size_t j = deg_bch_poly; j; --j) { | |||
int16_t mask = -((uint16_t) - bch_poly[j] >> 15); | |||
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1]; | |||
} | |||
@@ -119,13 +119,13 @@ size_t PQCLEAN_HQC192_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, cons | |||
* @param[in] message Array of PARAM_K bytes storing the packed message | |||
*/ | |||
static void unpack_message(uint8_t *message_unpacked, const uint64_t *message) { | |||
for (size_t i = 0 ; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)) ; ++i) { | |||
for (size_t j = 0 ; j < 64 ; ++j) { | |||
for (size_t i = 0; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)); ++i) { | |||
for (size_t j = 0; j < 64; ++j) { | |||
message_unpacked[j + 64 * i] = (message[i] >> j) & 0x0000000000000001; | |||
} | |||
} | |||
for (int8_t j = 0 ; j < PARAM_K % 64 ; ++j) { | |||
for (int8_t j = 0; j < PARAM_K % 64; ++j) { | |||
message_unpacked[j + 64 * (VEC_K_SIZE_64 - 1)] = (message[VEC_K_SIZE_64 - 1] >> j) & 0x0000000000000001; | |||
} | |||
} | |||
@@ -142,10 +142,10 @@ static void lfsr_encode(uint8_t *codeword, const uint8_t *message) { | |||
uint8_t bch_poly[PARAM_G] = PARAM_BCH_POLY; | |||
// Compute the Parity-check digits | |||
for (int16_t i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int16_t i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = message[i] ^ codeword[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = PARAM_N1 - PARAM_K - 1 ; j ; --j) { | |||
for (size_t j = PARAM_N1 - PARAM_K - 1; j; --j) { | |||
codeword[j] = codeword[j - 1] ^ (-gate_value & bch_poly[j]); | |||
} | |||
@@ -165,13 +165,13 @@ static void lfsr_encode(uint8_t *codeword, const uint8_t *message) { | |||
* @param[in] codeword_unpacked Array of PARAM_N1 bytes storing the unpacked codeword | |||
*/ | |||
static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked) { | |||
for (size_t i = 0 ; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)) ; ++i) { | |||
for (size_t j = 0 ; j < 64 ; ++j) { | |||
for (size_t i = 0; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)); ++i) { | |||
for (size_t j = 0; j < 64; ++j) { | |||
codeword[i] |= ((uint64_t) codeword_unpacked[j + 64 * i]) << j; | |||
} | |||
} | |||
for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) { | |||
for (size_t j = 0; j < PARAM_N1 % 64; ++j) { | |||
codeword[VEC_N1_SIZE_64 - 1] |= ((uint64_t) codeword_unpacked[j + 64 * (VEC_N1_SIZE_64 - 1)]) << j; | |||
} | |||
} | |||
@@ -224,13 +224,13 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) { | |||
for (size_t mu = 0; mu < PARAM_DELTA; ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQC192_CLEAN_gf_mul(d, PQCLEAN_HQC192_CLEAN_gf_inverse(d_p)); // 0 if(d == 0) | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQC192_CLEAN_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -254,7 +254,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Update pp, d_p and X_sigma_p if needed | |||
pp = (mask12 & (2 * mu)) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA - 1 ; i ; --i) { | |||
for (size_t i = PARAM_DELTA - 1; i; --i) { | |||
X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
X_sigma_p[1] = 0; | |||
@@ -263,7 +263,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Compute the next discrepancy | |||
d = syndromes[2 * mu + 2]; | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQC192_CLEAN_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]); | |||
} | |||
} | |||
@@ -288,7 +288,7 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword) { | |||
uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64)); | |||
size_t index = val / 64; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64 - 1; ++i) { | |||
uint64_t message1 = (codeword[index] & mask1) >> val % 64; | |||
uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64); | |||
message[i] = message1 | message2; | |||
@@ -33,7 +33,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -54,8 +54,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -149,7 +149,7 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin | |||
memcpy(f + 2 * n, R + n, 2 * n); | |||
memcpy(f + 3 * n, Q + n, 2 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
f[2 * n + i] ^= Q[i]; | |||
f[3 * n + i] ^= f[2 * n + i]; | |||
} | |||
@@ -185,14 +185,14 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
// Step 1 | |||
if (m_f == 1) { | |||
f[0] = 0; | |||
for (i = 0 ; i < (1U << m) ; ++i) { | |||
for (i = 0; i < (1U << m); ++i) { | |||
f[0] ^= w[i]; | |||
} | |||
f[1] = 0; | |||
betas_sums[0] = 0; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
betas_sums[(1 << j) + k] = betas_sums[k] ^ betas[j]; | |||
f[1] ^= PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[(1 << j) + k], w[(1 << j) + k]); | |||
} | |||
@@ -202,7 +202,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
} | |||
// Compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], PQCLEAN_HQC192_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -222,7 +222,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
f1[1] = 0; | |||
u[0] = w[0] ^ w[k]; | |||
f1[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
f1[0] ^= PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
@@ -231,7 +231,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
u[0] = w[0] ^ w[k]; | |||
v[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
v[i] = PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
@@ -247,7 +247,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
// Step 2: compute f from g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -294,13 +294,13 @@ void PQCLEAN_HQC192_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) | |||
k = 1 << (PARAM_M - 1); | |||
u[0] = w[0] ^ w[k]; | |||
v[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
v[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
// Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -395,7 +395,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -438,13 +438,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -455,7 +455,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -465,7 +465,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], PQCLEAN_HQC192_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -480,7 +480,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -491,7 +491,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -544,7 +544,7 @@ void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -563,7 +563,7 @@ void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -588,14 +588,14 @@ void PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 | |||
size_t i, j, k; | |||
// Unpack the received word vector into array r | |||
for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) { | |||
for (j = 0 ; j < 64 ; ++j) { | |||
for (i = 0; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0); ++i) { | |||
for (j = 0; j < 64; ++j) { | |||
r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); | |||
} | |||
} | |||
// Last byte | |||
for (j = 0 ; j < PARAM_N1 % 64 ; ++j) { | |||
for (j = 0; j < PARAM_N1 % 64; ++j) { | |||
r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); | |||
} | |||
@@ -609,7 +609,7 @@ void PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 | |||
k = 1 << (PARAM_M - 1); | |||
w[0] = 0; | |||
w[k] = -r[0] & 1; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = -r[PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i]; | |||
w[k + i] = -r[PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1); | |||
} | |||
@@ -639,7 +639,7 @@ void PQCLEAN_HQC192_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uin | |||
bit = 1 ^ ((uint16_t) - w[k] >> 15); | |||
error[index / 8] ^= bit << (index % 64); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i]); | |||
bit = 1 ^ ((uint16_t) - w[i] >> 15); | |||
error[index / 64] ^= bit << (index % 64); | |||
@@ -45,7 +45,7 @@ static void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -79,49 +79,49 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
uint64_t *pt; | |||
uint16_t *res_16; | |||
for (uint32_t i = 0 ; i < 16; i++) { | |||
for (uint32_t i = 0; i < 16; i++) { | |||
permuted_table[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i < 15 ; i++) { | |||
for (uint32_t i = 0; i < 15; i++) { | |||
swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); | |||
} | |||
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); | |||
for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = a2[j]; | |||
} | |||
pt[VEC_N_SIZE_64] = 0x0; | |||
for (uint32_t i = 1 ; i < 16 ; i++) { | |||
for (uint32_t i = 1; i < 16; i++) { | |||
carry = 0; | |||
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = (a2[j] << i) ^ carry; | |||
carry = (a2[j] >> ((64 - i))); | |||
} | |||
pt[VEC_N_SIZE_64] = carry; | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
permuted_sparse_vect[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i + 1 < weight ; i++) { | |||
for (uint32_t i = 0; i + 1 < weight; i++) { | |||
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i)); | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
dec = a1[permuted_sparse_vect[i]] & 0xf; | |||
s = a1[permuted_sparse_vect[i]] >> 4; | |||
res_16 = ((uint16_t *) o) + s; | |||
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) { | |||
*res_16++ ^= (uint16_t) pt[j]; | |||
*res_16++ ^= (uint16_t) (pt[j] >> 16); | |||
*res_16++ ^= (uint16_t) (pt[j] >> 32); | |||
@@ -146,7 +146,7 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
*/ | |||
void PQCLEAN_HQC192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { | |||
uint64_t tmp[2 * VEC_N_SIZE_64 + 1]; | |||
for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < 2 * VEC_N_SIZE_64 + 1; j++) { | |||
tmp[j] = 0; | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQC192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char * | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQC192_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC192_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -21,8 +21,8 @@ static inline int32_t popcount(uint64_t n); | |||
*/ | |||
void PQCLEAN_HQC192_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) { | |||
static const uint64_t mask[2][2] = {{0x0UL, 0x0UL}, {0x7FFFFFFFFFFFFFFUL, 0x3FFFFFFFFFFFFFFUL}}; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_64 - 1 ; i++) { | |||
for (size_t j = 0 ; j < 64 ; j++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_64 - 1; i++) { | |||
for (size_t j = 0; j < 64; j++) { | |||
uint8_t bit = (m[i] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * ((i << 6) + j); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -33,7 +33,7 @@ void PQCLEAN_HQC192_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m | |||
} | |||
} | |||
for (size_t j = 0 ; j < (PARAM_N1 & 0x3f) ; j++) { | |||
for (size_t j = 0; j < (PARAM_N1 & 0x3f); j++) { | |||
uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -76,7 +76,7 @@ static inline int32_t popcount(uint64_t n) { | |||
void PQCLEAN_HQC192_CLEAN_repetition_code_decode(uint64_t *m, const uint64_t *em) { | |||
size_t t = 0, b, bn, bi, c, cn, ci; | |||
uint64_t cx, ones; | |||
for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) { | |||
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) { | |||
bn = b >> 6; | |||
bi = b & 63; | |||
c = b + PARAM_N2 - 1; | |||
@@ -36,7 +36,7 @@ void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_st | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -52,7 +52,7 @@ void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_st | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (v[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -95,7 +95,7 @@ void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -111,7 +111,7 @@ void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -124,7 +124,7 @@ void PQCLEAN_HQC192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
} | |||
} | |||
for (uint16_t i = 0 ; i < weight ; ++i) { | |||
for (uint16_t i = 0; i < weight; ++i) { | |||
int32_t index = tmp[i] / 64; | |||
int32_t pos = tmp[i] % 64; | |||
v[index] |= ((uint64_t) 1) << pos; | |||
@@ -178,7 +178,7 @@ void PQCLEAN_HQC192_CLEAN_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQC192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQC192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64 | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -49,7 +49,7 @@ static uint16_t mod(uint16_t i, uint16_t modulus) { | |||
*/ | |||
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) { | |||
// Compute the odd cyclotomic classes | |||
for (uint16_t i = 1 ; i < upper_bound ; i += 2) { | |||
for (uint16_t i = 1; i < upper_bound; i += 2) { | |||
if (cosets[i] == 0) { // If i does not already belong to a class | |||
uint16_t tmp = i; | |||
size_t j = PARAM_M; | |||
@@ -87,13 +87,13 @@ size_t PQCLEAN_HQC256_AVX2_compute_bch_poly(uint16_t *bch_poly, size_t *t, const | |||
// Start with bch_poly(X) = 1 | |||
bch_poly[0] = 1; | |||
for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) { | |||
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) { | |||
if (cosets[i] == 0) { | |||
continue; | |||
} | |||
// Multiply bch_poly(X) by X-a^i | |||
for (size_t j = deg_bch_poly ; j ; --j) { | |||
for (size_t j = deg_bch_poly; j; --j) { | |||
int16_t mask = -((uint16_t) - bch_poly[j] >> 15); | |||
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1]; | |||
} | |||
@@ -129,10 +129,10 @@ void PQCLEAN_HQC256_AVX2_table_alphaij_generation(const uint16_t *exp) { | |||
// pre-computation of alpha^ij for i in [0, N1[ and j in [1, 2*PARAM_DELTA] | |||
// see comment of alpha_ij_table_init() function. | |||
for (uint16_t i = 0; i < PARAM_N1 ; ++i) { | |||
for (uint16_t i = 0; i < PARAM_N1; ++i) { | |||
tmp_value = 0; | |||
alpha_tmp = table_alpha_ij + i * (PARAM_DELTA << 1); | |||
for (uint16_t j = 0 ; j < (PARAM_DELTA << 1) ; j++) { | |||
for (uint16_t j = 0; j < (PARAM_DELTA << 1); j++) { | |||
tmp_value = PQCLEAN_HQC256_AVX2_gf_mod(tmp_value + i); | |||
alpha_tmp[j] = exp[tmp_value]; | |||
} | |||
@@ -168,13 +168,13 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) { | |||
for (size_t mu = 0; mu < PARAM_DELTA; ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQC256_AVX2_gf_mul(d, PQCLEAN_HQC256_AVX2_gf_inverse(d_p)); // 0 if(d == 0) | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQC256_AVX2_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -198,7 +198,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Update pp, d_p and X_sigma_p if needed | |||
pp = (mask12 & (2 * mu)) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA - 1 ; i ; --i) { | |||
for (size_t i = PARAM_DELTA - 1; i; --i) { | |||
X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
X_sigma_p[1] = 0; | |||
@@ -207,7 +207,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Compute the next discrepancy | |||
d = syndromes[2 * mu + 2]; | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQC256_AVX2_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]); | |||
} | |||
} | |||
@@ -232,7 +232,7 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword) { | |||
uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64)); | |||
size_t index = val / 64; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64 - 1; ++i) { | |||
uint64_t message1 = (codeword[index] & mask1) >> val % 64; | |||
uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64); | |||
message[i] = message1 | message2; | |||
@@ -282,7 +282,7 @@ void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) { | |||
// vectorized version of the separation of the coordinates of the vector v in order to put each coordinate in an unsigned char | |||
// aux is used to consider 4 elements in v at each step of the loop | |||
aux = (uint32_t *) rcv; | |||
for (i = 0 ; i < ((VEC_N1_SIZE_BYTES >> 2) << 2) ; i += 4) { | |||
for (i = 0; i < ((VEC_N1_SIZE_BYTES >> 2) << 2); i += 4) { | |||
// duplicate aux 8 times in y , i.e y= (aux aux aux .... aux) | |||
y = _mm256_set1_epi32(*aux); | |||
// shuffle the bytes of y so that if aux=(a0 a1 a2 a3) | |||
@@ -294,11 +294,11 @@ void compute_syndromes(__m256i *syndromes, const uint64_t *rcv) { | |||
} | |||
// Evaluation of the polynomial corresponding to the vector v in alpha^i for i in {1, ..., 2 * PARAM_DELTA} | |||
for (size_t j = 0 ; j < SYND_SIZE_256 ; ++j) { | |||
for (size_t j = 0; j < SYND_SIZE_256; ++j) { | |||
S = zero_256; | |||
alpha_tmp = table_alpha_ij + (j << 4); | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
tmp_repeat = _mm256_set1_epi64x((long long)(tmp_array[i] != 0)); | |||
L = _mm256_cmpeq_epi64(tmp_repeat, un_256); | |||
tmp_repeat = _mm256_lddqu_si256((__m256i *)(alpha_tmp + i * (PARAM_DELTA << 1))); | |||
@@ -43,7 +43,7 @@ void PQCLEAN_HQC256_AVX2_code_encode(uint64_t *em, const uint64_t *m) { | |||
__m256i msg = _mm256_lddqu_si256((const __m256i *) m); | |||
colonne = ((__m256i *) gen_matrix); | |||
for (i = 0 ; i < PARAM_N1 - PARAM_K ; i++) { | |||
for (i = 0; i < PARAM_N1 - PARAM_K; i++) { | |||
// y is the and operation between m and ith column of G | |||
y = _mm256_and_si256(colonne[i], msg); | |||
// aux0 = (y2 y3 y0 y1) | |||
@@ -74,8 +74,8 @@ void PQCLEAN_HQC256_AVX2_code_encode(uint64_t *em, const uint64_t *m) { | |||
/* now we add the message m */ | |||
/* systematic encoding */ | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t j = 0 ; j < 64 ; j++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
for (int32_t j = 0; j < 64; j++) { | |||
uint8_t bit = (m[i] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * ((PARAM_N1 - PARAM_K) + ((i << 6) + j)); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -30,7 +30,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -51,8 +51,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -139,7 +139,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -182,13 +182,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQC256_AVX2_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -199,7 +199,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC256_AVX2_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC256_AVX2_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -209,7 +209,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC256_AVX2_gf_mul(betas[i], PQCLEAN_HQC256_AVX2_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC256_AVX2_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -224,7 +224,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC256_AVX2_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -235,7 +235,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC256_AVX2_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -288,7 +288,7 @@ void PQCLEAN_HQC256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC256_AVX2_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -307,7 +307,7 @@ void PQCLEAN_HQC256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC256_AVX2_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -337,7 +337,7 @@ void PQCLEAN_HQC256_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint | |||
bit = 1 ^ ((uint16_t) - w[k] >> 15); | |||
error[index / 8] ^= bit << (index % 64); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC256_AVX2_gf_log(gammas_sums[i]); | |||
bit = 1 ^ ((uint16_t) - w[i] >> 15); | |||
error[index / 64] ^= bit << (index % 64); | |||
@@ -50,7 +50,7 @@ static inline void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -198,7 +198,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4( D0, A, B); | |||
karat_mult_4(D2, A + 4, B + 4); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int is = i + 4; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -206,7 +206,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4(D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int32_t is = i + 4; | |||
int32_t is2 = is + 4; | |||
int32_t is3 = is2 + 4; | |||
@@ -237,7 +237,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D0, A, B); | |||
karat_mult_8(D2, A + 8, B + 8); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -245,7 +245,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
int32_t is2 = is + 8; | |||
int32_t is3 = is2 + 8; | |||
@@ -276,7 +276,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D0, A, B); | |||
karat_mult_16(D2, A + 16, B + 16); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int is = i + 16; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -284,7 +284,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int32_t is = i + 16; | |||
int32_t is2 = is + 16; | |||
int32_t is3 = is2 + 16; | |||
@@ -314,7 +314,7 @@ static inline void divByXplus1(__m256i *out, __m256i *in, int size) { | |||
B[0] = A[0]; | |||
for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size << 2); i++) { | |||
B[i] = B[i - 1] ^ A[i]; | |||
} | |||
} | |||
@@ -338,7 +338,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 - 1; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i42 = i4 - 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
@@ -349,7 +349,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4])); | |||
} | |||
for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = T_TM3_3W_256 - 1; i < T_TM3_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i41 = i4 + 1; | |||
U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); | |||
@@ -363,8 +363,8 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^64 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
@@ -372,7 +372,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//W1 = W2 * W3 | |||
karat_mult_32( W1, W2, W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
int64_t *U1_64 = ((int64_t *) U1); | |||
int64_t *U2_64 = ((int64_t *) U2); | |||
@@ -388,7 +388,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V1_64 = ((int64_t *) V1); | |||
V2_64 = ((int64_t *) V2); | |||
for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = 1; i < T_TM3_3W_256; i++) { | |||
int i4 = i << 2; | |||
W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1])); | |||
W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2])); | |||
@@ -397,46 +397,46 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2])); | |||
} | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
karat_mult_32(tmp, W3, W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
karat_mult_32(W2, W0, W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
karat_mult_32(W4, U2, V2); | |||
karat_mult_32(W0, U0, V0); | |||
// Interpolation phase | |||
// 9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x -> x = X^64 | |||
U1_64 = ((int64_t *) W2); | |||
U2_64 = ((int64_t *) W0); | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1); i++) { | |||
int32_t i4 = i << 2; | |||
W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1])); | |||
W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1])); | |||
@@ -447,7 +447,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
__m256i *U1_256 = (__m256i *) (U1_64 + 1); | |||
tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); | |||
for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 1; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); | |||
} | |||
@@ -461,7 +461,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = (int64_t *) W1; | |||
__m256i *U2_256 = (__m256i *) (U2_64 + 1); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256) - 1; i++) { | |||
tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]); | |||
} | |||
@@ -469,19 +469,19 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W3[2 * (T_TM3_3W_256) - 1] = zero; | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256) | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i]; | |||
ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i]; | |||
@@ -497,12 +497,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]); | |||
U2_256 = (__m256i *) (U2_64 - 2); | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 << 1; i++) { | |||
_mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i])); | |||
_mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i])); | |||
} | |||
for (int32_t i = 0 ; i < 6 * T_TM3_3W_256 - 2 ; i++) { | |||
for (int32_t i = 0; i < 6 * T_TM3_3W_256 - 2; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -519,7 +519,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
*/ | |||
static inline void divByXplus1_256(__m256i *out, __m256i *in, int32_t size) { | |||
out[0] = in[0]; | |||
for (int32_t i = 1 ; i < 2 * (size + 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size + 2); i++) { | |||
out[i] = out[i - 1] ^ in[i]; | |||
} | |||
} | |||
@@ -542,7 +542,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3R_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) { | |||
for (int32_t i = 0; i < T_TM3R_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
V0[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4])); | |||
@@ -552,7 +552,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2])); | |||
} | |||
for (int32_t i = T_TM3R_3W_256 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
for (int32_t i = T_TM3R_3W_256; i < T_TM3R_3W_256 + 2; i++) { | |||
U0[i] = zero; | |||
V0[i] = zero; | |||
U1[i] = zero; | |||
@@ -564,27 +564,27 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^256 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) { | |||
for (int32_t i = 0; i < T_TM3R_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
for (int32_t i = T_TM3R_3W_256 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
for (int32_t i = T_TM3R_3W_256; i < T_TM3R_3W_256 + 2; i++) { | |||
W2[i] = zero; | |||
W3[i] = zero; | |||
} | |||
//W1 = W2 * W3 | |||
TOOM3Mult(W1, (uint64_t *) W2, (uint64_t *) W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 + 2 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 + 2 !) | |||
W0[0] = zero; | |||
W4[0] = zero; | |||
W0[1] = U1[0]; | |||
W4[1] = V1[0]; | |||
for (int32_t i = 1 ; i < T_TM3R_3W_256 + 1 ; i++) { | |||
for (int32_t i = 1; i < T_TM3R_3W_256 + 1; i++) { | |||
W0[i + 1] = U1[i] ^ U2[i - 1]; | |||
W4[i + 1] = V1[i] ^ V2[i - 1]; | |||
} | |||
@@ -592,28 +592,28 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W0[T_TM3R_3W_256 + 1] = U2[T_TM3R_3W_256 - 1]; | |||
W4[T_TM3R_3W_256 + 1] = V2[T_TM3R_3W_256 - 1]; | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3R_3W_256 + 2; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3R_3W_256 + 2; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
TOOM3Mult(tmp, (uint64_t *) W3, (uint64_t *) W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
TOOM3Mult(W2, (uint64_t *) W0, (uint64_t *) W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
TOOM3Mult(W4, (uint64_t *) U2, (uint64_t *) V2); | |||
TOOM3Mult(W0, (uint64_t *) U0, (uint64_t *) V0); | |||
@@ -621,17 +621,17 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2) - 1; i++) { | |||
int32_t i1 = i + 1; | |||
W2[i] = W2[i1] ^ W0[i1]; | |||
} | |||
@@ -639,7 +639,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W2[2 * (T_TM3R_3W_256 + 2) - 1] = zero; | |||
//W2 =(W2 + W3 + W4*(x^3+1))/(x+1) | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i]; | |||
} | |||
@@ -647,14 +647,14 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
tmp[2 * (T_TM3R_3W_256 + 2) + 1] = zero; | |||
tmp[2 * (T_TM3R_3W_256 + 2) + 2] = zero; | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256); i++) { | |||
tmp[i + 3] ^= W4[i]; | |||
} | |||
divByXplus1_256(W2, tmp, T_TM3R_3W_256); | |||
//W3 =(W3 + W1)/(x*(x+1)) | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2) - 1; i++) { | |||
int32_t i1 = i + 1; | |||
tmp[i] = W3[i1] ^ W1[i1]; | |||
} | |||
@@ -663,18 +663,18 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
divByXplus1_256(W3, tmp, T_TM3R_3W_256); | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256+2) | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) { | |||
for (int32_t i = 0; i < T_TM3R_3W_256; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + T_TM3R_3W_256] = W0[i + T_TM3R_3W_256] ^ W1[i]; | |||
ro256[i + 2 * T_TM3R_3W_256] = W1[i + T_TM3R_3W_256] ^ W2[i]; | |||
@@ -696,7 +696,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
ro256[3 + 5 * T_TM3R_3W_256] ^= W3[3 + 2 * T_TM3R_3W_256]; | |||
for (int32_t i = 0 ; i < 2 * VEC_N_SIZE_256 + 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * VEC_N_SIZE_256 + 1; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQC256_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *c | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQC256_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC256_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQC256_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -26,7 +26,7 @@ void PQCLEAN_HQC256_AVX2_repetition_code_decode(uint64_t *m, const uint64_t *em) | |||
uint64_t cx, ones; | |||
uint64_t cy; | |||
for (b = 0 ; b < PARAM_N1N2 - PARAM_N2 + 1 ; b += PARAM_N2) { | |||
for (b = 0; b < PARAM_N1N2 - PARAM_N2 + 1; b += PARAM_N2) { | |||
bn = b >> 6; | |||
bi = b & 63; | |||
c = b + PARAM_N2 - 1; | |||
@@ -44,7 +44,7 @@ void PQCLEAN_HQC256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -60,7 +60,7 @@ void PQCLEAN_HQC256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -73,7 +73,7 @@ void PQCLEAN_HQC256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
} | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
// we store the bloc number and bit position of each vb[i] | |||
uint64_t bloc = tmp[i] >> 6; | |||
bloc256[i] = _mm256_set1_epi64x(bloc >> 2); | |||
@@ -85,11 +85,11 @@ void PQCLEAN_HQC256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint6 | |||
bit256[i] = bloc256 & mask256; | |||
} | |||
for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) { | |||
for (uint32_t i = 0; i < LOOP_SIZE; i++) { | |||
__m256i aux = _mm256_loadu_si256(((__m256i *)v) + i); | |||
__m256i i256 = _mm256_set1_epi64x(i); | |||
for (uint32_t j = 0 ; j < weight ; j++) { | |||
for (uint32_t j = 0; j < weight; j++) { | |||
__m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); | |||
aux ^= bit256[j] & mask256; | |||
} | |||
@@ -146,7 +146,7 @@ void PQCLEAN_HQC256_AVX2_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQC256_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -164,7 +164,7 @@ void PQCLEAN_HQC256_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_ | |||
int PQCLEAN_HQC256_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
unsigned char diff = 0; | |||
for (uint32_t i = 0 ; i < size ; i++) { | |||
for (uint32_t i = 0; i < size; i++) { | |||
diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i]; | |||
} | |||
return diff != 0; | |||
@@ -191,7 +191,7 @@ void PQCLEAN_HQC256_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_ | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -50,7 +50,7 @@ static uint16_t mod(uint16_t i, uint16_t modulus) { | |||
*/ | |||
static void compute_cyclotomic_cosets(uint16_t *cosets, uint16_t upper_bound) { | |||
// Compute the odd cyclotomic classes | |||
for (uint16_t i = 1 ; i < upper_bound ; i += 2) { | |||
for (uint16_t i = 1; i < upper_bound; i += 2) { | |||
if (cosets[i] == 0) { // If i does not already belong to a class | |||
uint16_t tmp = i; | |||
size_t j = PARAM_M; | |||
@@ -88,13 +88,13 @@ size_t PQCLEAN_HQC256_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, cons | |||
// Start with bch_poly(X) = 1 | |||
bch_poly[0] = 1; | |||
for (uint16_t i = 1 ; i < PARAM_GF_MUL_ORDER ; ++i) { | |||
for (uint16_t i = 1; i < PARAM_GF_MUL_ORDER; ++i) { | |||
if (cosets[i] == 0) { | |||
continue; | |||
} | |||
// Multiply bch_poly(X) by X-a^i | |||
for (size_t j = deg_bch_poly ; j ; --j) { | |||
for (size_t j = deg_bch_poly; j; --j) { | |||
int16_t mask = -((uint16_t) - bch_poly[j] >> 15); | |||
bch_poly[j] = (mask & exp[mod(log[bch_poly[j]] + i, PARAM_GF_MUL_ORDER)]) ^ bch_poly[j - 1]; | |||
} | |||
@@ -119,13 +119,13 @@ size_t PQCLEAN_HQC256_CLEAN_compute_bch_poly(uint16_t *bch_poly, size_t *t, cons | |||
* @param[in] message Array of PARAM_K bytes storing the packed message | |||
*/ | |||
static void unpack_message(uint8_t *message_unpacked, const uint64_t *message) { | |||
for (size_t i = 0 ; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)) ; ++i) { | |||
for (size_t j = 0 ; j < 64 ; ++j) { | |||
for (size_t i = 0; i < (VEC_K_SIZE_64 - (PARAM_K % 64 != 0)); ++i) { | |||
for (size_t j = 0; j < 64; ++j) { | |||
message_unpacked[j + 64 * i] = (message[i] >> j) & 0x0000000000000001; | |||
} | |||
} | |||
for (int8_t j = 0 ; j < PARAM_K % 64 ; ++j) { | |||
for (int8_t j = 0; j < PARAM_K % 64; ++j) { | |||
message_unpacked[j + 64 * (VEC_K_SIZE_64 - 1)] = (message[VEC_K_SIZE_64 - 1] >> j) & 0x0000000000000001; | |||
} | |||
} | |||
@@ -142,10 +142,10 @@ static void lfsr_encode(uint8_t *codeword, const uint8_t *message) { | |||
uint8_t bch_poly[PARAM_G] = PARAM_BCH_POLY; | |||
// Compute the Parity-check digits | |||
for (int16_t i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int16_t i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = message[i] ^ codeword[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = PARAM_N1 - PARAM_K - 1 ; j ; --j) { | |||
for (size_t j = PARAM_N1 - PARAM_K - 1; j; --j) { | |||
codeword[j] = codeword[j - 1] ^ (-gate_value & bch_poly[j]); | |||
} | |||
@@ -165,13 +165,13 @@ static void lfsr_encode(uint8_t *codeword, const uint8_t *message) { | |||
* @param[in] codeword_unpacked Array of PARAM_N1 bytes storing the unpacked codeword | |||
*/ | |||
static void pack_codeword(uint64_t *codeword, const uint8_t *codeword_unpacked) { | |||
for (size_t i = 0 ; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)) ; ++i) { | |||
for (size_t j = 0 ; j < 64 ; ++j) { | |||
for (size_t i = 0; i < (VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0)); ++i) { | |||
for (size_t j = 0; j < 64; ++j) { | |||
codeword[i] |= ((uint64_t) codeword_unpacked[j + 64 * i]) << j; | |||
} | |||
} | |||
for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) { | |||
for (size_t j = 0; j < PARAM_N1 % 64; ++j) { | |||
codeword[VEC_N1_SIZE_64 - 1] |= ((uint64_t) codeword_unpacked[j + 64 * (VEC_N1_SIZE_64 - 1)]) << j; | |||
} | |||
} | |||
@@ -224,13 +224,13 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; mu < PARAM_DELTA ; ++mu) { | |||
for (size_t mu = 0; mu < PARAM_DELTA; ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA - 1)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQC256_CLEAN_gf_mul(d, PQCLEAN_HQC256_CLEAN_gf_inverse(d_p)); // 0 if(d == 0) | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQC256_CLEAN_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -254,7 +254,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Update pp, d_p and X_sigma_p if needed | |||
pp = (mask12 & (2 * mu)) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA - 1 ; i ; --i) { | |||
for (size_t i = PARAM_DELTA - 1; i; --i) { | |||
X_sigma_p[i + 1] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
X_sigma_p[1] = 0; | |||
@@ -263,7 +263,7 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
// Compute the next discrepancy | |||
d = syndromes[2 * mu + 2]; | |||
for (size_t i = 1 ; (i <= 2 * mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= 2 * mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQC256_CLEAN_gf_mul(sigma[i], syndromes[2 * mu + 2 - i]); | |||
} | |||
} | |||
@@ -288,7 +288,7 @@ static void message_from_codeword(uint64_t *message, const uint64_t *codeword) { | |||
uint64_t mask2 = (uint64_t) (0xffffffffffffffff >> (64 - val % 64)); | |||
size_t index = val / 64; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 - 1 ; ++i) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64 - 1; ++i) { | |||
uint64_t message1 = (codeword[index] & mask1) >> val % 64; | |||
uint64_t message2 = (codeword[++index] & mask2) << (64 - val % 64); | |||
message[i] = message1 | message2; | |||
@@ -33,7 +33,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -54,8 +54,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -149,7 +149,7 @@ static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uin | |||
memcpy(f + 2 * n, R + n, 2 * n); | |||
memcpy(f + 3 * n, Q + n, 2 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
f[2 * n + i] ^= Q[i]; | |||
f[3 * n + i] ^= f[2 * n + i]; | |||
} | |||
@@ -185,14 +185,14 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
// Step 1 | |||
if (m_f == 1) { | |||
f[0] = 0; | |||
for (i = 0 ; i < (1U << m) ; ++i) { | |||
for (i = 0; i < (1U << m); ++i) { | |||
f[0] ^= w[i]; | |||
} | |||
f[1] = 0; | |||
betas_sums[0] = 0; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
betas_sums[(1 << j) + k] = betas_sums[k] ^ betas[j]; | |||
f[1] ^= PQCLEAN_HQC256_CLEAN_gf_mul(betas_sums[(1 << j) + k], w[(1 << j) + k]); | |||
} | |||
@@ -202,7 +202,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
} | |||
// Compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas[i], PQCLEAN_HQC256_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -222,7 +222,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
f1[1] = 0; | |||
u[0] = w[0] ^ w[k]; | |||
f1[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
f1[0] ^= PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
@@ -231,7 +231,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
u[0] = w[0] ^ w[k]; | |||
v[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
v[i] = PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
@@ -247,7 +247,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m | |||
// Step 2: compute f from g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -294,13 +294,13 @@ void PQCLEAN_HQC256_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) | |||
k = 1 << (PARAM_M - 1); | |||
u[0] = w[0] ^ w[k]; | |||
v[0] = w[k]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
u[i] = w[i] ^ w[k + i]; | |||
v[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i]; | |||
} | |||
// Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -395,7 +395,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -438,13 +438,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -455,7 +455,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -465,7 +465,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas[i], PQCLEAN_HQC256_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -480,7 +480,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -491,7 +491,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -544,7 +544,7 @@ void PQCLEAN_HQC256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -563,7 +563,7 @@ void PQCLEAN_HQC256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQC256_CLEAN_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -588,14 +588,14 @@ void PQCLEAN_HQC256_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 | |||
size_t i, j, k; | |||
// Unpack the received word vector into array r | |||
for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) { | |||
for (j = 0 ; j < 64 ; ++j) { | |||
for (i = 0; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0); ++i) { | |||
for (j = 0; j < 64; ++j) { | |||
r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); | |||
} | |||
} | |||
// Last byte | |||
for (j = 0 ; j < PARAM_N1 % 64 ; ++j) { | |||
for (j = 0; j < PARAM_N1 % 64; ++j) { | |||
r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); | |||
} | |||
@@ -609,7 +609,7 @@ void PQCLEAN_HQC256_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 | |||
k = 1 << (PARAM_M - 1); | |||
w[0] = 0; | |||
w[k] = -r[0] & 1; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = -r[PQCLEAN_HQC256_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i]; | |||
w[k + i] = -r[PQCLEAN_HQC256_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1); | |||
} | |||
@@ -639,7 +639,7 @@ void PQCLEAN_HQC256_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uin | |||
bit = 1 ^ ((uint16_t) - w[k] >> 15); | |||
error[index / 8] ^= bit << (index % 64); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC256_CLEAN_gf_log(gammas_sums[i]); | |||
bit = 1 ^ ((uint16_t) - w[i] >> 15); | |||
error[index / 64] ^= bit << (index % 64); | |||
@@ -45,7 +45,7 @@ static void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -79,49 +79,49 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
uint64_t *pt; | |||
uint16_t *res_16; | |||
for (uint32_t i = 0 ; i < 16; i++) { | |||
for (uint32_t i = 0; i < 16; i++) { | |||
permuted_table[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i < 15 ; i++) { | |||
for (uint32_t i = 0; i < 15; i++) { | |||
swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); | |||
} | |||
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); | |||
for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = a2[j]; | |||
} | |||
pt[VEC_N_SIZE_64] = 0x0; | |||
for (uint32_t i = 1 ; i < 16 ; i++) { | |||
for (uint32_t i = 1; i < 16; i++) { | |||
carry = 0; | |||
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = (a2[j] << i) ^ carry; | |||
carry = (a2[j] >> ((64 - i))); | |||
} | |||
pt[VEC_N_SIZE_64] = carry; | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
permuted_sparse_vect[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i + 1 < weight ; i++) { | |||
for (uint32_t i = 0; i + 1 < weight; i++) { | |||
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i)); | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
dec = a1[permuted_sparse_vect[i]] & 0xf; | |||
s = a1[permuted_sparse_vect[i]] >> 4; | |||
res_16 = ((uint16_t *) o) + s; | |||
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) { | |||
*res_16++ ^= (uint16_t) pt[j]; | |||
*res_16++ ^= (uint16_t) (pt[j] >> 16); | |||
*res_16++ ^= (uint16_t) (pt[j] >> 32); | |||
@@ -146,7 +146,7 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
*/ | |||
void PQCLEAN_HQC256_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { | |||
uint64_t tmp[2 * VEC_N_SIZE_64 + 1]; | |||
for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < 2 * VEC_N_SIZE_64 + 1; j++) { | |||
tmp[j] = 0; | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQC256_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char * | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQC256_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQC256_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -19,8 +19,8 @@ static inline int32_t popcount(uint64_t n); | |||
*/ | |||
void PQCLEAN_HQC256_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m) { | |||
static const uint64_t mask[2][3] = {{0x0UL, 0x0UL, 0x0UL}, {0xFFFFFFFFFFFFFFFFUL, 0xFFFFFFFFFFFFFFFFUL, 0x3FFFFFUL}}; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_64 - 1 ; i++) { | |||
for (size_t j = 0 ; j < 64 ; j++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_64 - 1; i++) { | |||
for (size_t j = 0; j < 64; j++) { | |||
uint8_t bit = (m[i] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * ((i << 6) + j); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -35,7 +35,7 @@ void PQCLEAN_HQC256_CLEAN_repetition_code_encode(uint64_t *em, const uint64_t *m | |||
} | |||
} | |||
for (size_t j = 0 ; j < (PARAM_N1 & 0x3f) ; j++) { | |||
for (size_t j = 0; j < (PARAM_N1 & 0x3f); j++) { | |||
uint8_t bit = (m[VEC_N1_SIZE_64 - 1] >> j) & 0x1; | |||
uint32_t pos_r = PARAM_N2 * (((VEC_N1_SIZE_64 - 1) << 6) + j); | |||
uint16_t idx_r = (pos_r & 0x3f); | |||
@@ -36,7 +36,7 @@ void PQCLEAN_HQC256_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_st | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -52,7 +52,7 @@ void PQCLEAN_HQC256_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_st | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (v[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -95,7 +95,7 @@ void PQCLEAN_HQC256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -111,7 +111,7 @@ void PQCLEAN_HQC256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -124,7 +124,7 @@ void PQCLEAN_HQC256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint | |||
} | |||
} | |||
for (uint16_t i = 0 ; i < weight ; ++i) { | |||
for (uint16_t i = 0; i < weight; ++i) { | |||
int32_t index = tmp[i] / 64; | |||
int32_t pos = tmp[i] % 64; | |||
v[index] |= ((uint64_t) 1) << pos; | |||
@@ -178,7 +178,7 @@ void PQCLEAN_HQC256_CLEAN_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQC256_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQC256_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64 | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -29,7 +29,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -50,8 +50,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -138,7 +138,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -181,13 +181,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -198,7 +198,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -208,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS128_AVX2_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -223,7 +223,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -234,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -287,7 +287,7 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -306,7 +306,7 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -333,7 +333,7 @@ void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint1 | |||
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); | |||
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS128_AVX2_gf_log(gammas_sums[i]); | |||
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); | |||
@@ -44,7 +44,7 @@ static inline void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -192,7 +192,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4( D0, A, B); | |||
karat_mult_4(D2, A + 4, B + 4); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int is = i + 4; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -200,7 +200,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4(D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int32_t is = i + 4; | |||
int32_t is2 = is + 4; | |||
int32_t is3 = is2 + 4; | |||
@@ -231,7 +231,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D0, A, B); | |||
karat_mult_8(D2, A + 8, B + 8); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -239,7 +239,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
int32_t is2 = is + 8; | |||
int32_t is3 = is2 + 8; | |||
@@ -270,7 +270,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D0, A, B); | |||
karat_mult_16(D2, A + 16, B + 16); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int is = i + 16; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -278,7 +278,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int32_t is = i + 16; | |||
int32_t is2 = is + 16; | |||
int32_t is3 = is2 + 16; | |||
@@ -307,7 +307,7 @@ static inline void divByXplus1(__m256i *out, __m256i *in, int size) { | |||
B[0] = A[0]; | |||
for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size << 2); i++) { | |||
B[i] = B[i - 1] ^ A[i]; | |||
} | |||
} | |||
@@ -331,7 +331,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 - 1; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i42 = i4 - 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
@@ -342,7 +342,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4])); | |||
} | |||
for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = T_TM3_3W_256 - 1; i < T_TM3_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i41 = i4 + 1; | |||
U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); | |||
@@ -356,8 +356,8 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^64 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
@@ -365,7 +365,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//W1 = W2 * W3 | |||
karat_mult_32( W1, W2, W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
int64_t *U1_64 = ((int64_t *) U1); | |||
int64_t *U2_64 = ((int64_t *) U2); | |||
@@ -381,7 +381,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V1_64 = ((int64_t *) V1); | |||
V2_64 = ((int64_t *) V2); | |||
for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = 1; i < T_TM3_3W_256; i++) { | |||
int i4 = i << 2; | |||
W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1])); | |||
W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2])); | |||
@@ -390,46 +390,46 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2])); | |||
} | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
karat_mult_32(tmp, W3, W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
karat_mult_32(W2, W0, W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
karat_mult_32(W4, U2, V2); | |||
karat_mult_32(W0, U0, V0); | |||
// Interpolation phase | |||
// 9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x -> x = X^64 | |||
U1_64 = ((int64_t *) W2); | |||
U2_64 = ((int64_t *) W0); | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1); i++) { | |||
int32_t i4 = i << 2; | |||
W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1])); | |||
W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1])); | |||
@@ -440,7 +440,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
__m256i *U1_256 = (__m256i *) (U1_64 + 1); | |||
tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); | |||
for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 1; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); | |||
} | |||
@@ -454,7 +454,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = (int64_t *) W1; | |||
__m256i *U2_256 = (__m256i *) (U2_64 + 1); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256) - 1; i++) { | |||
tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]); | |||
} | |||
@@ -462,19 +462,19 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W3[2 * (T_TM3_3W_256) - 1] = zero; | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256) | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i]; | |||
ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i]; | |||
@@ -490,12 +490,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]); | |||
U2_256 = (__m256i *) (U2_64 - 2); | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 << 1; i++) { | |||
_mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i])); | |||
_mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i])); | |||
} | |||
for (int32_t i = 0 ; i < 2 * VEC_N_SIZE_256 + 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * VEC_N_SIZE_256 + 1; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned cha | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQCRMRS128_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS128_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -79,10 +79,10 @@ static void encode(uint64_t *word, uint32_t message) { | |||
*/ | |||
inline void expand_and_sum(__m256i *dst, const uint64_t *src) { | |||
uint16_t v[16]; | |||
for (size_t part = 0 ; part < 8 ; part++) { | |||
for (size_t part = 0; part < 8; part++) { | |||
dst[part] = _mm256_setzero_si256(); | |||
} | |||
for (size_t copy = 0 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 0; copy < MULTIPLICITY; copy++) { | |||
for (size_t part = 0; part < 8; part++) { | |||
for (size_t bit = 0; bit < 16; bit++) { | |||
v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1; | |||
@@ -133,10 +133,10 @@ inline void hadamard(__m256i *src, __m256i *dst) { | |||
__m256i *p1 = src; | |||
__m256i *p2 = dst; | |||
__m256i *p3; | |||
for (size_t pass = 0 ; pass < 7 ; pass++) { | |||
for (size_t pass = 0; pass < 7; pass++) { | |||
// warning: hadd works "within lanes" as Intel call it | |||
// so you have to swap the middle 64 bit blocks of the result | |||
for (size_t part = 0 ; part < 4 ; part++) { | |||
for (size_t part = 0; part < 4; part++) { | |||
p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); | |||
p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); | |||
} | |||
@@ -223,13 +223,13 @@ inline int32_t find_peaks(__m256i *transform) { | |||
__m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows; | |||
__m256i peak_mask; | |||
// compute absolute value of transform | |||
for (size_t i = 0 ; i < 8 ; i++) { | |||
for (size_t i = 0; i < 8; i++) { | |||
abs_rows[i] = _mm256_abs_epi16(transform[i]); | |||
} | |||
// compute a vector of 16 elements which contains the maximum somewhere | |||
// (later used to compute bits 0 through 3 of message) | |||
max_abs_rows = abs_rows[0]; | |||
for (size_t i = 1 ; i < 8 ; i++) { | |||
for (size_t i = 1; i < 8; i++) { | |||
max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]); | |||
} | |||
@@ -263,7 +263,7 @@ inline int32_t find_peaks(__m256i *transform) { | |||
// find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message | |||
// find lowest value by searching backwards skip first check to save time | |||
size_t message = 0x70; | |||
for (int32_t i = 7 ; i >= 0 ; i--) { | |||
for (int32_t i = 7; i >= 0; i--) { | |||
bitmap = _mm256_cmpgt_epi16(abs_rows[i], bound); | |||
int message_mask = (-(int16_t)(_mm256_testz_si256(bitmap, bitmap) == 0)) >> 15; | |||
message ^= message_mask & (message ^ (unsigned)i << 4); | |||
@@ -297,7 +297,7 @@ inline int32_t find_peaks(__m256i *transform) { | |||
// and then adding elements within two groups of 8 | |||
peak_mask = _mm256_cmpgt_epi16(active_row, bound); | |||
peak_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1); | |||
for (int32_t i = 0 ; i < 3 ; i++) { | |||
for (int32_t i = 0; i < 3; i++) { | |||
peak_mask = _mm256_hadd_epi16(peak_mask, peak_mask); | |||
} | |||
// add low 4 bits of message | |||
@@ -337,12 +337,12 @@ inline int32_t find_peaks(__m256i *transform) { | |||
* @param[in] msg Array of size VEC_N1_SIZE_64 storing the message | |||
*/ | |||
void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// fill entries i * MULTIPLICITY to (i+1) * MULTIPLICITY | |||
// encode first word | |||
encode(&cdw[2 * i * MULTIPLICITY], ((uint8_t *)msg)[i]); | |||
// copy to other identical codewords | |||
for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
memcpy(&cdw[2 * (i * MULTIPLICITY + copy)], &cdw[2 * i * MULTIPLICITY], 2 * sizeof(uint64_t)); | |||
} | |||
} | |||
@@ -362,7 +362,7 @@ void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint64_t *cdw, const uint64_t *m | |||
void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint64_t *msg, const uint64_t *cdw) { | |||
__m256i expanded[8]; | |||
__m256i transform[8]; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// collect the codewords | |||
expand_and_sum(expanded, &cdw[2 * i * MULTIPLICITY]); | |||
// apply hadamard transform | |||
@@ -41,20 +41,20 @@ void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t * | |||
uint8_t msg_bytes[PARAM_K] = {0}; | |||
uint8_t cdw_bytes[PARAM_N1] = {0}; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 ; ++i) { | |||
for (size_t j = 0 ; j < 8 ; ++j) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64; ++i) { | |||
for (size_t j = 0; j < 8; ++j) { | |||
msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8)); | |||
} | |||
} | |||
for (int i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = 0 ; j < PARAM_G ; ++j) { | |||
for (size_t j = 0; j < PARAM_G; ++j) { | |||
tmp[j] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(gate_value, PARAM_RS_POLY[j]); | |||
} | |||
for (size_t k = PARAM_N1 - PARAM_K - 1 ; k ; --k) { | |||
for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) { | |||
cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k]; | |||
} | |||
@@ -74,8 +74,8 @@ void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t * | |||
* @param[in] cdw Array of size PARAM_N1 storing the received vector | |||
*/ | |||
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { | |||
for (size_t i = 0 ; i < 2 * PARAM_DELTA ; ++i) { | |||
for (size_t j = 1 ; j < PARAM_N1 ; ++j) { | |||
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { | |||
for (size_t j = 1; j < PARAM_N1; ++j) { | |||
syndromes[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); | |||
} | |||
syndromes[i] ^= cdw[0]; | |||
@@ -111,14 +111,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; (mu < (2 * PARAM_DELTA)) ; ++mu) { | |||
for (size_t mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQCRMRS128_AVX2_gf_mul(d, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(d_p)); | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -141,14 +141,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
pp = (mask12 & mu) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA ; i ; --i) { | |||
for (size_t i = PARAM_DELTA; i; --i) { | |||
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p); | |||
d = syndromes[mu + 1]; | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]); | |||
} | |||
} | |||
@@ -189,18 +189,18 @@ static void compute_roots(uint8_t *error, uint16_t *sigma) { | |||
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes) { | |||
z[0] = 1; | |||
for (size_t i = 1 ; i < PARAM_DELTA + 1 ; ++i) { | |||
for (size_t i = 1; i < PARAM_DELTA + 1; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] = ((uint16_t)mask2) & sigma[i]; | |||
} | |||
z[1] ^= syndromes[0]; | |||
for (size_t i = 2 ; i <= PARAM_DELTA ; ++i) { | |||
for (size_t i = 2; i <= PARAM_DELTA; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] ^= ((uint16_t)mask2 & syndromes[i - 1]); | |||
for (size_t j = 1 ; j < i ; ++j) { | |||
for (size_t j = 1; j < i; ++j) { | |||
z[i] ^= ((uint16_t)mask2) & PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]); | |||
} | |||
} | |||
@@ -226,10 +226,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
uint16_t delta_real_value; | |||
// Compute the beta_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_N1 ; i++) { | |||
for (size_t i = 0; i < PARAM_N1; i++) { | |||
uint16_t found = 0; | |||
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15; | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15; | |||
beta_j[j] += indexmask & valuemask & exp[i]; | |||
found += indexmask & valuemask & 1; | |||
@@ -239,17 +239,17 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
delta_real_value = delta_counter; | |||
// Compute the e_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_DELTA ; ++i) { | |||
for (size_t i = 0; i < PARAM_DELTA; ++i) { | |||
uint16_t tmp1 = 1; | |||
uint16_t tmp2 = 1; | |||
uint16_t inverse = PQCLEAN_HQCRMRS128_AVX2_gf_inverse(beta_j[i]); | |||
uint16_t inverse_power_j = 1; | |||
for (size_t j = 1 ; j <= PARAM_DELTA ; ++j) { | |||
for (size_t j = 1; j <= PARAM_DELTA; ++j) { | |||
inverse_power_j = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, inverse); | |||
tmp1 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, z[j]); | |||
} | |||
for (size_t k = 1 ; k < PARAM_DELTA ; ++k) { | |||
for (size_t k = 1; k < PARAM_DELTA; ++k) { | |||
tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); | |||
} | |||
int16_t mask = ((int16_t) - (i < delta_real_value)) >> 15; | |||
@@ -258,10 +258,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
// Place the delta e_{j_i} values at the right coordinates of the output vector | |||
delta_counter = 0; | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
uint16_t found = 0; | |||
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15; | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15; | |||
error_values[i] += indexmask & valuemask & e_j[j]; | |||
found += indexmask & valuemask & 1; | |||
@@ -280,7 +280,7 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
* @param[in] error_values Array of PARAM_DELTA elements storing the error values | |||
*/ | |||
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
cdw[i] ^= error_values[i]; | |||
} | |||
} | |||
@@ -45,7 +45,7 @@ void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -61,7 +61,7 @@ void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -74,7 +74,7 @@ void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
} | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
// we store the bloc number and bit position of each vb[i] | |||
uint64_t bloc = tmp[i] >> 6; | |||
bloc256[i] = _mm256_set1_epi64x(bloc >> 2); | |||
@@ -86,11 +86,11 @@ void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
bit256[i] = bloc256 & mask256; | |||
} | |||
for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) { | |||
for (uint32_t i = 0; i < LOOP_SIZE; i++) { | |||
__m256i aux = _mm256_loadu_si256(((__m256i *)v) + i); | |||
__m256i i256 = _mm256_set1_epi64x(i); | |||
for (uint32_t j = 0 ; j < weight ; j++) { | |||
for (uint32_t j = 0; j < weight; j++) { | |||
__m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); | |||
aux ^= bit256[j] & mask256; | |||
} | |||
@@ -147,7 +147,7 @@ void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -165,7 +165,7 @@ void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uin | |||
int PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
unsigned char diff = 0; | |||
for (uint32_t i = 0 ; i < size ; i++) { | |||
for (uint32_t i = 0; i < size; i++) { | |||
diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i]; | |||
} | |||
return diff != 0; | |||
@@ -192,7 +192,7 @@ void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uin | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -29,7 +29,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -50,8 +50,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -138,7 +138,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -181,13 +181,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -198,7 +198,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -208,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -223,7 +223,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -234,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -287,7 +287,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -306,7 +306,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -333,7 +333,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint | |||
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); | |||
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS128_CLEAN_gf_log(gammas_sums[i]); | |||
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); | |||
@@ -45,7 +45,7 @@ static void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -79,49 +79,49 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
uint64_t *pt; | |||
uint16_t *res_16; | |||
for (uint32_t i = 0 ; i < 16; i++) { | |||
for (uint32_t i = 0; i < 16; i++) { | |||
permuted_table[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i < 15 ; i++) { | |||
for (uint32_t i = 0; i < 15; i++) { | |||
swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); | |||
} | |||
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); | |||
for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = a2[j]; | |||
} | |||
pt[VEC_N_SIZE_64] = 0x0; | |||
for (uint32_t i = 1 ; i < 16 ; i++) { | |||
for (uint32_t i = 1; i < 16; i++) { | |||
carry = 0; | |||
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = (a2[j] << i) ^ carry; | |||
carry = (a2[j] >> ((64 - i))); | |||
} | |||
pt[VEC_N_SIZE_64] = carry; | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
permuted_sparse_vect[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i + 1 < weight ; i++) { | |||
for (uint32_t i = 0; i + 1 < weight; i++) { | |||
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i)); | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
dec = a1[permuted_sparse_vect[i]] & 0xf; | |||
s = a1[permuted_sparse_vect[i]] >> 4; | |||
res_16 = ((uint16_t *) o) + s; | |||
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) { | |||
*res_16++ ^= (uint16_t) pt[j]; | |||
*res_16++ ^= (uint16_t) (pt[j] >> 16); | |||
*res_16++ ^= (uint16_t) (pt[j] >> 32); | |||
@@ -146,7 +146,7 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
*/ | |||
void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { | |||
uint64_t tmp[2 * VEC_N_SIZE_64 + 1]; | |||
for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < 2 * VEC_N_SIZE_64 + 1; j++) { | |||
tmp[j] = 0; | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned ch | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQCRMRS128_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS128_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -104,8 +104,8 @@ static void hadamard(uint16_t src[128], uint16_t dst[128]) { | |||
uint16_t *p1 = src; | |||
uint16_t *p2 = dst; | |||
uint16_t *p3; | |||
for (uint32_t pass = 0 ; pass < 7 ; pass++) { | |||
for (uint32_t i = 0 ; i < 64 ; i++) { | |||
for (uint32_t pass = 0; pass < 7; pass++) { | |||
for (uint32_t i = 0; i < 64; i++) { | |||
p2[i] = p1[2 * i] + p1[2 * i + 1]; | |||
p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; | |||
} | |||
@@ -133,15 +133,15 @@ static void hadamard(uint16_t src[128], uint16_t dst[128]) { | |||
*/ | |||
static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]) { | |||
// start with the first copy | |||
for (uint32_t part = 0 ; part < 4 ; part++) { | |||
for (uint32_t bit = 0 ; bit < 32 ; bit++) { | |||
for (uint32_t part = 0; part < 4; part++) { | |||
for (uint32_t bit = 0; bit < 32; bit++) { | |||
dest[part * 32 + bit] = (uint16_t) ((src[part] >> bit) & 1); | |||
} | |||
} | |||
// sum the rest of the copies | |||
for (uint32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (uint32_t part = 0 ; part < 4 ; part++) { | |||
for (uint32_t bit = 0 ; bit < 32 ; bit++) { | |||
for (uint32_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
for (uint32_t part = 0; part < 4; part++) { | |||
for (uint32_t bit = 0; bit < 32; bit++) { | |||
dest[part * 32 + bit] += (uint16_t) ((src[4 * copy + part] >> bit) & 1); | |||
} | |||
} | |||
@@ -164,7 +164,7 @@ static uint8_t find_peaks(const uint16_t transform[128]) { | |||
uint16_t peak = 0; | |||
uint16_t pos = 0; | |||
uint16_t t, abs, mask; | |||
for (uint16_t i = 0 ; i < 128 ; i++) { | |||
for (uint16_t i = 0; i < 128; i++) { | |||
t = transform[i]; | |||
abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) | |||
mask = -(((uint16_t)(peak_abs - abs)) >> 15); | |||
@@ -191,11 +191,11 @@ static uint8_t find_peaks(const uint16_t transform[128]) { | |||
void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { | |||
uint8_t *message_array = (uint8_t *) msg; | |||
uint32_t *codeArray = (uint32_t *) cdw; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// encode first word | |||
encode(&codeArray[4 * i * MULTIPLICITY], message_array[i]); | |||
// copy to other identical codewords | |||
for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
memcpy(&codeArray[4 * i * MULTIPLICITY + 4 * copy], &codeArray[4 * i * MULTIPLICITY], 4 * sizeof(uint32_t)); | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint64_t *msg, const uint64_t * | |||
uint32_t *codeArray = (uint32_t *) cdw; | |||
uint16_t expanded[128]; | |||
uint16_t transform[128]; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// collect the codewords | |||
expand_and_sum(expanded, &codeArray[4 * i * MULTIPLICITY]); | |||
// apply hadamard transform | |||
@@ -41,20 +41,20 @@ void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(uint64_t *cdw, const uint64_t | |||
uint8_t msg_bytes[PARAM_K] = {0}; | |||
uint8_t cdw_bytes[PARAM_N1] = {0}; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 ; ++i) { | |||
for (size_t j = 0 ; j < 8 ; ++j) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64; ++i) { | |||
for (size_t j = 0; j < 8; ++j) { | |||
msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8)); | |||
} | |||
} | |||
for (int i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = 0 ; j < PARAM_G ; ++j) { | |||
for (size_t j = 0; j < PARAM_G; ++j) { | |||
tmp[j] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]); | |||
} | |||
for (size_t k = PARAM_N1 - PARAM_K - 1 ; k ; --k) { | |||
for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) { | |||
cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k]; | |||
} | |||
@@ -74,8 +74,8 @@ void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(uint64_t *cdw, const uint64_t | |||
* @param[in] cdw Array of size PARAM_N1 storing the received vector | |||
*/ | |||
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { | |||
for (size_t i = 0 ; i < 2 * PARAM_DELTA ; ++i) { | |||
for (size_t j = 1 ; j < PARAM_N1 ; ++j) { | |||
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { | |||
for (size_t j = 1; j < PARAM_N1; ++j) { | |||
syndromes[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); | |||
} | |||
syndromes[i] ^= cdw[0]; | |||
@@ -111,14 +111,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; (mu < (2 * PARAM_DELTA)) ; ++mu) { | |||
for (size_t mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(d_p)); | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -141,14 +141,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
pp = (mask12 & mu) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA ; i ; --i) { | |||
for (size_t i = PARAM_DELTA; i; --i) { | |||
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p); | |||
d = syndromes[mu + 1]; | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]); | |||
} | |||
} | |||
@@ -189,18 +189,18 @@ static void compute_roots(uint8_t *error, uint16_t *sigma) { | |||
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes) { | |||
z[0] = 1; | |||
for (size_t i = 1 ; i < PARAM_DELTA + 1 ; ++i) { | |||
for (size_t i = 1; i < PARAM_DELTA + 1; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] = ((uint16_t)mask2) & sigma[i]; | |||
} | |||
z[1] ^= syndromes[0]; | |||
for (size_t i = 2 ; i <= PARAM_DELTA ; ++i) { | |||
for (size_t i = 2; i <= PARAM_DELTA; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] ^= ((uint16_t)mask2 & syndromes[i - 1]); | |||
for (size_t j = 1 ; j < i ; ++j) { | |||
for (size_t j = 1; j < i; ++j) { | |||
z[i] ^= ((uint16_t)mask2) & PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]); | |||
} | |||
} | |||
@@ -226,10 +226,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
uint16_t delta_real_value; | |||
// Compute the beta_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_N1 ; i++) { | |||
for (size_t i = 0; i < PARAM_N1; i++) { | |||
uint16_t found = 0; | |||
uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 | |||
for (uint16_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (uint16_t j = 0; j < PARAM_DELTA; j++) { | |||
uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter | |||
beta_j[j] += indexmask & valuemask & exp[i]; | |||
found += indexmask & valuemask & 1; | |||
@@ -239,17 +239,17 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
delta_real_value = delta_counter; | |||
// Compute the e_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_DELTA ; ++i) { | |||
for (size_t i = 0; i < PARAM_DELTA; ++i) { | |||
uint16_t tmp1 = 1; | |||
uint16_t tmp2 = 1; | |||
uint16_t inverse = PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(beta_j[i]); | |||
uint16_t inverse_power_j = 1; | |||
for (size_t j = 1 ; j <= PARAM_DELTA ; ++j) { | |||
for (size_t j = 1; j <= PARAM_DELTA; ++j) { | |||
inverse_power_j = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, inverse); | |||
tmp1 ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, z[j]); | |||
} | |||
for (size_t k = 1 ; k < PARAM_DELTA ; ++k) { | |||
for (size_t k = 1; k < PARAM_DELTA; ++k) { | |||
tmp2 = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); | |||
} | |||
uint16_t mask = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value | |||
@@ -258,10 +258,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
// Place the delta e_{j_i} values at the right coordinates of the output vector | |||
delta_counter = 0; | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
uint16_t found = 0; | |||
uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter | |||
error_values[i] += indexmask & valuemask & e_j[j]; | |||
found += indexmask & valuemask & 1; | |||
@@ -280,7 +280,7 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
* @param[in] error_values Array of PARAM_DELTA elements storing the error values | |||
*/ | |||
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
cdw[i] ^= error_values[i]; | |||
} | |||
} | |||
@@ -36,7 +36,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XO | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -52,7 +52,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XO | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (v[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -95,7 +95,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -111,7 +111,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -124,7 +124,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
} | |||
} | |||
for (uint16_t i = 0 ; i < weight ; ++i) { | |||
for (uint16_t i = 0; i < weight; ++i) { | |||
int32_t index = tmp[i] / 64; | |||
int32_t pos = tmp[i] % 64; | |||
v[index] |= ((uint64_t) 1) << pos; | |||
@@ -178,7 +178,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQCRMRS128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const ui | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -29,7 +29,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -50,8 +50,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -138,7 +138,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -181,13 +181,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -198,7 +198,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -208,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS192_AVX2_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -223,7 +223,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -234,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -287,7 +287,7 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -306,7 +306,7 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -333,7 +333,7 @@ void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint1 | |||
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); | |||
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS192_AVX2_gf_log(gammas_sums[i]); | |||
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); | |||
@@ -45,7 +45,7 @@ static inline void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -193,7 +193,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4( D0, A, B); | |||
karat_mult_4(D2, A + 4, B + 4); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int is = i + 4; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -201,7 +201,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4(D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int32_t is = i + 4; | |||
int32_t is2 = is + 4; | |||
int32_t is3 = is2 + 4; | |||
@@ -232,7 +232,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D0, A, B); | |||
karat_mult_8(D2, A + 8, B + 8); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -240,7 +240,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
int32_t is2 = is + 8; | |||
int32_t is3 = is2 + 8; | |||
@@ -271,7 +271,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D0, A, B); | |||
karat_mult_16(D2, A + 16, B + 16); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int is = i + 16; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -279,7 +279,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int32_t is = i + 16; | |||
int32_t is2 = is + 16; | |||
int32_t is3 = is2 + 16; | |||
@@ -309,7 +309,7 @@ static inline void karat_mult_64(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_32( D0, A, B); | |||
karat_mult_32(D2, A + 32, B + 32); | |||
for (int32_t i = 0 ; i < 32 ; i++) { | |||
for (int32_t i = 0; i < 32; i++) { | |||
int32_t is = i + 32; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -317,7 +317,7 @@ static inline void karat_mult_64(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_32( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 32 ; i++) { | |||
for (int32_t i = 0; i < 32; i++) { | |||
int32_t is = i + 32; | |||
int32_t is2 = is + 32; | |||
int32_t is3 = is2 + 32; | |||
@@ -347,7 +347,7 @@ static inline void divByXplus1(__m256i *out, __m256i *in, int size) { | |||
B[0] = A[0]; | |||
for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size << 2); i++) { | |||
B[i] = B[i - 1] ^ A[i]; | |||
} | |||
} | |||
@@ -371,7 +371,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 - 1; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i42 = i4 - 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
@@ -382,7 +382,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4])); | |||
} | |||
for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = T_TM3_3W_256 - 1; i < T_TM3_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i41 = i4 + 1; | |||
U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); | |||
@@ -396,8 +396,8 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^64 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
@@ -405,7 +405,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//W1 = W2 * W3 | |||
karat_mult_64( W1, W2, W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
int64_t *U1_64 = ((int64_t *) U1); | |||
int64_t *U2_64 = ((int64_t *) U2); | |||
@@ -421,7 +421,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V1_64 = ((int64_t *) V1); | |||
V2_64 = ((int64_t *) V2); | |||
for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = 1; i < T_TM3_3W_256; i++) { | |||
int i4 = i << 2; | |||
W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1])); | |||
W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2])); | |||
@@ -430,14 +430,14 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2])); | |||
} | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
@@ -445,31 +445,31 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
karat_mult_64(tmp, W3, W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
karat_mult_64( W2, W0, W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
karat_mult_64(W4, U2, V2); | |||
karat_mult_64(W0, U0, V0); | |||
// Interpolation phase | |||
// 9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x -> x = X^64 | |||
U1_64 = ((int64_t *) W2); | |||
U2_64 = ((int64_t *) W0); | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1); i++) { | |||
int32_t i4 = i << 2; | |||
W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1])); | |||
W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1])); | |||
@@ -480,7 +480,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
__m256i *U1_256 = (__m256i *) (U1_64 + 1); | |||
tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); | |||
for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 1; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); | |||
} | |||
@@ -494,7 +494,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = (int64_t *) W1; | |||
__m256i *U2_256 = (__m256i *) (U2_64 + 1); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256) - 1; i++) { | |||
tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]); | |||
} | |||
@@ -502,19 +502,19 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W3[2 * (T_TM3_3W_256) - 1] = zero; | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256) | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i]; | |||
ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i]; | |||
@@ -530,12 +530,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]); | |||
U2_256 = (__m256i *) (U2_64 - 2); | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 << 1; i++) { | |||
_mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i])); | |||
_mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i])); | |||
} | |||
for (int32_t i = 0 ; i < 2 * VEC_N_SIZE_256 + 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * VEC_N_SIZE_256 + 1; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned cha | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQCRMRS192_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS192_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS192_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -79,10 +79,10 @@ static void encode(uint64_t *word, uint32_t message) { | |||
*/ | |||
inline void expand_and_sum(__m256i *dst, const uint64_t *src) { | |||
uint16_t v[16]; | |||
for (size_t part = 0 ; part < 8 ; part++) { | |||
for (size_t part = 0; part < 8; part++) { | |||
dst[part] = _mm256_setzero_si256(); | |||
} | |||
for (size_t copy = 0 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 0; copy < MULTIPLICITY; copy++) { | |||
for (size_t part = 0; part < 8; part++) { | |||
for (size_t bit = 0; bit < 16; bit++) { | |||
v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1; | |||
@@ -133,10 +133,10 @@ inline void hadamard(__m256i *src, __m256i *dst) { | |||
__m256i *p1 = src; | |||
__m256i *p2 = dst; | |||
__m256i *p3; | |||
for (size_t pass = 0 ; pass < 7 ; pass++) { | |||
for (size_t pass = 0; pass < 7; pass++) { | |||
// warning: hadd works "within lanes" as Intel call it | |||
// so you have to swap the middle 64 bit blocks of the result | |||
for (size_t part = 0 ; part < 4 ; part++) { | |||
for (size_t part = 0; part < 4; part++) { | |||
p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); | |||
p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); | |||
} | |||
@@ -223,13 +223,13 @@ inline int32_t find_peaks(__m256i *transform) { | |||
__m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows; | |||
__m256i peak_mask; | |||
// compute absolute value of transform | |||
for (size_t i = 0 ; i < 8 ; i++) { | |||
for (size_t i = 0; i < 8; i++) { | |||
abs_rows[i] = _mm256_abs_epi16(transform[i]); | |||
} | |||
// compute a vector of 16 elements which contains the maximum somewhere | |||
// (later used to compute bits 0 through 3 of message) | |||
max_abs_rows = abs_rows[0]; | |||
for (size_t i = 1 ; i < 8 ; i++) { | |||
for (size_t i = 1; i < 8; i++) { | |||
max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]); | |||
} | |||
@@ -263,7 +263,7 @@ inline int32_t find_peaks(__m256i *transform) { | |||
// find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message | |||
// find lowest value by searching backwards skip first check to save time | |||
size_t message = 0x70; | |||
for (int32_t i = 7 ; i >= 0 ; i--) { | |||
for (int32_t i = 7; i >= 0; i--) { | |||
bitmap = _mm256_cmpgt_epi16(abs_rows[i], bound); | |||
int message_mask = (-(int16_t)(_mm256_testz_si256(bitmap, bitmap) == 0)) >> 15; | |||
message ^= message_mask & (message ^ (unsigned)i << 4); | |||
@@ -297,7 +297,7 @@ inline int32_t find_peaks(__m256i *transform) { | |||
// and then adding elements within two groups of 8 | |||
peak_mask = _mm256_cmpgt_epi16(active_row, bound); | |||
peak_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1); | |||
for (int32_t i = 0 ; i < 3 ; i++) { | |||
for (int32_t i = 0; i < 3; i++) { | |||
peak_mask = _mm256_hadd_epi16(peak_mask, peak_mask); | |||
} | |||
// add low 4 bits of message | |||
@@ -337,12 +337,12 @@ inline int32_t find_peaks(__m256i *transform) { | |||
* @param[in] msg Array of size VEC_N1_SIZE_64 storing the message | |||
*/ | |||
void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// fill entries i * MULTIPLICITY to (i+1) * MULTIPLICITY | |||
// encode first word | |||
encode(&cdw[2 * i * MULTIPLICITY], ((uint8_t *)msg)[i]); | |||
// copy to other identical codewords | |||
for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
memcpy(&cdw[2 * (i * MULTIPLICITY + copy)], &cdw[2 * i * MULTIPLICITY], 2 * sizeof(uint64_t)); | |||
} | |||
} | |||
@@ -362,7 +362,7 @@ void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint64_t *cdw, const uint64_t *m | |||
void PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(uint64_t *msg, const uint64_t *cdw) { | |||
__m256i expanded[8]; | |||
__m256i transform[8]; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// collect the codewords | |||
expand_and_sum(expanded, &cdw[2 * i * MULTIPLICITY]); | |||
// apply hadamard transform | |||
@@ -41,20 +41,20 @@ void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t * | |||
uint8_t msg_bytes[PARAM_K] = {0}; | |||
uint8_t cdw_bytes[PARAM_N1] = {0}; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 ; ++i) { | |||
for (size_t j = 0 ; j < 8 ; ++j) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64; ++i) { | |||
for (size_t j = 0; j < 8; ++j) { | |||
msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8)); | |||
} | |||
} | |||
for (int i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = 0 ; j < PARAM_G ; ++j) { | |||
for (size_t j = 0; j < PARAM_G; ++j) { | |||
tmp[j] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(gate_value, PARAM_RS_POLY[j]); | |||
} | |||
for (size_t k = PARAM_N1 - PARAM_K - 1 ; k ; --k) { | |||
for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) { | |||
cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k]; | |||
} | |||
@@ -74,8 +74,8 @@ void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t * | |||
* @param[in] cdw Array of size PARAM_N1 storing the received vector | |||
*/ | |||
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { | |||
for (size_t i = 0 ; i < 2 * PARAM_DELTA ; ++i) { | |||
for (size_t j = 1 ; j < PARAM_N1 ; ++j) { | |||
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { | |||
for (size_t j = 1; j < PARAM_N1; ++j) { | |||
syndromes[i] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); | |||
} | |||
syndromes[i] ^= cdw[0]; | |||
@@ -111,14 +111,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; (mu < (2 * PARAM_DELTA)) ; ++mu) { | |||
for (size_t mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQCRMRS192_AVX2_gf_mul(d, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(d_p)); | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -141,14 +141,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
pp = (mask12 & mu) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA ; i ; --i) { | |||
for (size_t i = PARAM_DELTA; i; --i) { | |||
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p); | |||
d = syndromes[mu + 1]; | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]); | |||
} | |||
} | |||
@@ -189,18 +189,18 @@ static void compute_roots(uint8_t *error, uint16_t *sigma) { | |||
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes) { | |||
z[0] = 1; | |||
for (size_t i = 1 ; i < PARAM_DELTA + 1 ; ++i) { | |||
for (size_t i = 1; i < PARAM_DELTA + 1; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] = ((uint16_t)mask2) & sigma[i]; | |||
} | |||
z[1] ^= syndromes[0]; | |||
for (size_t i = 2 ; i <= PARAM_DELTA ; ++i) { | |||
for (size_t i = 2; i <= PARAM_DELTA; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] ^= ((uint16_t)mask2 & syndromes[i - 1]); | |||
for (size_t j = 1 ; j < i ; ++j) { | |||
for (size_t j = 1; j < i; ++j) { | |||
z[i] ^= ((uint16_t)mask2) & PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]); | |||
} | |||
} | |||
@@ -226,10 +226,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
uint16_t delta_real_value; | |||
// Compute the beta_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_N1 ; i++) { | |||
for (size_t i = 0; i < PARAM_N1; i++) { | |||
uint16_t found = 0; | |||
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15; | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15; | |||
beta_j[j] += indexmask & valuemask & exp[i]; | |||
found += indexmask & valuemask & 1; | |||
@@ -239,17 +239,17 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
delta_real_value = delta_counter; | |||
// Compute the e_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_DELTA ; ++i) { | |||
for (size_t i = 0; i < PARAM_DELTA; ++i) { | |||
uint16_t tmp1 = 1; | |||
uint16_t tmp2 = 1; | |||
uint16_t inverse = PQCLEAN_HQCRMRS192_AVX2_gf_inverse(beta_j[i]); | |||
uint16_t inverse_power_j = 1; | |||
for (size_t j = 1 ; j <= PARAM_DELTA ; ++j) { | |||
for (size_t j = 1; j <= PARAM_DELTA; ++j) { | |||
inverse_power_j = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, inverse); | |||
tmp1 ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, z[j]); | |||
} | |||
for (size_t k = 1 ; k < PARAM_DELTA ; ++k) { | |||
for (size_t k = 1; k < PARAM_DELTA; ++k) { | |||
tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); | |||
} | |||
int16_t mask = ((int16_t) - (i < delta_real_value)) >> 15; | |||
@@ -258,10 +258,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
// Place the delta e_{j_i} values at the right coordinates of the output vector | |||
delta_counter = 0; | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
uint16_t found = 0; | |||
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15; | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15; | |||
error_values[i] += indexmask & valuemask & e_j[j]; | |||
found += indexmask & valuemask & 1; | |||
@@ -280,7 +280,7 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
* @param[in] error_values Array of PARAM_DELTA elements storing the error values | |||
*/ | |||
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
cdw[i] ^= error_values[i]; | |||
} | |||
} | |||
@@ -45,7 +45,7 @@ void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -61,7 +61,7 @@ void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -74,7 +74,7 @@ void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
} | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
// we store the bloc number and bit position of each vb[i] | |||
uint64_t bloc = tmp[i] >> 6; | |||
bloc256[i] = _mm256_set1_epi64x(bloc >> 2); | |||
@@ -86,11 +86,11 @@ void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
bit256[i] = bloc256 & mask256; | |||
} | |||
for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) { | |||
for (uint32_t i = 0; i < LOOP_SIZE; i++) { | |||
__m256i aux = _mm256_loadu_si256(((__m256i *)v) + i); | |||
__m256i i256 = _mm256_set1_epi64x(i); | |||
for (uint32_t j = 0 ; j < weight ; j++) { | |||
for (uint32_t j = 0; j < weight; j++) { | |||
__m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); | |||
aux ^= bit256[j] & mask256; | |||
} | |||
@@ -147,7 +147,7 @@ void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -165,7 +165,7 @@ void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uin | |||
int PQCLEAN_HQCRMRS192_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
unsigned char diff = 0; | |||
for (uint32_t i = 0 ; i < size ; i++) { | |||
for (uint32_t i = 0; i < size; i++) { | |||
diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i]; | |||
} | |||
return diff != 0; | |||
@@ -192,7 +192,7 @@ void PQCLEAN_HQCRMRS192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uin | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -29,7 +29,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -50,8 +50,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -138,7 +138,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -181,13 +181,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -198,7 +198,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -208,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -223,7 +223,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -234,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -287,7 +287,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -306,7 +306,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -333,7 +333,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint | |||
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); | |||
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS192_CLEAN_gf_log(gammas_sums[i]); | |||
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); | |||
@@ -45,7 +45,7 @@ static void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -79,49 +79,49 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
uint64_t *pt; | |||
uint16_t *res_16; | |||
for (uint32_t i = 0 ; i < 16; i++) { | |||
for (uint32_t i = 0; i < 16; i++) { | |||
permuted_table[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i < 15 ; i++) { | |||
for (uint32_t i = 0; i < 15; i++) { | |||
swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); | |||
} | |||
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); | |||
for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = a2[j]; | |||
} | |||
pt[VEC_N_SIZE_64] = 0x0; | |||
for (uint32_t i = 1 ; i < 16 ; i++) { | |||
for (uint32_t i = 1; i < 16; i++) { | |||
carry = 0; | |||
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = (a2[j] << i) ^ carry; | |||
carry = (a2[j] >> ((64 - i))); | |||
} | |||
pt[VEC_N_SIZE_64] = carry; | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
permuted_sparse_vect[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i + 1 < weight ; i++) { | |||
for (uint32_t i = 0; i + 1 < weight; i++) { | |||
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i)); | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
dec = a1[permuted_sparse_vect[i]] & 0xf; | |||
s = a1[permuted_sparse_vect[i]] >> 4; | |||
res_16 = ((uint16_t *) o) + s; | |||
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) { | |||
*res_16++ ^= (uint16_t) pt[j]; | |||
*res_16++ ^= (uint16_t) (pt[j] >> 16); | |||
*res_16++ ^= (uint16_t) (pt[j] >> 32); | |||
@@ -146,7 +146,7 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
*/ | |||
void PQCLEAN_HQCRMRS192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { | |||
uint64_t tmp[2 * VEC_N_SIZE_64 + 1]; | |||
for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < 2 * VEC_N_SIZE_64 + 1; j++) { | |||
tmp[j] = 0; | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned ch | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQCRMRS192_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS192_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -104,8 +104,8 @@ static void hadamard(uint16_t src[128], uint16_t dst[128]) { | |||
uint16_t *p1 = src; | |||
uint16_t *p2 = dst; | |||
uint16_t *p3; | |||
for (uint32_t pass = 0 ; pass < 7 ; pass++) { | |||
for (uint32_t i = 0 ; i < 64 ; i++) { | |||
for (uint32_t pass = 0; pass < 7; pass++) { | |||
for (uint32_t i = 0; i < 64; i++) { | |||
p2[i] = p1[2 * i] + p1[2 * i + 1]; | |||
p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; | |||
} | |||
@@ -133,15 +133,15 @@ static void hadamard(uint16_t src[128], uint16_t dst[128]) { | |||
*/ | |||
static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]) { | |||
// start with the first copy | |||
for (uint32_t part = 0 ; part < 4 ; part++) { | |||
for (uint32_t bit = 0 ; bit < 32 ; bit++) { | |||
for (uint32_t part = 0; part < 4; part++) { | |||
for (uint32_t bit = 0; bit < 32; bit++) { | |||
dest[part * 32 + bit] = (uint16_t) ((src[part] >> bit) & 1); | |||
} | |||
} | |||
// sum the rest of the copies | |||
for (uint32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (uint32_t part = 0 ; part < 4 ; part++) { | |||
for (uint32_t bit = 0 ; bit < 32 ; bit++) { | |||
for (uint32_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
for (uint32_t part = 0; part < 4; part++) { | |||
for (uint32_t bit = 0; bit < 32; bit++) { | |||
dest[part * 32 + bit] += (uint16_t) ((src[4 * copy + part] >> bit) & 1); | |||
} | |||
} | |||
@@ -164,7 +164,7 @@ static uint8_t find_peaks(const uint16_t transform[128]) { | |||
uint16_t peak = 0; | |||
uint16_t pos = 0; | |||
uint16_t t, abs, mask; | |||
for (uint16_t i = 0 ; i < 128 ; i++) { | |||
for (uint16_t i = 0; i < 128; i++) { | |||
t = transform[i]; | |||
abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) | |||
mask = -(((uint16_t)(peak_abs - abs)) >> 15); | |||
@@ -191,11 +191,11 @@ static uint8_t find_peaks(const uint16_t transform[128]) { | |||
void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { | |||
uint8_t *message_array = (uint8_t *) msg; | |||
uint32_t *codeArray = (uint32_t *) cdw; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// encode first word | |||
encode(&codeArray[4 * i * MULTIPLICITY], message_array[i]); | |||
// copy to other identical codewords | |||
for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
memcpy(&codeArray[4 * i * MULTIPLICITY + 4 * copy], &codeArray[4 * i * MULTIPLICITY], 4 * sizeof(uint32_t)); | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint64_t *msg, const uint64_t * | |||
uint32_t *codeArray = (uint32_t *) cdw; | |||
uint16_t expanded[128]; | |||
uint16_t transform[128]; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// collect the codewords | |||
expand_and_sum(expanded, &codeArray[4 * i * MULTIPLICITY]); | |||
// apply hadamard transform | |||
@@ -41,20 +41,20 @@ void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(uint64_t *cdw, const uint64_t | |||
uint8_t msg_bytes[PARAM_K] = {0}; | |||
uint8_t cdw_bytes[PARAM_N1] = {0}; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 ; ++i) { | |||
for (size_t j = 0 ; j < 8 ; ++j) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64; ++i) { | |||
for (size_t j = 0; j < 8; ++j) { | |||
msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8)); | |||
} | |||
} | |||
for (int i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = 0 ; j < PARAM_G ; ++j) { | |||
for (size_t j = 0; j < PARAM_G; ++j) { | |||
tmp[j] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]); | |||
} | |||
for (size_t k = PARAM_N1 - PARAM_K - 1 ; k ; --k) { | |||
for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) { | |||
cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k]; | |||
} | |||
@@ -74,8 +74,8 @@ void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(uint64_t *cdw, const uint64_t | |||
* @param[in] cdw Array of size PARAM_N1 storing the received vector | |||
*/ | |||
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { | |||
for (size_t i = 0 ; i < 2 * PARAM_DELTA ; ++i) { | |||
for (size_t j = 1 ; j < PARAM_N1 ; ++j) { | |||
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { | |||
for (size_t j = 1; j < PARAM_N1; ++j) { | |||
syndromes[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); | |||
} | |||
syndromes[i] ^= cdw[0]; | |||
@@ -111,14 +111,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; (mu < (2 * PARAM_DELTA)) ; ++mu) { | |||
for (size_t mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(d_p)); | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -141,14 +141,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
pp = (mask12 & mu) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA ; i ; --i) { | |||
for (size_t i = PARAM_DELTA; i; --i) { | |||
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p); | |||
d = syndromes[mu + 1]; | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]); | |||
} | |||
} | |||
@@ -189,18 +189,18 @@ static void compute_roots(uint8_t *error, uint16_t *sigma) { | |||
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes) { | |||
z[0] = 1; | |||
for (size_t i = 1 ; i < PARAM_DELTA + 1 ; ++i) { | |||
for (size_t i = 1; i < PARAM_DELTA + 1; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] = ((uint16_t)mask2) & sigma[i]; | |||
} | |||
z[1] ^= syndromes[0]; | |||
for (size_t i = 2 ; i <= PARAM_DELTA ; ++i) { | |||
for (size_t i = 2; i <= PARAM_DELTA; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] ^= ((uint16_t)mask2 & syndromes[i - 1]); | |||
for (size_t j = 1 ; j < i ; ++j) { | |||
for (size_t j = 1; j < i; ++j) { | |||
z[i] ^= ((uint16_t)mask2) & PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]); | |||
} | |||
} | |||
@@ -226,10 +226,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
uint16_t delta_real_value; | |||
// Compute the beta_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_N1 ; i++) { | |||
for (size_t i = 0; i < PARAM_N1; i++) { | |||
uint16_t found = 0; | |||
uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 | |||
for (uint16_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (uint16_t j = 0; j < PARAM_DELTA; j++) { | |||
uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter | |||
beta_j[j] += indexmask & valuemask & exp[i]; | |||
found += indexmask & valuemask & 1; | |||
@@ -239,17 +239,17 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
delta_real_value = delta_counter; | |||
// Compute the e_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_DELTA ; ++i) { | |||
for (size_t i = 0; i < PARAM_DELTA; ++i) { | |||
uint16_t tmp1 = 1; | |||
uint16_t tmp2 = 1; | |||
uint16_t inverse = PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(beta_j[i]); | |||
uint16_t inverse_power_j = 1; | |||
for (size_t j = 1 ; j <= PARAM_DELTA ; ++j) { | |||
for (size_t j = 1; j <= PARAM_DELTA; ++j) { | |||
inverse_power_j = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, inverse); | |||
tmp1 ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, z[j]); | |||
} | |||
for (size_t k = 1 ; k < PARAM_DELTA ; ++k) { | |||
for (size_t k = 1; k < PARAM_DELTA; ++k) { | |||
tmp2 = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); | |||
} | |||
uint16_t mask = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value | |||
@@ -258,10 +258,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
// Place the delta e_{j_i} values at the right coordinates of the output vector | |||
delta_counter = 0; | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
uint16_t found = 0; | |||
uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter | |||
error_values[i] += indexmask & valuemask & e_j[j]; | |||
found += indexmask & valuemask & 1; | |||
@@ -280,7 +280,7 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
* @param[in] error_values Array of PARAM_DELTA elements storing the error values | |||
*/ | |||
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
cdw[i] ^= error_values[i]; | |||
} | |||
} | |||
@@ -36,7 +36,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XO | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -52,7 +52,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XO | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (v[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -95,7 +95,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -111,7 +111,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -124,7 +124,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
} | |||
} | |||
for (uint16_t i = 0 ; i < weight ; ++i) { | |||
for (uint16_t i = 0; i < weight; ++i) { | |||
int32_t index = tmp[i] / 64; | |||
int32_t pos = tmp[i] % 64; | |||
v[index] |= ((uint64_t) 1) << pos; | |||
@@ -178,7 +178,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQCRMRS192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const ui | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -29,7 +29,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -50,8 +50,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -138,7 +138,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -181,13 +181,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -198,7 +198,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQCRMRS256_AVX2_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -208,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS256_AVX2_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -223,7 +223,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -234,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -287,7 +287,7 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -306,7 +306,7 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -333,7 +333,7 @@ void PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint1 | |||
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); | |||
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS256_AVX2_gf_log(gammas_sums[i]); | |||
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); | |||
@@ -50,7 +50,7 @@ static inline void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -198,7 +198,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4( D0, A, B); | |||
karat_mult_4(D2, A + 4, B + 4); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int is = i + 4; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -206,7 +206,7 @@ static inline void karat_mult_8(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_4(D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 4 ; i++) { | |||
for (int32_t i = 0; i < 4; i++) { | |||
int32_t is = i + 4; | |||
int32_t is2 = is + 4; | |||
int32_t is3 = is2 + 4; | |||
@@ -237,7 +237,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D0, A, B); | |||
karat_mult_8(D2, A + 8, B + 8); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -245,7 +245,7 @@ static inline void karat_mult_16(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_8( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 8 ; i++) { | |||
for (int32_t i = 0; i < 8; i++) { | |||
int32_t is = i + 8; | |||
int32_t is2 = is + 8; | |||
int32_t is3 = is2 + 8; | |||
@@ -276,7 +276,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D0, A, B); | |||
karat_mult_16(D2, A + 16, B + 16); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int is = i + 16; | |||
SAA[i] = A[i] ^ A[is]; | |||
SBB[i] = B[i] ^ B[is]; | |||
@@ -284,7 +284,7 @@ static inline void karat_mult_32(__m256i *C, __m256i *A, __m256i *B) { | |||
karat_mult_16( D1, SAA, SBB); | |||
for (int32_t i = 0 ; i < 16 ; i++) { | |||
for (int32_t i = 0; i < 16; i++) { | |||
int32_t is = i + 16; | |||
int32_t is2 = is + 16; | |||
int32_t is3 = is2 + 16; | |||
@@ -314,7 +314,7 @@ static inline void divByXplus1(__m256i *out, __m256i *in, int size) { | |||
B[0] = A[0]; | |||
for (int32_t i = 1 ; i < 2 * (size << 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size << 2); i++) { | |||
B[i] = B[i - 1] ^ A[i]; | |||
} | |||
} | |||
@@ -338,7 +338,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 - 1; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i42 = i4 - 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
@@ -349,7 +349,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2 - 4])); | |||
} | |||
for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = T_TM3_3W_256 - 1; i < T_TM3_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
int32_t i41 = i4 + 1; | |||
U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); | |||
@@ -363,8 +363,8 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^64 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
@@ -372,7 +372,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//W1 = W2 * W3 | |||
karat_mult_32( W1, W2, W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 !) | |||
int64_t *U1_64 = ((int64_t *) U1); | |||
int64_t *U2_64 = ((int64_t *) U2); | |||
@@ -388,7 +388,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V1_64 = ((int64_t *) V1); | |||
V2_64 = ((int64_t *) V2); | |||
for (int32_t i = 1 ; i < T_TM3_3W_256 ; i++) { | |||
for (int32_t i = 1; i < T_TM3_3W_256; i++) { | |||
int i4 = i << 2; | |||
W0[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 - 1])); | |||
W0[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 - 2])); | |||
@@ -397,46 +397,46 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W4[i] ^= _mm256_lddqu_si256((__m256i const *)(& V2_64[i4 - 2])); | |||
} | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3_3W_256; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
karat_mult_32(tmp, W3, W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
karat_mult_32(W2, W0, W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
karat_mult_32(W4, U2, V2); | |||
karat_mult_32(W0, U0, V0); | |||
// Interpolation phase | |||
// 9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x -> x = X^64 | |||
U1_64 = ((int64_t *) W2); | |||
U2_64 = ((int64_t *) W0); | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1); i++) { | |||
int32_t i4 = i << 2; | |||
W2[i] = _mm256_lddqu_si256((__m256i const *)(& U1_64[i4 + 1])); | |||
W2[i] ^= _mm256_lddqu_si256((__m256i const *)(& U2_64[i4 + 1])); | |||
@@ -447,7 +447,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
__m256i *U1_256 = (__m256i *) (U1_64 + 1); | |||
tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); | |||
for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 1; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); | |||
} | |||
@@ -461,7 +461,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = (int64_t *) W1; | |||
__m256i *U2_256 = (__m256i *) (U2_64 + 1); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256) - 1; i++) { | |||
tmp[i] = _mm256_lddqu_si256(&U1_256[i]) ^ _mm256_lddqu_si256(&U2_256[i]); | |||
} | |||
@@ -469,19 +469,19 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W3[2 * (T_TM3_3W_256) - 1] = zero; | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3_3W_256); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256) | |||
for (int32_t i = 0 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { | |||
for (int32_t i = 0; i < (T_TM3_3W_256 << 1) - 1; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + 2 * T_TM3_3W_256 - 1] = W2[i]; | |||
ro256[i + 4 * T_TM3_3W_256 - 2] = W4[i]; | |||
@@ -497,12 +497,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
U2_64 = ((int64_t *) &ro256[3 * T_TM3_3W_256 - 1]); | |||
U2_256 = (__m256i *) (U2_64 - 2); | |||
for (int32_t i = 0 ; i < T_TM3_3W_256 << 1 ; i++) { | |||
for (int32_t i = 0; i < T_TM3_3W_256 << 1; i++) { | |||
_mm256_storeu_si256(&U1_256[i], W1[i] ^ _mm256_lddqu_si256(&U1_256[i])); | |||
_mm256_storeu_si256(&U2_256[i], W3[i] ^ _mm256_loadu_si256(&U2_256[i])); | |||
} | |||
for (int32_t i = 0 ; i < 6 * T_TM3_3W_256 - 2 ; i++) { | |||
for (int32_t i = 0; i < 6 * T_TM3_3W_256 - 2; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -519,7 +519,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
*/ | |||
static inline void divByXplus1_256(__m256i *out, __m256i *in, int32_t size) { | |||
out[0] = in[0]; | |||
for (int32_t i = 1 ; i < 2 * (size + 2) ; i++) { | |||
for (int32_t i = 1; i < 2 * (size + 2); i++) { | |||
out[i] = out[i - 1] ^ in[i]; | |||
} | |||
} | |||
@@ -542,7 +542,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
const __m256i zero = _mm256_setzero_si256(); | |||
int32_t T2 = T_TM3R_3W_64 << 1; | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) { | |||
for (int32_t i = 0; i < T_TM3R_3W_256; i++) { | |||
int32_t i4 = i << 2; | |||
U0[i] = _mm256_lddqu_si256((__m256i const *)(& A[i4])); | |||
V0[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4])); | |||
@@ -552,7 +552,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
V2[i] = _mm256_lddqu_si256((__m256i const *)(& B[i4 + T2])); | |||
} | |||
for (int32_t i = T_TM3R_3W_256 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
for (int32_t i = T_TM3R_3W_256; i < T_TM3R_3W_256 + 2; i++) { | |||
U0[i] = zero; | |||
V0[i] = zero; | |||
U1[i] = zero; | |||
@@ -564,27 +564,27 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
// Evaluation phase : x= X^256 | |||
// P(X): P0=(0); P1=(1); P2=(x); P3=(1+x); P4=(\infty) | |||
// Evaluation: 5*2 add, 2*2 shift; 5 mul (n) | |||
//W3 = U2 + U1 + U0 ; W2 = V2 + V1 + V0 | |||
//W3 = U2 + U1 + U0; W2 = V2 + V1 + V0 | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) { | |||
for (int32_t i = 0; i < T_TM3R_3W_256; i++) { | |||
W3[i] = U0[i] ^ U1[i] ^ U2[i]; | |||
W2[i] = V0[i] ^ V1[i] ^ V2[i]; | |||
} | |||
for (int32_t i = T_TM3R_3W_256 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
for (int32_t i = T_TM3R_3W_256; i < T_TM3R_3W_256 + 2; i++) { | |||
W2[i] = zero; | |||
W3[i] = zero; | |||
} | |||
//W1 = W2 * W3 | |||
TOOM3Mult(W1, (uint64_t *) W2, (uint64_t *) W3); | |||
//W0 =(U1 + U2*x)*x ; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 + 2 !) | |||
//W0 =(U1 + U2*x)*x; W4 =(V1 + V2*x)*x (SIZE = T_TM3_3W_256 + 2 !) | |||
W0[0] = zero; | |||
W4[0] = zero; | |||
W0[1] = U1[0]; | |||
W4[1] = V1[0]; | |||
for (int32_t i = 1 ; i < T_TM3R_3W_256 + 1 ; i++) { | |||
for (int32_t i = 1; i < T_TM3R_3W_256 + 1; i++) { | |||
W0[i + 1] = U1[i] ^ U2[i - 1]; | |||
W4[i + 1] = V1[i] ^ V2[i - 1]; | |||
} | |||
@@ -592,28 +592,28 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W0[T_TM3R_3W_256 + 1] = U2[T_TM3R_3W_256 - 1]; | |||
W4[T_TM3R_3W_256 + 1] = V2[T_TM3R_3W_256 - 1]; | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
//W3 = W3 + W0 ; W2 = W2 + W4 | |||
for (int32_t i = 0; i < T_TM3R_3W_256 + 2; i++) { | |||
W3[i] ^= W0[i]; | |||
W2[i] ^= W4[i]; | |||
} | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 + 2 ; i++) { | |||
//W0 = W0 + U0 ; W4 = W4 + V0 | |||
for (int32_t i = 0; i < T_TM3R_3W_256 + 2; i++) { | |||
W0[i] ^= U0[i]; | |||
W4[i] ^= V0[i]; | |||
} | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
//W3 = W3 * W2 ; W2 = W0 * W4 | |||
TOOM3Mult(tmp, (uint64_t *) W3, (uint64_t *) W2); | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W3[i] = tmp[i]; | |||
} | |||
TOOM3Mult(W2, (uint64_t *) W0, (uint64_t *) W4); | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
//W4 = U2 * V2 ; W0 = U0 * V0 | |||
TOOM3Mult(W4, (uint64_t *) U2, (uint64_t *) V2); | |||
TOOM3Mult(W0, (uint64_t *) U0, (uint64_t *) V0); | |||
@@ -621,17 +621,17 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
//9 add, 1 shift, 1 Smul, 2 Sdiv (2n) | |||
//W3 = W3 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W3[i] ^= W2[i]; | |||
} | |||
//W1 = W1 + W0 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256); i++) { | |||
W1[i] ^= W0[i]; | |||
} | |||
//W2 =(W2 + W0)/x | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2) - 1; i++) { | |||
int32_t i1 = i + 1; | |||
W2[i] = W2[i1] ^ W0[i1]; | |||
} | |||
@@ -639,7 +639,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
W2[2 * (T_TM3R_3W_256 + 2) - 1] = zero; | |||
//W2 =(W2 + W3 + W4*(x^3+1))/(x+1) | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
tmp[i] = W2[i] ^ W3[i] ^ W4[i]; | |||
} | |||
@@ -647,14 +647,14 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
tmp[2 * (T_TM3R_3W_256 + 2) + 1] = zero; | |||
tmp[2 * (T_TM3R_3W_256 + 2) + 2] = zero; | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256); i++) { | |||
tmp[i + 3] ^= W4[i]; | |||
} | |||
divByXplus1_256(W2, tmp, T_TM3R_3W_256); | |||
//W3 =(W3 + W1)/(x*(x+1)) | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) - 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2) - 1; i++) { | |||
int32_t i1 = i + 1; | |||
tmp[i] = W3[i1] ^ W1[i1]; | |||
} | |||
@@ -663,18 +663,18 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
divByXplus1_256(W3, tmp, T_TM3R_3W_256); | |||
//W1 = W1 + W4 + W2 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W1[i] ^= W2[i] ^ W4[i]; | |||
} | |||
//W2 = W2 + W3 | |||
for (int32_t i = 0 ; i < 2 * (T_TM3R_3W_256 + 2) ; i++) { | |||
for (int32_t i = 0; i < 2 * (T_TM3R_3W_256 + 2); i++) { | |||
W2[i] ^= W3[i]; | |||
} | |||
// Recomposition | |||
//W = W0+ W1*x+ W2*x^2+ W3*x^3 + W4*x^4 | |||
//W0, W1, W4 of size 2*T_TM3_3W_256, W2 and W3 of size 2*(T_TM3_3W_256+2) | |||
for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) { | |||
for (int32_t i = 0; i < T_TM3R_3W_256; i++) { | |||
ro256[i] = W0[i]; | |||
ro256[i + T_TM3R_3W_256] = W0[i + T_TM3R_3W_256] ^ W1[i]; | |||
ro256[i + 2 * T_TM3R_3W_256] = W1[i + T_TM3R_3W_256] ^ W2[i]; | |||
@@ -696,7 +696,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { | |||
ro256[3 + 5 * T_TM3R_3W_256] ^= W3[3 + 2 * T_TM3R_3W_256]; | |||
for (int32_t i = 0 ; i < 2 * VEC_N_SIZE_256 + 1 ; i++) { | |||
for (int32_t i = 0; i < 2 * VEC_N_SIZE_256 + 1; i++) { | |||
_mm256_storeu_si256(&Out[i], ro256[i]); | |||
} | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned cha | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQCRMRS256_AVX2_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS256_AVX2_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS256_AVX2_vect_compare((uint64_t *)d, (uint64_t *)d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -79,10 +79,10 @@ static void encode(uint64_t *word, uint32_t message) { | |||
*/ | |||
inline void expand_and_sum(__m256i *dst, const uint64_t *src) { | |||
uint16_t v[16]; | |||
for (size_t part = 0 ; part < 8 ; part++) { | |||
for (size_t part = 0; part < 8; part++) { | |||
dst[part] = _mm256_setzero_si256(); | |||
} | |||
for (size_t copy = 0 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 0; copy < MULTIPLICITY; copy++) { | |||
for (size_t part = 0; part < 8; part++) { | |||
for (size_t bit = 0; bit < 16; bit++) { | |||
v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1; | |||
@@ -133,10 +133,10 @@ inline void hadamard(__m256i *src, __m256i *dst) { | |||
__m256i *p1 = src; | |||
__m256i *p2 = dst; | |||
__m256i *p3; | |||
for (size_t pass = 0 ; pass < 7 ; pass++) { | |||
for (size_t pass = 0; pass < 7; pass++) { | |||
// warning: hadd works "within lanes" as Intel call it | |||
// so you have to swap the middle 64 bit blocks of the result | |||
for (size_t part = 0 ; part < 4 ; part++) { | |||
for (size_t part = 0; part < 4; part++) { | |||
p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); | |||
p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8); | |||
} | |||
@@ -223,13 +223,13 @@ inline int32_t find_peaks(__m256i *transform) { | |||
__m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows; | |||
__m256i peak_mask; | |||
// compute absolute value of transform | |||
for (size_t i = 0 ; i < 8 ; i++) { | |||
for (size_t i = 0; i < 8; i++) { | |||
abs_rows[i] = _mm256_abs_epi16(transform[i]); | |||
} | |||
// compute a vector of 16 elements which contains the maximum somewhere | |||
// (later used to compute bits 0 through 3 of message) | |||
max_abs_rows = abs_rows[0]; | |||
for (size_t i = 1 ; i < 8 ; i++) { | |||
for (size_t i = 1; i < 8; i++) { | |||
max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]); | |||
} | |||
@@ -263,7 +263,7 @@ inline int32_t find_peaks(__m256i *transform) { | |||
// find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message | |||
// find lowest value by searching backwards skip first check to save time | |||
size_t message = 0x70; | |||
for (int32_t i = 7 ; i >= 0 ; i--) { | |||
for (int32_t i = 7; i >= 0; i--) { | |||
bitmap = _mm256_cmpgt_epi16(abs_rows[i], bound); | |||
int message_mask = (-(int16_t)(_mm256_testz_si256(bitmap, bitmap) == 0)) >> 15; | |||
message ^= message_mask & (message ^ (unsigned)i << 4); | |||
@@ -297,7 +297,7 @@ inline int32_t find_peaks(__m256i *transform) { | |||
// and then adding elements within two groups of 8 | |||
peak_mask = _mm256_cmpgt_epi16(active_row, bound); | |||
peak_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1); | |||
for (int32_t i = 0 ; i < 3 ; i++) { | |||
for (int32_t i = 0; i < 3; i++) { | |||
peak_mask = _mm256_hadd_epi16(peak_mask, peak_mask); | |||
} | |||
// add low 4 bits of message | |||
@@ -337,12 +337,12 @@ inline int32_t find_peaks(__m256i *transform) { | |||
* @param[in] msg Array of size VEC_N1_SIZE_64 storing the message | |||
*/ | |||
void PQCLEAN_HQCRMRS256_AVX2_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// fill entries i * MULTIPLICITY to (i+1) * MULTIPLICITY | |||
// encode first word | |||
encode(&cdw[2 * i * MULTIPLICITY], ((uint8_t *)msg)[i]); | |||
// copy to other identical codewords | |||
for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
memcpy(&cdw[2 * (i * MULTIPLICITY + copy)], &cdw[2 * i * MULTIPLICITY], 2 * sizeof(uint64_t)); | |||
} | |||
} | |||
@@ -362,7 +362,7 @@ void PQCLEAN_HQCRMRS256_AVX2_reed_muller_encode(uint64_t *cdw, const uint64_t *m | |||
void PQCLEAN_HQCRMRS256_AVX2_reed_muller_decode(uint64_t *msg, const uint64_t *cdw) { | |||
__m256i expanded[8]; | |||
__m256i transform[8]; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// collect the codewords | |||
expand_and_sum(expanded, &cdw[2 * i * MULTIPLICITY]); | |||
// apply hadamard transform | |||
@@ -41,20 +41,20 @@ void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t * | |||
uint8_t msg_bytes[PARAM_K] = {0}; | |||
uint8_t cdw_bytes[PARAM_N1] = {0}; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 ; ++i) { | |||
for (size_t j = 0 ; j < 8 ; ++j) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64; ++i) { | |||
for (size_t j = 0; j < 8; ++j) { | |||
msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8)); | |||
} | |||
} | |||
for (int i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = 0 ; j < PARAM_G ; ++j) { | |||
for (size_t j = 0; j < PARAM_G; ++j) { | |||
tmp[j] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(gate_value, PARAM_RS_POLY[j]); | |||
} | |||
for (size_t k = PARAM_N1 - PARAM_K - 1 ; k ; --k) { | |||
for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) { | |||
cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k]; | |||
} | |||
@@ -74,8 +74,8 @@ void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t * | |||
* @param[in] cdw Array of size PARAM_N1 storing the received vector | |||
*/ | |||
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { | |||
for (size_t i = 0 ; i < 2 * PARAM_DELTA ; ++i) { | |||
for (size_t j = 1 ; j < PARAM_N1 ; ++j) { | |||
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { | |||
for (size_t j = 1; j < PARAM_N1; ++j) { | |||
syndromes[i] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); | |||
} | |||
syndromes[i] ^= cdw[0]; | |||
@@ -111,14 +111,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; (mu < (2 * PARAM_DELTA)) ; ++mu) { | |||
for (size_t mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQCRMRS256_AVX2_gf_mul(d, PQCLEAN_HQCRMRS256_AVX2_gf_inverse(d_p)); | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -141,14 +141,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
pp = (mask12 & mu) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA ; i ; --i) { | |||
for (size_t i = PARAM_DELTA; i; --i) { | |||
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p); | |||
d = syndromes[mu + 1]; | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]); | |||
} | |||
} | |||
@@ -189,18 +189,18 @@ static void compute_roots(uint8_t *error, uint16_t *sigma) { | |||
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes) { | |||
z[0] = 1; | |||
for (size_t i = 1 ; i < PARAM_DELTA + 1 ; ++i) { | |||
for (size_t i = 1; i < PARAM_DELTA + 1; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] = ((uint16_t)mask2) & sigma[i]; | |||
} | |||
z[1] ^= syndromes[0]; | |||
for (size_t i = 2 ; i <= PARAM_DELTA ; ++i) { | |||
for (size_t i = 2; i <= PARAM_DELTA; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] ^= ((uint16_t)mask2 & syndromes[i - 1]); | |||
for (size_t j = 1 ; j < i ; ++j) { | |||
for (size_t j = 1; j < i; ++j) { | |||
z[i] ^= ((uint16_t)mask2) & PQCLEAN_HQCRMRS256_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]); | |||
} | |||
} | |||
@@ -226,10 +226,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
uint16_t delta_real_value; | |||
// Compute the beta_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_N1 ; i++) { | |||
for (size_t i = 0; i < PARAM_N1; i++) { | |||
uint16_t found = 0; | |||
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15; | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15; | |||
beta_j[j] += indexmask & valuemask & exp[i]; | |||
found += indexmask & valuemask & 1; | |||
@@ -239,17 +239,17 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
delta_real_value = delta_counter; | |||
// Compute the e_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_DELTA ; ++i) { | |||
for (size_t i = 0; i < PARAM_DELTA; ++i) { | |||
uint16_t tmp1 = 1; | |||
uint16_t tmp2 = 1; | |||
uint16_t inverse = PQCLEAN_HQCRMRS256_AVX2_gf_inverse(beta_j[i]); | |||
uint16_t inverse_power_j = 1; | |||
for (size_t j = 1 ; j <= PARAM_DELTA ; ++j) { | |||
for (size_t j = 1; j <= PARAM_DELTA; ++j) { | |||
inverse_power_j = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse_power_j, inverse); | |||
tmp1 ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse_power_j, z[j]); | |||
} | |||
for (size_t k = 1 ; k < PARAM_DELTA ; ++k) { | |||
for (size_t k = 1; k < PARAM_DELTA; ++k) { | |||
tmp2 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); | |||
} | |||
int16_t mask = ((int16_t) - (i < delta_real_value)) >> 15; | |||
@@ -258,10 +258,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
// Place the delta e_{j_i} values at the right coordinates of the output vector | |||
delta_counter = 0; | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
uint16_t found = 0; | |||
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15; | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15; | |||
error_values[i] += indexmask & valuemask & e_j[j]; | |||
found += indexmask & valuemask & 1; | |||
@@ -280,7 +280,7 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
* @param[in] error_values Array of PARAM_DELTA elements storing the error values | |||
*/ | |||
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
cdw[i] ^= error_values[i]; | |||
} | |||
} | |||
@@ -45,7 +45,7 @@ void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -61,7 +61,7 @@ void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -74,7 +74,7 @@ void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
} | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
// we store the bloc number and bit position of each vb[i] | |||
uint64_t bloc = tmp[i] >> 6; | |||
bloc256[i] = _mm256_set1_epi64x(bloc >> 2); | |||
@@ -86,11 +86,11 @@ void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, u | |||
bit256[i] = bloc256 & mask256; | |||
} | |||
for (uint32_t i = 0 ; i < LOOP_SIZE ; i++) { | |||
for (uint32_t i = 0; i < LOOP_SIZE; i++) { | |||
__m256i aux = _mm256_loadu_si256(((__m256i *)v) + i); | |||
__m256i i256 = _mm256_set1_epi64x(i); | |||
for (uint32_t j = 0 ; j < weight ; j++) { | |||
for (uint32_t j = 0; j < weight; j++) { | |||
__m256i mask256 = _mm256_cmpeq_epi64(bloc256[j], i256); | |||
aux ^= bit256[j] & mask256; | |||
} | |||
@@ -147,7 +147,7 @@ void PQCLEAN_HQCRMRS256_AVX2_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQCRMRS256_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -165,7 +165,7 @@ void PQCLEAN_HQCRMRS256_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uin | |||
int PQCLEAN_HQCRMRS256_AVX2_vect_compare(const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
unsigned char diff = 0; | |||
for (uint32_t i = 0 ; i < size ; i++) { | |||
for (uint32_t i = 0; i < size; i++) { | |||
diff |= ((uint8_t *) v1)[i] ^ ((uint8_t *) v2)[i]; | |||
} | |||
return diff != 0; | |||
@@ -192,7 +192,7 @@ void PQCLEAN_HQCRMRS256_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uin | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||
@@ -29,7 +29,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
*/ | |||
static void compute_fft_betas(uint16_t *betas) { | |||
size_t i; | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
betas[i] = 1 << (PARAM_M - 1 - i); | |||
} | |||
} | |||
@@ -50,8 +50,8 @@ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size | |||
size_t i, j; | |||
subset_sums[0] = 0; | |||
for (i = 0 ; i < set_size ; ++i) { | |||
for (j = 0 ; j < (1U << i) ; ++j) { | |||
for (i = 0; i < set_size; ++i) { | |||
for (j = 0; j < (1U << i); ++j) { | |||
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; | |||
} | |||
} | |||
@@ -138,7 +138,7 @@ static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_ | |||
memcpy(Q + n, f + 3 * n, 2 * n); | |||
memcpy(R, f, 4 * n); | |||
for (i = 0 ; i < n ; ++i) { | |||
for (i = 0; i < n; ++i) { | |||
Q[i] ^= f[2 * n + i]; | |||
R[n + i] ^= Q[i]; | |||
} | |||
@@ -181,13 +181,13 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 1 | |||
if (m_f == 1) { | |||
for (i = 0 ; i < m ; ++i) { | |||
for (i = 0; i < m; ++i) { | |||
tmp[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas[i], f[1]); | |||
} | |||
w[0] = f[0]; | |||
for (j = 0 ; j < m ; ++j) { | |||
for (k = 0 ; k < (1U << j) ; ++k) { | |||
for (j = 0; j < m; ++j) { | |||
for (k = 0; k < (1U << j); ++k) { | |||
w[(1 << j) + k] = w[k] ^ tmp[j]; | |||
} | |||
} | |||
@@ -198,7 +198,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
// Step 2: compute g | |||
if (betas[m - 1] != 1) { | |||
beta_m_pow = 1; | |||
for (i = 1 ; i < (1U << m_f) ; ++i) { | |||
for (i = 1; i < (1U << m_f); ++i) { | |||
beta_m_pow = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); | |||
f[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(beta_m_pow, f[i]); | |||
} | |||
@@ -208,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
radix(f0, f1, f, m_f); | |||
// Step 4: compute gammas and deltas | |||
for (i = 0 ; i + 1 < m ; ++i) { | |||
for (i = 0; i + 1 < m; ++i) { | |||
gammas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(betas[m - 1])); | |||
deltas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_square(gammas[i]) ^ gammas[i]; | |||
} | |||
@@ -223,7 +223,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant | |||
w[0] = u[0]; | |||
w[k] = u[0] ^ f1[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gammas_sums[i], f1[0]); | |||
w[k + i] = w[i] ^ f1[0]; | |||
} | |||
@@ -234,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 | |||
memcpy(w + k, v, 2 * k); | |||
w[0] = u[0]; | |||
w[k] ^= u[0]; | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gammas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -287,7 +287,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff | |||
radix(f0, f1, f, PARAM_FFT); | |||
// Step 4: Compute deltas | |||
for (i = 0 ; i < PARAM_M - 1 ; ++i) { | |||
for (i = 0; i < PARAM_M - 1; ++i) { | |||
deltas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_square(betas[i]) ^ betas[i]; | |||
} | |||
@@ -306,7 +306,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff | |||
w[k] ^= u[0]; | |||
// Find other roots | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas_sums[i], v[i]); | |||
w[k + i] ^= w[i]; | |||
} | |||
@@ -333,7 +333,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint | |||
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); | |||
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); | |||
for (i = 1 ; i < k ; ++i) { | |||
for (i = 1; i < k; ++i) { | |||
index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS256_CLEAN_gf_log(gammas_sums[i]); | |||
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); | |||
@@ -45,7 +45,7 @@ static void reduce(uint64_t *o, const uint64_t *a) { | |||
uint64_t r; | |||
uint64_t carry; | |||
for (uint32_t i = 0 ; i < VEC_N_SIZE_64 ; i++) { | |||
for (uint32_t i = 0; i < VEC_N_SIZE_64; i++) { | |||
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63); | |||
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63))); | |||
o[i] = a[i] ^ r ^ carry; | |||
@@ -79,49 +79,49 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
uint64_t *pt; | |||
uint16_t *res_16; | |||
for (uint32_t i = 0 ; i < 16; i++) { | |||
for (uint32_t i = 0; i < 16; i++) { | |||
permuted_table[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i < 15 ; i++) { | |||
for (uint32_t i = 0; i < 15; i++) { | |||
swap(permuted_table + i, 0, permutation_table[i] % (16 - i)); | |||
} | |||
pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1)); | |||
for (int32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (int32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = a2[j]; | |||
} | |||
pt[VEC_N_SIZE_64] = 0x0; | |||
for (uint32_t i = 1 ; i < 16 ; i++) { | |||
for (uint32_t i = 1; i < 16; i++) { | |||
carry = 0; | |||
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64; j++) { | |||
pt[j] = (a2[j] << i) ^ carry; | |||
carry = (a2[j] >> ((64 - i))); | |||
} | |||
pt[VEC_N_SIZE_64] = carry; | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
permuted_sparse_vect[i] = i; | |||
} | |||
seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t)); | |||
for (uint32_t i = 0 ; i + 1 < weight ; i++) { | |||
for (uint32_t i = 0; i + 1 < weight; i++) { | |||
swap(permuted_sparse_vect + i, 0, permutation_sparse_vect[i] % (weight - i)); | |||
} | |||
for (uint32_t i = 0 ; i < weight ; i++) { | |||
for (uint32_t i = 0; i < weight; i++) { | |||
dec = a1[permuted_sparse_vect[i]] & 0xf; | |||
s = a1[permuted_sparse_vect[i]] >> 4; | |||
res_16 = ((uint16_t *) o) + s; | |||
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1)); | |||
for (uint32_t j = 0 ; j < VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < VEC_N_SIZE_64 + 1; j++) { | |||
*res_16++ ^= (uint16_t) pt[j]; | |||
*res_16++ ^= (uint16_t) (pt[j] >> 16); | |||
*res_16++ ^= (uint16_t) (pt[j] >> 32); | |||
@@ -146,7 +146,7 @@ static void fast_convolution_mult(uint64_t *o, const uint32_t *a1, const uint64_ | |||
*/ | |||
void PQCLEAN_HQCRMRS256_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) { | |||
uint64_t tmp[2 * VEC_N_SIZE_64 + 1]; | |||
for (uint32_t j = 0 ; j < 2 * VEC_N_SIZE_64 + 1 ; j++) { | |||
for (uint32_t j = 0; j < 2 * VEC_N_SIZE_64 + 1; j++) { | |||
tmp[j] = 0; | |||
} | |||
@@ -128,7 +128,7 @@ int PQCLEAN_HQCRMRS256_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned ch | |||
// Abort if c != c' or d != d' | |||
result = (PQCLEAN_HQCRMRS256_CLEAN_vect_compare(u, u2, VEC_N_SIZE_BYTES) == 0 && PQCLEAN_HQCRMRS256_CLEAN_vect_compare(v, v2, VEC_N1N2_SIZE_BYTES) == 0 && memcmp(d, d2, SHA512_BYTES) == 0); | |||
for (size_t i = 0 ; i < SHARED_SECRET_BYTES ; i++) { | |||
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) { | |||
ss[i] = result * ss[i]; | |||
} | |||
result--; | |||
@@ -104,8 +104,8 @@ static void hadamard(uint16_t src[128], uint16_t dst[128]) { | |||
uint16_t *p1 = src; | |||
uint16_t *p2 = dst; | |||
uint16_t *p3; | |||
for (uint32_t pass = 0 ; pass < 7 ; pass++) { | |||
for (uint32_t i = 0 ; i < 64 ; i++) { | |||
for (uint32_t pass = 0; pass < 7; pass++) { | |||
for (uint32_t i = 0; i < 64; i++) { | |||
p2[i] = p1[2 * i] + p1[2 * i + 1]; | |||
p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; | |||
} | |||
@@ -133,15 +133,15 @@ static void hadamard(uint16_t src[128], uint16_t dst[128]) { | |||
*/ | |||
static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]) { | |||
// start with the first copy | |||
for (uint32_t part = 0 ; part < 4 ; part++) { | |||
for (uint32_t bit = 0 ; bit < 32 ; bit++) { | |||
for (uint32_t part = 0; part < 4; part++) { | |||
for (uint32_t bit = 0; bit < 32; bit++) { | |||
dest[part * 32 + bit] = (uint16_t) ((src[part] >> bit) & 1); | |||
} | |||
} | |||
// sum the rest of the copies | |||
for (uint32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (uint32_t part = 0 ; part < 4 ; part++) { | |||
for (uint32_t bit = 0 ; bit < 32 ; bit++) { | |||
for (uint32_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
for (uint32_t part = 0; part < 4; part++) { | |||
for (uint32_t bit = 0; bit < 32; bit++) { | |||
dest[part * 32 + bit] += (uint16_t) ((src[4 * copy + part] >> bit) & 1); | |||
} | |||
} | |||
@@ -164,7 +164,7 @@ static uint8_t find_peaks(const uint16_t transform[128]) { | |||
uint16_t peak = 0; | |||
uint16_t pos = 0; | |||
uint16_t t, abs, mask; | |||
for (uint16_t i = 0 ; i < 128 ; i++) { | |||
for (uint16_t i = 0; i < 128; i++) { | |||
t = transform[i]; | |||
abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) | |||
mask = -(((uint16_t)(peak_abs - abs)) >> 15); | |||
@@ -191,11 +191,11 @@ static uint8_t find_peaks(const uint16_t transform[128]) { | |||
void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { | |||
uint8_t *message_array = (uint8_t *) msg; | |||
uint32_t *codeArray = (uint32_t *) cdw; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// encode first word | |||
encode(&codeArray[4 * i * MULTIPLICITY], message_array[i]); | |||
// copy to other identical codewords | |||
for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { | |||
for (size_t copy = 1; copy < MULTIPLICITY; copy++) { | |||
memcpy(&codeArray[4 * i * MULTIPLICITY + 4 * copy], &codeArray[4 * i * MULTIPLICITY], 4 * sizeof(uint32_t)); | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_decode(uint64_t *msg, const uint64_t * | |||
uint32_t *codeArray = (uint32_t *) cdw; | |||
uint16_t expanded[128]; | |||
uint16_t transform[128]; | |||
for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { | |||
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) { | |||
// collect the codewords | |||
expand_and_sum(expanded, &codeArray[4 * i * MULTIPLICITY]); | |||
// apply hadamard transform | |||
@@ -41,20 +41,20 @@ void PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_encode(uint64_t *cdw, const uint64_t | |||
uint8_t msg_bytes[PARAM_K] = {0}; | |||
uint8_t cdw_bytes[PARAM_N1] = {0}; | |||
for (size_t i = 0 ; i < VEC_K_SIZE_64 ; ++i) { | |||
for (size_t j = 0 ; j < 8 ; ++j) { | |||
for (size_t i = 0; i < VEC_K_SIZE_64; ++i) { | |||
for (size_t j = 0; j < 8; ++j) { | |||
msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8)); | |||
} | |||
} | |||
for (int i = PARAM_K - 1 ; i >= 0 ; --i) { | |||
for (int i = PARAM_K - 1; i >= 0; --i) { | |||
gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1]; | |||
for (size_t j = 0 ; j < PARAM_G ; ++j) { | |||
for (size_t j = 0; j < PARAM_G; ++j) { | |||
tmp[j] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]); | |||
} | |||
for (size_t k = PARAM_N1 - PARAM_K - 1 ; k ; --k) { | |||
for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) { | |||
cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k]; | |||
} | |||
@@ -74,8 +74,8 @@ void PQCLEAN_HQCRMRS256_CLEAN_reed_solomon_encode(uint64_t *cdw, const uint64_t | |||
* @param[in] cdw Array of size PARAM_N1 storing the received vector | |||
*/ | |||
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { | |||
for (size_t i = 0 ; i < 2 * PARAM_DELTA ; ++i) { | |||
for (size_t j = 1 ; j < PARAM_N1 ; ++j) { | |||
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { | |||
for (size_t j = 1; j < PARAM_N1; ++j) { | |||
syndromes[i] ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); | |||
} | |||
syndromes[i] ^= cdw[0]; | |||
@@ -111,14 +111,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
uint16_t d_p = 1; | |||
uint16_t d = syndromes[0]; | |||
for (size_t mu = 0 ; (mu < (2 * PARAM_DELTA)) ; ++mu) { | |||
for (size_t mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { | |||
// Save sigma in case we need it to update X_sigma_p | |||
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); | |||
deg_sigma_copy = deg_sigma; | |||
uint16_t dd = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(d_p)); | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
sigma[i] ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(dd, X_sigma_p[i]); | |||
} | |||
@@ -141,14 +141,14 @@ static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { | |||
pp = (mask12 & mu) ^ (~mask12 & pp); | |||
d_p = (mask12 & d) ^ (~mask12 & d_p); | |||
for (size_t i = PARAM_DELTA ; i ; --i) { | |||
for (size_t i = PARAM_DELTA; i; --i) { | |||
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); | |||
} | |||
deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p); | |||
d = syndromes[mu + 1]; | |||
for (size_t i = 1 ; (i <= mu + 1) && (i <= PARAM_DELTA) ; ++i) { | |||
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { | |||
d ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]); | |||
} | |||
} | |||
@@ -189,18 +189,18 @@ static void compute_roots(uint8_t *error, uint16_t *sigma) { | |||
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes) { | |||
z[0] = 1; | |||
for (size_t i = 1 ; i < PARAM_DELTA + 1 ; ++i) { | |||
for (size_t i = 1; i < PARAM_DELTA + 1; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] = ((uint16_t)mask2) & sigma[i]; | |||
} | |||
z[1] ^= syndromes[0]; | |||
for (size_t i = 2 ; i <= PARAM_DELTA ; ++i) { | |||
for (size_t i = 2; i <= PARAM_DELTA; ++i) { | |||
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15); | |||
z[i] ^= ((uint16_t)mask2 & syndromes[i - 1]); | |||
for (size_t j = 1 ; j < i ; ++j) { | |||
for (size_t j = 1; j < i; ++j) { | |||
z[i] ^= ((uint16_t)mask2) & PQCLEAN_HQCRMRS256_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]); | |||
} | |||
} | |||
@@ -226,10 +226,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
uint16_t delta_real_value; | |||
// Compute the beta_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_N1 ; i++) { | |||
for (size_t i = 0; i < PARAM_N1; i++) { | |||
uint16_t found = 0; | |||
uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 | |||
for (uint16_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (uint16_t j = 0; j < PARAM_DELTA; j++) { | |||
uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter | |||
beta_j[j] += indexmask & valuemask & exp[i]; | |||
found += indexmask & valuemask & 1; | |||
@@ -239,17 +239,17 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
delta_real_value = delta_counter; | |||
// Compute the e_{j_i} page 31 of the documentation | |||
for (size_t i = 0 ; i < PARAM_DELTA ; ++i) { | |||
for (size_t i = 0; i < PARAM_DELTA; ++i) { | |||
uint16_t tmp1 = 1; | |||
uint16_t tmp2 = 1; | |||
uint16_t inverse = PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(beta_j[i]); | |||
uint16_t inverse_power_j = 1; | |||
for (size_t j = 1 ; j <= PARAM_DELTA ; ++j) { | |||
for (size_t j = 1; j <= PARAM_DELTA; ++j) { | |||
inverse_power_j = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(inverse_power_j, inverse); | |||
tmp1 ^= PQCLEAN_HQCRMRS256_CLEAN_gf_mul(inverse_power_j, z[j]); | |||
} | |||
for (size_t k = 1 ; k < PARAM_DELTA ; ++k) { | |||
for (size_t k = 1; k < PARAM_DELTA; ++k) { | |||
tmp2 = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); | |||
} | |||
uint16_t mask = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value | |||
@@ -258,10 +258,10 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
// Place the delta e_{j_i} values at the right coordinates of the output vector | |||
delta_counter = 0; | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
uint16_t found = 0; | |||
uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 | |||
for (size_t j = 0 ; j < PARAM_DELTA ; j++) { | |||
for (size_t j = 0; j < PARAM_DELTA; j++) { | |||
uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter | |||
error_values[i] += indexmask & valuemask & e_j[j]; | |||
found += indexmask & valuemask & 1; | |||
@@ -280,7 +280,7 @@ static void compute_error_values(uint16_t *error_values, const uint16_t *z, cons | |||
* @param[in] error_values Array of PARAM_DELTA elements storing the error values | |||
*/ | |||
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { | |||
for (size_t i = 0 ; i < PARAM_N1 ; ++i) { | |||
for (size_t i = 0; i < PARAM_N1; ++i) { | |||
cdw[i] ^= error_values[i]; | |||
} | |||
} | |||
@@ -36,7 +36,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XO | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -52,7 +52,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XO | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (v[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -95,7 +95,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
seedexpander(ctx, rand_bytes, random_bytes_size); | |||
for (uint32_t i = 0 ; i < weight ; ++i) { | |||
for (uint32_t i = 0; i < weight; ++i) { | |||
exist = 0; | |||
do { | |||
if (j == random_bytes_size) { | |||
@@ -111,7 +111,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
random_data = random_data % PARAM_N; | |||
for (uint32_t k = 0 ; k < i ; k++) { | |||
for (uint32_t k = 0; k < i; k++) { | |||
if (tmp[k] == random_data) { | |||
exist = 1; | |||
} | |||
@@ -124,7 +124,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, | |||
} | |||
} | |||
for (uint16_t i = 0 ; i < weight ; ++i) { | |||
for (uint16_t i = 0; i < weight; ++i) { | |||
int32_t index = tmp[i] / 64; | |||
int32_t pos = tmp[i] % 64; | |||
v[index] |= ((uint64_t) 1) << pos; | |||
@@ -178,7 +178,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_vect_set_random_from_randombytes(uint64_t *v) { | |||
* @param[in] size Integer that is the size of the vectors | |||
*/ | |||
void PQCLEAN_HQCRMRS256_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) { | |||
for (uint32_t i = 0 ; i < size ; ++i) { | |||
for (uint32_t i = 0; i < size; ++i) { | |||
o[i] = v1[i] ^ v2[i]; | |||
} | |||
} | |||
@@ -217,7 +217,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const ui | |||
memcpy(o, v, VEC_N1N2_SIZE_BYTES); | |||
for (int8_t i = 0 ; i < val ; ++i) { | |||
for (int8_t i = 0; i < val; ++i) { | |||
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i); | |||
} | |||
} else { | |||